Werktechnisch zit ik met bakken data (denk terabyte) te kijken die we bijna nooit nodig hebben - alleen om bij "problemen" dingen uit te zoeken. Daarnaast is het ook nog niet in een vast formaat (geen vaste veldindeling). Teveel en te onhandig dus om een database mee lastig te vallen, maar alles zomaar in een file dumpen maakt het gestructureerd doorzoeken er ook niet handiger op. Enter
JSON lines: een text based format met op elke regel een JSON object. Een beetje een soort CSV zonder vaste kolomindeling. En daar moet je dan wat mee:
<?php
namespace Rsi\Object;
/**
* Manipulate a JSON lines file.
*/
class JsonLinesFile{
public $delimiter = "\n"; //!< Line delimiter.
public $assoc = true; //!< Return data as assoc.array.
protected $_filename = null;
public function __construct($filename){
$this->_filename = $filename;
}
/**
* Append data to the file.
* @param mixed $data
* @return bool True on success.
*/
public function append($data){
return file_put_contents($this->_filename,json_encode($data) . $this->delimiter,LOCK_EX | FILE_APPEND) !== false;
}
/**
* Clear (empty) the file.
* @return bool True on success.
*/
public function clear(){
return file_put_contents($this->_filename,null) !== false;
}
/**
* Walk through all lines in the file.
* @param callable $callback Function to call for each line. The line is in the first parameter, and the index in the file
* (base-0) in the second.
*/
protected function lines($callback){
if(is_file($this->_filename)){
$f = fopen($this->_filename,'r');
$index = 0;
while(!feof($f)) if(strlen($line = fgets($f)) && (call_user_func($callback,$line,$index++) === false)) break;
fclose($f);
}
}
/**
* Number of records in the file.
* @return int
*/
public function count(){
$count = 0;
$this->lines(function() use (&$count){
$count++;
});
return $count;
}
/**
* Decode a line.
* @param string $line Encoded JSON line.
* @return mixed Decoded data.
*/
protected function decode($line){
if((($data = json_decode($line,$this->assoc)) === null) && (json_last_error() != JSON_ERROR_NONE))
throw new \Exception(json_last_error_msg());
return $data;
}
/**
* Walk through all records in the file.
* @param callable $callback Function to call for each record. The data is in the first parameter, and the index in the file
* (base-0) in the second. Return false to break.
*/
public function each($callback){
$this->lines(function($line,$index) use ($callback){
return call_user_func($callback,$this->decode($line),$index);
});
}
/**
* Retrieve a slice of the records in the file.
* @param $offset int Record to start from.
* @param $length int Number of records to return (when available).
* @return array The selected records (key = index, value = data).
*/
public function slice($offset,$length){
$length += $offset;
$result = [];
$this->lines(function($line,$index) use ($offset,$length,&$result){
if($index >= $offset){
if($index < $length) $result[$index] = $this->decode($line);
else return false; //break
}
});
return $result;
}
/**
* Filter the records in the file.
* @param callable $callback If this function returns true, the record is preserved. The data is in the first parameter, and
* the index in the file (base-0) in the second.
* @return bool True on success.
*/
public function filter($callback){
$f = fopen($temp = $this->_filename . '-' . bin2hex(random_bytes(16)),'w');
try{
$this->lines(function($line,$index) use ($callback,$f){
if(call_user_func($callback,$this->decode($line),$index)) fputs($f,$line);
});
fclose($f);
return rename($temp,$this->_filename);
}
catch(\Exception $e){
unlink($temp);
throw $e;
}
}
/**
* Edit the records in the file.
* @param callable $callback Function that returns the modified record. The original data is in the first parameter, and the
* index in the file (base-0) in the second. Return false to remove the record, or true to keep the original record.
* @return bool True on success.
*/
public function edit($callback){
$f = fopen($temp = $this->_filename . '-' . bin2hex(random_bytes(16)),'w');
try{
$this->lines(function($line,$index) use ($callback,$f){
if(($data = call_user_func($callback,$this->decode($line),$index)) !== false)
fputs($f,$data === true ? $line : json_encode($data) . $this->delimiter);
});
fclose($f);
return rename($temp,$this->_filename);
}
catch(\Exception $e){
unlink($temp);
throw $e;
}
}
}
Rob, zaterdag 27 april 2024, 22:18