Last active
August 29, 2015 14:10
-
-
Save paunin/7507c9c5989d36034faa to your computer and use it in GitHub Desktop.
Comarer 2 files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace Compare; | |
/** | |
* нужно написать программу которая сравнивает два текстовых файла (терабайтных) | |
* и выводит в третий файл строки, которые есть в первом, но нет во втором. | |
* | |
* To run this utility just run `php ./Compare.php input_file1.txt input_file2.txt output_file.txt` | |
*/ | |
class Compare | |
{ | |
/** | |
* @var int Input file reading in bytes | |
*/ | |
private $readLength = 1048576; // 1024 * 1024 | |
/** | |
* @var resource first input file handler | |
*/ | |
private $inputFile1; | |
/** | |
* @var resource second input file handler | |
*/ | |
private $inputFile2; | |
/** | |
* @var resource output file handler | |
*/ | |
private $outputFile; | |
/** | |
* Simple construct and open | |
* | |
* @param $inputFile1 | |
* @param $inputFile2 | |
* @param $outputFile | |
* @throws \Exception | |
*/ | |
public function __construct($inputFile1, $inputFile2, $outputFile) // here you can add configs for redefine params | |
{ | |
if (!$this->inputFile1 = @fopen($inputFile1, 'r')) { | |
throw new \Exception('Can\'t open first input file'); | |
} | |
if (!$this->inputFile2 = @fopen($inputFile2, 'r')) { | |
throw new \Exception('Can\'t open second input file'); | |
} | |
if (!$this->outputFile = @fopen($outputFile, 'a')) { | |
throw new \Exception('Can\'t open output file'); | |
} | |
ftruncate($this->outputFile, 0); | |
rewind($this->outputFile); | |
} | |
/** | |
* Close open resources | |
*/ | |
public function __destruct() | |
{ | |
fclose($this->inputFile1); | |
fclose($this->inputFile2); | |
fclose($this->outputFile); | |
} | |
/** | |
* Main runner | |
*/ | |
public function run() | |
{ | |
$fpos = 0; | |
while (!feof($this->inputFile1)) { | |
$this->existString($fpos) ? | |
$this->skipString($fpos) : | |
$this->writeString($fpos); | |
$fpos = ftell($this->inputFile1); | |
} | |
} | |
/** | |
* Function to find string (start in $pos) from first file in second | |
* | |
* @param $pos | |
* @return bool | |
*/ | |
public function existString($pos) | |
{ | |
$equalPart = true; | |
fseek($this->inputFile1, $pos); | |
rewind($this->inputFile2); | |
while (!feof($this->inputFile2)) { // read every line of file 2 | |
$rightBlock = fgets($this->inputFile2, $this->readLength); | |
//if string already not equal we need to finish string in file 2 | |
if (!$equalPart) { | |
if ($this->eol($rightBlock) || feof($this->inputFile2)) { | |
fseek($this->inputFile1, $pos); | |
$equalPart = true; // we want to believe that next iteration make true | |
} | |
continue; | |
} | |
$leftBlock = fgets($this->inputFile1, $this->readLength); | |
if ($leftBlock !== $rightBlock) { | |
if ($this->eol($rightBlock) || feof($this->inputFile2)) { //second file string end too early | |
fseek($this->inputFile1, $pos); | |
$equalPart = true; | |
} else { | |
$equalPart = false; | |
} | |
} elseif ($this->eol($leftBlock) || feof($this->inputFile1)) { | |
return true; | |
} | |
} | |
return false; //So we have no string in second file at all | |
} | |
/** | |
* Write string from input file | |
* | |
* @param $pos String position for write | |
*/ | |
public function writeString($pos) | |
{ | |
fseek($this->inputFile1, $pos); | |
while (!feof($this->inputFile1)) { | |
$block = fgets($this->inputFile1, $this->readLength); | |
fwrite($this->outputFile, $block); | |
if ($this->eol($block) || feof($this->inputFile1)) { | |
break; | |
} | |
} | |
} | |
/** | |
* Skip string from input file | |
* | |
* @param $pos String position for write | |
*/ | |
public function skipString($pos) | |
{ | |
fseek($this->inputFile1, $pos); | |
while (!feof($this->inputFile1)) { | |
$block = fgets($this->inputFile1, $this->readLength); | |
if ($this->eol($block) || feof($this->inputFile1)) { | |
break; | |
} | |
} | |
} | |
/** | |
* Check if block has end of line | |
* | |
* @param $block | |
* @return bool | |
*/ | |
public function eol($block) | |
{ | |
if (preg_match('/(\n|\r|\r\n)$/', $block)) { //End Of Line - last block in string | |
return true; | |
} else { | |
return false; | |
} | |
} | |
} | |
//-------------------------------- <<< PROGRAM ------------------------------------ | |
if (empty($argv[1])) { | |
die("Please show me first input file\n"); | |
} else { | |
$input1 = $argv[1]; | |
} | |
if (empty($argv[2])) { | |
die("Please show me second input file\n"); | |
} else { | |
$input2 = $argv[2]; | |
} | |
if (empty($argv[3])) { | |
die("Please show me output file\n"); | |
} else { | |
$output = $argv[3]; | |
} | |
try { | |
$comparer = new Compare($input1, $input2, $output); | |
$comparer->run(); | |
} catch (\Exception $e) { //use native Exceptions | |
echo "Error: {$e->getMessage()}\n"; | |
} | |
//-------------------------------- >>> PROGRAM ------------------------------------ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment