Skip to content

Instantly share code, notes, and snippets.

@RajithaKumara
Created December 11, 2023 09:05
Show Gist options
  • Save RajithaKumara/8103e1ae03dc2456a3f8534223fff2e6 to your computer and use it in GitHub Desktop.
Save RajithaKumara/8103e1ae03dc2456a3f8534223fff2e6 to your computer and use it in GitHub Desktop.
Merge CSV
{
"require": {
"league/csv": "^9.0",
"symfony/finder": "^5.4"
}
}
<?php
require_once './vendor/autoload.php';
use League\Csv\Reader;
use League\Csv\Writer;
use Symfony\Component\Finder\Finder;
$finder = new Finder();
$finder->name('Pattern*')->in(__DIR__ . '/data');
$main = null;
$stat = [];
$rowCount = 0;
$mainHeader = null;
foreach ($finder as $file) {
$absoluteFilePath = $file->getRealPath();
$fileNameWithExtension = $file->getRelativePathname();
$csv = Reader::createFromPath($absoluteFilePath, 'r');
$csv->setDelimiter(',');
$csv->setHeaderOffset(0);
$json = $csv->jsonSerialize();
$json = array_map(function($el) use ($fileNameWithExtension) {
$el['__filename'] = $fileNameWithExtension;
return $el;
}, $json);
$rowCount += count($json);
$stat[$fileNameWithExtension] = count($json);
if ($main == null) {
$mainHeader = isset($json[0])? array_keys($json[0]):[];
$main = $json;
} else {
array_push($main, ...$json);
}
}
$records = [];
$records[] = [...$mainHeader];
$headerIndex = array_flip($records[0]);
$dummyRecord = [];
foreach ($mainHeader as $i => $header) {
$dummyRecord[$i] = '';
}
$extraHeaders = [];
foreach ($main as $record) {
$newRecord = [...$dummyRecord];
foreach ($record as $key => $value) {
$index = $headerIndex[$key] ?? null;
if ($index !== null) {
$newRecord[$index] = $value;
} else {
$extraHeaders[] = $key;
}
}
$records[] = $newRecord;
}
var_dump(array_unique($extraHeaders));
$writer = Writer::createFromString('');
$writer->insertAll($records);
file_put_contents('./data/merged.csv', $writer->toString());
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment