Last active
February 9, 2022 17:32
-
-
Save mRoca/2193c84a03924fb062f95e6b0623a5eb to your computer and use it in GitHub Desktop.
Export all MySQL related data - One row and all its relationships
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace App\Service; | |
use Doctrine\DBAL\Connection; | |
use Doctrine\DBAL\Schema\AbstractSchemaManager; | |
use Doctrine\DBAL\Schema\ForeignKeyConstraint; | |
use Doctrine\DBAL\Schema\Table; | |
use Doctrine\ORM\EntityManagerInterface; | |
use Psr\Log\LoggerInterface; | |
class EntityExporter | |
{ | |
private Connection $conn; | |
private AbstractSchemaManager $schemaManager; | |
private LoggerInterface $logger; | |
private array $cachedTables = []; | |
private array $cachedAlreadyIncludedIDs = []; | |
private int $cachedAlreadyIncludedRowsCount = 0; // We could count $cachedAlreadyIncludedIDs, but it's less efficient | |
private ?\Closure $currentExportCallback = null; | |
public function __construct(Connection $conn, LoggerInterface $logger) | |
{ | |
$this->conn = $conn; | |
$this->schemaManager = $this->conn->getSchemaManager(); | |
$this->logger = $logger; | |
} | |
/** | |
* Get a Doctrine ORM entity table name and ID. | |
* This method should remain static, as we don't want Doctrine ORM to be a requirement. | |
* | |
* Usage: | |
* $exporter->exportRowAndRelationships(...EntityExporter::getEntityTableAndId($myEntity)) | |
*/ | |
public static function getEntityTableAndId(EntityManagerInterface $em, object $entity): array | |
{ | |
try { | |
$metadata = $em->getClassMetadata(get_class($entity)); | |
} catch (\Exception $e) { | |
throw new \InvalidArgumentException('Unable to get the entity class metadata.'); | |
} | |
return [$metadata->getTableName(), $metadata->getIdentifierValues($entity)]; | |
} | |
/** | |
* Export all data related to a row identied by a table and a primary key. | |
* This method only uses Doctrine DBAL. | |
* | |
* @param array<string, mixed>|int|string $id | |
* @param null|\Closure $progressCallback A callback called after each exported row | |
*/ | |
public function exportRowAndRelationships(string $tableName, $id, ?\Closure $progressCallback = null): array | |
{ | |
// Flushing the cache, as this class is stateful. It would be better to use a Factory instead, but it's okay for now. | |
$this->flushPreviousCache(); | |
$this->currentExportCallback = $progressCallback; | |
if (!$this->schemaManager->tablesExist($tableName)) { | |
throw new \InvalidArgumentException("The `{$tableName}` table does not exist"); | |
} | |
$table = $this->schemaManager->listTableDetails($tableName); | |
if (!is_array($id)) { | |
// Checking if the table has a primary key with at least one column | |
if (null === ($primaryKey = $table->getPrimaryKey()) || empty($primaryKey->getColumns())) { | |
throw new \InvalidArgumentException('Unable to get the table primary key'); | |
} | |
if (1 !== count($primaryKey->getColumns())) { | |
throw new \InvalidArgumentException('You must provide an array as ID for a table with multiple primary key columns'); | |
} | |
// If the $id value is a string or an int, tranforming it into an array | |
$id = [$primaryKey->getColumns()[0] => $id]; | |
} | |
set_time_limit(0); | |
return $this->exportTableRowAndRelationships($table, $id); | |
} | |
/** | |
* @param array<string, mixed> $id The row to export primary key, as an array in order to deal with one or many columns | |
* | |
* @return array<string, array<string, mixed>> The list of all rows to export, indexed per table | |
*/ | |
private function exportTableRowAndRelationships(Table $table, array $id): array | |
{ | |
$this->logger->debug('Exporting row', ['table' => $table->getName(), 'id' => $id, 'count' => $this->cachedAlreadyIncludedRowsCount]); | |
// Infinite loop protection | |
if (in_array($id, $this->cachedAlreadyIncludedIDs[$table->getName()] ?? [], true)) { | |
$this->logger->debug('Row already exported, ignoring', ['table' => $table->getName(), 'id' => $id]); | |
return []; | |
} | |
$this->cachedAlreadyIncludedIDs[$table->getName()][] = $id; | |
// Checking if the table has a primary key with at least one column | |
if (null === ($primaryKey = $table->getPrimaryKey()) || empty($primaryKey->getColumns())) { | |
throw new \RuntimeException(sprintf("Unable to get the '%s' table primary key", $table->getName())); | |
} | |
// Checking if the given ID contains all required primary key columns | |
if (array_keys($id) != $primaryKey->getUnquotedColumns()) { // Caution: using "==" and not "===" here, as we don't care about the order | |
$this->logger->error('The given ID does not contain all table primary key columns', ['table' => $table->getName(), 'id' => $id]); | |
return []; | |
} | |
// Avoiding a SELECT if the table only contains the primary key columns, e.g. for a n-n table. | |
if (count($table->getColumns()) === count($primaryKey->getColumns())) { | |
$this->logger->debug('The table only contains primary key columns, skiping select', ['table' => $table->getName()]); | |
$rowData = $id; | |
} else { | |
// Get the current row | |
try { | |
$rowData = $this->conn->fetchAssociative(...$this->getFetchWhereArguments('*', $table, $id)); | |
} catch (\Exception $e) { | |
$this->logger->error('Exception when selecting the row', ['table' => $table->getName(), 'id' => $id, 'exception' => $e]); | |
return []; | |
} | |
} | |
if (empty($rowData)) { | |
$this->logger->error('Empty row', ['table' => $table->getName(), 'id' => $id]); | |
return []; | |
} | |
// Building an array containing all exported rows | |
/** @var array<int, array<string, array<string, mixed>>> $rowsArrays an array of data indexed per table */ | |
$rowsArrays = [ | |
[$table->getName() => [$rowData]], | |
]; | |
$this->callRowCallback(); | |
// Get all related rows | |
$foreignKeys = $this->listReferencingForeignKeys($table); | |
foreach ($foreignKeys as $foreignKey) { | |
$localTable = $foreignKey->getLocalTable(); | |
if (null === $localPrimaryKey = $localTable->getPrimaryKey()) { | |
$this->logger->error('The linked table does not have any primary key', ['table' => $localTable->getName()]); | |
continue; | |
} | |
// Building the "WHERE ? = ? AND ? = ?" part | |
$localColumns = $foreignKey->getLocalColumns(); | |
$foreignColumns = $foreignKey->getForeignColumns(); | |
$foreignWhere = []; | |
foreach ($localColumns as $i => $localColumn) { | |
$foreignWhere[$localColumn] = $rowData[$foreignColumns[$i]]; | |
} | |
$selectedCols = implode(', ', $localPrimaryKey->getQuotedColumns($this->conn->getDatabasePlatform())); | |
$linkedRowsIds = $this->conn->fetchAllAssociative(...$this->getFetchWhereArguments($selectedCols, $localTable, $foreignWhere)); | |
foreach ($linkedRowsIds as $linkedRowId) { | |
$rowsArrays[] = $this->exportTableRowAndRelationships($localTable, $linkedRowId); | |
} | |
} | |
return array_merge_recursive(...$rowsArrays); | |
} | |
/** | |
* Returns an array containing 3 values: the WHERE sql, the params and the params types. | |
*/ | |
private function getFetchWhereArguments(string $select, Table $table, array $where): array | |
{ | |
if (empty($where)) { | |
throw new \InvalidArgumentException('The "where" argument cannot be empty'); | |
} | |
$selectSql = sprintf('SELECT %s FROM %s WHERE 1 = 1', $select, $table->getQuotedName($this->conn->getDatabasePlatform())); | |
$selectParams = $selectTypes = []; | |
foreach ($where as $columnName => $value) { | |
$column = $table->getColumn($columnName); | |
$selectSql .= sprintf(' AND %s = ?', $column->getQuotedName($this->conn->getDatabasePlatform())); | |
$selectParams[] = $value; | |
$selectTypes[] = $column->getType(); | |
} | |
return [$selectSql, $selectParams, $selectTypes]; | |
} | |
/** | |
* @return ForeignKeyConstraint[] returns a list of all tables having a foreign key targetting the source one | |
*/ | |
private function listReferencingForeignKeys(Table $sourceTable): array | |
{ | |
if (empty($this->cachedTables)) { | |
$this->cachedTables = $this->schemaManager->listTables(); | |
} | |
$matchingForeignKeys = []; | |
foreach ($this->cachedTables as $table) { | |
$foreignKeys = $table->getForeignKeys(); | |
foreach ($foreignKeys as $foreignKey) { | |
if ($foreignKey->getForeignTableName() !== $sourceTable->getName()) { | |
continue; | |
} | |
// We don't want to deal with custom foreign keys for now | |
if ($foreignKey->getForeignColumns() !== $sourceTable->getPrimaryKeyColumns()) { | |
throw new \LogicException(sprintf('The [%s] foreign key columns must match the foreign table primary keys', $foreignKey->getName())); | |
} | |
$matchingForeignKeys[] = $foreignKey; | |
} | |
} | |
return $matchingForeignKeys; | |
} | |
private function callRowCallback(): void | |
{ | |
++$this->cachedAlreadyIncludedRowsCount; | |
if (null !== $this->currentExportCallback) { | |
($this->currentExportCallback)($this->cachedAlreadyIncludedRowsCount); | |
} | |
} | |
private function flushPreviousCache(): void | |
{ | |
$this->cachedTables = []; | |
$this->cachedAlreadyIncludedIDs = []; | |
$this->cachedAlreadyIncludedRowsCount = 0; | |
$this->currentExportCallback = null; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace App\Command; | |
use App\Service\EntityExporter; | |
use Doctrine\ORM\EntityManagerInterface; | |
use Symfony\Component\Console\Command\Command; | |
use Symfony\Component\Console\Helper\ProgressBar; | |
use Symfony\Component\Console\Input\InputArgument; | |
use Symfony\Component\Console\Input\InputInterface; | |
use Symfony\Component\Console\Output\OutputInterface; | |
use Symfony\Component\Console\Style\SymfonyStyle; | |
use Symfony\Component\Filesystem\Filesystem; | |
use Symfony\Component\Yaml\Yaml; | |
/** | |
* This command dumps all the SQL data related to a main row (e.g. a User or a Parish) in a YAML file. | |
*/ | |
class ExportEntityCommand extends Command | |
{ | |
protected static $defaultName = 'app:export-entity'; | |
protected static $defaultDescription = 'Export an entity and all its relationships'; | |
private EntityManagerInterface $em; | |
private EntityExporter $exporter; | |
private string $cacheDir; | |
public function __construct(EntityManagerInterface $em, EntityExporter $exporter, string $cacheDir) | |
{ | |
$this->em = $em; | |
$this->exporter = $exporter; | |
$this->cacheDir = $cacheDir; | |
parent::__construct(); | |
} | |
protected function configure(): void | |
{ | |
$this | |
->addArgument('entityOrTable', InputArgument::REQUIRED, 'The entity FQDN or its table name, example: "App/Entity/Entite", or "entite"') | |
->addArgument('id', InputArgument::REQUIRED, 'The entity primary key, example: "42"') | |
; | |
} | |
protected function execute(InputInterface $input, OutputInterface $output): int | |
{ | |
$io = new SymfonyStyle($input, $output); | |
$entityOrTable = $input->getArgument('entityOrTable'); | |
$scalarId = $input->getArgument('id'); | |
if (empty($entityOrTable) || empty($scalarId)) { | |
throw new \InvalidArgumentException('You must provide an entity or a table, and an ID'); | |
} | |
if (class_exists($entityOrTable)) { | |
$io->comment('Getting the entity'); | |
$entity = $this->em->getRepository($entityOrTable)->find($scalarId); | |
if (null === $entity) { | |
throw new \InvalidArgumentException('Unable to find the entity'); | |
} | |
[$tableName, $id] = EntityExporter::getEntityTableAndId($this->em, $entity); | |
$io->comment("Entity table found: {$tableName}"); | |
} else { | |
$tableName = $entityOrTable; | |
$id = $scalarId; | |
} | |
$io->comment('Exporting all the SQL data. Caution: you\'ll need 500 MB of free memory per 100.000 exported rows.'); | |
$progressBar = new ProgressBar($io, 0, 1); | |
$progressBar->setFormat('[%bar%] Rows: %current% - Time: %elapsed:6s% - Memory: %memory:6s%'); | |
$progressBar->start(); | |
$progressCallback = function ($rowsCount) use ($progressBar): void { | |
$progressBar->setProgress($rowsCount); | |
}; | |
$data = $this->exporter->exportRowAndRelationships($tableName, $id, $progressCallback); | |
$progressBar->finish(); | |
$exportDir = implode(DIRECTORY_SEPARATOR, [$this->cacheDir, 'export', date('Y-m-d_H-i-s')."_{$tableName}_{$scalarId}"]); | |
$io->comment("Creating the export directory: {$exportDir}"); | |
$fs = new Filesystem(); | |
if ($fs->exists($exportDir)) { | |
throw new \RuntimeException("The {$exportDir} directory already exists, please try again after 1 second"); | |
} | |
$fs->mkdir($exportDir); | |
$io->comment('Dumping data into the export directory'); | |
$fs->dumpFile($exportDir.DIRECTORY_SEPARATOR.'data.yaml', Yaml::dump($data)); | |
$io->success("Export finished with success into {$exportDir}"); | |
return Command::SUCCESS; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment