-
-
Save miklcct/c7d40d502ed33c481943942f558b2734 to your computer and use it in GitHub Desktop.
Split GTFS files into one file per transport operator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env php | |
<?php | |
const GTFS_PATH = '/tmp/gtfs/'; | |
const TARGET_DIR = './splitGtfs/'; | |
@mkdir(GTFS_PATH); | |
@mkdir(TARGET_DIR); | |
$zip = new ZipArchive; | |
$res = $zip->open($argv[1]); | |
if ($res === TRUE) { | |
$zip->extractTo(GTFS_PATH); | |
$zip->close(); | |
} else { | |
die("Failed to get GTFS"); | |
} | |
$gtfs_agency = deserializeCSV(GTFS_PATH . 'agency.txt'); | |
$gtfs_calendar = deserializeCSV(GTFS_PATH . 'calendar.txt'); | |
$gtfs_calendar_dates = deserializeCSV(GTFS_PATH . 'calendar_dates.txt'); | |
$gtfs_feed_info = deserializeCSV(GTFS_PATH . 'feed_info.txt'); | |
$gtfs_routes = deserializeCSV(GTFS_PATH . 'routes.txt'); | |
$gtfs_stops = deserializeCSV(GTFS_PATH . 'stops.txt'); | |
$gtfs_stop_times = deserializeCSV(GTFS_PATH . 'stop_times.txt'); | |
$gtfs_transfers = deserializeCSV(GTFS_PATH . 'transfers.txt'); | |
$gtfs_trips = deserializeCSV(GTFS_PATH . 'trips.txt'); | |
foreach ($gtfs_agency as $agencyId => $agency) { | |
$part_feed_info = $gtfs_feed_info; | |
$part_agency = [$agency]; | |
echo PHP_EOL . 'parsing... ' . $agency['agency_name'] . PHP_EOL; | |
$part_routes = []; | |
$used_routes = []; | |
foreach ($gtfs_routes as $key => $route) { | |
if ($route['agency_id'] != $agency['agency_id']){ | |
continue; | |
} | |
$part_routes[] = $route; | |
$used_routes[$route['route_id']] = $route['route_id']; | |
} | |
echo 'Kept ' . count($part_routes) . ' out of ' . count($gtfs_routes) . ' routes' . PHP_EOL; | |
if(count($part_routes) == 0) { | |
echo 'No routes, skipped!' . PHP_EOL; | |
continue; | |
} | |
$part_trips = []; | |
$used_services = []; | |
$used_stops = []; | |
$used_trips = []; | |
foreach ($gtfs_trips as $key => $trip) { | |
if (!array_key_exists($trip['route_id'], $used_routes)){ | |
continue; | |
} | |
$part_trips[] = $trip; | |
$used_trips[] = $trip['trip_id']; | |
$used_services[$trip['service_id']] = $trip['service_id']; | |
$used_trips[$trip['trip_id']] = $trip['trip_id']; | |
} | |
echo $agency['agency_name'] . ' has ' . count($part_trips) . ' trips' . PHP_EOL; | |
$part_stop_times = []; | |
foreach ($gtfs_stop_times as $key => $stop_time) { | |
if (!array_key_exists($stop_time['trip_id'], $used_trips)){ | |
continue; | |
} | |
$part_stop_times [] = $stop_time; | |
$used_stops[$stop_time['stop_id']] = $stop_time['stop_id']; | |
} | |
echo $agency['agency_name'] . ' has ' . count($part_stop_times) . ' stop times' . PHP_EOL; | |
$part_calendar = []; | |
foreach ($gtfs_calendar as $key => $calendar) { | |
if (!array_key_exists($calendar['service_id'], $used_services)){ | |
continue; | |
} | |
$part_calendar [] = $calendar; | |
} | |
$part_calendar_dates = []; | |
foreach ($gtfs_calendar_dates as $key => $calendar_date) { | |
if (!in_array($calendar_date['service_id'], $used_services)){ | |
continue; | |
} | |
$part_calendar_dates [] = $calendar_date; | |
} | |
$part_stops = []; | |
foreach ($gtfs_stops as $key => $stop) { | |
if (!in_array($stop['stop_id'], $used_stops)){ | |
continue; | |
} | |
$part_stops [] = $stop; | |
} | |
echo $agency['agency_name'] . ' has ' . count($part_stops) . ' stops' . PHP_EOL; | |
$part_transfers = []; | |
foreach ($gtfs_transfers as $key => $transfer) { | |
if (!array_key_exists($transfer['from_stop_id'], $used_stops) && !array_key_exists($transfer['to_stop_id'], $used_stops)){ | |
continue; | |
} | |
$part_transfers [] = $transfer; | |
} | |
echo 'exporting... ' . $agency['agency_name'] . PHP_EOL; | |
@mkdir(TARGET_DIR . $agency['agency_id'] . '/'); | |
serializeCSV(TARGET_DIR . $agency['agency_id'] . '/agency.txt', $part_agency); | |
serializeCSV(TARGET_DIR . $agency['agency_id'] . '/calendar.txt', $part_calendar); | |
serializeCSV(TARGET_DIR . $agency['agency_id'] . '/calendar_dates.txt', $part_calendar_dates); | |
serializeCSV(TARGET_DIR . $agency['agency_id'] . '/feed_info.txt', $part_feed_info); | |
serializeCSV(TARGET_DIR . $agency['agency_id'] . '/routes.txt', $part_routes); | |
serializeCSV(TARGET_DIR . $agency['agency_id'] . '/stops.txt', $part_stops); | |
serializeCSV(TARGET_DIR . $agency['agency_id'] . '/stop_times.txt', $part_stop_times); | |
serializeCSV(TARGET_DIR . $agency['agency_id'] . '/transfers.txt', $part_transfers); | |
serializeCSV(TARGET_DIR . $agency['agency_id'] . '/trips.txt', $part_trips); | |
$zip = new ZipArchive; | |
echo 'Saving ' . TARGET_DIR . $agency['agency_id'] . '.zip...' . PHP_EOL; | |
if ($zip->open(TARGET_DIR . $agency['agency_id'] . '.zip', ZipArchive::CREATE) === TRUE) | |
{ | |
$zip->addFile(TARGET_DIR . $agency['agency_id'] . '/agency.txt','agency.txt'); | |
$zip->addFile(TARGET_DIR . $agency['agency_id'] . '/calendar.txt','calendar.txt'); | |
$zip->addFile(TARGET_DIR . $agency['agency_id'] . '/calendar_dates.txt','calendar_dates.txt'); | |
$zip->addFile(TARGET_DIR . $agency['agency_id'] . '/feed_info.txt','feed_info.txt'); | |
$zip->addFile(TARGET_DIR . $agency['agency_id'] . '/routes.txt','routes.txt'); | |
$zip->addFile(TARGET_DIR . $agency['agency_id'] . '/stops.txt','stops.txt'); | |
$zip->addFile(TARGET_DIR . $agency['agency_id'] . '/stop_times.txt','stop_times.txt'); | |
$zip->addFile(TARGET_DIR . $agency['agency_id'] . '/transfers.txt','transfers.txt'); | |
$zip->addFile(TARGET_DIR . $agency['agency_id'] . '/trips.txt','trips.txt'); | |
// All files are added, so close the zip file. | |
$zip->close(); | |
} | |
} | |
/** | |
* Serialize data to a CSV file | |
* | |
* @param $headers array The headers which should be written | |
* @param $station array The data as an associative array (header => value) to serialize to CSV | |
* @return string CSV representation of the data | |
*/ | |
function serializeCSV($path, $data) | |
{ | |
if ($data === []) { | |
return; | |
} | |
$headers = array_keys($data[0]); | |
echo 'serializing... ' . $path . ' with ' . count($data). ' rows' . PHP_EOL; | |
$file = fopen($path, 'w'); | |
fputcsv($file, $headers); | |
foreach ($data as $key => $dataRow) { | |
fputcsv($file, serializeCSVLine($headers, $dataRow)); | |
} | |
fclose($file); | |
} | |
/** | |
* Serialize data to a CSV row | |
* | |
* @param $headers array The headers which should be written | |
* @param $station array The data as an associative array (header => value) to serialize to CSV | |
* @return string[] the row containing the data | |
*/ | |
function serializeCSVLine($headers, $station): array | |
{ | |
$row = []; | |
// Loop over all headers | |
for ($i = 0; $i < count($headers); $i++) { | |
// Which value we are appending | |
$header = $headers[$i]; | |
// Add key if it exists, otherwise leave empty | |
if (key_exists($header, $station)) { | |
$row[] = $station[$header]; | |
} | |
} | |
return $row; | |
} | |
/** | |
* Load a CSV file and store it in an associative array with incremental keys | |
* Each line is stored as an associative array using column headers as key and the fields as value. | |
* | |
* @param $csvPath string File path leading to the CSV file | |
* @return array the deserialized data | |
*/ | |
function deserializeCSV($csvPath): array | |
{ | |
// Open the GTFS stops file and read it into an associative array | |
$fileReadHandle = fopen($csvPath, 'r'); | |
if (!$fileReadHandle) { | |
die($csvPath . ' could not be opened!'); | |
} else { | |
echo 'Deserializing ' . $csvPath . PHP_EOL; | |
} | |
// Read the original headers | |
$headers = fgetcsv($fileReadHandle); | |
$entries = []; | |
// Go through all rows | |
while (($entry = fgetcsv($fileReadHandle)) !== false) { | |
$entries[] = array_combine($headers, $entry); | |
} | |
return $entries; | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment