Skip to content

Instantly share code, notes, and snippets.

@miklcct
Forked from Bertware/splitGtfs.php
Created January 6, 2025 17:15
Show Gist options
  • Save miklcct/c7d40d502ed33c481943942f558b2734 to your computer and use it in GitHub Desktop.
Save miklcct/c7d40d502ed33c481943942f558b2734 to your computer and use it in GitHub Desktop.
Split GTFS files into one file per transport operator
#!/usr/bin/env php
<?php
const GTFS_PATH = '/tmp/gtfs/';
const TARGET_DIR = './splitGtfs/';
@mkdir(GTFS_PATH);
@mkdir(TARGET_DIR);
$zip = new ZipArchive;
$res = $zip->open($argv[1]);
if ($res === TRUE) {
$zip->extractTo(GTFS_PATH);
$zip->close();
} else {
die("Failed to get GTFS");
}
$gtfs_agency = deserializeCSV(GTFS_PATH . 'agency.txt');
$gtfs_calendar = deserializeCSV(GTFS_PATH . 'calendar.txt');
$gtfs_calendar_dates = deserializeCSV(GTFS_PATH . 'calendar_dates.txt');
$gtfs_feed_info = deserializeCSV(GTFS_PATH . 'feed_info.txt');
$gtfs_routes = deserializeCSV(GTFS_PATH . 'routes.txt');
$gtfs_stops = deserializeCSV(GTFS_PATH . 'stops.txt');
$gtfs_stop_times = deserializeCSV(GTFS_PATH . 'stop_times.txt');
$gtfs_transfers = deserializeCSV(GTFS_PATH . 'transfers.txt');
$gtfs_trips = deserializeCSV(GTFS_PATH . 'trips.txt');
foreach ($gtfs_agency as $agencyId => $agency) {
$part_feed_info = $gtfs_feed_info;
$part_agency = [$agency];
echo PHP_EOL . 'parsing... ' . $agency['agency_name'] . PHP_EOL;
$part_routes = [];
$used_routes = [];
foreach ($gtfs_routes as $key => $route) {
if ($route['agency_id'] != $agency['agency_id']){
continue;
}
$part_routes[] = $route;
$used_routes[$route['route_id']] = $route['route_id'];
}
echo 'Kept ' . count($part_routes) . ' out of ' . count($gtfs_routes) . ' routes' . PHP_EOL;
if(count($part_routes) == 0) {
echo 'No routes, skipped!' . PHP_EOL;
continue;
}
$part_trips = [];
$used_services = [];
$used_stops = [];
$used_trips = [];
foreach ($gtfs_trips as $key => $trip) {
if (!array_key_exists($trip['route_id'], $used_routes)){
continue;
}
$part_trips[] = $trip;
$used_trips[] = $trip['trip_id'];
$used_services[$trip['service_id']] = $trip['service_id'];
$used_trips[$trip['trip_id']] = $trip['trip_id'];
}
echo $agency['agency_name'] . ' has ' . count($part_trips) . ' trips' . PHP_EOL;
$part_stop_times = [];
foreach ($gtfs_stop_times as $key => $stop_time) {
if (!array_key_exists($stop_time['trip_id'], $used_trips)){
continue;
}
$part_stop_times [] = $stop_time;
$used_stops[$stop_time['stop_id']] = $stop_time['stop_id'];
}
echo $agency['agency_name'] . ' has ' . count($part_stop_times) . ' stop times' . PHP_EOL;
$part_calendar = [];
foreach ($gtfs_calendar as $key => $calendar) {
if (!array_key_exists($calendar['service_id'], $used_services)){
continue;
}
$part_calendar [] = $calendar;
}
$part_calendar_dates = [];
foreach ($gtfs_calendar_dates as $key => $calendar_date) {
if (!in_array($calendar_date['service_id'], $used_services)){
continue;
}
$part_calendar_dates [] = $calendar_date;
}
$part_stops = [];
foreach ($gtfs_stops as $key => $stop) {
if (!in_array($stop['stop_id'], $used_stops)){
continue;
}
$part_stops [] = $stop;
}
echo $agency['agency_name'] . ' has ' . count($part_stops) . ' stops' . PHP_EOL;
$part_transfers = [];
foreach ($gtfs_transfers as $key => $transfer) {
if (!array_key_exists($transfer['from_stop_id'], $used_stops) && !array_key_exists($transfer['to_stop_id'], $used_stops)){
continue;
}
$part_transfers [] = $transfer;
}
echo 'exporting... ' . $agency['agency_name'] . PHP_EOL;
@mkdir(TARGET_DIR . $agency['agency_id'] . '/');
serializeCSV(TARGET_DIR . $agency['agency_id'] . '/agency.txt', $part_agency);
serializeCSV(TARGET_DIR . $agency['agency_id'] . '/calendar.txt', $part_calendar);
serializeCSV(TARGET_DIR . $agency['agency_id'] . '/calendar_dates.txt', $part_calendar_dates);
serializeCSV(TARGET_DIR . $agency['agency_id'] . '/feed_info.txt', $part_feed_info);
serializeCSV(TARGET_DIR . $agency['agency_id'] . '/routes.txt', $part_routes);
serializeCSV(TARGET_DIR . $agency['agency_id'] . '/stops.txt', $part_stops);
serializeCSV(TARGET_DIR . $agency['agency_id'] . '/stop_times.txt', $part_stop_times);
serializeCSV(TARGET_DIR . $agency['agency_id'] . '/transfers.txt', $part_transfers);
serializeCSV(TARGET_DIR . $agency['agency_id'] . '/trips.txt', $part_trips);
$zip = new ZipArchive;
echo 'Saving ' . TARGET_DIR . $agency['agency_id'] . '.zip...' . PHP_EOL;
if ($zip->open(TARGET_DIR . $agency['agency_id'] . '.zip', ZipArchive::CREATE) === TRUE)
{
$zip->addFile(TARGET_DIR . $agency['agency_id'] . '/agency.txt','agency.txt');
$zip->addFile(TARGET_DIR . $agency['agency_id'] . '/calendar.txt','calendar.txt');
$zip->addFile(TARGET_DIR . $agency['agency_id'] . '/calendar_dates.txt','calendar_dates.txt');
$zip->addFile(TARGET_DIR . $agency['agency_id'] . '/feed_info.txt','feed_info.txt');
$zip->addFile(TARGET_DIR . $agency['agency_id'] . '/routes.txt','routes.txt');
$zip->addFile(TARGET_DIR . $agency['agency_id'] . '/stops.txt','stops.txt');
$zip->addFile(TARGET_DIR . $agency['agency_id'] . '/stop_times.txt','stop_times.txt');
$zip->addFile(TARGET_DIR . $agency['agency_id'] . '/transfers.txt','transfers.txt');
$zip->addFile(TARGET_DIR . $agency['agency_id'] . '/trips.txt','trips.txt');
// All files are added, so close the zip file.
$zip->close();
}
}
/**
* Serialize data to a CSV file
*
* @param $headers array The headers which should be written
* @param $station array The data as an associative array (header => value) to serialize to CSV
* @return string CSV representation of the data
*/
function serializeCSV($path, $data)
{
if ($data === []) {
return;
}
$headers = array_keys($data[0]);
echo 'serializing... ' . $path . ' with ' . count($data). ' rows' . PHP_EOL;
$file = fopen($path, 'w');
fputcsv($file, $headers);
foreach ($data as $key => $dataRow) {
fputcsv($file, serializeCSVLine($headers, $dataRow));
}
fclose($file);
}
/**
* Serialize data to a CSV row
*
* @param $headers array The headers which should be written
* @param $station array The data as an associative array (header => value) to serialize to CSV
* @return string[] the row containing the data
*/
function serializeCSVLine($headers, $station): array
{
$row = [];
// Loop over all headers
for ($i = 0; $i < count($headers); $i++) {
// Which value we are appending
$header = $headers[$i];
// Add key if it exists, otherwise leave empty
if (key_exists($header, $station)) {
$row[] = $station[$header];
}
}
return $row;
}
/**
* Load a CSV file and store it in an associative array with incremental keys
* Each line is stored as an associative array using column headers as key and the fields as value.
*
* @param $csvPath string File path leading to the CSV file
* @return array the deserialized data
*/
function deserializeCSV($csvPath): array
{
// Open the GTFS stops file and read it into an associative array
$fileReadHandle = fopen($csvPath, 'r');
if (!$fileReadHandle) {
die($csvPath . ' could not be opened!');
} else {
echo 'Deserializing ' . $csvPath . PHP_EOL;
}
// Read the original headers
$headers = fgetcsv($fileReadHandle);
$entries = [];
// Go through all rows
while (($entry = fgetcsv($fileReadHandle)) !== false) {
$entries[] = array_combine($headers, $entry);
}
return $entries;
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment