Last active
November 20, 2018 16:19
-
-
Save Bertware/ad3c5f16cf8602e8cb9193a8a24d5bd3 to your computer and use it in GitHub Desktop.
Prototype PHP script to convert a GTFS archive into a
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Ćreate a stations list, agencies list, stations list per agency, stops files from GTFS. | |
* Due to a lack of time this doesn't output 100% valid linked data (yet), but it's good enough for now | |
* | |
* Free to use, adapt, modify, redistribute however you want at your own responsibility. | |
* | |
* Requirements: | |
* php-zip | |
* fopen_url_allowed = true | |
* | |
*/ | |
// Constants, example GTFS from Sweden | |
const GTFS_ZIP = 'https://transitfeeds.com/p/trafiklab/50/latest/download'; | |
//const GTFS_ZIP = '/home/bert/Desktop/splitGtfs/SJ.zip'; | |
const TMP_UNZIP_PATH = 'gtfs'; | |
const TMP_ZIPFILE = 'sv-latest-gtfs.zip'; | |
const GTFS_AGENCIES = 'agency.txt'; | |
const GTFS_STOP_TIMES = 'stop_times.txt'; | |
const GTFS_TRIPS = 'trips.txt'; | |
const GTFS_ROUTES = 'routes.txt'; | |
const GTFS_STOPS = 'stops.txt'; | |
const GTFS_CAL_DATES = 'calendar_dates.txt'; | |
const GTFS_TRANSFER_TIMES = 'transfers.txt'; | |
const STATIONS_CSV = 'stations.csv'; | |
const STOPS_CSV = 'stops.csv'; | |
const CSV_HEADER_URI = 'URI'; | |
const CSV_HEADER_NAME = 'name'; | |
const CSV_HEADER_COUNTRY = 'country-code'; | |
const CSV_HEADER_LONGITUDE = 'longitude'; | |
const CSV_HEADER_LATITUDE = 'latitude'; | |
const CSV_HEADER_AVG_STOP_TIMES = 'avg_stop_times'; | |
const CSV_HEADER_TRANSFER_TIME = 'official_transfer_time'; | |
const CSV_WRITE_HEADERS = [CSV_HEADER_URI, CSV_HEADER_NAME, | |
CSV_HEADER_COUNTRY, CSV_HEADER_LONGITUDE, CSV_HEADER_LATITUDE, CSV_HEADER_AVG_STOP_TIMES, CSV_HEADER_TRANSFER_TIME]; | |
const BASE_URI = "http://se.lc.bertmarcelis.be/"; | |
const BASE_URI_STOPS = BASE_URI . "stops/"; | |
const BASE_URI_AGENCY = BASE_URI . "agency/"; | |
const BASE_URI_ROUTE = BASE_URI . "route/"; | |
const UNWANTED_ARRAY = array('Š' => 'S', 'š' => 's', 'Ž' => 'Z', 'ž' => 'z', 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A', 'Æ' => 'A', 'Ç' => 'C', 'È' => 'E', 'É' => 'E', | |
'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I', 'Í' => 'I', 'Î' => 'I', 'Ï' => 'I', 'Ñ' => 'N', 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'O', 'Ø' => 'O', 'Ù' => 'U', | |
'Ú' => 'U', 'Û' => 'U', 'Ü' => 'U', 'Ý' => 'Y', 'Þ' => 'B', 'ß' => 'Ss', 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 'a', 'å' => 'a', 'æ' => 'a', 'ç' => 'c', | |
'è' => 'e', 'é' => 'e', 'ê' => 'e', 'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 'ð' => 'o', 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o', | |
'ö' => 'o', 'ø' => 'o', 'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ý' => 'y', 'þ' => 'b', 'ÿ' => 'y', ' ' => '', '-' => '', '&' => '', '/' => ''); | |
const GTFS_TYPES = [ | |
'100' => 'Railway Service', | |
'101' => 'High Speed Rail Service', | |
'102' => 'Long Distance Trains', | |
'103' => 'Inter Regional Rail Service', | |
'104' => 'Car Transport Rail Service', | |
'105' => 'Sleeper Rail Service', | |
'106' => 'Regional Rail Service', | |
'107' => 'Tourist Railway Service', | |
'108' => 'Rail Shuttle (Within Complex)', | |
'109' => 'Suburban Railway', | |
'110' => 'Replacement Rail Service', | |
'111' => 'Special Rail Service', | |
'112' => 'Lorry Transport Rail Service', | |
'113' => 'All Rail Services', | |
'114' => 'Cross-Country Rail Service', | |
'115' => 'Vehicle Transport Rail Service', | |
'116' => 'Rack and Pinion Railway', | |
'117' => 'Additional Rail Service', | |
'200' => 'Coach Service', | |
'201' => 'International Coach Service', | |
'202' => 'National Coach Service', | |
'203' => 'Shuttle Coach Service', | |
'204' => 'Regional Coach Service', | |
'205' => 'Special Coach Service', | |
'206' => 'Sightseeing Coach Service', | |
'207' => 'Tourist Coach Service', | |
'208' => 'Commuter Coach Service', | |
'209' => 'All Coach Services', | |
'300' => 'Suburban Railway Service', | |
'400' => 'Urban Railway Service', | |
'401' => 'Metro Service', | |
'402' => 'Underground Service', | |
'403' => 'Urban Railway Service', | |
'404' => 'All Urban Railway Services', | |
'405' => 'Monorail', | |
'500' => 'Metro Service', | |
'600' => 'Underground Service', | |
'700' => 'Bus Service', | |
'701' => 'Regional Bus Service', | |
'702' => 'Express Bus Service', | |
'703' => 'Stopping Bus Service', | |
'704' => 'Local Bus Service', | |
'705' => 'Night Bus Service', | |
'706' => 'Post Bus Service', | |
'707' => 'Special Needs Bus', | |
'708' => 'Mobility Bus Service', | |
'709' => 'Mobility Bus for Registered Disabled', | |
'710' => 'Sightseeing Bus', | |
'711' => 'Shuttle Bus', | |
'712' => 'School Bus', | |
'713' => 'School and Public Service Bus', | |
'714' => 'Rail Replacement Bus Service', | |
'715' => 'Demand and Response Bus Service', | |
'716' => 'All Bus Services', | |
'800' => 'Trolleybus Service', | |
'900' => 'Tram Service', | |
'901' => 'City Tram Service', | |
'902' => 'Local Tram Service', | |
'903' => 'Regional Tram Service', | |
'904' => 'Sightseeing Tram Service', | |
'905' => 'Shuttle Tram Service', | |
'906' => 'All Tram Services', | |
'1000' => 'Water Transport Service', | |
'1001' => 'International Car Ferry Service', | |
'1002' => 'National Car Ferry Service', | |
'1003' => 'Regional Car Ferry Service', | |
'1004' => 'Local Car Ferry Service', | |
'1005' => 'International Passenger Ferry Service', | |
'1006' => 'National Passenger Ferry Service', | |
'1007' => 'Regional Passenger Ferry Service', | |
'1008' => 'Local Passenger Ferry Service', | |
'1009' => 'Post Boat Service', | |
'1010' => 'Train Ferry Service', | |
'1011' => 'Road-Link Ferry Service', | |
'1012' => 'Airport-Link Ferry Service', | |
'1013' => 'Car High-Speed Ferry Service', | |
'1014' => 'Passenger High-Speed Ferry Service', | |
'1015' => 'Sightseeing Boat Service', | |
'1016' => 'School Boat', | |
'1017' => 'Cable-Drawn Boat Service', | |
'1018' => 'River Bus Service', | |
'1019' => 'Scheduled Ferry Service', | |
'1020' => 'Shuttle Ferry Service', | |
'1021' => 'All Water Transport Services', | |
'1100' => 'Air Service', | |
'1101' => 'International Air Service', | |
'1102' => 'Domestic Air Service', | |
'1103' => 'Intercontinental Air Service', | |
'1104' => 'Domestic Scheduled Air Service', | |
'1105' => 'Shuttle Air Service', | |
'1106' => 'Intercontinental Charter Air Service', | |
'1107' => 'International Charter Air Service', | |
'1108' => 'Round-Trip Charter Air Service', | |
'1109' => 'Sightseeing Air Service', | |
'1110' => 'Helicopter Air Service', | |
'1111' => 'Domestic Charter Air Service', | |
'1112' => 'Schengen-Area Air Service', | |
'1113' => 'Airship Service', | |
'1114' => 'All Air Services', | |
'1200' => 'Ferry Service', | |
'1300' => 'Telecabin Service', | |
'1301' => 'Telecabin Service', | |
'1302' => 'Cable Car Service', | |
'1303' => 'Elevator Service', | |
'1304' => 'Chair Lift Service', | |
'1305' => 'Drag Lift Service', | |
'1306' => 'Small Telecabin Service', | |
'1307' => 'All Telecabin Services', | |
'1400' => 'Funicular Service', | |
'1401' => 'Funicular Service', | |
'1402' => 'All Funicular Service', | |
'1500' => 'Taxi Service', | |
'1501' => 'Communal Taxi Service', | |
'1502' => 'Water Taxi Service', | |
'1503' => 'Rail Taxi Service', | |
'1504' => 'Bike Taxi Service', | |
'1505' => 'Licensed Taxi Service', | |
'1506' => 'Private Hire Service Vehicle', | |
'1507' => 'All Taxi Services', | |
'1600' => 'Self Drive', | |
'1601' => 'Hire Car', | |
'1602' => 'Hire Van', | |
'1603' => 'Hire Motorbike', | |
'1604' => 'Hire Cycle', | |
'1700' => 'Miscellaneous Service', | |
'1701' => 'Cable Car', | |
'1702' => 'Horse-drawn Carriage']; | |
/* | |
* Step 1 : Get the latest information from GTFS. | |
* This information can be found at http://www.belgianrail.be/nl/klantendienst/infodiensten-reistools/public-data/open-data.aspx | |
*/ | |
const HTTP_SWS_GEONAMES_ORG_660013 = "http://sws.geonames.org/660013"; | |
const HTTP_SWS_GEONAMES_ORG_2623032 = "http://sws.geonames.org/2623032"; | |
const HTTP_SWS_GEONAMES_ORG_3144096 = "http://sws.geonames.org/3144096/"; | |
const HTTP_SWS_GEONAMES_ORG_2661886 = "http://sws.geonames.org/2661886"; | |
const CC_SE = "SE"; | |
const CC_NO = "NO"; | |
const CC_DK = "DK"; | |
const CC_FI = "FI"; | |
echo 'Gathering resources...' . PHP_EOL; | |
downloadGTFS(); | |
list($providers, $routes, $stopdata) = getProviderAndTransportTypeInfo(); | |
echo 'Calculating derivates...' . PHP_EOL; | |
/* | |
* Gather prerequisite data | |
*/ | |
list($handledDaysCount, $stopFrequencies) = getStopTimes(); | |
$transferTimes = parseTransferTimes(); | |
/* | |
* Patch the csv file | |
* | |
* For this step, we need 3 actions: | |
* - Discover which stations are present already and storing their data in an associative array | |
* - Update calculated or extracted data (official_transfer_time, avg_stop_times) | |
* - Appending stations which aren't present yet | |
* - Write the new file to disk | |
*/ | |
$gtfsStations = getGTFSStops(); | |
// The new CSV file will be compiled in memory, in the $result variable. | |
echo 'Compiling JSON-LD file...' . PHP_EOL; | |
createStationsDataStructure($gtfsStations, $stopdata, $providers, $stopFrequencies, $handledDaysCount, $transferTimes); | |
createAgenciesStopLists($gtfsStations, $stopdata, $providers, $stopFrequencies, $handledDaysCount, $transferTimes); | |
createAgenciesDataStructure($gtfsStations, $stopdata, $providers, $stopFrequencies, $handledDaysCount, $transferTimes); | |
createRoutesDataStructure($gtfsStations, $stopdata, $providers, $stopFrequencies, $handledDaysCount, $transferTimes); | |
/** | |
* Download and extract the latest GTFS data set | |
*/ | |
function downloadGTFS(): void | |
{ | |
echo 'Downloading data...' . PHP_EOL; | |
if (!file_exists(TMP_ZIPFILE)) { | |
// Download zip file with GTFS data. | |
file_put_contents(TMP_ZIPFILE, file_get_contents(GTFS_ZIP)); | |
} | |
echo 'Extracting data...' . PHP_EOL; | |
// Load the zip file. | |
$zip = new ZipArchive(); | |
if ($zip->open(TMP_ZIPFILE) != 'true') { | |
die('Could not extract downloaded GTFS data'); | |
} | |
// Extract the zip file and remove it. | |
$zip->extractTo(TMP_UNZIP_PATH); | |
$zip->close(); | |
// Get the files we need. | |
rename(TMP_UNZIP_PATH . '/' . GTFS_STOP_TIMES, GTFS_STOP_TIMES); | |
rename(TMP_UNZIP_PATH . '/' . GTFS_ROUTES, GTFS_ROUTES); | |
rename(TMP_UNZIP_PATH . '/' . GTFS_AGENCIES, GTFS_AGENCIES); | |
rename(TMP_UNZIP_PATH . '/' . GTFS_TRIPS, GTFS_TRIPS); | |
rename(TMP_UNZIP_PATH . '/' . GTFS_CAL_DATES, GTFS_CAL_DATES); | |
rename(TMP_UNZIP_PATH . '/' . GTFS_STOPS, GTFS_STOPS); | |
rename(TMP_UNZIP_PATH . '/' . GTFS_TRANSFER_TIMES, GTFS_TRANSFER_TIMES); | |
echo 'Cleaning up resources...' . PHP_EOL; | |
// Remove temporary data. | |
$tmpfiles = scandir(TMP_UNZIP_PATH); | |
foreach ($tmpfiles as $file) { | |
if ($file != '.' && $file != '..') { | |
// Remove all extracted files from the zip file. | |
unlink(TMP_UNZIP_PATH . '/' . $file); | |
} | |
} | |
reset($tmpfiles); | |
// Remove the empty folder. | |
rmdir(TMP_UNZIP_PATH); | |
} | |
/** | |
* @return array | |
*/ | |
function getProviderAndTransportTypeInfo() | |
{ | |
echo 'Gathering agencies...' . PHP_EOL; | |
// Link Providers to stop ids | |
$providersCSV = deserializeCSV(GTFS_AGENCIES); | |
$providers = []; | |
foreach ($providersCSV as $provider) { | |
$providers[$provider['agency_id']] = ['name' => $provider['agency_name'], 'url' => $provider['agency_url']]; | |
} | |
echo 'Gathering routes...' . PHP_EOL; | |
$routesCSV = deserializeCSV(GTFS_ROUTES); | |
$routes = []; | |
foreach ($routesCSV as $route) { | |
$routes[$route['route_id']] = ['name' => $route['route_long_name'], 'number' => $route['route_short_name'], 'provider' => $route['agency_id'], 'route_type' => $route['route_type']]; | |
} | |
echo 'Gathering trips...' . PHP_EOL; | |
$tripsCSV = deserializeCSV(GTFS_TRIPS); | |
$trips = []; | |
foreach ($tripsCSV as $trip) { | |
$trips[$trip['trip_id']] = ['route_id' => $trip['route_id']]; | |
} | |
echo(count($trips) . " trips " . PHP_EOL); | |
echo 'Gathering stop times...' . PHP_EOL; | |
$stopdata = []; | |
echo "reading GTFS_STOP_TIMES streaming" . PHP_EOL; | |
// Open the CSV file and read it into an associative array | |
$i = 0; | |
$handle = @fopen(GTFS_STOP_TIMES, "r"); | |
if ($handle) { | |
$fields = array(); | |
while (($row = fgetcsv($handle)) !== false) { | |
if (empty($fields)) { | |
$fields = $row; | |
continue; | |
} | |
foreach ($row as $k => $value) { | |
$stoptime[$fields[$k]] = $value; | |
} | |
$routeId = $trips[$stoptime['trip_id']]['route_id']; | |
$stopdata[$stoptime['stop_id']][$routes[$routeId]['provider']][] = $routes[$routeId]['route_type']; | |
} | |
if (!feof($handle)) { | |
echo "Error: unexpected fgets() fail\n"; | |
} | |
fclose($handle); | |
} | |
echo "read GTFS_STOP_TIMES" . PHP_EOL; | |
foreach ($stopdata as $stop => $stopProviders) { | |
foreach ($stopProviders as $provider => $types) { | |
$stopdata[$stop][$provider] = array_values(array_unique($stopdata[$stop][$provider])); | |
} | |
} | |
return array($providers, $routes, $stopdata); | |
} | |
/** | |
* Load the recommended transfer times per station | |
* @return array | |
*/ | |
function parseTransferTimes(): array | |
{ | |
// CSV Header: | |
// from_stop_id,to_stop_id,transfer_type,min_transfer_time,from_trip_id,to_trip_id | |
$parsedCsv = deserializeCSV(GTFS_TRANSFER_TIMES); | |
$transferTimes = []; | |
foreach ($parsedCsv as $key => $csvRow) { | |
if ($csvRow['from_stop_id'] !== $csvRow['to_stop_id']) { | |
// We only want intra-stop transfers. NMBS GTFS only includes those, but to be sure, add a check | |
continue; | |
} | |
// Station UIC ID to HAFAS | |
$uri = BASE_URI_STOPS . $csvRow['from_stop_id']; | |
// Transfer value | |
$transfer = $csvRow['min_transfer_time']; | |
// Store value for station id | |
$transferTimes[$uri] = $transfer; | |
} | |
// We don't need this file anymore. Cleanup. | |
unlink(GTFS_TRANSFER_TIMES); | |
return $transferTimes; | |
} | |
/** | |
* Get the number of stops made on each station, as well as the number of days which were handled. | |
* This can be used to calculate both the stop times per station and the average stop times per station. | |
* @return array | |
*/ | |
function getStopTimes(): array | |
{ | |
echo 'Creating service id frequency table...' . PHP_EOL; | |
$fileReadHandle = fopen(GTFS_CAL_DATES, 'r'); | |
if (!$fileReadHandle) { | |
die(GTFS_CAL_DATES . ' could not be opened!'); | |
} | |
// skip the first line (csv header) | |
fgets($fileReadHandle); | |
// Create the frequency table. | |
$serviceFrequency = []; | |
// The dates we've handled. | |
$isDateHandled = []; | |
while (($line = fgets($fileReadHandle)) !== false) { | |
/* | |
* File format: | |
* service_id,date,exception_type | |
*/ | |
$parts = explode(',', $line); | |
// Get service ID. | |
$serviceId = $parts[0]; | |
$date = $parts[1]; | |
// Increase frequency. | |
if (isset($serviceFrequency[$serviceId])) { | |
$serviceFrequency[$serviceId]++; | |
} else { | |
// Set initial value if key isn't added yet. | |
$serviceFrequency[$serviceId] = 1; | |
} | |
$isDateHandled[$date] = 1; | |
} | |
// Close this handle. Important! | |
fclose($fileReadHandle); | |
// We don't need this file anymore. Cleanup. | |
unlink(GTFS_CAL_DATES); | |
// Use the calender frequencies to calculate the frequency of each trip | |
echo 'Creating trip id frequency table...' . PHP_EOL; | |
$fileReadHandle = fopen(GTFS_TRIPS, 'r'); | |
if (!$fileReadHandle) { | |
die(GTFS_TRIPS . ' could not be opened!'); | |
} | |
// skip the first line (csv header) | |
fgets($fileReadHandle); | |
// Create the frequency table containing each trips frequency.. | |
$tripFrequencies = []; | |
while (($line = fgets($fileReadHandle)) !== false) { | |
/* | |
* File format: | |
* route_id,service_id,trip_id | |
*/ | |
// Get service ID. | |
$parts = explode(',', $line); | |
$serviceId = $parts[1]; | |
$tripId = trim($parts[2]); | |
// Set frequency, which is the same as the service frequency. | |
$tripFrequencies[$tripId] = $serviceFrequency[$serviceId]; | |
} | |
// Close this handle. Important! | |
fclose($fileReadHandle); | |
// We don't need this file anymore. Cleanup. | |
unlink(GTFS_TRIPS); | |
// Use the | |
echo 'Creating frequency table...' . PHP_EOL; | |
$fileReadHandle = fopen(GTFS_STOP_TIMES, 'r'); | |
if (!$fileReadHandle) { | |
die('GTFS stop times file could not be opened!'); | |
} | |
// skip the first line (csv header) | |
fgets($fileReadHandle); | |
// Create the frequency table. | |
$stopFrequencies = []; | |
while (($line = fgets($fileReadHandle)) !== false) { | |
/* | |
* File format: | |
* trip_id,arrival_time,departure_time,stop_id,stop_sequence | |
* 88____:046::8821402:8400526:3:650:20181208,6:43:00,6:43:00,8821402,1,,0,1, | |
*/ | |
$parts = explode(',', $line); | |
// Get stop ID. | |
$uri = BASE_URI_STOPS . $parts[3]; | |
$tripId = $parts[0]; | |
// The amount of time this trip is made. | |
$tripFrequency = $tripFrequencies[$tripId]; | |
// Increase frequency. | |
if (isset($stopFrequencies[$uri])) { | |
$stopFrequencies[$uri] += $tripFrequency; | |
} else { | |
// Set initial value if key isn't added yet. | |
$stopFrequencies[$uri] = $tripFrequency; | |
} | |
} | |
// Close this handle. Important! | |
fclose($fileReadHandle); | |
unlink(GTFS_STOP_TIMES); | |
// Get the number of days that were handled. We need this to calculate the average later on. | |
$handledDaysCount = count($isDateHandled); | |
return [$handledDaysCount, $stopFrequencies]; | |
} | |
/** | |
* Load a list of 'official' stops data from the GTFS dataset | |
* @return array | |
*/ | |
function getGTFSStops(): array | |
{ | |
// CSV Header: | |
// stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,platform_code | |
$parsedCsv = deserializeCSV(GTFS_STOPS); | |
usort($parsedCsv, function ($a, $b) { | |
if ($a['stop_name'] != $b['stop_name']) | |
return $a['stop_name'] > $b['stop_name']; | |
else | |
if ($a['stop_id'] != $b['stop_id']) | |
return $a['stop_id'] > $b['stop_id']; | |
else | |
return $a['platform_code'] > $b['platform_code']; | |
}); | |
$gtfsStations = []; | |
// Go through all files. | |
foreach ($parsedCsv as $key => $csvRow) { | |
$uri = BASE_URI_STOPS . $csvRow['stop_id']; | |
$gtfsStations[$uri] = $csvRow; | |
} | |
unlink(GTFS_STOPS); | |
return $gtfsStations; | |
} | |
/** | |
* Load a CSV file and store it in an associative array with the first CSV column value as key. | |
* Each line is stored as an associative array using column headers as key and the fields as value. | |
* | |
* @param $csvPath string File path leading to the CSV file | |
* @return array the deserialized data | |
*/ | |
function deserializeCSV($csvPath): array | |
{ | |
echo "reading $csvPath" . PHP_EOL; | |
// Open the CSV file and read it into an associative array | |
$array = $fields = array(); | |
$i = 0; | |
$handle = @fopen($csvPath, "r"); | |
if ($handle) { | |
while (($row = fgetcsv($handle)) !== false) { | |
if (empty($fields)) { | |
$fields = $row; | |
continue; | |
} | |
foreach ($row as $k => $value) { | |
$array[$i][$fields[$k]] = $value; | |
} | |
$i++; | |
} | |
if (!feof($handle)) { | |
echo "Error: unexpected fgets() fail\n"; | |
} | |
fclose($handle); | |
} | |
echo "read $csvPath" . PHP_EOL; | |
return $array; | |
} | |
/** | |
* @param $gtfsStations | |
* @param $stopdata | |
* @param $providers | |
* @param $stopFrequencies | |
* @param $handledDaysCount | |
* @param $transferTimes | |
* @return array | |
*/ | |
function createStationsDataStructure(&$gtfsStations, &$stopdata, &$providers, &$stopFrequencies, $handledDaysCount, $transferTimes): array | |
{ | |
echo 'Saving...' . PHP_EOL; | |
$jsonLd = [ | |
'version' => 0.1, | |
'timestamp' => time(), | |
'@context' => [ | |
'dct' => 'http://purl.org/dc/terms/', | |
'longitude' => 'http://www.w3.org/2003/01/geo/wgs84_pos#long', | |
'latitude' =>'http://www.w3.org/2003/01/geo/wgs84_pos#lat', | |
'country' => [ | |
'@type' => '@id', | |
'@id' => 'http://www.geonames.org/ontology#parentCountry' | |
], | |
'gtfs' => 'http://vocab.gtfs.org/terms#', | |
'stop' => 'gtfs:Stop', | |
'hafasCode' => 'gtfs:code', | |
'official_transfer_time' => 'gtfs:minimumTransferTime', | |
'name' => 'foaf:name', | |
'foaf' => 'http://xmlns.com/foaf/0.1/' | |
], | |
'@graph' => [] | |
]; | |
@mkdir('stops'); | |
foreach ($gtfsStations as $uri => $gtfsStation) { | |
if (empty($uri) || strpos($uri, "_") !== false || strpos($uri, "S8") !== false) { | |
continue; // Invalid data | |
} | |
$ldValue = []; | |
$ldValue['@context'] = [ | |
'longitude' => 'http://www.w3.org/2003/01/geo/wgs84_pos#long', | |
'latitude' =>'http://www.w3.org/2003/01/geo/wgs84_pos#lat', | |
'dct' => 'http://purl.org/dc/terms/', | |
'country' => [ | |
'@type' => '@id', | |
'@id' => 'http://www.geonames.org/ontology#parentCountry' | |
], | |
'foaf' => 'http://xmlns.com/foaf/0.1/', | |
'gtfs' => 'http://vocab.gtfs.org/terms#', | |
'stop' => 'gtfs:Stop', | |
'hafasCode' => 'gtfs:code', | |
'official_transfer_time' => 'gtfs:minimumTransferTime', | |
'name' => 'foaf:name' | |
]; | |
$ldValue['@id'] = $uri; | |
$ldValue['@type'] = 'stop'; | |
$ldValue['latitude'] = $gtfsStation['stop_lat']; | |
$ldValue['longitude'] = $gtfsStation['stop_lon']; | |
$ldValue['name'] = $gtfsStation['stop_name']; | |
$ldValue['hafasCode'] = $gtfsStation['stop_id']; | |
switch (substr($ldValue['hafasCode'], 0, 2)) { | |
case '10': | |
$ldValue['countryCode'] = CC_FI; | |
$ldValue['country'] = HTTP_SWS_GEONAMES_ORG_660013; | |
break; | |
case '86': | |
$ldValue['countryCode'] = CC_DK; | |
$ldValue['country'] = HTTP_SWS_GEONAMES_ORG_2623032; | |
break; | |
case '76': | |
$ldValue['countryCode'] = CC_NO; | |
$ldValue['country'] = HTTP_SWS_GEONAMES_ORG_3144096; | |
break; | |
case '74': | |
default: | |
$ldValue['countryCode'] = CC_SE; | |
$ldValue['country'] = HTTP_SWS_GEONAMES_ORG_2661886; | |
break; | |
} | |
foreach ($stopdata[$ldValue['hafasCode']] as $agency => $transporttypes) { | |
$readableTransportTypes = array(); | |
foreach ($transporttypes as $value) { | |
$readableTransportTypes[] = GTFS_TYPES[$value]; | |
} | |
$ldValue['operatedBy'][] = [ | |
'@id' => BASE_URI_AGENCY . strtr(strtolower($providers[$agency]['name']), UNWANTED_ARRAY), | |
'name' => $providers[$agency]['name'], | |
'gtfs:routeType' => $transporttypes, | |
'routeType' => $readableTransportTypes | |
]; | |
} | |
$ldValue['avg_stop_times'] = round($stopFrequencies[$uri] / $handledDaysCount, 4); | |
$ldValue['official_transfer_time'] = $transferTimes[$uri]; | |
file_put_contents("stops/" . $ldValue['hafasCode'] . ".json", json_encode($ldValue, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)); | |
unset($ldValue['@context']); | |
$jsonLd['@graph'][] = $ldValue; | |
} | |
file_put_contents("stations.json", json_encode($jsonLd, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)); | |
return $jsonLd; | |
} | |
function createAgenciesDataStructure($gtfsStations, $stopdata, $providers, $stopFrequencies, $handledDaysCount, $transferTimes) | |
{ | |
$masterCatalog = [ | |
'@context' => [ | |
'xsd' => 'http://www.w3.org/2001/XMLSchema#', | |
'dcat' => 'http://www.w3.org/ns/dcat#', | |
'dct' => 'http://purl.org/dc/terms/', | |
'foaf' => 'http://xmlns.com/foaf/0.1/', | |
'owl' => 'http://www.w3.org/2002/07/owl#', | |
'schema' => 'http://schema.org/', | |
'dct:modified' => [ | |
'@type' => 'xsd:dateTime' | |
], | |
'dct:issued' => [ | |
'@type' => 'xsd:dateTime' | |
], | |
'dct:spatial' => [ | |
'@type' => '@id' | |
], | |
'dct:license' => [ | |
'@type' => '@id' | |
], | |
'dct:conformsTo' => [ | |
'@type' => '@id' | |
], | |
'dcat:mediaType' => [ | |
'@type' => 'xsd:string' | |
], | |
'schema:startDate' => [ | |
'@type' => 'xsd:dateTime' | |
], | |
'schema:endDate' => [ | |
'@type' => 'xsd:dateTime' | |
], | |
'minLatitude' => [ | |
'@id' => 'http://aims.fao.org/aos/geopolitical.owl#hasMinLatitude', | |
'@type' => '@id' | |
], | |
'maxLatitude' => [ | |
'@id' => 'http://aims.fao.org/aos/geopolitical.owl#hasMaxLatitude', | |
'@type' => '@id' | |
], | |
'minLongitude' => [ | |
'@id' => 'http://aims.fao.org/aos/geopolitical.owl#hasMinLongitude', | |
'@type' => '@id' | |
], | |
'maxLongitude' => [ | |
'@id' => 'http://aims.fao.org/aos/geopolitical.owl#hasMaxLongitude', | |
'@type' => '@id' | |
], | |
], | |
'@id' => 'http://se.lc.bertmarcelis.be/catalog', | |
'@type' => 'dcat:Catalog', | |
'dct:title' => 'Catalog of Samtrafiken datasets', | |
'dct:description' => 'Catalog of datasets published by Samtrafiken / Trafiklab', | |
'dct:modified' => '2018-11-01T10:00:00.000+01:00', | |
'dct:license' => 'http://creativecommons.org/publicdomain/zero/1.0/', | |
'dct:rights' => 'public', | |
'dct:publisher' => [ | |
'@id' => 'http://samtrafiken.se', | |
'@type' => 'foaf:Organization', | |
'foaf:name' => 'Samtrafiken' | |
], | |
'dcat:dataset' => [ | |
] | |
]; | |
foreach ($providers as $agencyId => $provider) { | |
$stations = []; | |
$minLat = 999; | |
$maxLat = -999; | |
$minLong = 999; | |
$maxLong = -999; | |
foreach ($gtfsStations as $gtfsStation) { | |
if (key_exists($agencyId, $stopdata[$gtfsStation['stop_id']])) { | |
$stations[] = BASE_URI_STOPS . $gtfsStation['stop_id']; | |
$minLat = min($gtfsStation['stop_lat'], $minLat); | |
$maxLat = max($gtfsStation['stop_lat'], $maxLat); | |
$minLong = min($gtfsStation['stop_lon'], $minLong); | |
$maxLong = max($gtfsStation['stop_lon'], $maxLong); | |
} | |
} | |
$jsonLd = [ | |
'version' => 0.1, | |
'timestamp' => time(), | |
'@context' => [ | |
'geo' => 'http://www.w3.org/2003/01/geo/wgs84_pos#', | |
'dct' => 'http://purl.org/dc/terms/', | |
'foaf' => 'http://xmlns.com/foaf/0.1/', | |
"minLatitude" => [ | |
'@id' => 'http=>//aims.fao.org/aos/geopolitical.owl#hasMinLatitude', | |
'@type' => '@id' | |
], | |
'maxLatitude' => [ | |
'@id' => 'http=>//aims.fao.org/aos/geopolitical.owl#hasMaxLatitude', | |
'@type' => '@id' | |
], | |
'minLongitude' => [ | |
'@id' => 'http=>//aims.fao.org/aos/geopolitical.owl#hasMinLongitude', | |
'@type' => '@id' | |
], | |
'maxLongitude' => [ | |
'@id' => 'http=>//aims.fao.org/aos/geopolitical.owl#hasMaxLongitude', | |
'@type' => '@id' | |
], 'country' => [ | |
'@type' => '@id', | |
'@id' => 'http://www.geonames.org/ontology#parentCountry' | |
] | |
], | |
'@id' => BASE_URI_AGENCY . strtr(strtolower($provider['name']), UNWANTED_ARRAY), | |
"dct:publisher" => [ | |
'@id' => 'http://samtrafiken.se', | |
'@type' => 'foaf:Organization', | |
'foaf:name' => 'Samtrafiken' | |
], | |
'foaf:name' => $provider['name'], | |
'foaf:homepage' => $provider['url'], | |
'linkedconnections' => 'https://se.lc.bertmarcelis.be/' . strtr(strtolower($provider['name']), UNWANTED_ARRAY) . '/connections', | |
'minLatitude' => $minLat, | |
'maxLatitude' => $maxLat, | |
'minLongitude' => $minLong, | |
'maxLongitude' => $maxLong, | |
'stations' => $stations | |
]; | |
@mkdir('agency'); | |
file_put_contents("agency/" . strtr(strtolower($provider['name']), UNWANTED_ARRAY) . ".json", json_encode($jsonLd, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)); | |
$readableTransportTypes = array(); | |
foreach ($gtfsStations as $gtfsStation) { | |
if (key_exists($agencyId, $stopdata[$gtfsStation['stop_id']])) { | |
foreach ($stopdata[$gtfsStation['stop_id']][$agencyId] as $value) { | |
$readableTransportTypes[] = GTFS_TYPES[$value]; | |
} | |
$readableTransportTypes = array_values(array_unique($readableTransportTypes)); | |
} | |
} | |
$catalogEntry = [ | |
'@id' => 'http://se.lc.bertmarcelis.be/datasets/' . strtr(strtolower($provider['name']), UNWANTED_ARRAY) . '/connections', | |
'@type' => 'dcat:Dataset', | |
'dct:description' => 'Linked Connections dataset for ' . strtr($provider['name'], UNWANTED_ARRAY), | |
'dct:title' => 'Linked Connections - ' . strtr($provider['name'], UNWANTED_ARRAY), | |
//'dct:spatial' => 'http://sws.geonames.org/2673722/', | |
'minLatitude' => $minLat, | |
'maxLatitude' => $maxLat, | |
'minLongitude' => $minLong, | |
'maxLongitude' => $maxLong, | |
'dcat:keyword' => $readableTransportTypes, | |
'dct:conformsTo' => 'http://linkedconnections.org/specification/1-0', | |
'dct:accessRights' => 'public', | |
'dcat:distribution' => [ | |
[ | |
'@id' => 'http://se.lc.bertmarcelis.be/' . strtr(strtolower($provider['name']), UNWANTED_ARRAY) . '/connections', | |
'@type' => 'dcat:Distribution', | |
'dcat:accessURL' => 'http://se.lc.bertmarcelis.be/' . strtr(strtolower($provider['name']), UNWANTED_ARRAY) . '/connections', | |
'dct:license' => 'http://creativecommons.org/publicdomain/zero/1.0/', | |
'dcat:mediaType' => 'application/ld+json', | |
'dct:issued' => (new DateTime())->format(DATE_RFC3339), | |
] | |
] | |
]; | |
$stopsCatalogEntry = [ | |
'@id' => 'http://se.lc.bertmarcelis.be/datasets/'. strtr(strtolower($provider['name']), UNWANTED_ARRAY) . '/stops', | |
'@type' => 'dcat:Dataset', | |
'dct:description' => 'Stop locations dataset for ' . strtr($provider['name'], UNWANTED_ARRAY), | |
'dct:title' => 'Stop locations - ' . strtr($provider['name'], UNWANTED_ARRAY), | |
'dct:spatial' => 'http://sws.geonames.org/2673722/', | |
'minLatitude' => $minLat, | |
'maxLatitude' => $maxLat, | |
'minLongitude' => $minLong, | |
'maxLongitude' => $maxLong, | |
'dcat:keyword' => $readableTransportTypes, | |
'dct:accessRights' => 'public', | |
'dcat:distribution' => [ | |
[ | |
'@id' => 'http://se.lc.bertmarcelis.be/stops' . strtr(strtolower($provider['name']), UNWANTED_ARRAY), | |
'@type' => 'dcat:Distribution', | |
'dcat:accessURL' => 'http://se.lc.bertmarcelis.be/stations-' . strtr(strtolower($provider['name']), UNWANTED_ARRAY) . '.json', | |
'dct:license' => 'http://creativecommons.org/publicdomain/zero/1.0/', | |
'dcat:mediaType' => 'application/ld+json', | |
'dct:issued' => (new DateTime())->format(DATE_RFC3339), | |
] | |
] | |
]; | |
$masterCatalog['dcat:dataset'][] = $catalogEntry; | |
$masterCatalog['dcat:dataset'][] = $stopsCatalogEntry; | |
} | |
file_put_contents("master-catalog.json", json_encode($masterCatalog, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)); | |
} | |
function createAgenciesStopLists($gtfsStations, $stopdata, $providers, $stopFrequencies, $handledDaysCount, $transferTimes) | |
{ | |
foreach ($providers as $agencyId => $provider) { | |
$jsonLd = [ | |
'version' => 0.1, | |
'timestamp' => time(), | |
'@context' => [ | |
'dct' => 'http://purl.org/dc/terms/', | |
'longitude' => 'http://www.w3.org/2003/01/geo/wgs84_pos#long', | |
'latitude' =>'http://www.w3.org/2003/01/geo/wgs84_pos#lat', | |
'country' => [ | |
'@type' => '@id', | |
'@id' => 'http://www.geonames.org/ontology#parentCountry' | |
], | |
'gtfs' => 'http://vocab.gtfs.org/terms#', | |
'stop' => 'gtfs:Stop', | |
'hafasCode' => 'gtfs:code', | |
'official_transfer_time' => 'gtfs:minimumTransferTime', | |
'name' => 'foaf:name', | |
'foaf' => 'http://xmlns.com/foaf/0.1/' | |
], | |
'@graph' => [] | |
]; | |
foreach ($gtfsStations as $gtfsStation) { | |
if (key_exists($agencyId, $stopdata[$gtfsStation['stop_id']])) { | |
$ldValue = []; | |
$uri = BASE_URI_STOPS . $gtfsStation['stop_id']; | |
$ldValue['@id'] = $uri; | |
$ldValue['@type'] = 'stop'; | |
$ldValue['latitude'] = $gtfsStation['stop_lat']; | |
$ldValue['longitude'] = $gtfsStation['stop_lon']; | |
$ldValue['name'] = $gtfsStation['stop_name']; | |
$ldValue['hafasCode'] = $gtfsStation['stop_id']; | |
switch (substr($ldValue['hafasCode'], 0, 2)) { | |
case '10': | |
$ldValue['countryCode'] = CC_FI; | |
$ldValue['country'] = HTTP_SWS_GEONAMES_ORG_660013; | |
break; | |
case '86': | |
$ldValue['countryCode'] = CC_DK; | |
$ldValue['country'] = HTTP_SWS_GEONAMES_ORG_2623032; | |
break; | |
case '76': | |
$ldValue['countryCode'] = CC_NO; | |
$ldValue['country'] = HTTP_SWS_GEONAMES_ORG_3144096; | |
break; | |
case '74': | |
default: | |
$ldValue['countryCode'] = CC_SE; | |
$ldValue['country'] = HTTP_SWS_GEONAMES_ORG_2661886; | |
break; | |
} | |
foreach ($stopdata[$ldValue['hafasCode']] as $agency => $transporttypes) { | |
$readableTransportTypes = array(); | |
foreach ($transporttypes as $value) { | |
$readableTransportTypes[] = GTFS_TYPES[$value]; | |
} | |
$ldValue['agency'][] = [ | |
'@id' => BASE_URI_AGENCY . strtr(strtolower($providers[$agency]['name']), UNWANTED_ARRAY), | |
'name' => $providers[$agency]['name'], | |
'gtfs:routeType' => $transporttypes, | |
'routeType' => $readableTransportTypes | |
]; | |
unset($readableTransportTypes); | |
} | |
$ldValue['avg_stop_times'] = round($stopFrequencies[$uri] / $handledDaysCount, 4); | |
$ldValue['official_transfer_time'] = $transferTimes[$uri]; | |
$jsonLd['@graph'][] = $ldValue; | |
} | |
} | |
file_put_contents("stations-" . strtr(strtolower($provider['name']), UNWANTED_ARRAY) . ".json", json_encode($jsonLd, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)); | |
} | |
} | |
function createRoutesDataStructure($gtfsStations, $stopdata, $providers, $stopFrequencies, $handledDaysCount, $transferTimes) | |
{ | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// Route indexing for Linked Connections, outputting Linked Data Fragments | |
for($i = 1; $i < count($argv); $i++){ | |
$agency = $argv[$i]; | |
$trips = []; | |
$routes = []; | |
$page = "http://se.lc.bertmarcelis.be/$agency/connections?departureTime=2018-11-05T03:00:00.000Z"; | |
while (!empty($page) && $page < "https://se.lc.bertmarcelis.be/$agency/connections?departureTime=2018-12-01T01:00:00.000Z") { | |
echo "page " . $page . PHP_EOL; | |
$connections = json_decode(file_get_contents($page ),true); | |
foreach ($connections['@graph'] as $connection) { | |
$trips[$connection['gtfs:trip']][] = [ | |
'departureStop' => $connection['departureStop'], | |
'arrivalStop' => $connection['arrivalStop'], | |
'connection' => $connection['@id'], | |
'page' => $page | |
]; | |
} | |
$page = $connections['hydra:next']; | |
//echo "next: " . $page . PHP_EOL; | |
} | |
@mkdir('vehicle'); | |
@mkdir('vehicle/' . $agency); | |
foreach ($trips as $id =>$trip) { | |
$tripLd = [ | |
"@context" => [ | |
"xsd" => "http://www.w3.org/2001/XMLSchema#", | |
"lc" => "http://semweb.mmlab.be/ns/linkedconnections#", | |
"hydra" => "http://www.w3.org/ns/hydra/core#", | |
"gtfs" => "http://vocab.gtfs.org/terms#", | |
"Connection" => "lc:Connection", | |
"arrivalStop" => [ | |
"@type" => "@id", | |
"@id" => "lc:arrivalStop" | |
], | |
"departureStop" => [ | |
"@type" => "@id", | |
"@id" => "lc:departureStop" | |
], | |
"@id" => [ | |
"@type" => "@id", | |
"@id" => "lc:Connection" | |
] | |
], | |
'@id' => $id, | |
'@graph' => [] | |
]; | |
foreach ($trip as $tripStop){ | |
$tripLd['@graph'][] = $tripStop; | |
} | |
$file = substr($id,strpos($id,'/',10)+1); | |
$folder = substr($file,0,strlen($file)-strlen(basename($file))); | |
@mkdir($folder); | |
file_put_contents($file .".json", json_encode($tripLd, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment