Created
May 6, 2022 13:49
-
-
Save benpackard/fa0140a1951aeb2b4a3c3d1a2cc2617f to your computer and use it in GitHub Desktop.
This PHP script is supposed to scrape MLB scores from ESPN but stopped working recently.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class Game { | |
public $espn_id, $date, $home_team, $visiting_team, $home_score, $visiting_score, $is_preseason; | |
} | |
$urls = build_URLs(); | |
$games = array(); | |
foreach ($urls as $url) { | |
$games = array_merge($games, get_games($url)); | |
} | |
function build_URLs() { | |
$urls = array(); | |
$today = date('Ymd'); | |
$yesterday = date('Ymd', strtotime("-1 days")); | |
$days = array($yesterday, $today); | |
foreach ($days as $day) { | |
$urls[] = "http://www.espn.com/mlb/scoreboard/_/date/" . $day; | |
} | |
return $urls; | |
} | |
function get_games($url) { | |
$page = file_get_contents($url); | |
$substring = get_string_between($page, "<script>window.espn.scoreboardData = ", ";window.espn.scoreboardSettings"); | |
$json = json_decode($substring); | |
// game IDs are not unique across leagues | |
$league = $json->leagues[0]->abbreviation; | |
$is_preseason = $json->leagues[0]->season->type->abbreviation === "pre"; | |
$games = array(); | |
foreach ($json->events as $event) { | |
// skip games that aren't completed | |
if ($event->status->type->completed !== true) { | |
continue; | |
} | |
// check that each event has one competition with two teams | |
if (count($event->competitions) != 1) { | |
echo "\nMultiple or zero competitions found for event " . $event->uid . "in league " . $league; | |
continue; | |
} | |
$competitors = $event->competitions[0]->competitors; | |
if (count($competitors) != 2) { | |
echo "\nMore or fewer than two competitors found for event " . $event->uid . "in league " . $league; | |
continue; | |
} | |
$game = new Game(); | |
$game->date = new DateTime($event->date); | |
$time_zone = new DateTimeZone('America/New_York'); | |
$game->date->setTimezone($time_zone); | |
$game->espn_id = $league . "-" . $event->uid; | |
$game->is_preseason = ($is_preseason ? 1 : 0); | |
$game->home_team = $league . $competitors[0]->id; | |
$game->visiting_team = $league . $competitors[1]->id; | |
$game->home_score = $competitors[0]->score; | |
$game->visiting_score = $competitors[1]->score; | |
$games[] = $game; | |
} | |
return $games; | |
} | |
function get_string_between($string, $start, $end) { | |
$string = ' ' . $string; | |
$ini = strpos($string, $start); | |
if ($ini == 0) return ''; | |
$ini += strlen($start); | |
$len = strpos($string, $end, $ini) - $ini; | |
return substr($string, $ini, $len); | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I'm no PHP expert but I gave a look and I don't see anything wrong with the code.
I'm not sure what the rest of your bot code looks like, but my guess is that the MLB website changed a bit and the output no longer works with your other code?
Your Game Objects look like this, which seems like an output that's workable for whatever code it feeds into