User:Sajt/Script to rip game logs from nhl.com

This is a work in progress.

 array('num' => 1,  'city' => "Anaheim",      'wiki' => "Anaheim Ducks",                'hbg' => "#000000", 'htx' => "#F47937", 'mbg' => "#000000", 'mtx' => "#F47937"),	'atl' => array('num' => 2,  'city' => "Atlanta",      'wiki' => "Atlanta Thrashers"),	'bos' => array('num' => 3,  'city' => "Boston",       'wiki' => "Boston Bruins"),	'buf' => array('num' => 4,  'city' => "Buffalo",      'wiki' => "Buffalo Sabres",               'hbg' => "#002D62", 'htx' => "#FDBB30", 'mbg' => "#FDBB30", 'mtx' => "#FFFFFF"),	'cal' => array('num' => 5,  'city' => "Calgary",      'wiki' => "Calgary Flames"), 'car' => array('num' => 6, 'city' => "Carolina",     'wiki' => "Carolina Hurricanes"), 'chi' => array('num' => 7, 'city' => "Chicago",      'wiki' => "Chicago Blackhawks",           'hbg' => "#BF2F38", 'htx' => "#231F20", 'mbg' => "#231F20", 'mtx' => "#FFFFFF"), 'col' => array('num' => 8, 'city' => "Colorado",     'wiki' => "Colorado Avalanche"), 'cbj' => array('num' => 9, 'city' => "Columbus",     'wiki' => "Columbus Blue Jackets"), 'dal' => array('num' => 10, 'city' => "Dallas",      'wiki' => "Dallas Stars",                 'hbg' => "#000000", 'htx' => "#D5A10E", 'mbg' => "#D5A10E", 'mtx' => "#FFFFFF"), 'det' => array('num' => 11, 'city' => "Detroit",     'wiki' => "Detroit Red Wings"), 'edm' => array('num' => 12, 'city' => "Edmonton",    'wiki' => "Edmonton Oilers"), 'flo' => array('num' => 13, 'city' => "Florida",     'wiki' => "Florida Panthers"), 'lak' => array('num' => 14, 'city' => "Los Angeles", 'wiki' => "Los Angeles Kings",            'hbg' => "#231F20", 'htx' => "#C0C0C0", 'mbg' => "#393997", 'mtx' => "#FFFFFF"), 'min' => array('num' => 15, 'city' => "Minnesota",   'wiki' => "Minnesota Wild",               'hbg' => "#166D5F", 'htx' => "#EEE1C5", 'mbg' => "#EEE1C5", 'mtx' => "#000000"), 'mtl' => array('num' => 16, 'city' => "Montreal",    'wiki' => "Montreal Canadiens"), 'nsh' => array('num' => 17, 'city' => "Nashville",   'wiki' => "Nashville Predators",          'hbg' => "#FDBB30", 'htx' => "#002D62", 'mbg' => "#002D62", 'mtx' => "#FFFFFF"), 'njd' => array('num' => 18, 'city' => "New Jersey",  'wiki' => "New Jersey Devils"), 'nyi' => array('num' => 19, 'city' => "NY Islanders", 'wiki' => "New York Islanders"), 'nyr' => array('num' => 20, 'city' => "NY Rangers",  'wiki' => "New York Rangers"), 'ott' => array('num' => 21, 'city' => "Ottawa",      'wiki' => "Ottawa Senators"), 'phi' => array('num' => 22, 'city' => "Philadelphia", 'wiki' => "Philadelphia Flyers"), 'phx' => array('num' => 23, 'city' => "Phoenix",     'wiki' => "Phoenix Coyotes",              'hbg' => "#900028", 'htx' => "#EEE1C5", 'mbg' => "#EEE1C5", 'mtx' => "#900028"), 'pit' => array('num' => 24, 'city' => "Pittsburgh",  'wiki' => "Pittsburgh Penguins"), 'sjs' => array('num' => 25, 'city' => "San Jose",    'wiki' => "San Jose Sharks"), 'stl' => array('num' => 26, 'city' => "St Louis",    'wiki' => "St. Louis Blues (ice hockey)", 'hbg' => "#092c57", 'htx' => "#ffc322", 'mbg' => "#ffc322", 'mtx' => "#092c57"), 'tbl' => array('num' => 27, 'city' => "Tampa Bay",   'wiki' => "Tampa Bay Lightning"), 'tor' => array('num' => 28, 'city' => "Toronto",     'wiki' => "Toronto Maple Leafs"), 'van' => array('num' => 29, 'city' => "Vancouver",   'wiki' => "Vancouver Canucks",            'hbg' => "#003e7e", 'htx' => "#c9c0bb", 'mbg' => "#003e7e", 'mtx' => "#c9c0bb"), 'wsh' => array('num' => 30, 'city' => "Washington",  'wiki' => "Washington Capitals") );

if (!isset($teams[$this_team])) die("bad team");

// sigh $goalies = array (	'ANDERSON, CRAIG' => array('Anderson', 'Craig Anderson (ice hockey)'),	'AULD, ALEX' => array('Auld', 'Alex Auld'),	'BACKSTROM, NIKLAS' => array('Backstrom', 'Nicklas Bäckström'),	'BIRON, MARTIN' => array('Biron', 'Martin Biron'),	'BISHOP, BEN' => array('Bishop', 'Ben Bishop'),	'BOUCHER, BRIAN' => array('Boucher', 'Brian Boucher'),	'BRODEUR, MARTIN' => array('Brodeur', 'Martin Brodeur'),	'BRYZGALOV, ILJA' => array('Bryzgalov', 'Ilya Bryzgalov'),	'BUDAJ, PETER' => array('Budaj', 'Peter Budaj'),	'CLIMIE, MATT' => array('Climie', 'Matt Climie'),	'CONKLIN, TY' => array('Conklin', 'Ty Conklin'),	'CRAWFORD, COREY' => array('Crawford', 'Corey Crawford'),	'DIPIETRO, RICK' => array('DiPietro', 'Rick DiPietro'),	'DROUIN-DESLAURIERS, JEFF' => array('Deslauriers', 'Jeff Drouin-Deslauriers'),	'DUBIELEWICZ, WADE' => array('Dubielewicz', 'Wade Dubielewicz'),	'ELLIOTT, BRIAN' => array('Elliott', 'Brian Elliott'), 'ELLIS, DAN' => array('Ellis', 'Dan Ellis'), 'ENROTH, JHONAS' => array('Enroth', 'Jhonas Enroth'), 'ERSBERG, ERIK' => array('Ersberg', 'Erik Ersberg'), 'FERNANDEZ, MANNY' => array('Fernandez', 'Manny Fernandez (ice hockey)'), 'FLEURY, MARC-ANDRE' => array('Fleury', 'Marc-André Fleury'), 'GARON, MATHIEU' => array('Garon', 'Mathieu Garon'), 'GERBER, MARTIN' => array('Gerber', 'Martin Gerber'), 'GIGUERE, JEAN-SEBASTIEN' => array('Giguere', 'Jean-Sébastien Giguère'), 'HALAK, JAROSLAV' => array('Halak', 'Jaroslav Halák'), 'HARDING, JOSH' => array('Harding', 'Josh Harding'), 'HEDBERG, JOHAN' => array('Hedberg', 'Johan Hedberg'), 'HILLER, JONAS' => array('Hiller', 'Jonas Hiller'), 'HUET, CRISTOBAL' => array('Huet', 'Cristobal Huet'), 'JOHNSON, BRENT' => array('Johnson', 'Brent Johnson'), 'JOSEPH, CURTIS' => array('Joseph', 'Curtis Joseph'), 'KHABIBULIN, NIKOLAI' => array('Khabibulin', 'Nikolai Khabibulin'), 'KIPRUSOFF, MIIKKA' => array('Kiprusoff', 'Miikka Kiprusoff'), 'KOLZIG, OLAF' => array('Kolzig', 'Olaf Kölzig'), 'LABARBERA, JASON' => array('LaBarbera', 'Jason LaBarbera'), 'LALIME, PATRICK' => array('Lalime', 'Patrick Lalime'), 'LECLAIRE, PASCAL' => array('Leclaire', 'Pascal Leclaire'), 'LEGACE, MANNY' => array('Legace', 'Manny Legacé'), 'LEHTONEN, KARI' => array('Lehtonen', 'Kari Lehtonen'), 'LEIGHTON, MICHAEL' => array('Leighton', 'Michael Leighton'), 'LUNDQVIST, HENRIK' => array('Lundqvist', 'Henrik Lundqvist'), 'LENEVEU, DAVID' => array('LeNeveu', 'David LeNeveu'), 'LUONGO, ROBERTO' => array('Luongo', 'Roberto Luongo'), 'MACDONALD, JOEY' => array('MacDonald', 'Joey MacDonald'), 'MACINTYRE, DREW' => array('MacIntyre', 'Drew MacIntyre'), 'MANNINO, PETER' => array('Mannino', 'Peter Mannino'), 'MASON, CHRIS' => array('Mason', 'Chris Mason'), 'MASON, STEVE' => array('Mason', 'Steve Mason (ice hockey)'), 'MCKENNA, MIKE' => array('McKenna', 'Mike McKenna (ice hockey)'), 'MILLER, RYAN' => array('Miller', 'Ryan Miller (ice hockey)'), 'MONTOYA, AL' => array('Montoya', 'Al Montoya'), 'NABOKOV, EVGENI' => array('Nabokov', 'Evgeni Nabokov'), 'NIEMI, ANTTI' => array('Niemi', 'Antti Niemi (ice hockey)'), 'NIITTYMAKI, ANTERO' => array('Niittymaki', 'Antero Niittymäki'), 'NORRENA, FREDRIK' => array('Norrena', 'Fredrik Norrena'), 'OSGOOD, CHRIS' => array('Osgood', 'Chris Osgood'), 'PAVELEC, ONDREJ' => array('Pavelec', 'Ondrej Pavelec'), // note- the 'r' in Ondrej is actually supposed to have a caron on it	'PRICE, CAREY' => array('Price', 'Carey Price'), 'QUICK, JONATHAN' => array('Quick', 'Jonathan Quick'), 'RAYCROFT, ANDREW' => array('Raycroft', 'Andrew Raycroft'), 'RINNE, PEKKA' => array('Rinne', 'Pekka Rinne'), 'ROLOSON, DWAYNE' => array('Roloson', 'Dwayne Roloson'), 'SABOURIN, DANY' => array('Sabourin', 'Dany Sabourin'), 'SANFORD, CURTIS' => array('Sanford', 'Curtis Sanford'), 'SCHNEIDER, CORY' => array('Schneider', 'Cory Schneider'), 'SMITH, MIKE' => array('Smith', 'Mike Smith (ice hockey b. 1982)'), 'STEPHAN, TOBIAS' => array('Stephan', 'Tobias Stephan'), 'TAYLOR, DANIEL' => array('Taylor', 'Daniel Taylor (ice hockey)'), 'TELLQVIST, MIKAEL' => array('Tellqvist', 'Mikael Tellqvist'), 'THEODORE, JOSE' => array('Theodore', 'José Théodore'), 'THOMAS, TIM' => array('Thomas', 'Tim Thomas (ice hockey)'), 'TORDJMAN, JOSH' => array('Tordjman', 'Josh Tordjman'), 'TOSKALA, VESA' => array('Toskala', 'Vesa Toskala'), 'TURCO, MARTY' => array('Turco', 'Marty Turco'), 'VALIQUETTE, STEPHEN' => array('Valiquette', 'Stephen Valiquette'), 'VOKOUN, TOMAS' => array('Vokoun', 'Tomáš Vokoun'), 'WARD, CAM' => array('Ward', 'Cam Ward') );

function find_team($city) {	global $teams; foreach ($teams as $key => $team) if (strtolower($team['city']) == strtolower($city)) return $key; die("find_team failed ($city)"); }

$ch = curl_init;

if ($preseason) {	$fields = array; $fields['formids'] = 'PropertySelection,PropertySelection_0,PropertySelection_1,PropertySelection_2'; $fields['component'] = '$SimpleForm'; $fields['page'] = 'schedulebyseason'; $fields['service'] = 'direct'; $fields['submitmode'] = ''; $fields['submitname'] = ''; $fields['PropertySelection'] = '0'; // 2008-2009 season $fields['PropertySelection_0'] = '0'; // pre-season $fields['PropertySelection_1'] = $teams[$this_team]['num']; // team number $fields['PropertySelection_2'] = 0; // all networks

curl_setopt($ch, CURLOPT_URL, "http://www.nhl.com/ice/app"); curl_setopt($ch, CURLOPT_POST, TRUE); curl_setopt($ch, CURLOPT_POSTFIELDS, $fields); } else {	curl_setopt($ch, CURLOPT_URL, "http://www.nhl.com/ice/schedulebyseason.htm?team=$this_team"); }

curl_setopt($ch, CURLOPT_FAILONERROR, TRUE); // fail silently if http code returned is >= 400 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE); // allow redirects ("Location: " in header) curl_setopt($ch, CURLOPT_HEADER, FALSE); // don't include header in output curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); // return value of curl_exec instead of printing to stdout

$contents = curl_exec($ch);

curl_close($ch);

function get_field($pattern, $contents, &$pos, $crash_on_fail = TRUE) {	if (!preg_match($pattern, $contents, $matches, PREG_OFFSET_CAPTURE, $pos)) return FALSE; $pos = $matches[0][1] + strlen($matches[0][0]); return $matches[1][0]; }

function get_games($contents) {	$games = array; $pos = 0;

for {		$game = array;

$game['date'] = get_field("/(.*?)<\/div>/", $contents, $pos);

if ($game['date'] === FALSE) return $games;

$game['away'] = get_field("/(.*?)(.*?)(.*?)<\/div>/s", $contents, $pos);

if (preg_match("/FINAL ]*?>[A-Z]{3} \(([0-9]+?)\)<\/span> \- ]*?>[A-Z]{3} \(([0-9]+?)\)<\/span>/", $result, $matches)) {			$game['final'] = TRUE; $game['awayscore'] = $matches[1]; $game['homescore'] = $matches[2];

if (strpos($result, " SO") !== FALSE) $game['overtime'] = "SO"; else if (strpos($result, " OT") !== FALSE) $game['overtime'] = "OT"; else $game['overtime'] = FALSE; }		else {			$game['final'] = FALSE; }

// get recap... which might be missing (in some preseason games) // note that if the recap is missing, we have no game id and can't look up the scoresheet for goalies and attendance // so if you know those, fill them in by hand from some other source if ($game['final']) {			$old_pos = $pos;

$alerts_pos = strpos($contents, 'ALERTS', $pos); // this is supposed to be after the recap

$game['id'] = get_field("//", $contents, $pos);

if ($pos > $alerts_pos) {			// no recap... rewind $pos = $old_pos; $game['id'] = FALSE; }		}		else $game['id'] = FALSE;

// date is in "Sun Mar 29, 2009" format... chop it to "Mar 29, 2009" $game['date'] = substr($game['date'], strpos($game['date'], ' ') + strlen(' ')); // now parse it		date_default_timezone_set("America/Toronto"); // well, nhl.com runs on crappy eastern time $game['date'] = strtotime($game['date']); //die($game['date'] . " to " . $date . " is " . strftime('%B %d, %Y', $date));

// filled in later $game['number'] = -1; $game['decision'] = '?'; $game['attendance'] = '?'; $game['result'] = -1; $game['record'] = -1; $game['pts'] = -1;

$games[] = $game; } }

$games = get_games($contents);

// sort games by date because the nhl schedule page puts the upcoming games at the top usort($games, create_function('$a,$b', 'return ($a[\'date\'] < $b[\'date\']) ? -1 : 1;'));

// determine whether each game is home or away, and calculate the team record at each game $number = 1; $record = array(0, 0, 0); foreach ($games as &$game) {	$game['number'] = $number++;

$teamabbr = find_team($game['home']); $game['homeaway'] = ($teamabbr == $this_team) ? 'home' : 'away';

if (!$game['final']) continue;

if (($game['homeaway'] == 'home' && $game['homescore'] > $game['awayscore']) || ($game['homeaway'] == 'away' && $game['awayscore'] > $game['homescore'])) $game['result'] = 2; else if ($game['overtime']) $game['result'] = 1; else $game['result'] = 0;

if ($game['result'] == 2) $record[0]++; if ($game['result'] == 1) $record[2]++; if ($game['result'] == 0) $record[1]++;

$game['record'] = $record[0]. '–' . $record[1]. '–' . $record[2]; $game['pts'] = $record[0] * 2 + $record[2]; } unset($game);

// now... download the game summaries of every game so we can get the attendance and goalie names...

$goalie_error = FALSE;

foreach ($games as &$game) {	if (!$game['final']) continue;

$thing = substr($game['id'], 4); $url = "http://www.nhl.com/scores/htmlreports/20082009/GS$thing.HTM";

$ch = curl_init;

curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_FAILONERROR, TRUE); // fail silently if http code returned is >= 400 curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE); // allow redirects ("Location: " in header) curl_setopt($ch, CURLOPT_HEADER, FALSE); // don't include header in output curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE); // return value of curl_exec instead of printing to stdout

$contents = curl_exec($ch);

curl_close($ch);

// some nhl.com game summary pages are glitched (e.g. http://www.nhl.com/scores/htmlreports/20082009/GS021077.HTM) // they are blank, so we have to check for them instead of crapping out

if (preg_match("/Attendance ([0-9,]+)/", $contents, $matches) || preg_match("/Ass\.\/Att\. ([0-9,]+)/", $contents, $matches)) $game['attendance'] = $matches[1];

$lookfor = ($game['result'] == 2) ? 'W' : '(OT|L)'; if (preg_match("/(.+?) \($lookfor\) <\/td>/", $contents, $matches)) {		$game['decision'] = $matches[1];

if (!isset($goalies[$game['decision']])) {			echo "unknown goalie \"{$game['decision']}\"\n"; $goalie_error = TRUE; }	} } unset($game);

if ($goalie_error) die;

// put the games into months and calculate the per-month records $this_month_name = strftime('%B');

$months = array; foreach ($games as $game) {	$month_name = strftime('%B', $game['date']); if ($preseason) $month_name = 'preseason'; // lump preseason games into one 'month' if (!isset($months[$month_name])) {		$months[$month_name] = array; $months[$month_name]['games'] = array; $months[$month_name]['overall_record'] = array(0, 0, 0); $months[$month_name]['home_record'] = array(0, 0, 0); $months[$month_name]['away_record'] = array(0, 0, 0); }	$months[$month_name]['games'][] = $game;

if ($game['result'] == 2) $months[$month_name]['overall_record'][0]++; if ($game['result'] == 1) $months[$month_name]['overall_record'][2]++; if ($game['result'] == 0) $months[$month_name]['overall_record'][1]++;

if ($game['homeaway'] == 'home') {		if ($game['result'] == 2) $months[$month_name]['home_record'][0]++; if ($game['result'] == 1) $months[$month_name]['home_record'][2]++; if ($game['result'] == 0) $months[$month_name]['home_record'][1]++; }	else {		if ($game['result'] == 2) $months[$month_name]['away_record'][0]++; if ($game['result'] == 1) $months[$month_name]['away_record'][2]++; if ($game['result'] == 0) $months[$month_name]['away_record'][1]++; } } unset($games); // won't be needing THIS anymore...

$wincolour = "#ccffcc"; $losecolour = "#ffcccc"; $otlcolour = "#ffffff";

$t = $teams[$this_team]; $header_background = $t['hbg']; $header_text = $t['htx']; $month_background = $t['mbg']; $month_text = $t['mtx'];

if (!$preseason && !$minilegend) {	echo "* Green background indicates win (2 points).\n"; echo "* Red background indicates regulation loss (0 points).\n"; echo "* White background indicates overtime/shootout loss (1 point).\n"; echo "\n"; }

$goalie_links = array; // only the first appearance of each goalie is a link

function print_month($title, $month, $collapsed) {	global $teams; global $goalies; global $goalie_links; global $wincolour, $losecolour, $otlcolour; global $month_background, $month_text; global $preseason;

if ($preseason) {		if ($collapsed) echo "{| class=\"toccolours collapsible collapsed\" width=90% style=\"clear:both; margin:1.5em auto; text-align:center\"\n"; else echo "{| class=\"toccolours collapsible\" width=90% style=\"clear:both; margin:1.5em auto; text-align:center\"\n"; }	else {		if ($collapsed) echo "{| class=\"toccolours collapsible collapsed\" width=100%\n"; else echo "{| class=\"toccolours collapsible\" width=100%\n"; }

$ov_rec = $month['overall_record'][0]. '–' . $month['overall_record'][1]. '–' . $month['overall_record'][2]; $hm_rec = $month['home_record'][0]. '–' . $month['home_record'][1]. '–' . $month['home_record'][2]; $aw_rec = $month['away_record'][0]. '–' . $month['away_record'][1]. '–' . $month['away_record'][2];

echo "|-\n"; echo "! colspan=11 style=\"background:$month_background; color:$month_text;\" | $title: $ov_rec (Home: $hm_rec ; Road: $aw_rec)\n"; echo "|- align=\"center\" bgcolor=\"#dddddd\"\n"; if ($preseason) echo "| # || Date || Visitor || Score || Home || OT || Decision || Attendance || Record || Recap\n"; else echo "| # || Date || Visitor || Score || Home || OT || Decision || Attendance || Record || Pts || Recap\n";

foreach ($month['games'] as $game) {		$number = $game['number'];

$date = strftime('%B ', $game['date']). (int)strftime('%d', $game['date']); // get rid of leading zero on day

$home_team = find_team($game['home']); $home_team = $teams[$home_team]; $home_city = ($home_team['city'] == 'St Louis') ? 'St. Louis' : $home_team['city']; $away_team = find_team($game['away']); $away_team = $teams[$away_team]; $away_city = ($away_team['city'] == 'St Louis') ? 'St. Louis' : $away_team['city'];

$home = ($game['homeaway'] == 'home') ? $home_city : ('$home_city"); $away = ($game['homeaway'] == 'away') ? $away_city : ('$away_city");

if ($game['final']) {			$goodguy_score = ($game['homeaway'] == 'away') ? $game['awayscore'] : $game['homescore']; $badguy_score = ($game['homeaway'] == 'away') ? $game['homescore'] : $game['awayscore'];

if ($goodguy_score > $badguy_score) $bgcolour = $wincolour; else if ($game['overtime']) $bgcolour = $otlcolour; else $bgcolour = $losecolour; $bgcolour = " bgcolor=\"$bgcolour\"";

$score = $game['awayscore']. '–' . $game['homescore'];

$ot = $game['overtime'] ? $game['overtime'] : '';

if ($game['decision'] != '?') {				if (in_array($game['decision'], $goalie_links)) $decision = $goalies[$game['decision']][0]; else {					$decision = '' . $goalies[$game['decision'[1]. '|' . $goalies[$game['decision']][0]. ']]';					$goalie_links[] = $game['decision']; }			}			else $decision = '?';

$attendance = $game['attendance'];

$record = $game['record']; $pts = $game['pts']; $recap = $game['id'] ? ('. $game['id' . ']') : ''; }		else {			$bgcolour = '';

$score = ''; $ot = ''; $decision = ''; $attendance = ''; $record = ''; $pts = ''; $recap = ''; }

echo "|- align=\"center\"$bgcolour\n"; if ($preseason) echo "| $number || $date || $away || $score || $home || $ot || $decision || $attendance || $record || $recap\n"; else echo "| $number || $date || $away || $score || $home || $ot || $decision || $attendance || $record || $pts || $recap\n"; }

echo "|}\n"; }

if ($preseason) {	print_month("2008 Pre-season Game Log", $months['preseason'], TRUE); } else {	echo "{| class=\"toccolours\" width=90% style=\"clear:both; margin:1.5em auto; text-align:center;\"\n"; echo "|-\n"; echo "! colspan=11 style=\"background:$header_background; color:$header_text;\" | 2008–09 Game Log\n";

foreach ($months as $month_name => $month) {		echo "|-\n"; echo "| colspan=11 |\n";

print_month($month_name, $month, $month_name != $this_month_name); }

if ($minilegend) {		echo "|-\n"; echo "| colspan=11 align=\"center\"|\n"; echo "Legend:\n"; echo "\n"; echo "\n"; echo "\n"; }

echo "|}\n"; }

?>