-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathScore_Scraper.php
More file actions
248 lines (222 loc) · 8.01 KB
/
Score_Scraper.php
File metadata and controls
248 lines (222 loc) · 8.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
<?php
/* Class ScoreScraper
* Version 1.0.0
* Copyright 2006 dyodyo
*
* Description
*
* The class scrapes score data for the major US sports leagues:
* NFL, MLB, NHL and NBA
* and the two major US College leagues: NCAA Men's Basketball and NCAA Men's Football
* from the ESPN mobile website.
*
* @param Values
* the Class is used via the main function 'fetchScore' (eg ScoreScraper->fetchScore)
* The function takes the following parameters:
* 'league' (string) !required : this sets the league where the scores will be from
* valid values for the 'league' parameter are:
* 'nba', 'nfl', 'mlb', 'nhl' : self explanatory
* 'ncb' = NCAA Men's basketball
* 'ncf' = NCAA Men's football
* Specifying a league outside of this values would return an error
*
* 'date' (string) optional: this is the date of when the scores would be taken
* This would default to the current date if none is specified
* this parameter is parsed using the 'strtotime' PHP function
*
* function return values:
* true : sports scores fetched; false otherwise
* if this function returns false the 'ScoreScraper->error' variable contains the error message
*
* NOTE:
* In case there are no games on the date specified (whether today or a specified future date),
* the function would still return TRUE but there the scores array would be null or zero-length
* However, an error message would still be returned in the 'error' string variable
*
* The class returns the scraped scores as a PHP array (ScoreScraper->scores) that follows this structure
* Array (0.. number of games) {
* gameDate (date): the Game Date
* isStarted (boolean): if the game has already started or not
*
* homeTeam (string) : the name of the host team
* homeTeamScore (integer) : the score of the host team (always zero if 'isStarted' == false)
*
* awayTeam (string) : the name of the visiting team
* awayTeamScore (integer) : the score of the visiting team (always zero if 'isStarted' == false)
*
* if 'isStarted' is True:
* period (byte) : the period number (either the number of
* the current Quarter, Period or Inning
* based on the league type specified
* periodTime (string): time remaining in the current period
* can be (min:sec) or either "top/bottom/middle" for baseball
*
* if 'isStarted' is False:
* gameTime (string) : the game time (eg '9:00 PM ET')
*
*
*
* Please see the 'test_score.php' file for an example of how to use the class
*
*
*
*
* LICENSE
*
* This script is free software; you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option) any later
* version.
*
* This script is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with this script; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
require_once "class_HTTPRetriever.php";
DEFINE ("BASE_URL", "http://scores.espn.go.com/{LEAGUE}/scoreboard");
class ScoreScraper {
var $http;
var $valid_leagues;
var $error;
var $scores;
var $fetchedDate;
function ScoreScraper (){
$this->http = &new HTTPRetriever();
$this->valid_leagues = array("nba","nfl","mlb","nhl","ncf","ncb");
$this->error = "";
$this->scores = array();
}
function fetchScore($league,$date=""){
$league = strtolower($league);
if (!in_array($league,$this->valid_leagues)) {
$this->error = "Invalid League Specified!";
return false;
}
if ($date!="") {
if (!$date=strtotime($date)) {
$this->error = "Invalid Date Specified!";
return false;
}
$this->fetchedDate = $date;
$date = "?date=".date("Ymd",$date);
}
$url = str_replace("{LEAGUE}",$league,BASE_URL).$date;
if (!$this->http->get($url)) {
$this->error= "HTTP request error: #{$this->http->result_code}: {$this->http->result_text}";
return false;
}
$html = $this->http->response;
if ($date=="") {
//no date specified in function call == "current scores"
//retrieve the date from within the fetched html page
preg_match("/<hr><b>(.*?)<\\/b>/",$html,$d);
if ((is_array($d)) && (sizeof($d)>0) ){
$this->fetchedDate=strtotime($d[1]);
} else {
//no date specified within page
//tag error but still continue;
$this->error = "No date found within fetched html.";
}
}
if (in_array($league, array("nfl","ncf") )) {
return $this->processFootballScore($html);
}
return $this->processScore($html);
}
function getScores($html,$gd=""){
preg_match_all("/<br><font size=\"?1\"?>(.*?)<\\/a>/", $html, $items, PREG_SET_ORDER);
if (!is_array($this->scores)) {$this->scores=array();}
if ($gd=="") $gd=$this->fetchedDate;
while (sizeof($items)>0){
$item=array_shift($items);
$item=$item[1];
if (strpos($item,"scoreboard")===false) {
preg_match("/^(.*?)<\\/font>/", $item, $s);
preg_match("/<a[^>]*>(.*?)$/", $item, $t);
preg_match("/<a\ href=\"(.*?)\">/", $item, $l);
if(sizeof($s)){ $s = trim(strip_tags($s[1])); };
if(sizeof($t)){ $t = trim(strip_tags($t[1])); };
if(sizeof($l)){ $l = trim(strip_tags($l[1])); };
if (strpos($s," ET")===false){
//game started
$sa = explode(" ",$s); $t1 = $sa[0];$t2 = $sa[1];$t3="";
$ta = explode(" ",$t);
if (count($ta)>4) {
$ht='';$at='';
$idx=array();foreach ($ta as $k=>$tt){if (is_numeric($tt)) {$idx[]=$k;}}
for ($n=0;$n<$idx[0];$n++) {$ht .=$ta[$n];}
for ($n=$idx[0]+1;$n<$idx[1];$n++) {$at .=$ta[$n];}
$hs=$ta[$idx[0]];$as=$ta[$idx[1]];
} else { $ht = $ta[0];$hs = $ta[1];$at = $ta[2];$as = $ta[3]; }
$isStarted = true;
} else {
$t1="";$t2="";$t3 = $s;
$ta = explode(" at ",$t);$ht = $ta[0];$at = $ta[1];$hs = 0;$as = 0;
$isStarted = false;
}
$this->scores[] = array( "isStarted" => $isStarted,
"gameDate"=> $gd,
"gameTime" => $t3,
"period" => $t2,
"periodRemain" => $t1,
"awayTeam" => $at,
"awayTeamScore" => $as,
"homeTeam" => $ht,
"homeTeamScore" => $hs,
);
}
}
return true;
}
function processScore ($html){
if ($html=="") {
$this->error="Invalid page returned!";
return false;
}
if ((strpos($html,'No games scheduled')!==false) || (strpos($html,'href="scoreboard?')===false) ) {
$this->error = "No Games Scheduled Today!";
$this->scores = null;
return true;
}
return $this->getScores($html);
}
function processFootballScore ($html){
if ($html=="") {
$this->error="Invalid page returned!";
return false;
}
if ((strpos($html,'No games scheduled')!==false) || (strpos($html,'href="scoreboard?')===false) ) {
$this->error = "No Games Scheduled Today!";
$this->scores = null;
return true;
}
preg_match("/<hr>(.*?) /",$html,$sc);
if (sizeof($sc)<0) {
$this->error = "Invalid page returned by site.";
return false;
}
$sc=$sc[1];
$go=true;
while ($go) {
$p1=strpos($sc,"<b>")+3;
$p2=strpos($sc,"<b>",$p1);
if (!$p2) {
$go=false;
$p2 = strlen($sc);
}
$test = substr($sc,$p1,strpos($sc,"</b>")-$p1).date(" Y",$this->fetchedDate);
$thisdate = strtotime($test);
if (!$this->getScores(substr($sc,$p1,$p2-$p1),$thisdate)) {
return false;
}
$sc = substr($sc,$p2,strlen($sc));
}
return true;
}
}
?>