From 7ad2fe539c9f42a340c9a47c2444e741101c691d Mon Sep 17 00:00:00 2001 From: Incind Date: Mon, 2 Jan 2017 19:40:54 -0700 Subject: [PATCH 1/5] Added the other types methods to get player stats. Added an annual stats merge method. --- sportsref/nfl/players.py | 105 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/sportsref/nfl/players.py b/sportsref/nfl/players.py index 9782261..b9799af 100644 --- a/sportsref/nfl/players.py +++ b/sportsref/nfl/players.py @@ -233,6 +233,111 @@ def rushing_and_receiving(self, kind='R'): df = sportsref.utils.parse_table(table) return df + @sportsref.decorators.memoize + @sportsref.decorators.kind_rpb(include_type=True) + def defense_and_fumbles(self, kind='R'): + """Gets yearly defense/fumble stats for the player. + + :kind: One of 'R', 'P', or 'B'. Case-insensitive; defaults to 'R'. + :returns: Pandas DataFrame with defense/fumble stats. + """ + doc = self.get_doc() + table = (doc('#all_defense') if kind == 'R' + else doc('#all_defense_playoffs')) + df = sportsref.utils.parse_table(table) + return df + + @sportsref.decorators.memoize + @sportsref.decorators.kind_rpb(include_type=True) + def kick_and_punt_returns(self, kind='R'): + """Gets yearly kick/punt return stats for the player. + + :kind: One of 'R', 'P', or 'B'. Case-insensitive; defaults to 'R'. + :returns: Pandas DataFrame with kick/punt return stats. + """ + doc = self.get_doc() + table = (doc('#all_returns') if kind == 'R' + else doc('#all_returns_playoffs')) + df = sportsref.utils.parse_table(table) + return df + + @sportsref.decorators.memoize + @sportsref.decorators.kind_rpb(include_type=True) + def games(self, kind='R'): + """Gets yearly games stats for the player. + + :kind: One of 'R', 'P', or 'B'. Case-insensitive; defaults to 'R'. + :returns: Pandas DataFrame with games stats. + """ + doc = self.get_doc() + table = (doc('#all_games_played') if kind == 'R' + else doc('#all_games_played_playoffs')) + df = sportsref.utils.parse_table(table) + return df + + @sportsref.decorators.memoize + @sportsref.decorators.kind_rpb(include_type=True) + def kicking_and_punting(self, kind='R'): + """Gets yearly kicking/punting stats for the player. + + :kind: One of 'R', 'P', or 'B'. Case-insensitive; defaults to 'R'. + :returns: Pandas DataFrame with kicking/punting stats. + """ + doc = self.get_doc() + table = (doc('#all_kicking') if kind == 'R' + else doc('#all_kicking_playoffs')) + df = sportsref.utils.parse_table(table) + return df + + @sportsref.decorators.memoize + @sportsref.decorators.kind_rpb(include_type=True) + def all_scoring(self, kind='R'): + """Gets yearly all scoring stats for the player. + + :kind: One of 'R', 'P', or 'B'. Case-insensitive; defaults to 'R'. + :returns: Pandas DataFrame with all scoring stats. + """ + doc = self.get_doc() + table = (doc('#all_scoring') if kind == 'R' + else doc('#all_scoring_playoffs')) + df = sportsref.utils.parse_table(table) + return df + + + @sportsref.decorators.memoize + @sportsref.decorators.kind_rpb(include_type=True) + def all_annual_stats(self, kind='R'): + """Gets yearly all annual stats for the player by grabbing + each individual dataset and then merging them for full years. + + :kind: One of 'R', 'P', or 'B'. Case-insensitive; defaults to 'R'. + :returns: Pandas DataFrame with all annual stats. + """ + dfPassing = self.passing(kind) + dfPassing = dfPassing.ix[dfPassing["has_class_full_table"]] + dfRushRec = self.rushing_and_receiving(kind) + dfRushRec = dfRushRec.ix[dfRushRec["has_class_full_table"]] + dfDefense = self.defense_and_fumbles(kind) + dfDefense = dfDefense.ix[dfDefense["has_class_full_table"]] + dfReturns = self.kick_and_punt_returns(kind) + dfReturns = dfReturns.ix[dfReturns["has_class_full_table"]] + dfGames = self.games(kind) + dfGames = dfGames.ix[dfGames["has_class_full_table"]] + dfKicking = self.kicking_and_punting(kind) + dfKicking = dfKicking.ix[dfKicking["has_class_full_table"]] + dfScoring = self.all_scoring(kind) + dfScoring = dfScoring.ix[dfScoring["has_class_full_table"]] + # the mergeList declares the common fields to merge on + mergeList = ['year', 'age', 'team', 'pos', 'uniform_number', 'g', 'gs'] + dfAll = dfPassing.merge(dfRushRec, 'outer', mergeList) + dfAll = dfAll.merge(dfDefense, 'outer', mergeList) + dfAll = dfAll.merge(dfReturns, 'outer', mergeList) + dfAll = dfAll.merge(dfGames, 'outer', mergeList) + dfAll = dfAll.merge(dfKicking, 'outer', mergeList) + dfAll = dfAll.merge(dfScoring, 'outer', mergeList) + + return dfAll + def _plays(self, year, play_type): """Returns a DataFrame of plays for a given year for a given play type (like rushing, receiving, or passing). From 9a8e63aafaee8d250fc409c728523705404bcb5c Mon Sep 17 00:00:00 2001 From: Incind Date: Sun, 15 Jan 2017 22:54:23 -0700 Subject: [PATCH 2/5] Added method to get collegeid. This was in another branch, but I consolidated for ease. --- sportsref/nfl/players.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sportsref/nfl/players.py b/sportsref/nfl/players.py index b9799af..cbddf11 100644 --- a/sportsref/nfl/players.py +++ b/sportsref/nfl/players.py @@ -177,6 +177,15 @@ def college(self): return college @sportsref.decorators.memoize + def collegeid(self): + doc = self.get_doc() + rawText = (doc('div#meta p').filter(lambda i, e: + 'College' in e.text_content())) + cleanedText = sportsref.utils.flatten_links(rawText) + collegeid = re.search(r'\((.*?)\)', cleanedText).group(1) + return collegeid + + @ sportsref.decorators.memoize def high_school(self): doc = self.get_doc() rawText = (doc('div#meta p') From 0333b89f208ac4fe3b47c730ee35132aa92bd8da Mon Sep 17 00:00:00 2001 From: Incind Date: Sun, 15 Jan 2017 23:12:14 -0700 Subject: [PATCH 3/5] added injuries and snap counts to nfl teams. --- sportsref/nfl/teams.py | 43 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/sportsref/nfl/teams.py b/sportsref/nfl/teams.py index c8d4a17..9c36c0d 100644 --- a/sportsref/nfl/teams.py +++ b/sportsref/nfl/teams.py @@ -239,6 +239,49 @@ def team_stats(self, year): df = sportsref.utils.parse_table(table) return df.ix[df.player_id == 'Team Stats'].iloc[0] + @sportsref.decorators.memoize + def injuries(self, year): + """ This gets the injuries from pro-football-reference for each team + + :param year: year to query + :return: the pfrid and injury status per week for the year + """ + # set link and table_name and then get the pyquery table + link = sportsref.nfl.BASE_URL + \ + '/teams/{}/{}_injuries.htm'.format(self.teamID, str(year)) + doc = pq(sportsref.utils.get_html(link)) + table = doc('#team_injuries') + + # check if valid return + if not len(table): + return pd.DataFrame() + else: + # identify columns, rows, data, and make dataframe + columns = [c.text() for c in table('thead tr th').items()] + rows = list(table('tbody tr').items()) + data = [[(td.attr('data-append-csv') if td.attr('class') == "left " + else ( + td.attr('data-tip') if "dnp" not in td.attr('class') + else "dnp;" + td.attr('data-tip'))) + for td in row.items('th,td')] for row in rows] + injuries_one_team = pd.DataFrame(data, columns=columns) + return injuries_one_team + + @sportsref.decorators.memoize + def snap_count(self, year): + """ get snaps per team per year + + :param year: year of query + :return: dataframe with pfrID, snap count (sum of just off and defense) + """ + # set link and table_name and then get the pyquery table + link = sportsref.nfl.BASE_URL + \ + '/teams/{}/{}-snap-counts.htm'.format(self.teamID, str(year)) + doc = pq(sportsref.utils.get_html(link)) + table = doc('#all_snap_counts') + snap_counts = sportsref.utils.parse_table(table) + return snap_counts + @sportsref.decorators.memoize def opp_stats(self, year): """Returns a Series (dict-like) of the team's opponent's stats from the From 9595fc5c86bb05481992f616a1c3d914b7287ee8 Mon Sep 17 00:00:00 2001 From: Incind Date: Mon, 23 Jan 2017 13:55:12 -0800 Subject: [PATCH 4/5] changed #all_[table_name] to table#[table_name] for all functions in players --- sportsref/nfl/players.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/sportsref/nfl/players.py b/sportsref/nfl/players.py index cbddf11..ec01588 100644 --- a/sportsref/nfl/players.py +++ b/sportsref/nfl/players.py @@ -221,7 +221,8 @@ def passing(self, kind='R'): :returns: Pandas DataFrame with passing stats. """ doc = self.get_doc() - table = doc('#passing') if kind == 'R' else doc('#passing_playoffs') + table = doc('table#passing') if kind == 'R' else \ + doc('table#passing_playoffs') df = sportsref.utils.parse_table(table) return df @@ -234,11 +235,11 @@ def rushing_and_receiving(self, kind='R'): :returns: Pandas DataFrame with rushing/receiving stats. """ doc = self.get_doc() - table = (doc('#rushing_and_receiving') if kind == 'R' - else doc('#rushing_and_receiving_playoffs')) + table = (doc('table#rushing_and_receiving') if kind == 'R' + else doc('table#rushing_and_receiving_playoffs')) if not table: - table = (doc('#receiving_and_rushing') if kind == 'R' - else doc('#receiving_and_rushing_playoffs')) + table = (doc('table#receiving_and_rushing') if kind == 'R' + else doc('table#receiving_and_rushing_playoffs')) df = sportsref.utils.parse_table(table) return df @@ -251,8 +252,8 @@ def defense_and_fumbles(self, kind='R'): :returns: Pandas DataFrame with defense/fumble stats. """ doc = self.get_doc() - table = (doc('#all_defense') if kind == 'R' - else doc('#all_defense_playoffs')) + table = (doc('table#defense') if kind == 'R' + else doc('table#defense_playoffs')) df = sportsref.utils.parse_table(table) return df @@ -265,8 +266,8 @@ def kick_and_punt_returns(self, kind='R'): :returns: Pandas DataFrame with kick/punt return stats. """ doc = self.get_doc() - table = (doc('#all_returns') if kind == 'R' - else doc('#all_returns_playoffs')) + table = (doc('table#returns') if kind == 'R' + else doc('table#returns_playoffs')) df = sportsref.utils.parse_table(table) return df @@ -279,8 +280,8 @@ def games(self, kind='R'): :returns: Pandas DataFrame with games stats. """ doc = self.get_doc() - table = (doc('#all_games_played') if kind == 'R' - else doc('#all_games_played_playoffs')) + table = (doc('table#games_played') if kind == 'R' + else doc('table#games_played_playoffs')) df = sportsref.utils.parse_table(table) return df @@ -293,8 +294,8 @@ def kicking_and_punting(self, kind='R'): :returns: Pandas DataFrame with kicking/punting stats. """ doc = self.get_doc() - table = (doc('#all_kicking') if kind == 'R' - else doc('#all_kicking_playoffs')) + table = (doc('table#kicking') if kind == 'R' + else doc('table#kicking_playoffs')) df = sportsref.utils.parse_table(table) return df @@ -307,8 +308,8 @@ def all_scoring(self, kind='R'): :returns: Pandas DataFrame with all scoring stats. """ doc = self.get_doc() - table = (doc('#all_scoring') if kind == 'R' - else doc('#all_scoring_playoffs')) + table = (doc('table#scoring') if kind == 'R' + else doc('table#scoring_playoffs')) df = sportsref.utils.parse_table(table) return df From 68770b4bbe2f2b2b77dd9cd477f4c86f2f72e660 Mon Sep 17 00:00:00 2001 From: Incind Date: Wed, 25 Jan 2017 00:52:00 -0700 Subject: [PATCH 5/5] refactored name to ncaaf_player_id. changed some link defs to use get_year_doc. --- sportsref/nfl/players.py | 2 +- sportsref/nfl/teams.py | 16 ++++++---------- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/sportsref/nfl/players.py b/sportsref/nfl/players.py index ec01588..b6efe2e 100644 --- a/sportsref/nfl/players.py +++ b/sportsref/nfl/players.py @@ -177,7 +177,7 @@ def college(self): return college @sportsref.decorators.memoize - def collegeid(self): + def ncaaf_player_id(self): doc = self.get_doc() rawText = (doc('div#meta p').filter(lambda i, e: 'College' in e.text_content())) diff --git a/sportsref/nfl/teams.py b/sportsref/nfl/teams.py index 9c36c0d..4ee0d2b 100644 --- a/sportsref/nfl/teams.py +++ b/sportsref/nfl/teams.py @@ -246,11 +246,9 @@ def injuries(self, year): :param year: year to query :return: the pfrid and injury status per week for the year """ - # set link and table_name and then get the pyquery table - link = sportsref.nfl.BASE_URL + \ - '/teams/{}/{}_injuries.htm'.format(self.teamID, str(year)) - doc = pq(sportsref.utils.get_html(link)) - table = doc('#team_injuries') + # get doc and table for the injuries + doc = self.get_year_doc('{}_injuries'.format(str(year))) + table = doc('table#team_injuries') # check if valid return if not len(table): @@ -275,10 +273,8 @@ def snap_count(self, year): :return: dataframe with pfrID, snap count (sum of just off and defense) """ # set link and table_name and then get the pyquery table - link = sportsref.nfl.BASE_URL + \ - '/teams/{}/{}-snap-counts.htm'.format(self.teamID, str(year)) - doc = pq(sportsref.utils.get_html(link)) - table = doc('#all_snap_counts') + doc = self.get_year_doc('{}-snap-counts'.format(str(year))) + table = doc('table#snap_counts') snap_counts = sportsref.utils.parse_table(table) return snap_counts @@ -305,6 +301,6 @@ def passing(self, year): @sportsref.decorators.memoize def rushing_and_receiving(self, year): doc = self.get_year_doc(year) - table = doc('#rushing_and_receiving') + table = doc('table#rushing_and_receiving') df = sportsref.utils.parse_table(table) return df