From 2e45e19409b71335d4f0069ab6dc3cc12472a2cd Mon Sep 17 00:00:00 2001 From: Adam Scharf Date: Sun, 8 Sep 2024 15:38:48 -0500 Subject: [PATCH 1/2] Issue #452: BR URLs updated to https from http to avoid 503 results --- pybaseball/league_batting_stats.py | 6 ++++-- pybaseball/league_pitching_stats.py | 4 ++-- pybaseball/standings.py | 2 +- pybaseball/team_results.py | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pybaseball/league_batting_stats.py b/pybaseball/league_batting_stats.py index b13fda43..38dc7cf6 100644 --- a/pybaseball/league_batting_stats.py +++ b/pybaseball/league_batting_stats.py @@ -17,7 +17,7 @@ def get_soup(start_dt: date, end_dt: date) -> BeautifulSoup: # if((start_dt is None) or (end_dt is None)): # print('Error: a date range needs to be specified') # return None - url = "http://www.baseball-reference.com/leagues/daily.cgi?user_team=&bust_cache=&type=b&lastndays=7&dates=fromandto&fromandto={}.{}&level=mlb&franch=&stat=&stat_value=0".format(start_dt, end_dt) + url = "https://www.baseball-reference.com/leagues/daily.cgi?user_team=&bust_cache=&type=b&lastndays=7&dates=fromandto&fromandto={}.{}&level=mlb&franch=&stat=&stat_value=0".format(start_dt, end_dt) s = session.get(url).content # a workaround to avoid beautiful soup applying the wrong encoding s = s.decode('utf-8') @@ -92,7 +92,7 @@ def bwar_bat(return_all: bool = False) -> pd.DataFrame: Get data from war_daily_bat table. Returns WAR, its components, and a few other useful stats. To get all fields from this table, supply argument return_all=True. """ - url = "http://www.baseball-reference.com/data/war_daily_bat.txt" + url = "https://www.baseball-reference.com/data/war_daily_bat.txt" s = session.get(url).content c=pd.read_csv(io.StringIO(s.decode('utf-8'))) if return_all: @@ -102,3 +102,5 @@ def bwar_bat(return_all: bool = False) -> pd.DataFrame: 'pitcher','G', 'PA', 'salary', 'runs_above_avg', 'runs_above_avg_off','runs_above_avg_def', 'WAR_rep','WAA','WAR'] return c[cols_to_keep] + +print(batting_stats_range()) diff --git a/pybaseball/league_pitching_stats.py b/pybaseball/league_pitching_stats.py index b140cb62..04c94533 100644 --- a/pybaseball/league_pitching_stats.py +++ b/pybaseball/league_pitching_stats.py @@ -18,7 +18,7 @@ def get_soup(start_dt: Optional[Union[date, str]], end_dt: Optional[Union[date, if((start_dt is None) or (end_dt is None)): print('Error: a date range needs to be specified') return None - url = "http://www.baseball-reference.com/leagues/daily.cgi?user_team=&bust_cache=&type=p&lastndays=7&dates=fromandto&fromandto={}.{}&level=mlb&franch=&stat=&stat_value=0".format(start_dt, end_dt) + url = "https://www.baseball-reference.com/leagues/daily.cgi?user_team=&bust_cache=&type=p&lastndays=7&dates=fromandto&fromandto={}.{}&level=mlb&franch=&stat=&stat_value=0".format(start_dt, end_dt) s = session.get(url).content # a workaround to avoid beautiful soup applying the wrong encoding s = s.decode('utf-8') @@ -96,7 +96,7 @@ def bwar_pitch(return_all: bool=False) -> pd.DataFrame: Get data from war_daily_pitch table. Returns WAR, its components, and a few other useful stats. To get all fields from this table, supply argument return_all=True. """ - url = "http://www.baseball-reference.com/data/war_daily_pitch.txt" + url = "https://www.baseball-reference.com/data/war_daily_pitch.txt" s = session.get(url).content c = pd.read_csv(io.StringIO(s.decode('utf-8'))) if return_all: diff --git a/pybaseball/standings.py b/pybaseball/standings.py index e6c13127..5e4296b2 100644 --- a/pybaseball/standings.py +++ b/pybaseball/standings.py @@ -10,7 +10,7 @@ session = BRefSession() def get_soup(year: int) -> BeautifulSoup: - url = f'http://www.baseball-reference.com/leagues/MLB/{year}-standings.shtml' + url = f'https://www.baseball-reference.com/leagues/MLB/{year}-standings.shtml' s = session.get(url).content return BeautifulSoup(s, "lxml") diff --git a/pybaseball/team_results.py b/pybaseball/team_results.py index 09b7eea8..6e7c44ce 100644 --- a/pybaseball/team_results.py +++ b/pybaseball/team_results.py @@ -18,7 +18,7 @@ def get_soup(season: Optional[int], team: str) -> BeautifulSoup: # get most recent year's schedule if year not specified if season is None: season = most_recent_season() - url = "http://www.baseball-reference.com/teams/{}/{}-schedule-scores.shtml".format(team, season) + url = "https://www.baseball-reference.com/teams/{}/{}-schedule-scores.shtml".format(team, season) print(url) s = session.get(url).content return BeautifulSoup(s, "lxml") From 1a70af841708285f7157b8dda30666313fb142ca Mon Sep 17 00:00:00 2001 From: Adam Scharf Date: Sun, 8 Sep 2024 19:21:00 -0500 Subject: [PATCH 2/2] Remove print output from league_batting_stats --- pybaseball/league_batting_stats.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pybaseball/league_batting_stats.py b/pybaseball/league_batting_stats.py index 38dc7cf6..564dc1b1 100644 --- a/pybaseball/league_batting_stats.py +++ b/pybaseball/league_batting_stats.py @@ -101,6 +101,4 @@ def bwar_bat(return_all: bool = False) -> pd.DataFrame: cols_to_keep = ['name_common', 'mlb_ID', 'player_ID', 'year_ID', 'team_ID', 'stint_ID', 'lg_ID', 'pitcher','G', 'PA', 'salary', 'runs_above_avg', 'runs_above_avg_off','runs_above_avg_def', 'WAR_rep','WAA','WAR'] - return c[cols_to_keep] - -print(batting_stats_range()) + return c[cols_to_keep] \ No newline at end of file