What Matters in Winning College Basketball Games?

bphilton5
Dec 10, 2020
2 min read

Data Source

All data is scraped with python from sports-reference.com code is below.

Filters

All college basketball teams from 2000 to 2019.

Key Points

No one statistical item matters.
FG percentage matters the most but not all the much.
Was surprised the R^2 of steals per game isn't negative.
I think going deeper would make more sense in how college basketball teams are winning.

Python Script

#pip install beautifulsoup4

#pip install lxml

#pip install requests

import pandas as pd

import requests

import bs4

from bs4 import BeautifulSoup

n=range(2000,2020)

url2 ='https://www.sports-reference.com/cbb/seasons/'

url3 ='-school-stats.html'

## initialize all of the lists but make sure they don't refer to the same empty list

pl_team_list, pl_wins_list, pl_loses_list, pl_total_points_list, pl_total_points_againest_list, \

pl_FG_percentage_list, pl_3_percentage_list, pl_FT_percentage_list, pl_total_rebounds_list, \

pl_assist_list, pl_steals_list, pl_turnovers_list = ([] for i in range(12))

for n in n:

all = url2+str(n)+url3

r = requests.get(all)

soup = BeautifulSoup(r.text, 'html.parser')

league_table = soup.find('table', class_ = 'per_match_toggle sortable stats_table')

for team in league_table.find_all('tbody'):

rows = team.find_all('tr')

for row in rows:

pl_team = row.find('td', class_ = 'left')

if pl_team == (None):

continue

pl_wins = row.find_all('td', class_ = 'right')[1]

if pl_wins == (None):

continue

pl_loses = row.find_all('td', class_ = 'right')[2]

if pl_wins == (None):

continue

pl_total_points = row.find_all('td', class_ = 'right')[16]

if pl_total_points == (None):

continue

pl_total_points_againest = row.find_all('td', class_ = 'right')[17]

if pl_total_points_againest == (None):

continue

pl_FG_percentage = row.find_all('td', class_ = 'right')[22]

if pl_FG_percentage == (None):

continue

pl_3_percentage = row.find_all('td', class_ = 'right')[25]

if pl_3_percentage == (None):

continue

pl_FT_percentage = row.find_all('td', class_ = 'right')[28]

if pl_FT_percentage == (None):

continue

pl_total_rebounds = row.find_all('td', class_ = 'right')[30]

if pl_total_rebounds == (None):

continue

pl_assist = row.find_all('td', class_ = 'right')[31]

if pl_assist == (None):

continue

pl_steals = row.find_all('td', class_ = 'right')[32]

if pl_steals == (None):

continue

pl_turnovers = row.find_all('td', class_ = 'right')[33]

if pl_total_rebounds == (None):

continue

#print(n,

# pl_team.text,

# pl_wins.text,

# pl_loses.text,

# pl_total_points.text,

# pl_total_points_againest.text,

# pl_FG_percentage.text,

# pl_3_percentage.text,

# pl_FT_percentage.text,

# pl_total_rebounds.text,

# pl_assist.text,

# pl_steals.text,

# pl_turnovers.text,

# )

#data = {'Year': n,

# 'Team': pl_team.text,

# 'Wins': pl_wins.text}

pl_team_list.append(pl_team.text)

pl_wins_list.append(pl_wins.text)

pl_loses_list.append(pl_loses.text)

pl_total_points_list.append(pl_total_points.text)

pl_total_points_againest_list.append(pl_total_points_againest.text)

pl_FG_percentage_list.append(pl_FG_percentage.int)

pl_3_percentage_list.append(pl_3_percentage.int)

pl_FT_percentage_list.append(pl_FT_percentage.int)

pl_total_rebounds_list.append(pl_total_rebounds.text)

pl_assist_list.append(pl_assist.text)

pl_steals_list.append(pl_steals.text)

pl_turnovers_list.append(pl_turnovers.text)

df = pd.DataFrame({

'Team': pl_team_list,

'Wins': pl_wins_list,

'Loses': pl_loses_list,

'Total Points': pl_total_points_list,

'Total Points Against': pl_total_points_againest_list,

'FG Percentage': pl_FG_percentage_list,

'3 Percentage': pl_3_percentage_list,

'FT Percentage': pl_FT_percentage_list,

'Total Rebounds': pl_total_rebounds_list,

'Assist': pl_assist_list,

'Steals': pl_steals_list,

'Turnovers': pl_turnovers_list

})

df.to_excel('C://Users/bphil/OneDrive/Desktop/ncaa_basketball_teams.xlsx')

What Matters in Winning College Basketball Games?

Recent Posts

Comments

SIGN UP AND STAY UPDATED!