#Scrape the data

import requests 
from bs4 import BeautifulSoup as b
import html
import pandas as pandas
import numpy as np


def htoint(t):
    x = []
    for i in range(len(t)):
        h = float(t[i][0])*12 + float(t[i][2])
        x.append(h)
    return x

ht_X = [] #the mean height 
multi_ht_X = [] #all the players heights for multiple linear regression 
wp_Y = [] 
#How has height effected NBA win percentage in 2019-2020?

#Eastern Conference 

#Bucks 
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FMIL%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(56/73)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t))

#Raptors
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FTOR%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(53/72)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t))

#Celtics 
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FBOS%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(48/72)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t))

#Pacers
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FIND%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(45/72)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t))


#Heat
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FMIA%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(44/73)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t))

#76ers
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FPHI%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(43/73)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t))

#Nets
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FBRK%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(35/72)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t))

#Magic
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FORL%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(33/73)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t))

#Hornets 
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FCHO%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(23/65)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t))

#Wizards
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FORL%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(25/72)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t))

#Bulls
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FCHI%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(22/65)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t))

#Knicks 
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FNYK%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(21/66)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t))

#Pistons
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FDET%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(20/66)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t))

#Hawks
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FATL%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(20/67)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t))

#Cavs
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FCLE%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(19/65)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t))

#Western Conference 

#Lakers
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FLAL%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(52/71)*100
ht_X.append(x)
wp_Y.append(y) 
multi_ht_X.append(htoint(t))

#Clippers
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FLAC%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(49/72)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t)) 

#Nuggets
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FDEN%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(46/73)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t)) 

#Rockets
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FHOU%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(44/72)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t)) 

#OKC
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FOKC%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(44/72)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t)) 

#Jazz
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FUTA%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(44/72)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t)) 


#Mavs
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FDAL%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(43/75)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t)) 

#Blazers
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FPOR%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(35/74)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t)) 


#Grizzles
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FMEM%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(34/73)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t)) 


#Suns
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FPHO%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(34/73)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t)) 

#Spurs
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FSAS%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(32/71)*100
ht_X.append(x)
wp_Y.append(y) 
multi_ht_X.append(htoint(t))

#Kings
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FSAC%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(31/72)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t)) 

#Pelicans
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FNOP%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(30/72)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t)) 

#Timberwolves
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FMIN%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(19/64)*100
ht_X.append(x)
wp_Y.append(y) 
multi_ht_X.append(htoint(t))

#Warriors 
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FGSW%2F2020.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = float(15/65)*100
ht_X.append(x)
wp_Y.append(y)
multi_ht_X.append(htoint(t)) 

height2020 = ht_X
winpct2020 = wp_Y


ht_X = []
wp_Y = [] 
#How has height effected NBA win percentage in 1979-1980?

#Eastern Confernce 

#Celtics 
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FBOS%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .774
ht_X.append(x)
wp_Y.append(y)

#76ers 
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FPHI%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .720
ht_X.append(x)
wp_Y.append(y)

#Bullets 
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FWSB%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .476
ht_X.append(x)
wp_Y.append(y)

#Knicks 
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FNYK%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .476
ht_X.append(x)
wp_Y.append(y)

#Nets 
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FNJN%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .415
ht_X.append(x)
wp_Y.append(y)

#Hawks 
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FATL%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .610
ht_X.append(x)
wp_Y.append(y)

#Rockets 
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FHOU%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .500
ht_X.append(x)
wp_Y.append(y)

#Spurs 
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FSAS%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .500
ht_X.append(x)
wp_Y.append(y)

#Pacers 
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FIND%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .451
ht_X.append(x)
wp_Y.append(y)

#Cavs
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FCLE%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .451
ht_X.append(x)
wp_Y.append(y)

#Pistons
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FDET%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .195
ht_X.append(x)
wp_Y.append(y)

#Bucks 
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FMIL%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .598
ht_X.append(x)
wp_Y.append(y)

#Kings
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FKCK%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .573
ht_X.append(x)
wp_Y.append(y)

#Nuggets
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FDEN%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .366
ht_X.append(x)
wp_Y.append(y)

#Bulls
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FCHI%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .366
ht_X.append(x)
wp_Y.append(y)


#Jazz
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FUTA%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .293
ht_X.append(x)
wp_Y.append(y)

#Lakers
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FLAL%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .732
ht_X.append(x)
wp_Y.append(y)

#Sonics
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FSEA%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .683
ht_X.append(x)
wp_Y.append(y)


#Suns
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FPHO%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .671
ht_X.append(x)
wp_Y.append(y)

#Blazers
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FPOR%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .463
ht_X.append(x)
wp_Y.append(y)

#Clippers
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FSDC%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .427
ht_X.append(x)
wp_Y.append(y)


#Warriors 
url = 'https://widgets.sports-reference.com/wg.fcgi?css=1&site=bbr&url=%2Fteams%2FGSW%2F1980.html&div=div_roster'
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.2 Safari/605.1.15'}
req = requests.get(url,headers=headers) #the html data
root = b(req.content,'html')
tab =  root.find("table").prettify()
table = pandas.read_html(tab)[0]
t = table['Ht']
x = np.mean(htoint(t))
y = .293
ht_X.append(x)
wp_Y.append(y)

y = []
for i in range(len(wp_Y)):
    y.append(wp_Y[i]*100)

height1980 = ht_X
winpct1980 = y

Intercept: [-95.54570895]
Slope: [1.89321239]

Intercept: [294.85120304]
Slope: [-3.18168388]


#Visually Exploring the Data 
import seaborn as sns

years = []
for i in range(len(height1980)):
    years.append(1980)
for i in range(len(height2020)):
    years.append(2020)
heights = height1980 + height2020
winpcts = winpct1980 + winpct2020

plt.figure(1)
sns.violinplot(x = years, y = heights)
plt.title('Height Distrubution in 1980 and 2020')

plt.figure(2)
sns.violinplot(x = years, y = winpcts)
plt.title('Win % Distrubution in 1980 and 2020')

plt.show()

Intercept: [-95.54570895]
Slope: [1.89321239]

Intercept: [294.85120304]
Slope: [-3.18168388]


#Linear Regression 
import matplotlib.pyplot as plt

X = np.array(height1980).reshape(-1,1)
x = np.array(height1980).reshape(-1,1)
y = np.array(winpct1980).reshape(-1,1)
ones = np.ones([X.shape[0],1])
X = np.concatenate([ones,X],1)
a,b = np.linalg.solve(X.T.dot(X), X.T.dot(y))
print("Intercept:",a)
print("Slope:",b)
plt.figure(3)
plt.plot(x,y,'o')
plt.plot(x, a+b*x)
plt.title('Win% vs Height in Inches in 1980')
plt.xlabel('Height')
plt.ylabel('Win%')
plt.show()

X = np.array(height2020).reshape(-1,1)
x = np.array(height2020).reshape(-1,1)
y = np.array(winpct2020).reshape(-1,1)
ones = np.ones([X.shape[0],1])
X = np.concatenate([ones,X],1)
a,b = np.linalg.solve(X.T.dot(X), X.T.dot(y))
print("Intercept:",a)
print("Slope:",b)
plt.figure(4)
plt.plot(x,y,'o')
plt.plot(x, a+b*x)
plt.title('Win% vs Height in Inches in 2020')
plt.xlabel('Height')
plt.ylabel('Win%')
plt.show()

Intercept: [-95.54570895]
Slope: [1.89321239]

Intercept: [294.85120304]
Slope: [-3.18168388]


#Manupulation and Exploration to prepare for Logistic Regression 

win1980bins = []

win2020bins = []


for i in range(len(winpct1980)):

    if float(winpct1980[i]) > 45:
        win1980bins.append(1)
    else:
        win1980bins.append(0)
        
for i in range(len(winpct2020)):

    if int(winpct2020[i]) > 45:
        win2020bins.append(1)
    else:
        win2020bins.append(0)


plt.figure(5)
plt.plot(height1980,win1980bins,'o')
plt.title("Logistic Plot of Win % vs Height 1980")
plt.show()

plt.figure(6)
plt.plot(height2020,win2020bins,'o')
plt.title("Logistic Plot of Win % vs Height 2020")
plt.show()


#Logistic Regression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score


X = np.array(height1980).reshape(-1,1)

X1 = np.array(height2020).reshape(-1,1)




model1980 = LogisticRegression(solver='newton-cg', random_state=0).fit(X,win1980bins)

ldamodel1980 = LinearDiscriminantAnalysis(solver='lsqr').fit(X,win1980bins)



model2020 = LogisticRegression(solver='newton-cg', random_state=0).fit(X1,win2020bins)

ldamodel2020= LinearDiscriminantAnalysis(solver='lsqr').fit(X1,win2020bins)



cv = KFold(n_splits=7, random_state=1, shuffle=True)

model1980CV = cross_val_score(model1980, X, win1980bins, scoring='accuracy', cv=cv, n_jobs=-1,error_score='raise')

model2020CV = cross_val_score(model2020, X1, win2020bins, scoring='accuracy', cv=cv, n_jobs=-1,error_score='raise')

cvmean1980 = np.mean(model1980CV)
cvmean2020 = np.mean(model2020CV)

ldamodel1980CV = cross_val_score(ldamodel1980, X, win1980bins, scoring='accuracy', cv=cv, n_jobs=-1,error_score='raise')

ldamodel2020CV = cross_val_score(ldamodel2020, X1, win2020bins, scoring='accuracy', cv=cv, n_jobs=-1,error_score='raise')

cvLmean1980 = np.mean(ldamodel1980CV)
cvLmean2020 = np.mean(ldamodel2020CV)

print("1980 Logsitic Regression Cross Val Score: ",cvmean1980)
print()
print("2020 Logsitic Regression Cross Val Score: ",cvmean2020)
print()
print()
print("1980 LDA Cross Val Score: ",cvLmean1980)
print()
print("2020 LDA Cross Val Score: ",cvLmean2020)
print()
print()



small = [75] * 30
big = [78] * 30

s = np.array(small).reshape(-1,1)
b = np.array(big).reshape(-1,1)

print("1980 Logistic Small Teams Predictions:",model1980.predict(s))
print()
print("1980 Logistic Big Teams Predictions:",model1980.predict(b))
print()
print()
print("2020 Logistic Small Teams Predictions:",model2020.predict(s))
print()
print("2020 Logistic Big Teams Predictions:",model2020.predict(b))
print()
print()
print("1980 LDA Small Teams Predictions:",ldamodel1980.predict(s))
print()
print("1980 LDA Big Teams Predictions:",ldamodel1980.predict(b))
print()
print()
print("2020 LDA Small Teams Predictions:",ldamodel2020.predict(s))
print()
print("2020 LDA Big Teams Predictions:",ldamodel2020.predict(b))

1980 Logsitic Regression Cross Val Score:  0.6785714285714286

2020 Logsitic Regression Cross Val Score:  0.5285714285714286


1980 LDA Cross Val Score:  0.6309523809523808

2020 LDA Cross Val Score:  0.5285714285714286


1980 Logistic Small Teams Predictions: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

1980 Logistic Big Teams Predictions: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


2020 Logistic Small Teams Predictions: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]

2020 Logistic Big Teams Predictions: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


1980 LDA Small Teams Predictions: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

1980 LDA Big Teams Predictions: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


2020 LDA Small Teams Predictions: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]

2020 LDA Big Teams Predictions: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

How Height has effected Win % in 1980 and 2020

by Vivek Mantha

Introduction

Scraping and Processing the Data

Exploring The Data

Hypothesis

1. Height is positively correlated with win % in 1980

2. Height is negatively correlated with win % in 2020

3. The correlation between height and win % is stronger in 1980 than in 2020

Linear Regression

Logistic Regression Preperation

Logistic Regression Analysis

Conclusion