import numpy as np
import pandas as pd
import geopandas as gp
import time
from selenium import webdriver
import hvplot.pandas
from bokeh.models import HoverTool
from holoviews import opts


base_url = 'https://www.politico.com/2020-election/results/'
driver_path = 'C:\\chromedriver_win32\\chromedriver.exe'

def president_url(state):
    return base_url + state.replace(' ', '-').lower()

def senate_url(state):
    if(state == 'Arizona'):
        return base_url + state.replace(' ', '-').lower() + '/senate-special/'
    return base_url + state.replace(' ', '-').lower() + '/senate'


def get_data(driver):
    time.sleep(1)
    elem = driver.find_element_by_xpath("//div[contains(@class, 'county-results')]")
    time.sleep(2)
    try:
        button = elem.find_element_by_tag_name('button')
        time.sleep(1)
        button.click()
        time.sleep(4)
    except:
        pass
    html_table = elem.find_element_by_tag_name('table')
    time.sleep(1)
    table = pd.read_html(html_table.get_attribute('outerHTML'))[0]   
    return table

def get_DC_data(driver):
    time.sleep(1)
    elem = driver.find_element_by_xpath("//div[contains(@class, 'results-table')]")
    time.sleep(1)
    html_table = elem.find_element_by_tag_name('table')
    time.sleep(1)
    table = pd.read_html(html_table.get_attribute('outerHTML'))[0]   
    return table

#Finds out which party the winning candidate is in for a Senate race, renames columns accordingly
def find_parties(driver, df):
    header = driver.find_element_by_tag_name('h1')
    winner = header.get_attribute('innerHTML')
    if('(D)' in winner):
        df.columns = ['County', 'democrat votes', 'democrat pct', 'republican votes', 'republican pct']
    else:
        df.columns = ['County', 'republican votes', 'republican pct', 'democrat votes', 'democrat pct']
    return df


pres_data_2020 = pd.DataFrame(columns=['County', 'Biden votes', 'Biden pct', 'Trump votes', 'Trump pct', 'State'])

states = ['Alabama', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia',
 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts',
 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey',
 'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 
 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia', 
 'Wisconsin', 'Wyoming']


driver = webdriver.Chrome(executable_path=driver_path)
time.sleep(5)

#Takes a few minutes to run
for state in states:
    try:
        time.sleep(1)
        driver.get(president_url(state))
        table = get_data(driver)
        table['State'] = state
        pres_data_2020 = pd.concat([pres_data_2020, table])
    except:
        print('No county data for ', state)

#Gets the DC data
try:
    time.sleep(1)
    driver.get(president_url('Washington DC'))
    DC_table = get_DC_data(driver)
    pres_data_2020 = pres_data_2020.append({'County':'Washington', 'Biden votes':DC_table.iloc[0, 1], 
                                        'Biden pct':DC_table.iloc[0, 2], 'Trump votes':DC_table.iloc[1, 1], 
                                        'Trump pct':DC_table.iloc[1, 2], 'State':'District of Columbia'}, ignore_index=True)
except:
    print('No data for Washington DC')

driver.quit()


pres_data_historical = pd.read_csv('countypres_2000-2016.csv')


senate_data_2020 = pd.DataFrame()

senate_states = ['Alabama', 'Arizona', 'Arkansas', 'Colorado', 'Delaware', 'Georgia', 'Idaho', 
 'Illinois', 'Iowa', 'Kansas', 'Kentucky', 'Maine', 'Massachusetts', 'Michigan',  'Minnesota', 
 'Mississippi', 'Montana', 'Nebraska', 'New Hampshire', 'New Jersey', 'New Mexico', 'North Carolina', 
 'Oklahoma', 'Oregon', 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 
 'Virginia', 'West Virginia', 'Wyoming']

driver = webdriver.Chrome(executable_path=driver_path)
time.sleep(5)

#Takes a few minutes to run
for state in senate_states:
    try:
        time.sleep(1)
        driver.get(senate_url(state))
        table = get_data(driver)
        table = find_parties(driver, table)
        table['State'] = state
        senate_data_2020 = pd.concat([senate_data_2020, table])
    except:
        print('No senate data for ', state)
    
driver.quit()


county_geog = gp.read_file('cb_2018_us_county_500k')
cvap_2012 = pd.read_csv('County_CVAP_2012.csv')
cvap_2016 = pd.read_csv('County_CVAP_2016.csv')
cvap_2019 = pd.read_sas('County_CVAP_2019.sas7bdat', format = 'sas7bdat',  encoding="latin-1")


def fix_pct(pct):
    return (float(pct.replace('%', '')))

#Determines the total votes based on the vote totals and percents for each party
def sum_votes(row):
    two_party_votes = row['democrat votes'] + row['republican votes']
    two_party_pct = row['democrat pct'] + row['republican pct']
    #Percents given are not exact, so this number will not be exact
    return int(100*two_party_votes/two_party_pct)


virginia_cities = ['Alexandria', 'Bristol', 'Buena Vista', 'Charlottesville', 'Chesapeake', 'Colonial Heights', 'Covington', 
                   'Danville', 'Emporia', 'Fairfax', 'Falls Church', 'Franklin', 'Fredericksburg', 'Galax', 'Hampton', 
                   'Harrisonburg', 'Hopewell', 'Lexington', 'Lynchburg', 'Manassas', 'Manassas Park', 'Martinsville', 
                   'Newport News', 'Norfolk', 'Norton', 'Petersburg', 'Poquoson', 'Portsmouth', 'Radford', 'Richmond', 
                   'Roanoke', 'Salem', 'Staunton', 'Suffolk', 'Virginia Beach', 'Waynesboro', 'Williamsburg', 'Winchester']

#List of independent cities that share a name with a county in Virginia
virginia_double = ['Bedford', 'Fairfax', 'Franklin', 'Richmond', 'Roanoke']

county_replacements = {
    'Desoto': 'DeSoto',
    'LaSalle': 'La Salle',
    'Lac Qui Parle': 'Lac qui Parle',
    'Dona Ana': 'Doña Ana',
    'Dewitt': 'DeWitt',
    'Saint Louis': 'St. Louis',
    'District of Columbia': 'Washington'
}

def is_city(county, state):
    return ((state == 'Maryland' and county.lower() == 'baltimore city')
            or (state == 'Missouri' and county.lower() == 'st. louis city')
            or (state == 'Nevada' and county.lower() == 'carson city')
            or (state == 'Virginia' and county.replace(' city', '') in virginia_double and ' city' in county)
            or (state == 'Virginia' and county.replace(' city', '') in virginia_cities and 
                county.replace(' city', '') not in virginia_double))

def find_county_type(county, state):
    if state == 'DC' or state == 'District of Columbia':
        return 'District'
    elif state == 'Louisiana':
        return 'Parish'
    elif is_city(county, state):
        return 'City'
    else:
        return 'County'

#Removes ' county' from the end of county names
def remove_county(county):
    #Preserve the names of 'Charles City County' and 'James City County'
    if('Charles City' in county or 'James City' in county):
        return county.replace(' County', '')
    else:
        county =  county.replace(' County', '')
        county = county.replace(' Parish', '')
        county = county.replace(' city', '')
        county = county.replace(' City', '') 
        return county

def fix_county(county, state):
    county_type = find_county_type(county, state)
    county_name = remove_county(county)
    if county_name in county_replacements:
        county_name = county_replacements[county_name]
    if county_type == 'District':
        return county_name
    return county_name + ' ' + county_type

#Matches a row of the 2020 dataset to the FIPS of the same county in the historical dataset
def find_FIPS(county, state):
    lines = pres_data_historical.loc[pres_data_historical['state'] == state]
    lines = lines.loc[lines['county'] == county]
    if lines.empty:
        print("cannot find", county, state)
        return np.NaN
    line = lines.iloc[0]
    return int(line['FIPS'])

#Turns CVAP GEOID into FIPS by removing '05000US' from the front
def fix_FIPS(GEOID):
    return int(GEOID[7:])


pres_data_2020 = pres_data_2020.rename(columns={"County": "county", "State": "state",
                                               'Biden votes': 'democrat votes', 'Trump votes': 'republican votes',
                                               'Biden pct': 'democrat pct', 'Trump pct': 'republican pct'})

pres_data_2020['year'] = 2020
pres_data_2020['democrat pct'] = pres_data_2020['democrat pct'].apply(fix_pct)
pres_data_2020['republican pct'] = pres_data_2020['republican pct'].apply(fix_pct)
pres_data_2020['total votes'] = pres_data_2020.apply(sum_votes, axis=1)
pres_data_2020['county'] = pres_data_2020.apply(lambda x: fix_county(x.county, x.state), axis=1)
pres_data_2020['democrat votes'] = pres_data_2020['democrat votes'].apply(int)
pres_data_2020['republican votes'] = pres_data_2020['republican votes'].apply(int)


pres_data_historical = pres_data_historical.query("party == 'republican' or party == 'democrat'")
pres_data_historical = pres_data_historical.drop(columns=['state_po', 'office', 'candidate', 'version'])

#Combines republican votes and democrat votes into one row
index = ['year', 'state', 'county', 'totalvotes', 'FIPS']
pres_data_historical = pd.pivot_table(pres_data_historical, index=index, columns='party', values='candidatevotes')
pres_data_historical = pres_data_historical.reset_index()
pres_data_historical.columns.names = [''] #This is needed to finish resetting the index

pres_data_historical = pres_data_historical.rename(columns={'totalvotes': 'total votes', 
                                                            'democrat': 'democrat votes', 'republican': 'republican votes'})

pres_data_historical['democrat pct'] = 100*pres_data_historical['democrat votes']/pres_data_historical['total votes']
pres_data_historical['republican pct'] = 100*pres_data_historical['republican votes']/pres_data_historical['total votes']
pres_data_historical['county'] = pres_data_historical.apply(lambda x: fix_county(x.county, x.state), axis=1)


senate_data_2020 = senate_data_2020.rename(columns={"County": "county", "State": "state"})

senate_data_2020['democrat pct'] = senate_data_2020['democrat pct'].apply(fix_pct)
senate_data_2020['republican pct'] = senate_data_2020['republican pct'].apply(fix_pct)
senate_data_2020['total votes'] = senate_data_2020.apply(sum_votes, axis=1)
senate_data_2020['county'] = senate_data_2020.apply(lambda x: fix_county(x.county, x.state), axis=1)
senate_data_2020['democrat votes'] = senate_data_2020['democrat votes'].apply(int)
senate_data_2020['republican votes'] = senate_data_2020['republican votes'].apply(int)
senate_data_2020['FIPS'] = senate_data_2020.apply(lambda x: find_FIPS(x.county, x.state), axis=1)


county_geog['STATEFP'] = county_geog['STATEFP'].apply(int)

#Moves Hawaii closer to the continental US on the map
move_HI = county_geog.loc[county_geog['STATEFP'] == 15]
move_HI['geometry'] =  move_HI['geometry'].translate(45, 5)
county_geog = county_geog.loc[county_geog['STATEFP'] != 15]
county_geog = county_geog.append(move_HI)

#Removes Alaska and US territories
county_geog = county_geog.loc[county_geog['STATEFP'] != 2]
county_geog = county_geog.loc[county_geog['STATEFP'] < 60]

county_geog = county_geog.drop(columns=['STATEFP', 'COUNTYFP', 'NAME', 'LSAD', 'AFFGEOID', 'COUNTYNS', 'AWATER'])
county_geog = county_geog.rename(columns={'GEOID':'FIPS', 'ALAND':'area'})
county_geog['FIPS'] = county_geog['FIPS'].apply(int)

#Converts area from square meters to square miles
county_geog['area'] = 0.00000038610*county_geog['area']

<ipython-input-10-ae5dd1e729f0>:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  move_HI['geometry'] =  move_HI['geometry'].translate(45, 5)


cvap_2019.columns = cvap_2012.columns
cvap_2012['year'] = 2012
cvap_2016['year'] = 2016
cvap_2019['year'] = 2019
cvap = pd.concat([cvap_2012, cvap_2016, cvap_2019])
cvap['GEOID'] = cvap['GEOID'].apply(fix_FIPS)
cvap = cvap.loc[cvap['LNNUMBER'] == 1] #Selects overall population
cvap = cvap.drop(columns=['GEONAME', 'LNTITLE', 'LNNUMBER', 'TOT_MOE', 'ADU_EST', 'ADU_MOE', 'CIT_EST', 'CIT_MOE', 'CVAP_MOE'])
cvap = cvap.rename(columns={'GEOID': 'FIPS', 'TOT_EST': 'population', 'CVAP_EST': 'CVAP'})

#Combines CVAP data from 2012, 2016, and 2019 into one row
cvap = pd.pivot_table(cvap, index='FIPS', columns='year', values=['population', 'CVAP'])
cvap = cvap.reset_index()

#Fixes the names of columns
cvap.columns = cvap.columns.droplevel(1)  + ' ' + cvap.columns.droplevel(0).map(lambda x: str(x))
cvap.columns = cvap.columns.map(lambda x: x.strip())


county_geog = county_geog.merge(cvap, on='FIPS')
county_geog = county_geog.dropna()

state_geog = county_geog.merge(pres_data_historical.loc[pres_data_historical['year'] == 2016], on='FIPS')
state_geog = state_geog.dissolve(by='state', aggfunc='sum')
state_geog = state_geog.reset_index()
state_geog = state_geog.loc[:, ['state', 'geometry', 'population 2012', 'population 2016', 'population 2019', 
                                'CVAP 2012', 'CVAP 2016', 'CVAP 2019', 'area']]

#Reduces the resolution of the county and state borders so maps load faster
county_geog['geometry'] = county_geog['geometry'].simplify(.005)
state_geog['geometry'] = state_geog['geometry'].simplify(.005)


pres_data_2020['FIPS'] = pres_data_2020.apply(lambda x: find_FIPS(x.county, x.state), axis=1)
pres_counties = pd.concat([pres_data_historical, pres_data_2020])

pres_counties['margin'] = pres_counties['republican pct'] - pres_counties['democrat pct']
pres_counties['margin direction'] = pres_counties['margin'].apply(lambda x: 'R' if x > 0 else 'D')
pres_counties['absolute margin'] = pres_counties['margin'].apply(abs)


pres_states = pres_counties.groupby(['state', 'year']).sum()
pres_states = pres_states.reset_index()
pres_states.columns.names = ['']
pres_states = pres_states.dropna()

pres_states['democrat pct'] = 100*pres_states['democrat votes']/pres_states['total votes']
pres_states['republican pct'] = 100*pres_states['republican votes']/pres_states['total votes']
pres_states['margin'] = pres_states['republican pct'] - pres_states['democrat pct']
pres_states['margin direction'] = pres_states['margin'].apply(lambda x: 'R' if x > 0 else 'D')
pres_states['absolute margin'] = pres_states['margin'].apply(abs)


pres_data_2020['race'] = 'pres'
senate_data_2020['race'] = 'sen'
counties_2020 = pd.concat([pres_data_2020, senate_data_2020])
counties_2020 = counties_2020.drop(columns=['year'])

counties_2020['margin'] = counties_2020['republican pct'] - counties_2020['democrat pct']
counties_2020['margin direction'] = counties_2020['margin'].apply(lambda x: 'R' if x > 0 else 'D')
counties_2020['absolute margin'] = counties_2020['margin'].apply(abs)


states_2020 = counties_2020.groupby(['state', 'race']).sum()
states_2020 = states_2020.reset_index()
states_2020.columns.names = ['']
states_2020 = states_2020.dropna()

states_2020['democrat pct'] = 100*states_2020['democrat votes']/states_2020['total votes']
states_2020['republican pct'] = 100*states_2020['republican votes']/states_2020['total votes']
states_2020['margin'] = states_2020['republican pct'] - states_2020['democrat pct']
states_2020['margin direction'] = states_2020['margin'].apply(lambda x: 'R' if x > 0 else 'D')
states_2020['absolute margin'] = states_2020['margin'].apply(abs)


national_2020 = states_2020.loc[states_2020['state'].isin(senate_states)]
national_2020 = national_2020.groupby('race').sum().reset_index()
national_2020['turnout'] = national_2020['total votes']/1000000

national_2020['margin'] = national_2020['republican votes'] - national_2020['democrat votes']
national_2020['margin'] = 100*national_2020['margin']/national_2020['total votes']
national_2020['margin direction'] = national_2020['margin'].apply(lambda x: 'R' if x > 0 else 'D')
national_2020['absolute margin'] = national_2020['margin'].apply(abs)


#Gets turnout data from the overall dataset
county_turnout = pres_counties.loc[pres_counties['year'] > 2015]
county_turnout = pd.pivot_table(county_turnout, index=['state', 'county', 'FIPS'], columns='year', values='total votes')
county_turnout = county_turnout.reset_index()
county_turnout = county_turnout.rename(columns = {2016:'total votes 2016', 2020:'total votes 2020'})
county_turnout = county_geog.merge(county_turnout, on='FIPS')

#Calculates increase in turnout
county_turnout['total increase'] = county_turnout['total votes 2020'] - county_turnout['total votes 2016']
county_turnout['pct increase'] = 100*county_turnout['total increase']/county_turnout['total votes 2016']

#Calcualtes turnout as a percent of citizen voting age population
county_turnout['pct turnout 2016'] = 100*county_turnout['total votes 2016']/county_turnout['CVAP 2016']
county_turnout['pct turnout 2020'] = 100*county_turnout['total votes 2020']/county_turnout['CVAP 2019']

#Gets state data
state_turnout = pres_states.loc[pres_states['year'] > 2015]
state_turnout = pd.pivot_table(state_turnout, index=['state'], columns='year', values='total votes')
state_turnout = state_turnout.reset_index()
state_turnout = state_turnout.rename(columns = {2016:'total votes 2016', 2020:'total votes 2020'})

state_turnout = state_geog.merge(state_turnout, on='state')

#Calculates increase in turnout
state_turnout['total increase'] = state_turnout['total votes 2020'] - state_turnout['total votes 2016']
state_turnout['pct increase'] = 100*state_turnout['total increase']/state_turnout['total votes 2016']

#Calcualtes turnout as a percent of citizen voting age population
state_turnout['pct turnout 2016'] = 100*state_turnout['total votes 2016']/state_turnout['CVAP 2016']
state_turnout['pct turnout 2020'] = 100*state_turnout['total votes 2020']/state_turnout['CVAP 2019']

#Calculates national turnout
national_turnout = pres_counties.groupby('year').sum().reset_index()

#calculate turnout in millions of votes
national_turnout['turnout'] = national_turnout['total votes']/1000000

#Sets default options for maps
opts.defaults(
    opts.Polygons(xaxis='bare', yaxis='bare', line_width=0.5, hover_line_color='darkgoldenrod'))

#Creates state borders map to emphasize state borders in national maps of counties
state_borders = state_geog.loc[:, ['geometry']]
borders = state_borders.hvplot(c='clear', geo=True, line_width=.75, hover_line_color=None)


hover = HoverTool(tooltips=[('', 'Turnout: @turnout million')])

national_turnout.hvplot.bar(x='year', y='turnout', xlabel='Year', ylabel='Turnout (millions)', 
                            title= 'National Turnout from 2000 to 2020', hover_cols='turnout', tools=[hover])


hover = HoverTool(tooltips=[('', '@state'),
                            ('', 'Increase in Turnout: @pct_increase%')])
hover_cols=['state', 'pct increase']

state_turnout.hvplot(c='pct increase', geo=True, title='Increase in Turnout from 2016 to 2020 by State',
                     tools=[hover], hover_cols=hover_cols, cmap='greens', clim=(0, 32))


hover = HoverTool(tooltips=[('', '@county, @state'),
                            ('', 'Increase in Turnout: @pct_increase%')])
hover_cols=['state', 'county', 'pct increase']

wisconsin_turnout = county_turnout.loc[county_turnout['state'] == 'Wisconsin']
wisconsin_turnout.hvplot(c='pct increase', geo=True, title='Wisconsin Increase in Turnout fron 2016 to 2020', 
                         tools=[hover], hover_cols=hover_cols, cmap='PiYG', clim=(-25, 25))


ohio_turnout = county_turnout.loc[county_turnout['state'] == 'Ohio']
ohio_turnout.hvplot(c='pct increase', geo=True, title='Ohio Increase in Turnout from 2016 to 2020', 
                    tools=[hover], hover_cols=hover_cols, cmap='PiYG', clim=(-25, 25))


hover = HoverTool(tooltips=[('', '@county, @state'),
                            ('', 'Increase in Turnout: @pct_increase%')])
hover_cols=['state', 'county', 'pct increase']

county_turnout.hvplot(c='pct increase', geo=True, title='Increase in Turnout from 2016 to 2020 by County', 
                      tools=[hover], hover_cols=hover_cols, cmap='PiYG', clim=(-25, 25)) * borders


hover = HoverTool(tooltips=[('', '@state'),
                            ('', '2016 Turnout: @pct_turnout_2016%'),
                            ('', '2020 Turnout: @pct_turnout_2020%')])
hover_cols=['state', 'pct turnout 2016', 'pct turnout 2020']

state_turnout.hvplot(c='pct turnout 2020', geo=True, title='2020 Voting Age Citizen Turnout by State', 
                          hover_cols=hover_cols, tools=[hover], cmap='greens', clim=(45, 85))


hover = HoverTool(tooltips=[('', '@county, @state'),
                            ('', '2016 Turnout: @pct_turnout_2016%'),
                            ('', '2020 Turnout: @pct_turnout_2020%')])
hover_cols=['state', 'county', 'pct turnout 2016', 'pct turnout 2020']

county_turnout.hvplot(c='pct turnout 2020', geo=True, title='2020 Voting Age Citizen Turnout by County', 
                      tools=[hover], hover_cols=hover_cols, cmap='greens', clim=(45, 85)) * borders


#Gets margin data from the overall dataset
county_margin = pres_counties.query("year == 2012 or year == 2020")
index = ['state', 'county', 'FIPS']
values = ['margin', 'margin direction', 'absolute margin']
county_margin = pd.pivot_table(county_margin, index=index, columns='year', values=values, aggfunc=lambda x:x)
county_margin = county_margin.reset_index()
county_margin.columns = county_margin.columns.droplevel(1) + ' ' + county_margin.columns.droplevel(0).map(lambda x: str(x))
county_margin.columns = county_margin.columns.map(lambda x: x.strip())

county_margin = county_geog.merge(county_margin, on='FIPS')

#Calculates change in margin from 2012 to 2020
county_margin['margin shift'] = county_margin['margin 2020'] - county_margin['margin 2012']
county_margin['shift direction'] = county_margin['margin shift'].apply(lambda x: 'R' if x > 0 else 'D')
county_margin['absolute shift'] = county_margin['margin shift'].apply(abs)


#Gets state data
state_margin = pres_states.query("year == 2012 or year == 2020")
state_margin = pd.pivot_table(state_margin, index=['state'], columns='year', values=values, aggfunc=lambda x:x)
state_margin = state_margin.reset_index()
state_margin.columns = state_margin.columns.droplevel(1) + ' ' + state_margin.columns.droplevel(0).map(str)
state_margin.columns = state_margin.columns.map(lambda x: x.strip())

state_margin = state_geog.merge(state_margin, on='state')

#Calculates change in margin from 2012 to 2020
state_margin['margin shift'] = state_margin['margin 2020'] - state_margin['margin 2012']
state_margin['shift direction'] = state_margin['margin shift'].apply(lambda x: 'R' if x > 0 else 'D')
state_margin['absolute shift'] = state_margin['margin shift'].apply(abs)


#Calculates national turnout
national_margin = pres_counties.groupby('year').sum().reset_index()
national_margin['margin'] = national_margin['republican votes'] - national_margin['democrat votes']
national_margin['margin'] = 100*national_margin['margin']/national_margin['total votes'] 
national_margin['margin direction'] = national_margin['margin'].apply(lambda x: 'R' if x > 0 else 'D')
national_margin['absolute margin'] = national_margin['margin'].apply(abs)


hover = HoverTool(tooltips=[('', ' Popular Vote Margin: +@absolute_margin% @margin_direction')])
hover_cols= ['absolute margin', 'margin direction']

national_margin['color'] = national_margin['margin direction'].apply(lambda x: 'red' if x=='R' else 'blue')
national_margin.hvplot.bar(x='year', y='absolute margin', xlabel='Year', ylabel='Popular Vote Margin (%)', c='color',
                        title= 'National Popular Vote Margin from 2000 to 2020', tools=[hover], hover_cols=hover_cols)


hover = HoverTool(tooltips=[('', '@county, @state'),
                            ('', '2012 Margin: +@absolute_margin_2012% @margin_direction_2012'),
                            ('', '2020 Margin: +@absolute_margin_2020% @margin_direction_2020'),
                            ('', 'Shift: +@absolute_shift% @shift_direction')])
hover_cols=['state', 'county', 'absolute margin 2012', 'margin direction 2012', 
            'absolute margin 2020', 'margin direction 2020', 'absolute shift', 'shift direction']

pennsylvania_margin = county_margin.loc[county_margin['state'] == 'Pennsylvania']
pennsylvania_margin.hvplot(c='margin shift', geo=True, title='Pennsylvania Change in Margin from 2012 to 2020', 
                           tools=[hover], hover_cols = hover_cols, cmap='bwr', clim=(-40, 40))


Pennsylvania_neighbors = ['Pennsylvania', 'Ohio', 'New York', 'New Jersey', 'Delaware', 'Maryland', 'West Virginia']
neighbor_margin = county_margin.loc[county_margin['state'].isin(Pennsylvania_neighbors)]
neighbor_margin.hvplot(c='margin shift', geo=True, title='Change in Margin from 2012 to 2020 Surrounding Pennsylvania', 
                       tools=[hover], hover_cols=hover_cols, cmap='bwr', clim=(-40, 40))


hover = HoverTool(tooltips=[('', '@county, @state'),
                            ('', '2012 Margin: +@absolute_margin_2012% @margin_direction_2012'),
                            ('', '2020 Margin: +@absolute_margin_2020% @margin_direction_2020'),
                            ('', 'Shift: +@absolute_shift% @shift_direction')])
hover_cols=['state', 'county', 'absolute margin 2012', 'margin direction 2012'] 
            #'absolute margin 2020', 'margin direction 2020']#, 'absolute shift', 'shift direction']
#test_test = county_margin.dropna()

county_margin.hvplot(c='margin shift', geo=True, title='Change in Margin from 2012 to 2020', 
                     tools=[hover], hover_cols=hover_cols, cmap='bwr', clim=(-40, 40))


hover = HoverTool(tooltips=[('', '@state'),
                            ('', '2012 Margin: +@absolute_margin_2012% @margin_direction_2012'),
                            ('', '2020 Margin: +@absolute_margin_2020% @margin_direction_2020'),
                            ('', 'Shift: +@absolute_shift% @shift_direction')])
hover_cols=['state', 'absolute margin 2012', 'margin direction 2012', 
            'absolute margin 2020', 'margin direction 2020', 'absolute shift', 'shift direction']

state_margin.hvplot(c='margin shift', geo=True, title='Change in Margin from 2012 to 2020', 
                    tools=[hover], hover_cols=hover_cols, cmap='bwr', clim=(-30, 30))


county_margin['population density'] = county_margin['population 2019']/county_margin['area']
hover = HoverTool(tooltips=[('', '@county, @state'),
                            ('', 'Shift: +@absolute_shift% @shift_direction')])
hover_cols=['state', 'county',  'absolute shift', 'shift direction']
graph_data = county_margin.loc[:, ['population density', 'state', 'county',
                                   'margin shift', 'absolute shift', 'shift direction']]
graph_data.hvplot.scatter(x='population density', y='margin shift',  xlabel=' Log Population Density (people per square mile)', 
                          ylabel='Change in Margin (%)', logx=True, s=10, tools=[hover], hover_cols=hover_cols, 
                         title='Population Density vs Change in Presidential Margin from 2012 to 2020')


#Gets presidential and senate data from the overall dataset
index = ['state', 'county', 'FIPS']
values = ['total votes', 'margin', 'margin direction', 'absolute margin']
counties_2020 = pd.pivot_table(counties_2020, index=index, columns='race', values=values, aggfunc=lambda x:x)
counties_2020 = counties_2020.reset_index()
counties_2020.columns = counties_2020.columns.droplevel(1) + ' ' + counties_2020.columns.droplevel(0).map(lambda x: str(x))
counties_2020.columns = counties_2020.columns.map(lambda x: x.strip())

#Calculates difference in turnout
counties_2020['turnout diff'] = counties_2020['total votes pres'] - counties_2020['total votes sen']
counties_2020['turnout pct'] = 100*counties_2020['turnout diff']/counties_2020['total votes sen']
counties_2020['turnout pct'] = counties_2020['turnout pct'].apply(lambda x: np.NaN if x > 100 else x)
counties_2020['total votes sen'] = counties_2020['total votes sen'].fillna(0)

#Calulates difference in margin
counties_2020['margin diff'] = counties_2020['margin pres'] - counties_2020['margin sen']
counties_2020['candidate'] = counties_2020['margin diff'].apply(lambda x: 'Trump' if x > 0 else 'Biden' if x <= 0 else 'N/A')
counties_2020['absolute diff'] = counties_2020['margin diff'].apply(abs)
counties_2020['absolute diff'] = counties_2020['absolute diff'].fillna(0)
counties_2020['absolute margin sen'] = counties_2020['absolute margin sen'].fillna(0)
counties_2020['margin direction sen'] = counties_2020['margin direction sen'].fillna('N/A')

counties_2020 = county_geog.merge(counties_2020, on='FIPS')

#Gets state level presidential and senate data
states_2020 = pd.pivot_table(states_2020, index=['state'], columns='race', values=values, aggfunc=lambda x:x)
states_2020 = states_2020.reset_index()
states_2020.columns = states_2020.columns.droplevel(1) + ' ' + states_2020.columns.droplevel(0).map(str)
states_2020.columns = states_2020.columns.map(lambda x: x.strip())

#calculates difference in turnout
states_2020['turnout diff'] = states_2020['total votes pres'] - states_2020['total votes sen']
states_2020['turnout pct'] = 100*states_2020['turnout diff']/states_2020['total votes sen']
states_2020['turnout pct'] = states_2020['turnout pct'].apply(lambda x: np.NaN if x > 100 else x)
states_2020['total votes sen'] = states_2020['total votes sen'].fillna(0)

#Calculates difference in margin
states_2020['margin diff'] = states_2020['margin pres'] - states_2020['margin sen']
states_2020['candidate'] = states_2020['margin diff'].apply(lambda x: 'Trump' if x > 0 else 'Biden' if x <= 0 else 'N/A')
states_2020['absolute diff'] = states_2020['margin diff'].apply(abs)
states_2020['absolute diff'] = states_2020['absolute diff'].fillna(0)
states_2020['absolute margin sen'] = states_2020['absolute margin sen'].fillna(0)
states_2020['margin direction sen'] = states_2020['margin direction sen'].fillna('N/A')

states_2020 = state_geog.merge(states_2020, on='state')


hover = HoverTool(tooltips=[('', ' Turnout: @turnout million')])

national_2020['election'] = national_2020['race'].apply(lambda x: 'President' if x=='pres' else 'Senate')
national_2020.hvplot.bar(x='election', y='turnout', title= 'Turnout in States with both Presidential and Senate Elections', 
                           xlabel='Election', ylabel='Turnout (millions)', tools=[hover], hover_cols='turnout')


hover = HoverTool(tooltips=[('', 'Election Margin: +@absolute_margin% @margin_direction')])
hover_cols = ['absolute margin', 'margin direction']

national_2020.hvplot.bar(x='election', y='margin', xlabel='Election', ylabel='Turnout (millions)', 
                         title= 'Combined Margin in States with both Presidential and Senate Elections', 
                         tools=[hover], hover_cols=hover_cols, c='red')


hover = HoverTool(tooltips=[('', '@county, @state'),
                            ('', 'President Margin: +@absolute_margin_pres% @margin_direction_pres'),
                            ('', 'Senate Margin: +@absolute_margin_sen% @margin_direction_sen'),
                            ('', 'Difference: +@absolute_diff% @candidate')])
hover_cols=['state', 'county', 'absolute margin pres', 'margin direction pres', 'absolute margin sen', 'margin direction sen',
            'absolute diff', 'candidate']

georgia_2020 = counties_2020.loc[counties_2020['state'] == 'Georgia']
georgia_2020.hvplot(c='margin diff', geo=True, title='Georgia Presidential result relative to Senate', 
                    tools=[hover], hover_cols=hover_cols, cmap='bwr', clim=(-10, 10))


hover = HoverTool(tooltips=[('', '@state'),
                            ('', 'President Margin: +@absolute_margin_pres% @margin_direction_pres'),
                            ('', 'Senate Margin: +@absolute_margin_sen% @margin_direction_sen'),
                            ('', 'Difference: +@absolute_diff% @candidate')])
hover_cols=['state', 'absolute margin pres', 'margin direction pres', 'absolute margin sen', 'margin direction sen', 
            'absolute diff', 'candidate']

states_2020.hvplot(c='margin diff', geo=True, title='Presidential result relative to Senate by State', 
                   tools=[hover], hover_cols=hover_cols, cmap='bwr', clim=(-25, 25))


hover = HoverTool(tooltips=[('', '@county, @state'),
                            ('', 'President Margin: +@absolute_margin_pres% @margin_direction_pres'),
                            ('', 'Senate Margin: +@absolute_margin_sen% @margin_direction_sen'),
                            ('', 'Difference: +@absolute_diff% @candidate')])
hover_cols=['state', 'county', 'absolute margin pres', 'margin direction pres', 'absolute margin sen', 'margin direction sen', 
            'absolute diff', 'candidate']

counties_2020.hvplot(c='margin diff', geo=True, title='Presidential result relative to Senate by County',   
                     tools=[hover], hover_cols=hover_cols, cmap='bwr', clim=(-25, 25)) * borders


georgia_2020 = counties_2020.loc[counties_2020['state'] == 'Georgia']
georgia_2020.hvplot(c='turnout pct', geo=True, title='Georgia Presidential turnout relative to Senate', 
                    tools=[hover], hover_cols=hover_cols, cmap='greens', clim=(0, 10))


hover = HoverTool(tooltips=[('', '@state'),
                            ('', 'President Turnout: @total_votes_pres'),
                            ('', 'Senate Turnout: @total_votes_sen'),
                            ('', 'Difference: +@turnout_pct% President')])
hover_cols=['state', 'total votes pres', 'total votes sen', 'turnout pct']

states_2020.hvplot(c='turnout pct', geo=True, title='Presidential turnout relative to Senate by State',
                   tools=[hover], hover_cols=hover_cols, cmap='greens', clim=(0, 10))


hover = HoverTool(tooltips=[('', '@county, @state'),
                            ('', 'President Turnout: @total_votes_pres'),
                            ('', 'Senate Turnout: @total_votes_sen'),
                            ('', 'Difference: +@turnout_pct% President')])
hover_cols=['state', 'county', 'total votes pres', 'total votes sen', 'turnout pct']

counties_2020.hvplot(c='turnout pct', geo=True, title='Presidential turnout relative to Senate by County',
                     tools=[hover], hover_cols=hover_cols, cmap='greens', clim=(0, 10)) * borders

An Investigation into "The Statistical Case Against Biden's Win" by Steve Cortes¶

Steps¶

Data Collection¶

2020 Presidential results¶

Historical Presidential Results¶

2020 Senate Results¶

Geographic data¶

Data Cleaning¶

2020 Presidential Results¶

Historical Presidential Results¶

2020 Senate Results¶

Geographic Data¶

Combined Geographic Data¶

Combined Presidential Data¶

Combined 2020 President and Senate Data¶

Data Analysis¶

The Four Claims Made¶

Claim 1: Turnout¶

Calculating Turnout Data¶

Visualizations of Turnout¶

Claim 2: Overperformance vs Obama¶

Calculating Popular Vote Margins¶

Visualizations of Margins¶

Claim 3: Biden Only Ballots¶

Calculating Turnout and Margin for President and Senate Data¶

President vs Senate Visualization¶

Claim 4: Absence of Mail-In Vote Vetting¶

Conclusion¶

Footnotes¶