• Author: putnik
  • License: CC0
In [1]:
import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/grwlf/COVID-19_plus_Russia/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv')
df.head()
Out[1]:
UID iso2 iso3 code3 FIPS Admin2 Province_State Country_Region Lat Long_ ... 10/1/20 10/2/20 10/3/20 10/4/20 10/5/20 10/6/20 10/7/20 10/8/20 10/9/20 10/10/20
0 84001001 US USA 840 1001.0 Autauga Alabama US 32.539527 -86.644082 ... 1798 1805 1818 1828 1831 1839 1852 1863 1882 1898
1 84001003 US USA 840 1003.0 Baldwin Alabama US 30.727750 -87.722071 ... 5997 6024 6048 6073 6085 6116 6134 6141 6172 6190
2 84001005 US USA 840 1005.0 Barbour Alabama US 31.868263 -85.387129 ... 898 902 921 921 921 923 927 927 939 942
3 84001007 US USA 840 1007.0 Bibb Alabama US 32.996421 -87.125115 ... 672 675 678 686 687 691 703 708 719 726
4 84001009 US USA 840 1009.0 Blount Alabama US 33.982109 -86.567906 ... 1634 1642 1655 1656 1662 1665 1673 1681 1689 1704

5 rows × 274 columns

In [2]:
import numpy as np
new_df = df.iloc[:, np.r_[6, 49:len(df.columns)]].groupby(['Province_State']).sum().T
new_df
Out[2]:
Province_State Alabama Alaska American Samoa Arizona Arkansas California Colorado Connecticut Delaware Diamond Princess ... Tennessee Texas Utah Vermont Virgin Islands Virginia Washington West Virginia Wisconsin Wyoming
2/29/20 0 0 0 1 0 12 0 0 0 0 ... 0 0 0 0 0 0 7 0 0 0
3/1/20 0 0 0 1 0 12 0 0 0 0 ... 0 0 0 0 0 0 11 0 0 0
3/2/20 0 0 0 1 0 21 0 0 0 0 ... 0 0 0 0 0 0 18 0 0 0
3/3/20 0 0 0 1 0 25 0 0 0 0 ... 0 0 0 0 0 0 27 0 0 0
3/4/20 0 0 0 1 0 35 0 0 0 0 ... 0 0 0 0 0 0 39 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
10/6/20 160477 8752 0 221934 88071 838202 74169 59241 21466 49 ... 205375 798569 79439 1821 1321 152945 90663 16942 136379 6770
10/7/20 161418 8878 0 222538 88880 841928 74899 59364 21550 49 ... 207455 803690 80446 1827 1322 153451 91208 17150 138698 6899
10/8/20 161975 9005 0 223401 90145 845270 75761 59748 21682 49 ... 209447 807754 81947 1838 1324 155270 91918 17333 141830 7092
10/9/20 163465 9182 0 224084 91312 849076 76595 60038 21827 49 ... 211003 812191 83290 1846 1324 156372 92560 17707 144818 7335
10/10/20 164526 9431 0 224978 92220 852410 77618 60038 21998 49 ... 212649 815915 84644 1857 1325 157609 93035 17927 147560 7455

225 rows × 58 columns

In [3]:
elect_df = pd.read_csv('usa-election-by-county.csv')
elect_df.head()
Out[3]:
State ST Fips County Precincts Votes Democrats 08 (Votes) Democrats 12 (Votes) Republicans 08 (Votes) Republicans 12 (Votes) ... total08 total12 other08 other12 Other12 Frac Other08 Frac Rep12 Frac2 Rep08 Frac2 Dem12 Frac2 Dem08 Frac2
0 Georgia GA 13089 DeKalb County, Georgia 192.0 295871.0 254594.0 238224.0 65581.0 64392.0 ... 322304.0 306265.0 2129.0 3649.0 0.011915 0.006606 0.212785 0.204829 0.787215 0.795171
1 Texas TX 48487 Wilbarger County, Texas 7.0 4101.0 1196.0 971.0 3283.0 2956.0 ... 4509.0 3980.0 30.0 53.0 0.013317 0.006653 0.752737 0.732976 0.247263 0.267024
2 Virginia VA 51111 Lunenburg County, Virginia 12.0 5577.0 2703.0 2684.0 2900.0 2969.0 ... 5650.0 5734.0 47.0 81.0 0.014126 0.008319 0.525208 0.517580 0.474792 0.482420
3 Georgia GA 13297 Walton County, Georgia 21.0 40425.0 8469.0 8148.0 27253.0 29036.0 ... 36026.0 37612.0 304.0 428.0 0.011379 0.008438 0.780873 0.762919 0.219127 0.237081
4 North Carolina NC 37011 Avery County, North Carolina 19.0 8060.0 2178.0 1882.0 5681.0 5766.0 ... 7943.0 7759.0 84.0 111.0 0.014306 0.010575 0.753923 0.722866 0.246077 0.277134

5 rows × 158 columns

In [4]:
state_elect_df = elect_df.copy()
state_elect_df['dem_votes'] = state_elect_df['Votes'] * state_elect_df['Democrats 2016']
state_elect_df['rep_votes'] = state_elect_df['Votes'] * state_elect_df['Republicans 2016']
state_elect_df = state_elect_df.groupby(['State']).sum()
state_elect_df = state_elect_df.loc[:,['dem_votes', 'rep_votes']]
state_elect_df.head()
Out[4]:
dem_votes rep_votes
State
Alabama 71808400.0 130692500.0
Alaska 0.0 0.0
Arizona 93625000.0 102115400.0
Arkansas 37872900.0 67790400.0
California 736249000.0 391620900.0
In [5]:
dem_states = state_elect_df[state_elect_df['dem_votes'] > state_elect_df['rep_votes']].index.values
rep_states = state_elect_df[state_elect_df['dem_votes'] < state_elect_df['rep_votes']].index.values
rep_states
Out[5]:
array(['Alabama', 'Arizona', 'Arkansas', 'Florida', 'Georgia', 'Idaho',
       'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Michigan',
       'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'North Carolina',
       'North Dakota', 'Ohio', 'Oklahoma', 'Pennsylvania',
       'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah',
       'West Virginia', 'Wisconsin', 'Wyoming'], dtype=object)
In [6]:
sum_df = new_df.copy()
sum_df['all'] = sum_df.sum(axis = 1)
sum_df['dem'] = new_df.loc[:,dem_states].sum(axis=1)
sum_df['rep'] = new_df.loc[:,rep_states].sum(axis=1)

sum_df = sum_df.loc[:,['all', 'dem', 'rep']]
sum_df
Out[6]:
Province_State all dem rep
2/29/20 25 24 1
3/1/20 32 31 1
3/2/20 55 52 3
3/3/20 74 68 6
3/4/20 107 101 6
... ... ... ...
10/6/20 7499341 2810046 4624440
10/7/20 7549682 2823647 4660972
10/8/20 7605873 2841622 4698467
10/9/20 7663293 2859173 4737506
10/10/20 7717932 2875414 4775180

225 rows × 3 columns

In [7]:
diff_df = sum_df.diff().fillna(0).astype(int)
diff_df
Out[7]:
Province_State all dem rep
2/29/20 0 0 0
3/1/20 7 7 0
3/2/20 23 21 2
3/3/20 19 16 3
3/4/20 33 33 0
... ... ... ...
10/6/20 41939 11631 29673
10/7/20 50341 13601 36532
10/8/20 56191 17975 37495
10/9/20 57420 17551 39039
10/10/20 54639 16241 37674

225 rows × 3 columns

In [8]:
perc_df = diff_df.copy()
perc_df['dem%'] = perc_df['dem'] / perc_df['all'] * 100
perc_df['rep%'] = perc_df['rep'] / perc_df['all'] * 100
perc_df.fillna(0, inplace=True)
perc_df
Out[8]:
Province_State all dem rep dem% rep%
2/29/20 0 0 0 0.000000 0.000000
3/1/20 7 7 0 100.000000 0.000000
3/2/20 23 21 2 91.304348 8.695652
3/3/20 19 16 3 84.210526 15.789474
3/4/20 33 33 0 100.000000 0.000000
... ... ... ... ... ...
10/6/20 41939 11631 29673 27.733136 70.752760
10/7/20 50341 13601 36532 27.017739 72.569079
10/8/20 56191 17975 37495 31.989109 66.727768
10/9/20 57420 17551 39039 30.566005 67.988506
10/10/20 54639 16241 37674 29.724190 68.950749

225 rows × 5 columns

In [9]:
#!pip install plotly
import plotly.express as px
In [10]:
fig = px.line(diff_df.loc[:,['dem', 'rep']].iloc[1:,:])
fig.update_layout(
    autosize=False,
    width=1000,
    height=500,
)
fig.show()