import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/grwlf/COVID-19_plus_Russia/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv')
df.head()
UID | iso2 | iso3 | code3 | FIPS | Admin2 | Province_State | Country_Region | Lat | Long_ | ... | 10/1/20 | 10/2/20 | 10/3/20 | 10/4/20 | 10/5/20 | 10/6/20 | 10/7/20 | 10/8/20 | 10/9/20 | 10/10/20 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 84001001 | US | USA | 840 | 1001.0 | Autauga | Alabama | US | 32.539527 | -86.644082 | ... | 1798 | 1805 | 1818 | 1828 | 1831 | 1839 | 1852 | 1863 | 1882 | 1898 |
1 | 84001003 | US | USA | 840 | 1003.0 | Baldwin | Alabama | US | 30.727750 | -87.722071 | ... | 5997 | 6024 | 6048 | 6073 | 6085 | 6116 | 6134 | 6141 | 6172 | 6190 |
2 | 84001005 | US | USA | 840 | 1005.0 | Barbour | Alabama | US | 31.868263 | -85.387129 | ... | 898 | 902 | 921 | 921 | 921 | 923 | 927 | 927 | 939 | 942 |
3 | 84001007 | US | USA | 840 | 1007.0 | Bibb | Alabama | US | 32.996421 | -87.125115 | ... | 672 | 675 | 678 | 686 | 687 | 691 | 703 | 708 | 719 | 726 |
4 | 84001009 | US | USA | 840 | 1009.0 | Blount | Alabama | US | 33.982109 | -86.567906 | ... | 1634 | 1642 | 1655 | 1656 | 1662 | 1665 | 1673 | 1681 | 1689 | 1704 |
5 rows × 274 columns
import numpy as np
new_df = df.iloc[:, np.r_[6, 49:len(df.columns)]].groupby(['Province_State']).sum().T
new_df
Province_State | Alabama | Alaska | American Samoa | Arizona | Arkansas | California | Colorado | Connecticut | Delaware | Diamond Princess | ... | Tennessee | Texas | Utah | Vermont | Virgin Islands | Virginia | Washington | West Virginia | Wisconsin | Wyoming |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2/29/20 | 0 | 0 | 0 | 1 | 0 | 12 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 0 | 0 | 0 |
3/1/20 | 0 | 0 | 0 | 1 | 0 | 12 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 11 | 0 | 0 | 0 |
3/2/20 | 0 | 0 | 0 | 1 | 0 | 21 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 18 | 0 | 0 | 0 |
3/3/20 | 0 | 0 | 0 | 1 | 0 | 25 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 27 | 0 | 0 | 0 |
3/4/20 | 0 | 0 | 0 | 1 | 0 | 35 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 39 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
10/6/20 | 160477 | 8752 | 0 | 221934 | 88071 | 838202 | 74169 | 59241 | 21466 | 49 | ... | 205375 | 798569 | 79439 | 1821 | 1321 | 152945 | 90663 | 16942 | 136379 | 6770 |
10/7/20 | 161418 | 8878 | 0 | 222538 | 88880 | 841928 | 74899 | 59364 | 21550 | 49 | ... | 207455 | 803690 | 80446 | 1827 | 1322 | 153451 | 91208 | 17150 | 138698 | 6899 |
10/8/20 | 161975 | 9005 | 0 | 223401 | 90145 | 845270 | 75761 | 59748 | 21682 | 49 | ... | 209447 | 807754 | 81947 | 1838 | 1324 | 155270 | 91918 | 17333 | 141830 | 7092 |
10/9/20 | 163465 | 9182 | 0 | 224084 | 91312 | 849076 | 76595 | 60038 | 21827 | 49 | ... | 211003 | 812191 | 83290 | 1846 | 1324 | 156372 | 92560 | 17707 | 144818 | 7335 |
10/10/20 | 164526 | 9431 | 0 | 224978 | 92220 | 852410 | 77618 | 60038 | 21998 | 49 | ... | 212649 | 815915 | 84644 | 1857 | 1325 | 157609 | 93035 | 17927 | 147560 | 7455 |
225 rows × 58 columns
elect_df = pd.read_csv('usa-election-by-county.csv')
elect_df.head()
State | ST | Fips | County | Precincts | Votes | Democrats 08 (Votes) | Democrats 12 (Votes) | Republicans 08 (Votes) | Republicans 12 (Votes) | ... | total08 | total12 | other08 | other12 | Other12 Frac | Other08 Frac | Rep12 Frac2 | Rep08 Frac2 | Dem12 Frac2 | Dem08 Frac2 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Georgia | GA | 13089 | DeKalb County, Georgia | 192.0 | 295871.0 | 254594.0 | 238224.0 | 65581.0 | 64392.0 | ... | 322304.0 | 306265.0 | 2129.0 | 3649.0 | 0.011915 | 0.006606 | 0.212785 | 0.204829 | 0.787215 | 0.795171 |
1 | Texas | TX | 48487 | Wilbarger County, Texas | 7.0 | 4101.0 | 1196.0 | 971.0 | 3283.0 | 2956.0 | ... | 4509.0 | 3980.0 | 30.0 | 53.0 | 0.013317 | 0.006653 | 0.752737 | 0.732976 | 0.247263 | 0.267024 |
2 | Virginia | VA | 51111 | Lunenburg County, Virginia | 12.0 | 5577.0 | 2703.0 | 2684.0 | 2900.0 | 2969.0 | ... | 5650.0 | 5734.0 | 47.0 | 81.0 | 0.014126 | 0.008319 | 0.525208 | 0.517580 | 0.474792 | 0.482420 |
3 | Georgia | GA | 13297 | Walton County, Georgia | 21.0 | 40425.0 | 8469.0 | 8148.0 | 27253.0 | 29036.0 | ... | 36026.0 | 37612.0 | 304.0 | 428.0 | 0.011379 | 0.008438 | 0.780873 | 0.762919 | 0.219127 | 0.237081 |
4 | North Carolina | NC | 37011 | Avery County, North Carolina | 19.0 | 8060.0 | 2178.0 | 1882.0 | 5681.0 | 5766.0 | ... | 7943.0 | 7759.0 | 84.0 | 111.0 | 0.014306 | 0.010575 | 0.753923 | 0.722866 | 0.246077 | 0.277134 |
5 rows × 158 columns
state_elect_df = elect_df.copy()
state_elect_df['dem_votes'] = state_elect_df['Votes'] * state_elect_df['Democrats 2016']
state_elect_df['rep_votes'] = state_elect_df['Votes'] * state_elect_df['Republicans 2016']
state_elect_df = state_elect_df.groupby(['State']).sum()
state_elect_df = state_elect_df.loc[:,['dem_votes', 'rep_votes']]
state_elect_df.head()
dem_votes | rep_votes | |
---|---|---|
State | ||
Alabama | 71808400.0 | 130692500.0 |
Alaska | 0.0 | 0.0 |
Arizona | 93625000.0 | 102115400.0 |
Arkansas | 37872900.0 | 67790400.0 |
California | 736249000.0 | 391620900.0 |
dem_states = state_elect_df[state_elect_df['dem_votes'] > state_elect_df['rep_votes']].index.values
rep_states = state_elect_df[state_elect_df['dem_votes'] < state_elect_df['rep_votes']].index.values
rep_states
array(['Alabama', 'Arizona', 'Arkansas', 'Florida', 'Georgia', 'Idaho', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Michigan', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Pennsylvania', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'West Virginia', 'Wisconsin', 'Wyoming'], dtype=object)
sum_df = new_df.copy()
sum_df['all'] = sum_df.sum(axis = 1)
sum_df['dem'] = new_df.loc[:,dem_states].sum(axis=1)
sum_df['rep'] = new_df.loc[:,rep_states].sum(axis=1)
sum_df = sum_df.loc[:,['all', 'dem', 'rep']]
sum_df
Province_State | all | dem | rep |
---|---|---|---|
2/29/20 | 25 | 24 | 1 |
3/1/20 | 32 | 31 | 1 |
3/2/20 | 55 | 52 | 3 |
3/3/20 | 74 | 68 | 6 |
3/4/20 | 107 | 101 | 6 |
... | ... | ... | ... |
10/6/20 | 7499341 | 2810046 | 4624440 |
10/7/20 | 7549682 | 2823647 | 4660972 |
10/8/20 | 7605873 | 2841622 | 4698467 |
10/9/20 | 7663293 | 2859173 | 4737506 |
10/10/20 | 7717932 | 2875414 | 4775180 |
225 rows × 3 columns
diff_df = sum_df.diff().fillna(0).astype(int)
diff_df
Province_State | all | dem | rep |
---|---|---|---|
2/29/20 | 0 | 0 | 0 |
3/1/20 | 7 | 7 | 0 |
3/2/20 | 23 | 21 | 2 |
3/3/20 | 19 | 16 | 3 |
3/4/20 | 33 | 33 | 0 |
... | ... | ... | ... |
10/6/20 | 41939 | 11631 | 29673 |
10/7/20 | 50341 | 13601 | 36532 |
10/8/20 | 56191 | 17975 | 37495 |
10/9/20 | 57420 | 17551 | 39039 |
10/10/20 | 54639 | 16241 | 37674 |
225 rows × 3 columns
perc_df = diff_df.copy()
perc_df['dem%'] = perc_df['dem'] / perc_df['all'] * 100
perc_df['rep%'] = perc_df['rep'] / perc_df['all'] * 100
perc_df.fillna(0, inplace=True)
perc_df
Province_State | all | dem | rep | dem% | rep% |
---|---|---|---|---|---|
2/29/20 | 0 | 0 | 0 | 0.000000 | 0.000000 |
3/1/20 | 7 | 7 | 0 | 100.000000 | 0.000000 |
3/2/20 | 23 | 21 | 2 | 91.304348 | 8.695652 |
3/3/20 | 19 | 16 | 3 | 84.210526 | 15.789474 |
3/4/20 | 33 | 33 | 0 | 100.000000 | 0.000000 |
... | ... | ... | ... | ... | ... |
10/6/20 | 41939 | 11631 | 29673 | 27.733136 | 70.752760 |
10/7/20 | 50341 | 13601 | 36532 | 27.017739 | 72.569079 |
10/8/20 | 56191 | 17975 | 37495 | 31.989109 | 66.727768 |
10/9/20 | 57420 | 17551 | 39039 | 30.566005 | 67.988506 |
10/10/20 | 54639 | 16241 | 37674 | 29.724190 | 68.950749 |
225 rows × 5 columns
#!pip install plotly
import plotly.express as px
fig = px.line(diff_df.loc[:,['dem', 'rep']].iloc[1:,:])
fig.update_layout(
autosize=False,
width=1000,
height=500,
)
fig.show()