import pandas as pd
df = pd.read_csv('https://raw.githubusercontent.com/grwlf/COVID-19_plus_Russia/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_RU.csv')
df.head()
UID | iso2 | iso3 | code3 | FIPS | Admin2 | Province_State | Country_Region | Lat | Long_ | ... | 10/01/20 | 10/02/20 | 10/03/20 | 10/04/20 | 10/05/20 | 10/06/20 | 10/07/20 | 10/08/20 | 10/09/20 | 10/10/20 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | NaN | NaN | NaN | NaN | NaN | NaN | Adygea Republic | Russia | 44.693901 | 40.152042 | ... | 4114 | 4149 | 4186 | 4227 | 4266 | 4308 | 4348 | 4392 | 4434 | 4475 |
1 | NaN | NaN | NaN | NaN | NaN | NaN | Altai Krai | Russia | 52.693224 | 82.693142 | ... | 13921 | 14022 | 14125 | 14230 | 14334 | 14448 | 14577 | 14711 | 14853 | 15007 |
2 | NaN | NaN | NaN | NaN | NaN | NaN | Altai Republic | Russia | 50.711410 | 86.857219 | ... | 3598 | 3682 | 3769 | 3854 | 3940 | 4007 | 4095 | 4193 | 4296 | 4407 |
3 | NaN | NaN | NaN | NaN | NaN | NaN | Amur Oblast | Russia | 52.803237 | 128.437295 | ... | 4326 | 4362 | 4401 | 4437 | 4477 | 4519 | 4562 | 4611 | 4664 | 4721 |
4 | NaN | NaN | NaN | NaN | NaN | NaN | Arkhangelsk Oblast | Russia | 63.558969 | 43.122165 | ... | 13589 | 13736 | 13896 | 14072 | 14251 | 14438 | 14617 | 14799 | 14982 | 15181 |
5 rows × 274 columns
#!pip install plotly
import plotly.express as px
import numpy as np
np_tmp = np.array(df.iloc[:,69:]).T
row = df.iloc[:,69:].columns.values
col = df.iloc[:,6].values
new_df = pd.DataFrame(np_tmp, index=row, columns=col).fillna(0)
new_df
Adygea Republic | Altai Krai | Altai Republic | Amur Oblast | Arkhangelsk Oblast | Astrakhan Oblast | Bashkortostan Republic | Belgorod Oblast | Bryansk Oblast | Buryatia Republic | ... | Ulyanovsk Oblast | Vladimir Oblast | Volgograd Oblast | Vologda Oblast | Voronezh Oblast | Yamalo-Nenets Autonomous Okrug | Yaroslavl Oblast | Zabaykalsky Krai | Republic of Crimea | Sevastopol | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
03/20/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
03/21/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
03/22/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
03/23/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
03/24/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
10/06/20 | 4308 | 14448 | 4007 | 4519 | 14438 | 7401 | 9051 | 9267 | 9829 | 7408 | ... | 16304 | 7600 | 15876 | 4949 | 19074 | 16238 | 8932 | 6783 | 5063 | 1164 |
10/07/20 | 4348 | 14577 | 4095 | 4562 | 14617 | 7454 | 9098 | 9336 | 9913 | 7503 | ... | 16452 | 7657 | 16021 | 5009 | 19265 | 16386 | 8997 | 6881 | 5170 | 1191 |
10/08/20 | 4392 | 14711 | 4193 | 4611 | 14799 | 7509 | 9144 | 9410 | 10000 | 7598 | ... | 16600 | 7716 | 16164 | 5071 | 19459 | 16534 | 9064 | 6986 | 5288 | 1221 |
10/09/20 | 4434 | 14853 | 4296 | 4664 | 14982 | 7566 | 9191 | 9488 | 10092 | 7694 | ... | 16750 | 7772 | 16315 | 5136 | 19654 | 16683 | 9137 | 7115 | 5413 | 1253 |
10/10/20 | 4475 | 15007 | 4407 | 4721 | 15181 | 7624 | 9240 | 9564 | 10183 | 7792 | ... | 16902 | 7830 | 16470 | 5205 | 19855 | 16834 | 9214 | 7255 | 5553 | 1286 |
205 rows × 85 columns
sum_df = new_df.copy()
sum_df['all'] = sum_df.sum(axis = 1)
sum_df['msk'] = new_df['Moscow'] + new_df['Moscow Oblast']
sum_df['non-msk'] = sum_df['all'] - sum_df['msk']
sum_df['spb'] = new_df['Saint Petersburg'] + new_df['Leningrad Oblast']
sum_df['other'] = sum_df['all'] - sum_df['msk'] - sum_df['spb']
sum_df = sum_df.loc[:,['all', 'msk', 'non-msk', 'spb', 'other']]
sum_df
all | msk | non-msk | spb | other | |
---|---|---|---|---|---|
03/20/20 | 148 | 132 | 16 | 16 | 0 |
03/21/20 | 154 | 138 | 16 | 16 | 0 |
03/22/20 | 207 | 191 | 16 | 16 | 0 |
03/23/20 | 278 | 262 | 16 | 16 | 0 |
03/24/20 | 311 | 290 | 21 | 21 | 0 |
... | ... | ... | ... | ... | ... |
10/06/20 | 1237504 | 386876 | 850628 | 53828 | 796800 |
10/07/20 | 1248619 | 390500 | 858119 | 54376 | 803743 |
10/08/20 | 1260112 | 394256 | 865856 | 54928 | 810928 |
10/09/20 | 1272238 | 398368 | 873870 | 55494 | 818376 |
10/10/20 | 1285084 | 402889 | 882195 | 56091 | 826104 |
205 rows × 5 columns
diff_df = sum_df.diff().fillna(0).astype(int)
diff_df
Adygea Republic | Altai Krai | Altai Republic | Amur Oblast | Arkhangelsk Oblast | Astrakhan Oblast | Bashkortostan Republic | Belgorod Oblast | Bryansk Oblast | Buryatia Republic | ... | Yamalo-Nenets Autonomous Okrug | Yaroslavl Oblast | Zabaykalsky Krai | Republic of Crimea | Sevastopol | all | msk | non-msk | spb | other | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
03/20/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
03/21/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 6 | 6 | 0 | 0 | 0 |
03/22/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 53 | 53 | 0 | 0 | 0 |
03/23/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 71 | 71 | 0 | 0 | 0 |
03/24/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 33 | 28 | 5 | 5 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
10/06/20 | 42 | 114 | 67 | 42 | 187 | 49 | 44 | 67 | 82 | 95 | ... | 149 | 58 | 91 | 105 | 29 | 11615 | 4429 | 7186 | 498 | 6688 |
10/07/20 | 40 | 129 | 88 | 43 | 179 | 53 | 47 | 69 | 84 | 95 | ... | 148 | 65 | 98 | 107 | 27 | 11115 | 3624 | 7491 | 548 | 6943 |
10/08/20 | 44 | 134 | 98 | 49 | 182 | 55 | 46 | 74 | 87 | 95 | ... | 148 | 67 | 105 | 118 | 30 | 11493 | 3756 | 7737 | 552 | 7185 |
10/09/20 | 42 | 142 | 103 | 53 | 183 | 57 | 47 | 78 | 92 | 96 | ... | 149 | 73 | 129 | 125 | 32 | 12126 | 4112 | 8014 | 566 | 7448 |
10/10/20 | 41 | 154 | 111 | 57 | 199 | 58 | 49 | 76 | 91 | 98 | ... | 151 | 77 | 140 | 140 | 33 | 12846 | 4521 | 8325 | 597 | 7728 |
205 rows × 90 columns
perc_df = diff_df.copy()
perc_df['msk%'] = perc_df['msk'] / perc_df['all'] * 100
perc_df['non-msk%'] = perc_df['non-msk'] / perc_df['all'] * 100
perc_df
Adygea Republic | Altai Krai | Altai Republic | Amur Oblast | Arkhangelsk Oblast | Astrakhan Oblast | Bashkortostan Republic | Belgorod Oblast | Bryansk Oblast | Buryatia Republic | ... | Zabaykalsky Krai | Republic of Crimea | Sevastopol | all | msk | non-msk | spb | other | msk% | non-msk% | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
03/20/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NaN | NaN |
03/21/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 6 | 6 | 0 | 0 | 0 | 100.000000 | 0.000000 |
03/22/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 53 | 53 | 0 | 0 | 0 | 100.000000 | 0.000000 |
03/23/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 71 | 71 | 0 | 0 | 0 | 100.000000 | 0.000000 |
03/24/20 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 33 | 28 | 5 | 5 | 0 | 84.848485 | 15.151515 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
10/06/20 | 42 | 114 | 67 | 42 | 187 | 49 | 44 | 67 | 82 | 95 | ... | 91 | 105 | 29 | 11615 | 4429 | 7186 | 498 | 6688 | 38.131726 | 61.868274 |
10/07/20 | 40 | 129 | 88 | 43 | 179 | 53 | 47 | 69 | 84 | 95 | ... | 98 | 107 | 27 | 11115 | 3624 | 7491 | 548 | 6943 | 32.604588 | 67.395412 |
10/08/20 | 44 | 134 | 98 | 49 | 182 | 55 | 46 | 74 | 87 | 95 | ... | 105 | 118 | 30 | 11493 | 3756 | 7737 | 552 | 7185 | 32.680762 | 67.319238 |
10/09/20 | 42 | 142 | 103 | 53 | 183 | 57 | 47 | 78 | 92 | 96 | ... | 129 | 125 | 32 | 12126 | 4112 | 8014 | 566 | 7448 | 33.910605 | 66.089395 |
10/10/20 | 41 | 154 | 111 | 57 | 199 | 58 | 49 | 76 | 91 | 98 | ... | 140 | 140 | 33 | 12846 | 4521 | 8325 | 597 | 7728 | 35.193835 | 64.806165 |
205 rows × 92 columns
fig = px.line(diff_df.loc[:,['msk', 'non-msk']])
fig.update_layout(
autosize=False,
width=1000,
height=500,
)
fig.show()