from sqlalchemy import create_engine
import sys, os
import pandas as pd
constr = 'mysql+pymysql://{user}:{pwd}@{host}'.format(user=os.environ['MYSQL_USERNAME'],
pwd=os.environ['MYSQL_PASSWORD'],
host=os.environ['MYSQL_HOST'])
con = create_engine(constr)
df = pd.read_sql('select ukuser.user_id as uk_id, enuser.user_id as en_id, ukuser.user_name as username from ukwiki_p.user ukuser join enwiki_p.user enuser ON ukuser.user_name = enuser.user_name', con)
df['username'] = df['username'].map(lambda x: x.decode('utf-8'))
df
uk_id | en_id | username | |
---|---|---|---|
0 | 291649 | 26041642 | ! Bikkit ! |
1 | 345395 | 30599575 | ! Thunderer ! |
2 | 312288 | 27961600 | !!vdtfo8d!! |
3 | 222817 | 21874045 | !!zaq |
4 | 89772 | 13732777 | !-ArtMaster |
5 | 277658 | 25339310 | !-ArtMaster 95 |
6 | 344748 | 30205970 | !Alla17 |
7 | 210126 | 20086955 | !KrzysiekBu! |
8 | 122690 | 6752693 | !Silent |
9 | 133462 | 16773129 | !Venerable! |
10 | 18927 | 8016863 | !XzibiT! |
11 | 221820 | 19169987 | !aoniug |
12 | 61554 | 3744755 | !dea4u |
13 | 165897 | 18285889 | !palexey! |
14 | 327207 | 28963803 | !rynaH |
15 | 290687 | 26510745 | !болит |
16 | 135642 | 16403295 | "Coreytaylor" |
17 | 93804 | 13787084 | "Inkhaw" |
18 | 283850 | 26052218 | "Transylvania" band. |
19 | 70488 | 12434766 | "Wrong is Right" band |
20 | 341789 | 29938768 | "olha" |
21 | 60987 | 11858185 | "Відлуння віків" |
22 | 180949 | 19144748 | "Гуцул" |
23 | 286051 | 27367253 | "Клуб сиру" |
24 | 153149 | 17881633 | "МОЛОДИЙ ПІДПРЄМЕЦЬ РОКУ" |
25 | 20688 | 8194264 | "Милый ангел" |
26 | 233311 | 22956160 | "Обійми" |
27 | 138022 | 10412933 | "Ы" с ногами и руками |
28 | 160583 | 18179801 | $ Zahar Klym $ |
29 | 387874 | 32793058 | $!!remantee |
... | ... | ... | ... |
258171 | 96138 | 14127062 | 한국만세 |
258172 | 154710 | 17201862 | 한민족 |
258173 | 257390 | 21476482 | 한영태 |
258174 | 324301 | 19241221 | 햄방이 |
258175 | 200203 | 19241191 | 햄빵이 |
258176 | 66060 | 10963932 | 행유 |
258177 | 38621 | 8795260 | 현철환 |
258178 | 334336 | 29155797 | 호로조 |
258179 | 225576 | 19940536 | 홍문초 |
258180 | 324264 | 27424067 | 화목한 |
258181 | 99439 | 12952009 | 환골탈태 |
258182 | 345452 | 30175256 | 황국환 |
258183 | 390675 | 33103445 | 황금 막내 |
258184 | 195920 | 19644767 | 황제펭귄 |
258185 | 14382 | 6990211 | 휘뚜루마뚜루 |
258186 | 395796 | 33124715 | 힘이 |
258187 | 18324 | 7947453 | fi |
258188 | 36982 | 7604294 | (あ) |
258189 | 353268 | 27435954 | (正)无言 |
258190 | 45650 | 10490989 | 4行DA |
258191 | 296403 | 26889308 | Hsjs |
258192 | 306245 | 24799052 | K-SYAN |
258193 | 198153 | 24696986 | KoZ |
258194 | 307517 | 26595770 | RIEAKI |
258195 | 336939 | 29647142 | Tom |
258196 | 328649 | 29076045 | 𐨀𐨨𐨪 |
258197 | 321540 | 28526886 | 𝔊 |
258198 | 317140 | 28277776 | 𝕀𝓓𝒆𝓳𝓪𝓿𝓊𝕀 |
258199 | 14401 | 169932 | 😂 |
258200 | 15810 | 7630487 | 𥐣 |
258201 rows × 3 columns
translations = pd.read_sql("""
SELECT ips_item_id as wikidataId, ips_site_page AS uktitle, english.ips_site_page as entitle, enwiki_p.revision.rev_user, enwiki_p.page.page_id
FROM wikidatawiki_p.wb_items_per_site
INNER JOIN wikidatawiki_p.wb_items_per_site AS english
ON english.ips_site_id = 'enwiki' ips_item_id = english.ips_item_id
INNER JOIN enwiki_p.page ON enwiki_p.page.page_title = english.entitle
INNER JOIN enwiki_p.revision ON enwiki_p.page.page_id = enwiki_p.revision.rev_page
WHERE ips_site_id= 'ukwiki' AND
enwiki_p.revision.rev_user in (
select enuser.user_id from ukwiki_p.user ukuser join enwiki_p.user enuser ON ukuser.user_name = enuser.user_name
)
GROUP BY enwiki_p.revision.rev_user, enwiki_p.page.page_id""", con)
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-9-5cadcfa98c5b> in <module>() 15 ) 16 ---> 17 GROUP BY enwiki_p.revision.rev_user, enwiki_p.page.page_id""", con) /srv/paws/lib/python3.6/site-packages/pandas/io/sql.py in read_sql(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize) 395 sql, index_col=index_col, params=params, 396 coerce_float=coerce_float, parse_dates=parse_dates, --> 397 chunksize=chunksize) 398 399 /srv/paws/lib/python3.6/site-packages/pandas/io/sql.py in read_query(self, sql, index_col, coerce_float, parse_dates, params, chunksize) 1061 args = _convert_params(sql, params) 1062 -> 1063 result = self.execute(*args) 1064 columns = result.keys() 1065 /srv/paws/lib/python3.6/site-packages/pandas/io/sql.py in execute(self, *args, **kwargs) 952 def execute(self, *args, **kwargs): 953 """Simple passthrough to SQLAlchemy connectable""" --> 954 return self.connectable.execute(*args, **kwargs) 955 956 def read_table(self, table_name, index_col=None, coerce_float=True, /srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in execute(self, statement, *multiparams, **params) 2073 2074 connection = self.contextual_connect(close_with_result=True) -> 2075 return connection.execute(statement, *multiparams, **params) 2076 2077 def scalar(self, statement, *multiparams, **params): /srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in execute(self, object, *multiparams, **params) 940 """ 941 if isinstance(object, util.string_types[0]): --> 942 return self._execute_text(object, multiparams, params) 943 try: 944 meth = object._execute_on_connection /srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_text(self, statement, multiparams, params) 1102 statement, 1103 parameters, -> 1104 statement, parameters 1105 ) 1106 if self._has_events or self.engine._has_events: /srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args) 1198 parameters, 1199 cursor, -> 1200 context) 1201 1202 if self._has_events or self.engine._has_events: /srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context) 1414 ) 1415 else: -> 1416 util.reraise(*exc_info) 1417 1418 finally: /srv/paws/lib/python3.6/site-packages/sqlalchemy/util/compat.py in reraise(tp, value, tb, cause) 185 if value.__traceback__ is not tb: 186 raise value.with_traceback(tb) --> 187 raise value 188 189 else: /srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args) 1191 statement, 1192 parameters, -> 1193 context) 1194 except BaseException as e: 1195 self._handle_dbapi_exception( /srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/default.py in do_execute(self, cursor, statement, parameters, context) 506 507 def do_execute(self, cursor, statement, parameters, context=None): --> 508 cursor.execute(statement, parameters) 509 510 def do_execute_no_params(self, cursor, statement, context=None): /srv/paws/lib/python3.6/site-packages/pymysql/cursors.py in execute(self, query, args) 168 query = self.mogrify(query, args) 169 --> 170 result = self._query(query) 171 self._executed = query 172 return result /srv/paws/lib/python3.6/site-packages/pymysql/cursors.py in _query(self, q) 326 self._last_executed = q 327 self._clear_result() --> 328 conn.query(q) 329 self._do_get_result() 330 return self.rowcount /srv/paws/lib/python3.6/site-packages/pymysql/connections.py in query(self, sql, unbuffered) 891 sql = sql.encode(self.encoding, 'surrogateescape') 892 self._execute_command(COMMAND.COM_QUERY, sql) --> 893 self._affected_rows = self._read_query_result(unbuffered=unbuffered) 894 return self._affected_rows 895 /srv/paws/lib/python3.6/site-packages/pymysql/connections.py in _read_query_result(self, unbuffered) 1101 else: 1102 result = MySQLResult(self) -> 1103 result.read() 1104 self._result = result 1105 if result.server_status is not None: /srv/paws/lib/python3.6/site-packages/pymysql/connections.py in read(self) 1394 def read(self): 1395 try: -> 1396 first_packet = self.connection._read_packet() 1397 1398 if first_packet.is_ok_packet(): /srv/paws/lib/python3.6/site-packages/pymysql/connections.py in _read_packet(self, packet_type) 1030 buff = b'' 1031 while True: -> 1032 packet_header = self._read_bytes(4) 1033 #if DEBUG: dump_packet(packet_header) 1034 /srv/paws/lib/python3.6/site-packages/pymysql/connections.py in _read_bytes(self, num_bytes) 1064 while True: 1065 try: -> 1066 data = self._rfile.read(num_bytes) 1067 break 1068 except (IOError, OSError) as e: /usr/lib/python3.6/socket.py in readinto(self, b) 584 while True: 585 try: --> 586 return self._sock.recv_into(b) 587 except timeout: 588 self._timeout_occurred = True KeyboardInterrupt: