This is a importable notebook that provides simple helpers for performing queries on the labsdb replica databases from PAWS. It is stateful and designed to be easy to use in an interactive setup.
As soon as you import this notebook, it sets up a simple mysql connection pool to the replica. No actual connections are made until you try to use any of the functions.
It provides the following functions:
sql(query, *args, dbname=None)
¶This runs the given query (with passed in parameters) on the currently selected db. It returns an iterator
import os
from sqlalchemy import create_engine
from sqlalchemy.sql import text
host = os.environ['MYSQL_HOST']
user = os.environ['MYSQL_USERNAME']
password = os.environ['MYSQL_PASSWORD']
connection_string = 'mysql+pymysql://{user}:{password}@{host}/?charset=utf8mb4'.format(
user=user,
password=password,
host=host
)
engine = create_engine(connection_string, pool_recycle=300)
cur_db = 'metawiki_p'
def use_db(db):
global cur_db
if not db.endswith('_p'):
db = db + '_p'
cur_db = db
def pre_process_param(param):
if type(param) == list:
return tuple(param)
return param
def sql(query_str, params={}, db=None):
"""
Run the given SQL query with given
"""
def encode_if_necessary(b):
if type(b) is bytes:
return b.decode('utf8')
return b
if db is None:
db = cur_db
if type(db) != list:
db = [db]
query = text(query_str)
con = engine.connect()
for d in db:
con.execute('use %s' % d)
for k in params:
params[k] = pre_process_param(params[k])
for r in con.execute(query, **params):
d = dict(r)
# This is unfortunately necessary since I can not seem to get sqlalchemy
# nor pymysql to co-erce all strings to unicode values. This might be
# because our dbs don't have proper encoding set.
unicode_d = {k: encode_if_necessary(v) for k, v in d.items()}
yield unicode_d
def find_pages(title, namespace=10, db='metawiki_p'):
query = """
SELECT page_namespace, page_title
FROM templatelinks INNER JOIN page ON page_id = tl_from
WHERE
tl_namespace = :namespace AND tl_title = :title AND
page_id NOT IN (SELECT rt_page FROM revtag)
"""
result = sql(query, {'namespace': namespace, 'title': title}, db)
return [(r['page_namespace'], r['page_title']) for r in result]
find_pages('Affiliations_Committee/layout')
--------------------------------------------------------------------------- ProgrammingError Traceback (most recent call last) /srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args) 1277 self.dialect.do_execute( -> 1278 cursor, statement, parameters, context 1279 ) /srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/default.py in do_execute(self, cursor, statement, parameters, context) 592 def do_execute(self, cursor, statement, parameters, context=None): --> 593 cursor.execute(statement, parameters) 594 /srv/paws/lib/python3.6/site-packages/pymysql/cursors.py in execute(self, query, args) 162 --> 163 result = self._query(query) 164 self._executed = query /srv/paws/lib/python3.6/site-packages/pymysql/cursors.py in _query(self, q) 320 self._clear_result() --> 321 conn.query(q) 322 self._do_get_result() /srv/paws/lib/python3.6/site-packages/pymysql/connections.py in query(self, sql, unbuffered) 504 self._execute_command(COMMAND.COM_QUERY, sql) --> 505 self._affected_rows = self._read_query_result(unbuffered=unbuffered) 506 return self._affected_rows /srv/paws/lib/python3.6/site-packages/pymysql/connections.py in _read_query_result(self, unbuffered) 723 result = MySQLResult(self) --> 724 result.read() 725 self._result = result /srv/paws/lib/python3.6/site-packages/pymysql/connections.py in read(self) 1068 try: -> 1069 first_packet = self.connection._read_packet() 1070 /srv/paws/lib/python3.6/site-packages/pymysql/connections.py in _read_packet(self, packet_type) 675 self._result.unbuffered_active = False --> 676 packet.raise_for_error() 677 return packet /srv/paws/lib/python3.6/site-packages/pymysql/protocol.py in raise_for_error(self) 222 if DEBUG: print("errno =", errno) --> 223 err.raise_mysql_exception(self._data) 224 /srv/paws/lib/python3.6/site-packages/pymysql/err.py in raise_mysql_exception(data) 106 errorclass = InternalError if errno < 1000 else OperationalError --> 107 raise errorclass(errno, errval) ProgrammingError: (1146, "Table 'metawiki_p.revtag' doesn't exist") The above exception was the direct cause of the following exception: ProgrammingError Traceback (most recent call last) <ipython-input-9-c3069a975957> in <module> ----> 1 find_pages('Affiliations_Committee/layout') <ipython-input-8-085da29070d3> in find_pages(title, namespace, db) 8 """ 9 result = sql(query, {'namespace': namespace, 'title': title}, db) ---> 10 return [(r['page_namespace'], r['page_title']) for r in result] <ipython-input-8-085da29070d3> in <listcomp>(.0) 8 """ 9 result = sql(query, {'namespace': namespace, 'title': title}, db) ---> 10 return [(r['page_namespace'], r['page_title']) for r in result] <ipython-input-7-ea20bb484303> in sql(query_str, params, db) 18 for k in params: 19 params[k] = pre_process_param(params[k]) ---> 20 for r in con.execute(query, **params): 21 d = dict(r) 22 # This is unfortunately necessary since I can not seem to get sqlalchemy /srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in execute(self, object_, *multiparams, **params) 1012 ) 1013 else: -> 1014 return meth(self, multiparams, params) 1015 1016 def _execute_function(self, func, multiparams, params): /srv/paws/lib/python3.6/site-packages/sqlalchemy/sql/elements.py in _execute_on_connection(self, connection, multiparams, params) 296 def _execute_on_connection(self, connection, multiparams, params): 297 if self.supports_execution: --> 298 return connection._execute_clauseelement(self, multiparams, params) 299 else: 300 raise exc.ObjectNotExecutableError(self) /srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_clauseelement(self, elem, multiparams, params) 1131 distilled_params, 1132 compiled_sql, -> 1133 distilled_params, 1134 ) 1135 if self._has_events or self.engine._has_events: /srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args) 1316 except BaseException as e: 1317 self._handle_dbapi_exception( -> 1318 e, statement, parameters, cursor, context 1319 ) 1320 /srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _handle_dbapi_exception(self, e, statement, parameters, cursor, context) 1510 elif should_wrap: 1511 util.raise_( -> 1512 sqlalchemy_exception, with_traceback=exc_info[2], from_=e 1513 ) 1514 else: /srv/paws/lib/python3.6/site-packages/sqlalchemy/util/compat.py in raise_(***failed resolving arguments***) 176 177 try: --> 178 raise exception 179 finally: 180 # credit to /srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/base.py in _execute_context(self, dialect, constructor, statement, parameters, *args) 1276 if not evt_handled: 1277 self.dialect.do_execute( -> 1278 cursor, statement, parameters, context 1279 ) 1280 /srv/paws/lib/python3.6/site-packages/sqlalchemy/engine/default.py in do_execute(self, cursor, statement, parameters, context) 591 592 def do_execute(self, cursor, statement, parameters, context=None): --> 593 cursor.execute(statement, parameters) 594 595 def do_execute_no_params(self, cursor, statement, context=None): /srv/paws/lib/python3.6/site-packages/pymysql/cursors.py in execute(self, query, args) 161 query = self.mogrify(query, args) 162 --> 163 result = self._query(query) 164 self._executed = query 165 return result /srv/paws/lib/python3.6/site-packages/pymysql/cursors.py in _query(self, q) 319 self._last_executed = q 320 self._clear_result() --> 321 conn.query(q) 322 self._do_get_result() 323 return self.rowcount /srv/paws/lib/python3.6/site-packages/pymysql/connections.py in query(self, sql, unbuffered) 503 sql = sql.encode(self.encoding, 'surrogateescape') 504 self._execute_command(COMMAND.COM_QUERY, sql) --> 505 self._affected_rows = self._read_query_result(unbuffered=unbuffered) 506 return self._affected_rows 507 /srv/paws/lib/python3.6/site-packages/pymysql/connections.py in _read_query_result(self, unbuffered) 722 else: 723 result = MySQLResult(self) --> 724 result.read() 725 self._result = result 726 if result.server_status is not None: /srv/paws/lib/python3.6/site-packages/pymysql/connections.py in read(self) 1067 def read(self): 1068 try: -> 1069 first_packet = self.connection._read_packet() 1070 1071 if first_packet.is_ok_packet(): /srv/paws/lib/python3.6/site-packages/pymysql/connections.py in _read_packet(self, packet_type) 674 if self._result is not None and self._result.unbuffered_active is True: 675 self._result.unbuffered_active = False --> 676 packet.raise_for_error() 677 return packet 678 /srv/paws/lib/python3.6/site-packages/pymysql/protocol.py in raise_for_error(self) 221 errno = self.read_uint16() 222 if DEBUG: print("errno =", errno) --> 223 err.raise_mysql_exception(self._data) 224 225 def dump(self): /srv/paws/lib/python3.6/site-packages/pymysql/err.py in raise_mysql_exception(data) 105 if errorclass is None: 106 errorclass = InternalError if errno < 1000 else OperationalError --> 107 raise errorclass(errno, errval) ProgrammingError: (pymysql.err.ProgrammingError) (1146, "Table 'metawiki_p.revtag' doesn't exist") [SQL: SELECT page_namespace, page_title FROM templatelinks INNER JOIN page ON page_id = tl_from WHERE tl_namespace = %(namespace)s AND tl_title = %(title)s AND page_id NOT IN (SELECT rt_page FROM revtag) ] [parameters: {'namespace': 10, 'title': 'Affiliations_Committee/layout'}] (Background on this error at: http://sqlalche.me/e/13/f405)