import pywikibot
from bs4 import BeautifulSoup
import urllib.request
import re
# Connect to enwiki
enwiki = pywikibot.Site('en', 'wikipedia')
# and then to wikidata
enwiki_repo = enwiki.data_repository()
#%%
def print_authorname(list_item):
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()
i_list = []
for data_item in list_item:
item = pywikibot.ItemPage(repo, data_item)
#item_dict = item.get()
for source in item.claims['P50']:
tt = list(source.qualifiers.items())
for key, value in tt:
if key == 'P1545':
QID = source.target.getID()
item1 = pywikibot.ItemPage(repo, QID)
item_dict1 = item1.get()
try:
item_new = item_dict1['claims']['P735']
for item1_new in item_new:
itemm = item1_new.target.getID()
QID1 = pywikibot.ItemPage(repo, itemm)
name = QID1.get()
try:
item_new1 = item_dict1['claims']['P734']
for item1_new1 in item_new1:
itemm1 = item1_new1.target.getID()
QID2 = pywikibot.ItemPage(repo, itemm1)
name1 = QID2.get()
m_list = name['labels']['en'] + ', ' + name1['labels']['en']
i_list.append(m_list)
except:
print('No Family name')
except:
print('No Given name')
return i_list
print_authorname(['Q56603084'])
['Carole, Tucker', 'Keith, Vanderlinde', 'Hsin, Chiang', 'Valentyn, Novosad', 'P., Ade', 'Peter, Ade']
#%%
#function to get the cordinal number of author strings p2093
def print_authorstring_info(list_item):
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()
o_list = []
for data_item in list_item:
item = pywikibot.ItemPage(repo, data_item)
#item_dict = item.get()
for sourc in item.claims['P2093']:
stat = list(sourc.qualifiers.items())
for keyy, valuee in stat:
if keyy == 'P1545':
num = valuee[0].getTarget()
o_list.append(num)
return o_list
print_authorstring_info(['Q56603084'])
['1', '2', '3', '5', '6', '7', '8', '9', '10', '11', '12', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '51', '52', '53', '54', '55', '56', '57', '58', '59', '62', '63', '64', '65', '66', '67']
#%%
#function to get the cordinal number of authors p50
def print_author_info(list_item):
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()
q = []
for data_item in list_item:
item = pywikibot.ItemPage(repo, data_item)
item_dict = item.get()
for claim in item_dict['claims']: # Loop through items
if claim == 'P50':
for source in item.claims[claim]:
tt = list(source.qualifiers.items())
for key, value in tt:
if key == 'P1545':
no1 = value[0].getTarget()
q.append(no1)
return q
print_author_info(['Q56603084'])
['60', '61', '13', '50', '4']
#%%
#function to concatenate the list of cordinal number of authors and author strings
def all_authors():
f = print_author_info(['Q56603084'])
j = print_authorstring_info(['Q56603084'])
k = j + f
return k
#%%
#This function extracts author names from bibtex
def author_citation(url):
list1 = []
with urllib.request.urlopen(url) as response:
html = response.read()
soup = BeautifulSoup(html, "html.parser")
text3 = soup.findAll(attrs={'property' : re.compile('article:author')})
print(soup)
for text in text3:
tip = text['content']
list1.append(tip)
return list1
author_citation('https://www.researchgate.net/publication/221940970_Galaxy_Clusters_Discovered_via_the_Sunyaev-Zel%27dovich_Effect_in_the_First_720_Square_Degrees_of_the_South_Pole_Telescope_Survey')
#%%
#This function matches the name sof the authors in the citation to their cordinal no
def author_match():
stated = all_authors()
authorname = author_citation('https://ui.adsabs.harvard.edu/abs/2018ApJ...852...97H/exportcitation')
for r in stated:
num_r = int(r)
new_list = [authorname[int(i) - 1] for i in stated]
return new_list
author_match()
#%%
#This function adds the author string name to a list
def print_authorstring(list_item):
ti = []
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()
for data_item in list_item:
item = pywikibot.ItemPage(repo, data_item)
item_dict = item.get()
for claim in item_dict['claims']: # Loop through items
if 'P2093' in claim:
try:
for source in item.claims[claim]:
QID = source.target
ti.append(QID)
except:
print('Name')
return ti
print_authorstring(['Q56603084'])
['J. W. Henning', 'J. T. Sayre', 'C. L. Reichardt', 'A. J. Anderson', 'J. E. Austermann', 'J. A. Beall', 'A. N. Bender', 'B. A. Benson', 'L. E. Bleem', 'J. E. Carlstrom', 'C. L. Chang', 'H-M. Cho', 'R. Citron', 'C. Corbett Moran', 'T. M. Crawford', 'A. T. Crites', 'T. de Haan', 'M. A. Dobbs', 'W. Everett', 'J. Gallicchio', 'E. M. George', 'A. Gilbert', 'N. W. Halverson', 'N. Harrington', 'G. C. Hilton', 'G. P. Holder', 'W. L. Holzapfel', 'S. Hoover', 'Z. Hou', 'J. D. Hrubes', 'N. Huang', 'J. Hubmayr', 'K. D. Irwin', 'R. Keisler', 'L. Knox', 'A. T. Lee', 'E. M. Leitch', 'D. Li', 'A. Lowitz', 'A. Manzotti', 'J. J. McMahon', 'S. S. Meyer', 'L. Mocanu', 'J. Montgomery', 'A. Nadolski', 'T. Natoli', 'J. P. Nibarger', 'S. Padin', 'C. Pryke', 'J. E. Ruhl', 'B. R. Saliwanchik', 'K. K. Schaffer', 'C. Sievers', 'G. Smecher', 'A. A. Stark', 'K. T. Story', 'T. Veach', 'J. D. Vieira', 'G. Wang', 'N. Whitehorn', 'W. L. K. Wu', 'V. Yefremenko']
#%%
#This function joins the authorstring name and the concatenated given and family name
def joined():
first = print_authorstring(['Q56603084'])
second = print_authorname(['Q56603084'])
k = first + second
return k
joined()
#%%
#This function adds matches the names in the ikidata and citation
def match_alt():
left = author_match()
right = joined()
print('citation names' + ' - ' + 'wikidata names')
print('\n')
for left1, right1 in zip(left, right):
if left.index(left1) == right.index(right1):
print(left1 + ' - ' + right1 )
match_alt()
citation names - wikidata names High, F. W. - J. W. Henning Stalder, B. - J. T. Sayre Song, J. - C. L. Reichardt Aird, K. A. - A. J. Anderson Allam, S. S. - J. E. Austermann Armstrong, R. - J. A. Beall Barkhouse, W. A. - A. N. Bender Benson, B. A. - B. A. Benson Bertin, E. - L. E. Bleem Bhattacharya, S. - J. E. Carlstrom Bleem, L. E. - C. L. Chang Buckley-Geer, E. J. - H-M. Cho Carlstrom, J. E. - R. Citron Challis, P. - C. Corbett Moran Chang, C. L. - T. M. Crawford Crawford, T. M. - A. T. Crites Crites, A. T. - T. de Haan de Haan, T. - M. A. Dobbs Desai, S. - W. Everett Dobbs, M. A. - J. Gallicchio Dudley, J. P. - E. M. George Foley, R. J. - A. Gilbert George, E. M. - N. W. Halverson Gladders, M. - N. Harrington Halverson, N. W. - G. C. Hilton Hamuy, M. - G. P. Holder Hansen, S. M. - W. L. Holzapfel Holder, G. P. - S. Hoover Holzapfel, W. L. - Z. Hou Hrubes, J. D. - J. D. Hrubes Joy, M. - N. Huang Keisler, R. - J. Hubmayr Lee, A. T. - K. D. Irwin Leitch, E. M. - R. Keisler Lin, H. - L. Knox Lin, Y. -T. - A. T. Lee Loehr, A. - E. M. Leitch Lueker, M. - D. Li Marrone, D. - A. Lowitz McMahon, J. J. - A. Manzotti Mehl, J. - J. J. McMahon Meyer, S. S. - S. S. Meyer Mohr, J. J. - L. Mocanu Montroy, T. E. - J. Montgomery Morell, N. - A. Nadolski Ngeow, C. -C. - T. Natoli Padin, S. - J. P. Nibarger Pryke, C. - S. Padin Reichardt, C. L. - C. Pryke Rest, A. - J. E. Ruhl Ruel, J. - B. R. Saliwanchik Ruhl, J. E. - K. K. Schaffer Schaffer, K. K. - C. Sievers Shaw, L. - G. Smecher Shirokoff, E. - A. A. Stark Smith, R. C. - K. T. Story Stark, A. A. - T. Veach Stubbs, C. W. - J. D. Vieira Tucker, D. L. - G. Wang Vanderlinde, K. - N. Whitehorn Vieira, J. D. - W. L. K. Wu Williamson, R. - V. Yefremenko Spieler, H. G. - Carole, Tucker Staniszewski, Z. - Keith, Vanderlinde Brodwin, M. - Hsin, Chiang Plagge, T. - Valentyn, Novosad Ade, P. A. R. - P., Ade
#%%
#This function adds seperates the author names in citation
def authorname_seperated():
listt = author_match()
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()
item = pywikibot.ItemPage(repo, 'Q56603077')
item_dict = item.get()
pli1 = []
for ID in listt:
x1 = re.split(", ", ID)
pli1.append(x1)
return pli1
authorname_seperated()
[['Seljak', 'Uroš'], ['Zaldarriaga', 'Matias']]
#%%
#This function adds the claims in P2093 and P50-useful to add qualifiers
def join_names():
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()
item = pywikibot.ItemPage(repo, 'Q56603082')
item_dict = item.get()
pli = []
pli1 = []
if 'P50' in item_dict['claims']:
for sourcee in item.claims['P50']:
tt = list(sourcee.qualifiers.items())
for key, value in tt:
if key == 'P1545':
no1 = value[0].getTarget()
pli.append(sourcee)
else:
pli = []
if 'P2093' in item_dict['claims']:
for claim in item_dict['claims']['P2093']:
pli1.append(claim)
else:
pli1 = []
joined = pli1 + pli
return joined
join_names()
#%%
#This function adds the P9688 and P9687 qualifier if not present
def add_namesqualifier(data_item):
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()
item = pywikibot.ItemPage(repo, data_item)
item_dict = item.get()
listb = join_names()
dateCre = authorname_seperated()
for te, tea in zip(listb, dateCre):
if listb.index(te) == dateCre.index(tea):
if 'P9688' not in te.qualifiers:
qualifier = pywikibot.Claim(repo, u'P9688')
qualifier.setTarget(tea[0])
te.addQualifier(qualifier, summary=u'Adding a qualifier.')
print('New Qualifier for P9688!')
else:
continue
if 'P9687' not in te.qualifiers:
qualifier = pywikibot.Claim(repo, u'P9687')
qualifier.setTarget(tea[1])
te.addQualifier(qualifier, summary=u'Adding a qualifier.')
print('New Qualifier for P9687!')
else:
continue
add_namesqualifier('Q56603077')
#%%
#This function adds the statedin qualifier if not present
def add_statedqualifier(list_item):
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()
neu = print_author_info(['Q27450617'])
names = author_citation('https://ui.adsabs.harvard.edu/abs/2010ApJ...723.1736H/exportcitation')
claim_list = []
for data_item in list_item:
item = pywikibot.ItemPage(repo, data_item)
item_dict = item.get()
for claim in item_dict['claims']['P50']: # Loop through items
claim_list.append(claim)
teb = [names[int(i) - 1] for i in neu]
for u, p in zip(teb, claim_list):
if claim_list.index(p) == teb.index(u):
if 'P1932' not in p.qualifiers:
qualifier = pywikibot.Claim(repo, u'P1932')
qualifier.setTarget(u)
p.addQualifier(qualifier, summary=u'Adding a qualifier.')
print('New Qualifier for P1932!')
else:
continue
add_statedqualifier(['Q56603077'])