In [5]:
import pywikibot
from bs4 import BeautifulSoup
import urllib.request
import re
# Connect to enwiki
enwiki = pywikibot.Site('en', 'wikipedia')
# and then to wikidata
enwiki_repo = enwiki.data_repository()
In [10]:
#%%

def print_authorname(list_item):
    
    site = pywikibot.Site("wikidata", "wikidata")
    repo = site.data_repository()
    
    i_list = []
    for data_item in list_item:
        item = pywikibot.ItemPage(repo, data_item) 
        #item_dict = item.get()
    
        for source in item.claims['P50']:
            tt = list(source.qualifiers.items())
            for key, value in tt:
                if key == 'P1545':
                    QID = source.target.getID()
                    item1 = pywikibot.ItemPage(repo, QID) 
                    item_dict1 = item1.get()
                    try:
                        item_new = item_dict1['claims']['P735']
                        for item1_new in item_new:
                            itemm = item1_new.target.getID()
                            QID1 = pywikibot.ItemPage(repo, itemm) 
                            name = QID1.get()
                            try:
                                item_new1 = item_dict1['claims']['P734']
                                for item1_new1 in item_new1:
                                    itemm1 = item1_new1.target.getID()
                                    QID2 = pywikibot.ItemPage(repo, itemm1) 
                                    name1 = QID2.get()
                                    m_list = name['labels']['en'] + ', ' + name1['labels']['en']
                                    i_list.append(m_list)
                            except:
                                print('No Family name')
                    except:
                        print('No Given name')
                    
                    
    return i_list

print_authorname(['Q56603084'])
Out[10]:
['Carole, Tucker',
 'Keith, Vanderlinde',
 'Hsin, Chiang',
 'Valentyn, Novosad',
 'P., Ade',
 'Peter, Ade']
In [15]:
#%%
#function to get the cordinal number of author strings p2093

def print_authorstring_info(list_item):
    
    site = pywikibot.Site("wikidata", "wikidata")
    repo = site.data_repository()
    
    o_list = []
    for data_item in list_item:
        item = pywikibot.ItemPage(repo, data_item) 
        #item_dict = item.get()
    
        for sourc in item.claims['P2093']:
            stat = list(sourc.qualifiers.items())
                    
            for keyy, valuee in stat:
                if keyy == 'P1545':
                    num = valuee[0].getTarget()
                    o_list.append(num)
                    
                
    return o_list
                    
print_authorstring_info(['Q56603084'])               
Out[15]:
['1',
 '2',
 '3',
 '5',
 '6',
 '7',
 '8',
 '9',
 '10',
 '11',
 '12',
 '14',
 '15',
 '16',
 '17',
 '18',
 '19',
 '20',
 '21',
 '22',
 '23',
 '24',
 '25',
 '26',
 '27',
 '28',
 '29',
 '30',
 '31',
 '32',
 '33',
 '34',
 '35',
 '36',
 '37',
 '38',
 '39',
 '40',
 '41',
 '42',
 '43',
 '44',
 '45',
 '46',
 '47',
 '48',
 '49',
 '51',
 '52',
 '53',
 '54',
 '55',
 '56',
 '57',
 '58',
 '59',
 '62',
 '63',
 '64',
 '65',
 '66',
 '67']
In [107]:
#%%

#function to get the cordinal number of authors p50

def print_author_info(list_item):
    
    site = pywikibot.Site("wikidata", "wikidata")
    repo = site.data_repository()
    
    q = []
    for data_item in list_item:
        item = pywikibot.ItemPage(repo, data_item) 
        item_dict = item.get()
    
        for claim in item_dict['claims']: # Loop through items
            if claim == 'P50':
                for source in item.claims[claim]:
                    
                    tt = list(source.qualifiers.items())
                    for key, value in tt:
                        if key == 'P1545':
                            no1 = value[0].getTarget()
                            q.append(no1)  
                            
    return q

print_author_info(['Q56603084'])
Out[107]:
['60', '61', '13', '50', '4']
In [108]:
#%%

#function to concatenate the list of cordinal number of authors and author strings 

def all_authors():
    
    f = print_author_info(['Q56603084'])
    j = print_authorstring_info(['Q56603084'])
    k = j + f
    return k
In [ ]:
#%%
#This function extracts author names from bibtex 

def author_citation(url):
    list1 = []
    with urllib.request.urlopen(url) as response:
        html = response.read()
        soup = BeautifulSoup(html, "html.parser")
        text3 = soup.findAll(attrs={'property' : re.compile('article:author')})
        print(soup)
        for text in text3:
            tip = text['content']
            list1.append(tip)
    return list1

author_citation('https://www.researchgate.net/publication/221940970_Galaxy_Clusters_Discovered_via_the_Sunyaev-Zel%27dovich_Effect_in_the_First_720_Square_Degrees_of_the_South_Pole_Telescope_Survey')
In [ ]:
#%%
#This function matches the name sof the authors in the citation to their cordinal no

def author_match():
    stated = all_authors()
    authorname = author_citation('https://ui.adsabs.harvard.edu/abs/2018ApJ...852...97H/exportcitation')
    for r in stated:
        num_r = int(r)
        new_list = [authorname[int(i) - 1] for i in stated]
        return new_list
    
author_match()
In [16]:
#%%
#This function adds the author string name to a list


def print_authorstring(list_item):
    ti = []
    site = pywikibot.Site("wikidata", "wikidata")
    repo = site.data_repository()
    for data_item in list_item:
        item = pywikibot.ItemPage(repo, data_item) 
        item_dict = item.get()
    
        for claim in item_dict['claims']: # Loop through items
            if 'P2093' in claim:
                try:
                    for source in item.claims[claim]:
                        QID = source.target
                        ti.append(QID)
                        
                except:
                    print('Name')
    return ti

print_authorstring(['Q56603084'])
Out[16]:
['J. W. Henning',
 'J. T. Sayre',
 'C. L. Reichardt',
 'A. J. Anderson',
 'J. E. Austermann',
 'J. A. Beall',
 'A. N. Bender',
 'B. A. Benson',
 'L. E. Bleem',
 'J. E. Carlstrom',
 'C. L. Chang',
 'H-M. Cho',
 'R. Citron',
 'C. Corbett Moran',
 'T. M. Crawford',
 'A. T. Crites',
 'T. de Haan',
 'M. A. Dobbs',
 'W. Everett',
 'J. Gallicchio',
 'E. M. George',
 'A. Gilbert',
 'N. W. Halverson',
 'N. Harrington',
 'G. C. Hilton',
 'G. P. Holder',
 'W. L. Holzapfel',
 'S. Hoover',
 'Z. Hou',
 'J. D. Hrubes',
 'N. Huang',
 'J. Hubmayr',
 'K. D. Irwin',
 'R. Keisler',
 'L. Knox',
 'A. T. Lee',
 'E. M. Leitch',
 'D. Li',
 'A. Lowitz',
 'A. Manzotti',
 'J. J. McMahon',
 'S. S. Meyer',
 'L. Mocanu',
 'J. Montgomery',
 'A. Nadolski',
 'T. Natoli',
 'J. P. Nibarger',
 'S. Padin',
 'C. Pryke',
 'J. E. Ruhl',
 'B. R. Saliwanchik',
 'K. K. Schaffer',
 'C. Sievers',
 'G. Smecher',
 'A. A. Stark',
 'K. T. Story',
 'T. Veach',
 'J. D. Vieira',
 'G. Wang',
 'N. Whitehorn',
 'W. L. K. Wu',
 'V. Yefremenko']
In [ ]:
#%%
#This function joins the authorstring name and the concatenated given and family name 

def joined():
    first = print_authorstring(['Q56603084'])
    second = print_authorname(['Q56603084'])
    k = first + second
    
    return k
joined()
In [104]:
#%%
#This function adds matches the names in the ikidata and citation 

def match_alt():
    left = author_match()
    right = joined()
    
    print('citation names' + ' - ' + 'wikidata names')
    print('\n')
    for left1, right1 in zip(left, right):
        if left.index(left1) == right.index(right1):
    
            print(left1 + ' - ' + right1 )
match_alt()
citation names - wikidata names


High, F. W. - J. W. Henning
Stalder, B. - J. T. Sayre
Song, J. - C. L. Reichardt
Aird, K. A. - A. J. Anderson
Allam, S. S. - J. E. Austermann
Armstrong, R. - J. A. Beall
Barkhouse, W. A. - A. N. Bender
Benson, B. A. - B. A. Benson
Bertin, E. - L. E. Bleem
Bhattacharya, S. - J. E. Carlstrom
Bleem, L. E. - C. L. Chang
Buckley-Geer, E. J. - H-M. Cho
Carlstrom, J. E. - R. Citron
Challis, P. - C. Corbett Moran
Chang, C. L. - T. M. Crawford
Crawford, T. M. - A. T. Crites
Crites, A. T. - T. de Haan
de Haan, T. - M. A. Dobbs
Desai, S. - W. Everett
Dobbs, M. A. - J. Gallicchio
Dudley, J. P. - E. M. George
Foley, R. J. - A. Gilbert
George, E. M. - N. W. Halverson
Gladders, M. - N. Harrington
Halverson, N. W. - G. C. Hilton
Hamuy, M. - G. P. Holder
Hansen, S. M. - W. L. Holzapfel
Holder, G. P. - S. Hoover
Holzapfel, W. L. - Z. Hou
Hrubes, J. D. - J. D. Hrubes
Joy, M. - N. Huang
Keisler, R. - J. Hubmayr
Lee, A. T. - K. D. Irwin
Leitch, E. M. - R. Keisler
Lin, H. - L. Knox
Lin, Y. -T. - A. T. Lee
Loehr, A. - E. M. Leitch
Lueker, M. - D. Li
Marrone, D. - A. Lowitz
McMahon, J. J. - A. Manzotti
Mehl, J. - J. J. McMahon
Meyer, S. S. - S. S. Meyer
Mohr, J. J. - L. Mocanu
Montroy, T. E. - J. Montgomery
Morell, N. - A. Nadolski
Ngeow, C. -C. - T. Natoli
Padin, S. - J. P. Nibarger
Pryke, C. - S. Padin
Reichardt, C. L. - C. Pryke
Rest, A. - J. E. Ruhl
Ruel, J. - B. R. Saliwanchik
Ruhl, J. E. - K. K. Schaffer
Schaffer, K. K. - C. Sievers
Shaw, L. - G. Smecher
Shirokoff, E. - A. A. Stark
Smith, R. C. - K. T. Story
Stark, A. A. - T. Veach
Stubbs, C. W. - J. D. Vieira
Tucker, D. L. - G. Wang
Vanderlinde, K. - N. Whitehorn
Vieira, J. D. - W. L. K. Wu
Williamson, R. - V. Yefremenko
Spieler, H. G. - Carole, Tucker
Staniszewski, Z. - Keith, Vanderlinde
Brodwin, M. - Hsin, Chiang
Plagge, T. - Valentyn, Novosad
Ade, P. A. R. - P., Ade
In [24]:
#%%
#This function adds seperates the author names in citation

def authorname_seperated():
    listt = author_match()
    
    site = pywikibot.Site("wikidata", "wikidata")
    repo = site.data_repository()
    item = pywikibot.ItemPage(repo, 'Q56603077') 
    item_dict = item.get()
    
    pli1 = []
    for ID in listt:
        x1 = re.split(", ", ID)
        pli1.append(x1)
    return pli1
  
authorname_seperated()
Out[24]:
[['Seljak', 'Uroš'], ['Zaldarriaga', 'Matias']]
In [ ]:
#%%
#This function adds the claims in P2093 and P50-useful to add qualifiers

def join_names():
    
    site = pywikibot.Site("wikidata", "wikidata")
    repo = site.data_repository()
    item = pywikibot.ItemPage(repo, 'Q56603082') 
    item_dict = item.get()
    
    pli = []
    pli1 = []
         
    if 'P50' in item_dict['claims']:
        for sourcee in item.claims['P50']:
            tt = list(sourcee.qualifiers.items())
            for key, value in tt:
                if key == 'P1545':
                    no1 = value[0].getTarget()
                    pli.append(sourcee)
    else:
        pli = []
    if 'P2093' in item_dict['claims']:
        for claim in item_dict['claims']['P2093']: 
            pli1.append(claim)
    else:
        pli1 = []
        
    joined = pli1 + pli
    return joined

join_names()
In [ ]:
#%%
#This function adds the P9688 and P9687 qualifier if not present

def add_namesqualifier(data_item):
    site = pywikibot.Site("wikidata", "wikidata")
    repo = site.data_repository()
    item = pywikibot.ItemPage(repo, data_item) 
    item_dict = item.get()
    listb = join_names()
    dateCre = authorname_seperated()
    
    for te, tea in zip(listb, dateCre):
        
        if listb.index(te) == dateCre.index(tea):
            if 'P9688' not in te.qualifiers:
                qualifier = pywikibot.Claim(repo, u'P9688')
                qualifier.setTarget(tea[0])
                te.addQualifier(qualifier, summary=u'Adding a qualifier.')
                print('New Qualifier for P9688!')
            else:
                continue
            
            if 'P9687' not in te.qualifiers:
                qualifier = pywikibot.Claim(repo, u'P9687')
                qualifier.setTarget(tea[1])
                te.addQualifier(qualifier, summary=u'Adding a qualifier.')
                print('New Qualifier for P9687!')
            else:
                continue
            
add_namesqualifier('Q56603077')

#%%
#This function adds the statedin qualifier if not present

def add_statedqualifier(list_item):
    
    site = pywikibot.Site("wikidata", "wikidata")
    repo = site.data_repository()
    neu = print_author_info(['Q27450617'])
    names = author_citation('https://ui.adsabs.harvard.edu/abs/2010ApJ...723.1736H/exportcitation')
    
    claim_list = []
    for data_item in list_item:
        item = pywikibot.ItemPage(repo, data_item) 
        item_dict = item.get()
    
        for claim in item_dict['claims']['P50']: # Loop through items
                claim_list.append(claim)
    
    teb = [names[int(i) - 1] for i in neu]
    
    for u, p in zip(teb, claim_list):
        
        if claim_list.index(p) == teb.index(u):
            
            if 'P1932' not in p.qualifiers:
                qualifier = pywikibot.Claim(repo, u'P1932')
                qualifier.setTarget(u)
                p.addQualifier(qualifier, summary=u'Adding a qualifier.')
                print('New Qualifier for P1932!')
            else:
                continue
    
add_statedqualifier(['Q56603077'])
In [ ]: