In [3]:
 
WARNING: API error badtoken: Invalid CSRF token.
Sleeping for 9.9 seconds, 2020-01-02 22:59:36
Page [[Allen West (musician)]] saved
Sleeping for 4.9 seconds, 2020-01-02 22:59:51
Page [[Andrew Cyrille]] saved
Sleeping for 7.0 seconds, 2020-01-02 22:59:59
Page [[Andy Chambers]] saved
Sleeping for 8.5 seconds, 2020-01-02 23:00:08
Page [[Andy Hurley]] saved
Page [[Brad Childress]] saved
Page [[Chris Brokaw]] saved
Sleeping for 6.0 seconds, 2020-01-02 23:01:05
Page [[Christie Pearce]] saved
Sleeping for 6.7 seconds, 2020-01-02 23:01:14
Page [[Chuck Panozzo]] saved
Page [[Dana Gioia]] saved
Sleeping for 6.1 seconds, 2020-01-02 23:01:37
Page [[Danny Weis]] saved
Sleeping for 7.2 seconds, 2020-01-02 23:01:46
Page [[Dave Bailey (musician)]] saved
Sleeping for 5.3 seconds, 2020-01-02 23:01:57
Page [[David Gossett]] saved
Page [[Eavan Boland]] saved
Page [[Frank Funaro]] saved
Page [[Jody Rosen]] saved
Page [[Keiwan Ratliff]] saved
Page [[Mark Duper]] saved
Page [[Mike Mangini]] saved
Sleeping for 6.8 seconds, 2020-01-02 23:05:14
Page [[Mike Wengren]] saved
Page [[Patty Griffin]] saved
Page [[Robert Black (Canadian senator)]] saved
Page [[Sabina Cvilak]] saved
Page [[Shafal Mosed]] saved
Page [[Ted Tally]] saved
Page [[Tom Dumont]] saved
Sleeping for 3.3 seconds, 2020-01-02 23:07:27
Page [[Tony Yeboah]] saved
Page [[Vincent Curatola]] saved
Page [[Anika Wells]] saved
Page [[Ayyan (model)]] saved
Sleeping for 7.2 seconds, 2020-01-02 23:08:37
Page [[Barbara Dane]] saved
Page [[Christopher Chaplin]] saved
Page [[Dave Jones (politician)]] saved
Page [[Jason Jones (musician)]] saved
Page [[John Tracy (director)]] saved
Page [[Juan Cavallaro]] saved
Page [[Kieran Mulroney]] saved
Sleeping for 8.3 seconds, 2020-01-02 23:11:50
Page [[Kim Matula]] saved
Page [[Matt Winston]] saved
Page [[Rob Arnold]] saved
Page [[Sarai (rapper)]] saved
In [ ]:
import pywikibot
import re
from pywikibot import pagegenerators
site = pywikibot.Site('en', 'wikipedia')
def extractfirst(text):
    i=0
    result=text
    result=re.sub("\[\[[Cc]ategory:[^\]]*]]","",result)
    result=re.sub("\[\[[Ff]ile:[^\]]*]]","",result)
    result=re.sub("\[\[[Ii]mage:[^\]]*]]","",result)
    result=re.sub("\n"," ",result)
    result=re.sub("==.*","",result)
    result=re.sub("''+","",result)
    result=re.sub("\[\[([^\|\]\[]*\|)?([^\|\]\[]*)]]",r"\2",result)
    while i < 5:
        result=re.sub("{{[^{}]*}}","",result)
        result=re.sub("\([^\(\)]*\)","",result)
        result=re.sub("<ref[^<>]*>[^<>]*<\/ref>","",result)
        result=re.sub("<ref[^<>]*\/>","",result)
        result=re.sub("<!--[^<>]*-->","",result)
        i+=1
    result=re.sub("\n","",result)
    result=re.sub("  *"," ",result)
    result=re.sub("(^.*?[.!?](?=\s[A-Z]|$)).*",r"\1",result)
    result=re.sub("^\s*","",result)
    result=re.sub("\s*(?=,|\.)","",result)
    result=re.sub("\s*$","",result)
    return result
def extractdescription(text):
    result=text
    if re.search("\.[^\s]\.",result,re.IGNORECASE) or re.search("(br|chan|chapln|dr|fr|gov|miss|mr|mrs|ms|mme|m|msgr|pres|prof|rep|rev|revs|sen|sr|sra|srta|hon|esq|jr|ret|lt|col|sgt|gen|cpl|capt|bg|adm|cwo|ens|maj|msgt|st)\.",result,re.IGNORECASE):
        return False
    if re.search("(and|or)$",result):
        return False
    if re.match('^(("?[A-Z][a-z]*"?|(Jr\.)),? )*?(is|was) (a|an) ',result):
        result=re.sub('^(("?[A-Z][a-z]*"?|(Jr\.)),? )*?(is|was) (a|an) (.*)',r"\6",result)
        result=re.sub(',? who.*',"",result)
        result=re.sub(',? and currently.*',"",result)
        result=re.sub(',? currently.*',"",result)
        result=re.sub(',? as well.*',"",result)
        result=re.sub(',? better known.*',"",result)
        result=re.sub(',? best known.*',"",result)
        result=re.sub(',? also known.*',"",result)
        result=re.sub(',? most known.*',"",result)
        result=re.sub(',? mostly known.*',"",result)
        result=re.sub(',? generally known.*',"",result)
        result=re.sub(',? especially known.*',"",result)
        result=re.sub(',? well known.*',"",result)
        result=re.sub(',? particularly known.*',"",result)
        result=re.sub(',? primarily known.*',"",result)
        result=re.sub(',? also known.*',"",result)
        result=re.sub(',? known for.*',"",result)
        result=re.sub(',? riding for( the)?.*',"",result)
        result=re.sub(",? active in.*","",result)
        result=re.sub(",? born in.*","",result)
        result=re.sub("\.$","",result)
        result=re.sub("\bformer\b","",result)
        result=re.sub('[,;]? (he|she|they) (is|are|were|was).*',"",result)
        result=re.sub('([a-zA-Z])', lambda x: x.groups()[0].upper(), result, 1)
        result=re.sub("[\.\,\;]$","",result)
        result=re.sub("\b(and|is|that|was|were|are|for)$","",result)
        result=re.sub("\b(and|is|that|was|were|are|for)$","",result)
        if len(result) <= 40:
            if re.match("(Afghan|Albanian|Algerian|Andorran|Angolan|Barbuda|Antiguan|Barbudan|Argentine|Armenian|Australian|Austrian|Azerbaijani|Azeri|Bahamas|Bahamian|Bahraini|Bengali|Barbadian|Belarusian|Belgian|Belizean|Beninese|Beninois|Bhutanese|Bolivian|Bosnian|Herzegovinian|Motswana|Botswanan|Brazilian|Bruneian|Bulgarian|Faso|Burkinabé|Burmese|Burundian|Verde|Cabo|Verdean|Cambodian|Cameroonian|Canadian|African|Chadian|Chilean|Chinese|Colombian|Comoran|Comorian|Congolese|Rican|Ivorian|Croatian|Cuban|Cypriot|Republic|Czech|Danish|Djiboutian|Dominican|Republic|Dominican|Timor|Timorese|Ecuadorian|Egyptian|Salvador|Salvadoran|Guinea|Equatorial|Guinean|Equatoguinean|Eritrean|Estonian|Ethiopian|Fijian|Finnish|French|Gabonese|Gambian|Georgian|German|Ghanaian|Gibraltar|Greek|Hellenic|Grenadian|Guatemalan|Guinean|Bissau|Guinean|Guyanese|Haitian|Honduran|Hungarian|Magyar|Icelandic|Indian|Indonesian|Iranian|Persian|Iraqi|Irish|Israeli|Italian|Coast|Ivorian|Jamaican|Japanese|Jordanian|Kazakhstani|Kazakh|Kenyan|Kiribati|Korea|North|Korean|Korea|South|Korean|Kuwaiti|Kyrgyzstani|Kyrgyz|Kirgiz|Kirghiz|Lao|Laotian|Latvian|Lettish|Lebanese|Basotho|Liberian|Libyan|Liechtensteiner|Lithuanian|Luxembourg|Luxembourgish|Macedonian|Malagasy|Malawian|Malaysian|Maldivian|Malian|Malinese|Maltese|Islands|Marshallese|Martiniquais|Martinican|Mauritanian|Mauritian|Mexican|Micronesian|Moldovan|Monégasque|Monacan|Mongolian|Montenegrin|Moroccan|Mozambican|Namibian|Nauruan|Nepali|Nepalese|Dutch|Netherlandic|Zealand|Zealand|Zelanian|Nicaraguan|Nigerien|Nigerian|Marianan|Norwegian|Omani|Pakistani|Palauan|Palestinian|Panamanian|Guinea|Papua|Guinean|Papuan|Paraguayan|Peruvian|Filipino|Philippine|Polish|Portuguese|Rico|Puerto|Rican|Qatari|Romanian|Russian|Rwandan|Kitts|and|Nevis|Kittitian|Nevisian|Saint|Lucian|Saint|Vincentian|Vincentian|Samoan|Marino|Sammarinese|Tomé|Príncipe|São|Toméan|Arabia|Saudi|Arabian|Senegalese|Serbian|Seychellois|Leone|Sierra|Leonean|Singapore|Singaporean|Slovak|Slovenian|Slovene|Islands|Solomon|Island|Somali|African?|South|African|Sudan|South|Sudanese|Spanish|Lanka|Sri|Lankan|Sudanese|Surinamese|Swazi|Swedish|Swiss|Syrian|Tajikistani|Tanzanian|Thai|Leste|Timorese|Togolese|Tokelauan|Tongan|Tobago|Trinidadian|Tobagonian|Tunisian|Turkish|Turkmen|Tuvaluan|Ugandan|Ukrainian|Arab|Emirates|Emirati|Emirian|Emiri|Kingdom|Great|Britain|Northern|Ireland|UK|British|America|United|States|U.S.|American|Uruguayan|Uzbekistani|Uzbek|Vanuatu|Vanuatuan|Vatican|Venezuelan|Vietnamese|Yemeni|Zambian|Zimbabwean)",result):
                return result
    return False
savecounter = 0
def category_filter(generator, category):
    """
    Filter memebers of the specified category out of the generator.

    @param generator: Generator to filter
    @type generator: iterator
    @param category: Category to filter out
    @type category: L{pywikibot.pgae.Category}
    """
    for page in generator:
        if category not in page.categories():
            yield page

living_people_cat = pywikibot.Category(site, 'Living people')
sd_article_cat = pywikibot.Category(site, 'Articles with short description')

gen = pagegenerators.CategorizedPageGenerator(living_people_cat,start='Benjamin')
gen = category_filter(gen, sd_article_cat)
gen = pagegenerators.PreloadingGenerator(gen)

savecounter = 0
for page in gen:
    sd = extractdescription(extractfirst(page.text))
    if not sd or 'short description' in page.text: # Is the second condition necessary?
        continue
    description = "{{short description|" + sd + "|bot=PearBOT 5}}\n"
    page.text = description + page.text
    savecounter+=1
    print(description)
    if pywikibot.Page(site, u"User:PearBOT/Biography short descriptions/stop page").text == "":
        page.save('Adding automatically generated short description. For more information see [[Wikipedia:Bots/Requests for approval/PearBOT 5]] Feedback appreciated at [[User talk:Trialpears]]')
    else:
        break;
In [ ]: