# -*- coding: utf-8 -*- # fullstopschecker.py # Distributed under the terms of the GNU General Public License v3.0 ''' Script to remove all the full stops/periods of the descriptions in Wikidata items. The script run a SPARQL query in the Wikidata Query Service (WDQS) and then it check if the description meets the requirements: confirm that end with ".", check only the descriptions in the languages in which the bot has been configured, and if the full stop is not part of an exception (e.g. abbreviatures as w. in Polish or a. C. in Italian or Spanish). When the description meets the requirements there are five options: 1. Remove the full stop. 2. Add description to checklist. It means to add the description to another CSV file. This checlist serves to configure the exceptions list to avoid in the next run some of the descriptions. 3. Edit description. It serves when the description has been vandalized or there is another error easy to solve. 4. Skip. 5. Quit. Each action is checked and registered in a CSV log in /logs and, at the end, it generates a HTML file that works as CSV viewer (the same if a CSV checklist has been created). ''' import colorama as c import datetime import inquirer import logging import pywikibot import re import sys # Pywikibot from pywikibot import pagegenerators as pg from pywikibot.editor import TextEditor # Local modules import bot import log # Configuration site = pywikibot.Site("wikidata", "wikidata") scriptName = "cebvillagesimage" now = datetime.datetime.now() timestamp = str(now.strftime("%Y-%m-%d %H:%M")) # Colorama reset cR = c.Style.RESET_ALL ''' TODO: If you want the bot to change descriptions in other languages, you can open an issue in the GitHub repository. Or, you can read the next instructions to add yourself the code. Instructions to add another language in the code. 1. In "lang" dictionary you have to: 1. Write a comma at the end of the last key-value. 2. In another line configure the language with the format: "key": c.Back.COLOR + c.Fore.COLOR + c.Style.BRIGHT + "key-desc" + cR The colors available are: - Fore: BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE, RESET. - Back: BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE, RESET. - Style: DIM, NORMAL, BRIGHT, RESET_ALL 3. Then, in "count" dictionary, you have to write the comma at the end of the last key-value and in another line: "key": 0 4. Then, open a pull request with the change in the GitHub repository. 5. This is enoug for me to configure the rest of the bot to work in the language you have set, but you can also change the first "if" of the "try" statement, adding: key == "key-lang" ''' # Some colored useful words or symbols. misc = { "replace": c.Style.BRIGHT + "Replacement" + cR, "-": c.Fore.RED + "-" + cR, "+": c.Fore.GREEN + "+" + cR } def setLogName(): """Simple function to set the name of the log according to the editing mode. Returns ------- string log name with the form of "scriptName" if the script is going to edit, or "scriptName-test" if the script is running in test mode. """ if editMode is True: script = scriptName else: script = scriptName + "-test" return script def sparqlQuery(query, site): """Run a SPARQL Query in WDQS. Parameters ---------- query : string query to run site : string the site to perform the query Returns ------- generator items in the query """ generator = pg.WikidataSPARQLPageGenerator(query, site=site) # For each item (wd) in the SPARQL query (generator) for wd in generator: if (wd.exists()): wd.get(get_redirect=True) yield wd def addImage(query, editMode): """ TODO: Check if the description meet the requirements and show it to the operator. Parameters ---------- query : string query to run editMode : boolean editing mode, False for test edits or True to enable the editing. """ # Ask about the necessity to create an EditGroup for this task editGroup = u.editGroups() # When "edit" is True, the script will be running until something or the operator # stop it. If the operator choose "Quit", "edit" will be returned as "False" edit = True # To set the log name depending of the editing mode logName = setLogName() for item in sparqlQuery(query, site): # If "edit" is False, stop the script if edit is False: break else: descriptions = item.descriptions # Check the key of the languages in the descriptions of the item for key in descriptions: try: # If the description is in one of these languages... if key == "es" or key == "en" or key == "pl" or key == "it": # And if it isn't empty... if item.descriptions[key] is not "": # And if it ends with a dot... if item.descriptions[key].endswith(".") is True: # And it doesn't match any exception... if not any(exception.search(item.descriptions[key]) for exception in exceptions): # Setting variables to pass to editDesc() description = item.descriptions[key] newDescription = re.sub(r"\s?\.$", "", item.descriptions[key]) # Show the full stop in red redFullStop = c.Fore.RED + c.Style.BRIGHT + "." + cR item.descriptions[key] = re.sub(r"\s?\.$", redFullStop, item.descriptions[key]) print("\n {}{}{}{}".format( c.Fore.WHITE, c.Style.BRIGHT, str(item).lstrip("[[wikidata:").rstrip("]]"), cR) ) print(" {} {}:\t{}".format(misc["-"], lang[key], item.descriptions[key])) print(" {} {}:\t{}\n".format(misc["+"], misc["replace"], newDescription)) # Make the choice edit = editDesc(item, key, description, newDescription, count, editMode, editGroup, logName) # If not, pass... else: pass else: pass else: pass else: pass except KeyError as e: item = str(item).lstrip("[[wikidata:").rstrip("]]") info = u"{}\t{}-desc\tKeyError: {}".format(item, key, e) print(info) info = { "item": item, "key": key + "-desc", "msg": e } log.check(info, logName, mode="csv") # Statistics # Descriptions fixed item = str(item).lstrip("[[wikidata:").rstrip("]]") # Check if the bot is stopped by the operator... if edit is False: info = "Interruption of the script by the operator" print(info) info = { "time": timestamp, "item": item, "key": key + "-desc", "msg": "Interruption of the script by the operator. " } # Or if the query end... else: info = "Task completed!" print(info) info = { "time": now.strftime("%Y-%m-%d %H:%M"), "item": item, "key": key + "-desc", "msg": "Task completed!\n" } # Generate CSV viewer for the logfile log.check(info, logName, mode="csv", generateHTML=True) # Generate the CSV viewer for the checklist log.check(info, "descriptionCheckList", mode="csv", generateHTML=True) # The end! sys.exit() if __name__ == "__main__": # Query file rqFile = "ceb-vaillages-query.rq" # Reading the query with open("queries/" + rqFile, "r") as queryFile: query = queryFile.read() question = [ inquirer.Confirm("confirmation", message="Do you want to run the script?") ] answer = inquirer.prompt(question) if answer["confirmation"] is True: print("Starting script...\n") u = bot.Utilities() editMode = u.editMode() addImage(query, editMode) elif answer["confirmation"] is False: print("Stopping script...") sys.exit()