#-*- coding: utf-8 -*- import csv, time, subprocess, re, pywikibot WAIT_TIME = 60 with open('00000.csv', 'r') as csvfile: reader = csv.reader(csvfile,delimiter="~") # count = 0 for row in reader: subprocess.call("sed -i 1d 00000.csv",shell=True) wikiPage = row[0]#.decode('utf-8') print (wikiPage) site1 = pywikibot.Site('ta', 'wikisource') page = pywikibot.Page(site1, wikiPage) existingText = page.text # print('------------------------------------------- தற்போதுள்ள பக்க உள்ளடக்கம் ---------------------------------------------') # print(type (existingText)) # print('\n\n' + existingText) # print('\n\n' + '------------------------------ தற்போதுள்ள பக்க உள்ளடக்கம் முடிவடைந்தது. --------------------------------------------') search_pattern = re.compile("\.*?\<\/noinclude>", re.DOTALL) tags = re.findall(search_pattern, existingText) headerTag1 = tags[0] footerTag = tags[1] middleText = re.sub(search_pattern, '', existingText).strip() # subprocess.call("sed -i 1d middleText",shell=True) # subprocess.call("sed -n 1,3p middleText",shell=True) #sed -n '1,3p' middleText dotsLine = '............................................................' # print ('\n' + dotsLine + 'existingHeader' + dotsLine + '\n' + headerTag1 + '\n') # print ('\n' + dotsLine + 'existingMiddleText' + dotsLine + '\n' + middleText + '\n') # print ('\n' + dotsLine + 'existingFooter' + dotsLine + '\n' + footerTag + '\n') # coining header content to place rh template according to pagenumber #finding wikiPage number wikiPageNumberSplit = wikiPage.split('.pdf/') wikiPageNumberOnly = wikiPageNumberSplit[-1] # print(wikiPageNumberOnly) #getting first few lines of middleText middleTextFew = middleText.split('\n\n') middleTextFirstLineGet = middleTextFew[0] # print(middleTextFirstLineGet) time.sleep(WAIT_TIME) if not int(wikiPageNumberOnly) % 2 == 0 : # print('The wikiPage is odd number (ஒற்றையெண் பக்கம்)' + wikiPageNumberOnly) # print(type (wikiPageNumberOnly)) bookPageNumber = int(wikiPageNumberOnly)-2 # print(type (bookPageNumber)) # print(bookPageNumber) bookPageNumberStr = str(bookPageNumber) # print(type (bookPageNumberStr)) # print('The bookPage is odd number (ஒற்றையெண் பக்கம்) =' + bookPageNumberStr) # middleTextFirstLineRh = '/>{{rh|' + bookPageNumberStr + '| | }} ' # middleTextFirstLineRhPipeline = middleTextFirstLineRh.replace('மொழிகள்','மொழிகள் ||').replace('மொழிகன்','மொழிகள் ||').replace('.','').replace('$','') # headerTagFinal = footerTag.replace('/>',middleTextFirstLineRh) # headerTagFinal = headerTag1.replace('/>','/> {{rh|தமிழ்ப் பழமொழிகள்||61}} ') footerTagFinal = ' {{rh| |' + bookPageNumberStr + '|}} ' # print(type (headerTagFinal)) # print (dotsLine + 'modifiedHeader' + dotsLine + '\n' + headerTagFinal + '\n') if int(wikiPageNumberOnly) % 2 == 0 : # print('The wikiPage is even number (இரட்டையெண் பக்கம்) =' + wikiPageNumberOnly) bookPageNumber = int(wikiPageNumberOnly)-2 # print(type (bookPageNumber)) # print(bookPageNumber) bookPageNumberStr = str(bookPageNumber) # print(type (bookPageNumberStr)) # print('The bookPage is even number (இரட்டையெண் பக்கம்) =' + bookPageNumberStr) footerTagFinal = ' {{rh| |' + bookPageNumberStr + '|}} ' # middleTextFirstLineRhPipeline = middleTextFirstLineRh.replace('தமிழ்ப்','|| தமிழ்ப்').replace('மொழிகன்','மொழிகள்').replace('.','').replace('$','').replace('-','') # headerTagFinal = footerTag.replace('/>',middleTextFirstLineRh) # headerTagFinal = headerTag1.replace('/>','/> {{rh|62||தமிழ்ப் பழமொழிகள்}} ') # print(type (headerTagFinal)) # print (dotsLine + 'modifiedHeader' + dotsLine + '\n' + headerTagFinal + '\n') #doing example replacement in middleText modifiedMiddleText = middleText.replace('________________\n','')#.replace('(',':(').replace(')',')')#.replace('','').replace('','').replace('\n','\n\n') # print(modifiedMiddleText) modifiedWholeText = existingText.replace(footerTag,footerTagFinal)#.replace(middleTextFirstLineGet,'').replace(middleText,modifiedMiddleText) # print(modifiedWholeText) page.text = modifiedWholeText # page.save('Breif summary to patrol at RecentChanges') page.save('+ மேலடி வார்ப்புருவுடன், எண்ணிடல்') time.sleep(WAIT_TIME)