#!/usr/bin/python # -*- coding: utf-8 -*- # __version__ = '$Id: NULL $' # import re import wikipedia as pywikibot import pagegenerators from pywikibot import i18n import catlib import sys import urllib from datetime import datetime # This is required for the text that is shown when you run this script # with the parameter -help. docuReplacements = { '¶ms;': pagegenerators.parameterHelp } class WiSizeBot: # Edit summary message that should be used is placed on /i18n subdirectory. # The file containing these messages should have the same name as the caller # script (i.e. wikiindex-size.py in this case) def __init__(self, generator, summary, debug=False): """ Constructor. Parameters: @param generator: The page generator that determines on which pages to work. @type generator: generator. @param summary: Set the summary message text for the edit. @type summary: (unicode) string. """ self.generator = generator # init constants self.site = pywikibot.getSite("en", "wikiindex") # Set the edit summary message if summary: self.summary = summary else: self.summary = i18n.twtranslate(self.site, 'basic-changing') self.debug = debug def run(self): for page in self.generator: self.treat(page) def treat(self, page): """ Loads the given page, does some changes, and saves it. """ text = self.load(page) if not text: return newtext = text size_r = None size_r = re.compile(ur"""(?P<all>\{\{\s*[Ss]ize\s*((\|\s*(?P<pages>pages|wiki[ _]pages)\s*=\s*(?P<pages_value>\d*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<pagesurl>statistics[ _]URL|wiki[ _]statistics[ _]URL)\s*=\s*(?P<pagesurl_value>https?://[^ \|\}\<]*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<wikifactor>wikiFactor)\s*=\s*(?P<wikifactor_value>\d*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<wikifactorurl>wikiFactor[ _]URL)\s*=\s*(?P<wikifactorurl_value>http://[^ \|\}\<]*)\s*[^\|\}]*\s*))+\s*\|?\s*\}\}(\s*\([Aa]s\s*of:?\s*(?P<day>\d+)\s*(?P<month>[A-Z][a-z]+)\s*(?P<year>\d+)\s*\)\s*(\<!--[ A-Za-z0-9/]+--\>)?)?)""") wtext = page.get() m = size_r.finditer(wtext) all = "" newvalues = "" for i in m: all = i.group('all') and i.group('all').strip() or '' pages = i.group('pages') and i.group('pages').strip() or '' pagesurl = i.group('pagesurl') and i.group('pagesurl').strip() or '' wikifactor = i.group('wikifactor') and i.group('wikifactor').strip() or '' wikifactorurl = i.group('wikifactorurl') and i.group('wikifactorurl').strip() or '' pages_value = i.group('pages_value') and i.group('pages_value').strip() or '0' pagesurl_value = i.group('pagesurl_value') and i.group('pagesurl_value').strip() or '' wikifactor_value = i.group('wikifactor_value') and i.group('wikifactor_value').strip() or '' wikifactorurl_value = i.group('wikifactorurl_value') and i.group('wikifactorurl_value').strip() or '' day = i.group('day') and i.group('day').strip() or '' month = i.group('month') and i.group('month').strip() or '' year = i.group('year') and i.group('year').strip() or '' if self.debug: pywikibot.output(u"text = " + text) pywikibot.output(u"all = " + all) pywikibot.output(u"pages = " + pages) pywikibot.output(u"pagesurl = " + pagesurl) pywikibot.output(u"wikifactor = " + wikifactor) pywikibot.output(u"wikifactorurl = " + wikifactorurl) pywikibot.output(u"pages_value = " + pages_value) pywikibot.output(u"pagesurl_value = " + pagesurl_value) pywikibot.output(u"wikifactor_value = " + wikifactor_value) pywikibot.output(u"wikifactorurl_value = " + wikifactorurl_value) pywikibot.output(u"day = " + day) pywikibot.output(u"month = " + month) pywikibot.output(u"year = " + year) #get new values n = re.findall(ur"(https?://[^\|\}\]]+\?action=raw|https?://[^\|\}\]]+:Statistics)", pagesurl_value) if n: raw = '' try: url = n[0] if url.endswith(":Statistics"): url += '?action=raw' f = urllib.urlopen(url) raw = unicode(f.read(), 'utf-8') f.close() except: break o = re.findall(ur"total=\d+;good=(\d+);", raw) if o: if o[0] and int(pages_value) != int(o[0]): self.summary = u"Robot: Updating size: %s -> %s" % (pages_value, o[0]) pages_value = o[0] newtime = True else: break else: break else: break #end get #recalculate wikifactor pass #TODO, leave AS IS meanwhile #end recalculate """print pages, pages_value print pagesurl, pagesurl_value print wikifactor, wikifactor_value print wikifactorurl, wikifactorurl_value""" if newtime: dt = datetime.date(datetime.utcnow()) day = dt.strftime('%d') month = dt.strftime('%B') year = dt.strftime('%Y') newvalues = u"""{{Size <!--see Template:Size for full detail--> | %s = %s <!--type the plain number of pages - NO thousands separators--> | %s = %s <!--page count source (often a 'Statistics' page); if unknown type 'No'--> | %s = %s <!--preferred; if unknown leave void; see: Category:wikiFactor for help--> | %s = %s <!--wF source (often 'PopularPages', 'Mostvisitedpages' or 'PageHits'); if unknown leave void--> }}(As of: %s %s %s)<!--manually add/amend date when stats are verified and/or updated-->""" % ( pages or 'pages', pages_value or '', pagesurl or 'statistics URL', pagesurl_value or '', wikifactor or 'wikiFactor', wikifactor_value or '', wikifactorurl or 'wikiFactor URL', wikifactorurl_value or '', day or '', month or '', year or '') newtext = text.replace(all, newvalues) if not self.save(newtext, page, self.summary): pywikibot.output(u'Page %s not saved.' % page.title(asLink=True)) def load(self, page): """ Loads the given page, does some changes, and saves it. """ try: # Load the page text = page.get() except pywikibot.NoPage: pywikibot.output(u"Page %s does not exist; skipping." % page.title(asLink=True)) except pywikibot.IsRedirectPage: pywikibot.output(u"Page %s is a redirect; skipping." % page.title(asLink=True)) else: return text return None def save(self, text, page, comment=None, **kwargs): # only save if something was changed if text != page.get(): # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) # show what was changed pywikibot.showDiff(page.get(), text) pywikibot.output(u'Comment: %s' % comment) #choice = pywikibot.inputChoice( # u'Do you want to accept these changes?', # ['Yes', 'No'], ['y', 'N'], 'N') if True: try: # Save the page page.put(text, comment=comment or self.comment, **kwargs) except pywikibot.LockedPage: pywikibot.output(u"Page %s is locked; skipping." % page.title(asLink=True)) except pywikibot.EditConflict: pywikibot.output( u'Skipping %s because of edit conflict' % (page.title())) except pywikibot.SpamfilterError, error: pywikibot.output( u'Cannot change %s because of spam blacklist entry %s' % (page.title(), error.url)) else: return True return False def main(): # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() # The generator gives the pages that should be worked upon. gen = None # This temporary array is used to read the page title if one single # page to work on is specified by the arguments. pageTitleParts = [] # summary message editSummary = '' debug = False start = "!" # Parse command line arguments for arg in pywikibot.handleArgs(): if arg.startswith('-summary:'): editSummary = arg[9:] elif arg.startswith('-start:'): start = arg[7:] elif arg == '-debug': debug = True else: pywikibot.output(u'Unknown argument: %s' % arg) cat = catlib.Category(pywikibot.getSite("en", "wikiindex"), 'Category:MediaWiki') gen = pagegenerators.CategorizedPageGenerator(cat, start=start) #if not gen: # gen = genFactory.getCombinedGenerator() if gen: # The preloading generator is responsible for downloading multiple # pages from the wiki simultaneously. gen = pagegenerators.PreloadingGenerator(gen) bot = WiSizeBot(gen, editSummary, debug) bot.run() else: pywikibot.showHelp() if __name__ == "__main__": try: main() finally: pywikibot.stopme()