#!/usr/bin/env python
# -*- coding: utf-8 -*-
import wikipedia
import pagegenerators
import re
import warnings
from time import sleep
from contextlib import closing
from sys import stdout
from json import dump, load
from itertools import ifilter
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
'¶ms;': pagegenerators.parameterHelp
}
SITE = wikipedia.getSite()
def pagesUsingTemplate(templateName):
transclusionPageName = unicode(SITE.namespace(10)) + u":" + templateName
transclusionPage = wikipedia.Page(SITE, transclusionPageName)
gen = pagegenerators.ReferringPageGenerator(transclusionPage, onlyTemplateInclusion=True)
return gen
def has_disambiguator(page):
return u'(' in page.title()
def list_redirects_to(page):
return page.getReferences(follow_redirects=False,redirectsOnly=True)
def wordsRegex(words):
return "(?:%s)" % ("|".join("(?:%s)" % word for word in words))
class CobraBot(object):
EDIT_SUMMARY = u'Superfluous disambiguation removed per [[WP:NAMB]] ([[Wikipedia:BOTPOL#Assisted_editing_guidelines|assisted editing]] using [[User:CobraBot|CobraBot]]; [[User talk:Cybercobra]])'
PERSON_SUMMARY = u'Person disambiguation tweaked ([[Wikipedia:BOTPOL#Assisted_editing_guidelines|assisted editing]] using [[User:CobraBot|CobraBot]]; [[User talk:Cybercobra]])'
DABLINK = u"Dablink"
DISAMBIGUATION = re.compile(u"\\{\\{[ \t]*" + wordsRegex("about dablink otheruses for the redirect this twootheruses".split() + ["other uses", "two other uses"]) +"[^}]*\\}\\}(\n?)", re.IGNORECASE)
DB_MOVE = "{{db-move|%s|Evidently not ambiguous}}\n"
OFFSET_FILE = 'N.json'
def __init__(self, debug):
"""
Constructor. Parameters:
* generator - The page generator that determines on which pages
to work on.
* debug - If True, doesn't do any real changes, but only shows
what would have been changed.
"""
self.generator = ifilter(has_disambiguator, pagesUsingTemplate(self.DABLINK))
self.debug = debug
self.editCount = 0
self.log = file("skipped.log", 'a')
self.log.write("BEGIN NEW SESSION\n")
wikipedia.setAction(self.EDIT_SUMMARY)
def run(self):
with closing(file(self.OFFSET_FILE, 'r')) as f:
N = load(f)
# Set the edit summary message
print "Advancing by %s..." % N
stdout.flush()
for i in xrange(N):
next(self.generator)
print "Done advancing!"
stdout.flush()
try:
for pageIndex, page in enumerate(self.generator):
wikipedia.setAction(self.EDIT_SUMMARY)
self.treat(page, pageIndex)
finally:
self.log.close()
with closing(file(self.OFFSET_FILE, 'w')) as f:
dump(N+pageIndex-5, f)
#########
def treat(self, page, pageIndex):
"""
Loads the given page, does some changes, and saves it.
"""
print "=================================================================="
print "PAGE TITLE:", page.title()
print "PAGE#:", pageIndex+1
print "EDIT COUNT:", self.editCount
if page.namespace() != 0:
wikipedia.output(u"SKIPPING: Non-article namespace!")
return
try:
# Load the page
text = page.get()
except wikipedia.NoPage:
wikipedia.output(u"Page %s does not exist; skipping." % page.aslink())
return
except wikipedia.IsRedirectPage:
wikipedia.output(u"Page %s is a redirect; skipping." % page.aslink())
return
disams = list(re.finditer(self.DISAMBIGUATION, text))
if not disams:
self.log.write("FALSE POSITIVE: "+page.title().encode('utf8')+"\n")
print "FALSE POSITIVE:", page.title().encode('utf8')
return
print "REDIRECTS:"
redirects = list(list_redirects_to(page))
print " ", "\n ".join([redirect.title() for redirect in redirects])
norm_with_caps = page.title().split(u"(")[0].strip()
normalized_title = norm_with_caps.lower()
if any(redir.title().lower() == normalized_title for redir in redirects):
print "***PRIMARY TOPIC REDIRECTS HERE***"
person = False
dbmove = False
while True:
print "Choose option:"
print "[0] Skip page"
for i, disamb in enumerate(disams):
lineno = text[:disamb.start()].count("\n")
print "[%s] (line %s): %s" % (i+1, lineno, disamb.group().strip())
try:
input = raw_input("Enter number of your choice: ")
choice = int(input)
except ValueError:
if input == "person":
person = True
choice = 1
break
if input == "dbmove":
dbmove = True
break
print "Invalid input; try again."
else:
if choice <= len(disams):
break
else:
print "Invalid input; try again."
if dbmove:
target = wikipedia.Page(SITE, norm_with_caps)
text = self.DB_MOVE % page.title() + target.get()
page = target
elif choice == 0:
print "SKIPPED"
return
else:
redo = choice < 0
if choice < 0: choice = -choice
choice -= 1
redact = disams[choice]
if person:
wikipedia.setAction(self.PERSON_SUMMARY)
text = text[:redact.start()] + "{{otherpeople|%s}}\n" % norm_with_caps + text[redact.end():]
else:
text = text[:redact.start()] + text[redact.end():]
# only save if something was changed
if text != page.get():
# Show the title of the page we're working on.
# Highlight the title in purple.
wikipedia.output(u"\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
# show what was changed
wikipedia.showDiff(page.get(), text)
# raw_input("Continue?")
# sleep(3)
if dbmove or self.debug:
choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No'], ['y', 'N'], 'N')
if choice == 'n':
return
try:
# Save the page
page.put(text)
except wikipedia.LockedPage:
wikipedia.output(u"Page %s is locked; skipping." % page.aslink())
except wikipedia.EditConflict:
wikipedia.output(u'Skipping %s because of edit conflict' % (page.title()))
except wikipedia.SpamfilterError, error:
wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (page.title(), error.url))
else:
self.editCount += 1
if redo:
self.treat(wikipedia.Page(SITE, page.title()), pageIndex)
def main():
DEBUG = False
bot = CobraBot(DEBUG)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
bot.run()
if __name__ == "__main__":
try:
main()
finally:
wikipedia.stopme()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import wikipedia
import pagegenerators
import re
import warnings
from time import sleep
from contextlib import closing
from sys import stdout
from json import dump, load
from itertools import ifilter
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
'¶ms;': pagegenerators.parameterHelp
}
SITE = wikipedia.getSite()
def pagesUsingTemplate(templateName):
transclusionPageName = unicode(SITE.namespace(10)) + u":" + templateName
transclusionPage = wikipedia.Page(SITE, transclusionPageName)
gen = pagegenerators.ReferringPageGenerator(transclusionPage, onlyTemplateInclusion=True)
return gen
def has_disambiguator(page):
return u'(' in page.title()
def list_redirects_to(page):
return page.getReferences(follow_redirects=False,redirectsOnly=True)
def wordsRegex(words):
return "(?:%s)" % ("|".join("(?:%s)" % word for word in words))
class CobraBot(object):
EDIT_SUMMARY = u'Superfluous disambiguation removed per [[WP:NAMB]] ([[Wikipedia:BOTPOL#Assisted_editing_guidelines|assisted editing]] using [[User:CobraBot|CobraBot]]; [[User talk:Cybercobra]])'
PERSON_SUMMARY = u'Person disambiguation tweaked ([[Wikipedia:BOTPOL#Assisted_editing_guidelines|assisted editing]] using [[User:CobraBot|CobraBot]]; [[User talk:Cybercobra]])'
DABLINK = u"Dablink"
DISAMBIGUATION = re.compile(u"\\{\\{[ \t]*" + wordsRegex("about dablink otheruses for the redirect this twootheruses".split() + ["other uses", "two other uses"]) +"[^}]*\\}\\}(\n?)", re.IGNORECASE)
DB_MOVE = "{{db-move|%s|Evidently not ambiguous}}\n"
OFFSET_FILE = 'N.json'
def __init__(self, debug):
"""
Constructor. Parameters:
* generator - The page generator that determines on which pages
to work on.
* debug - If True, doesn't do any real changes, but only shows
what would have been changed.
"""
self.generator = ifilter(has_disambiguator, pagesUsingTemplate(self.DABLINK))
self.debug = debug
self.editCount = 0
self.log = file("skipped.log", 'a')
self.log.write("BEGIN NEW SESSION\n")
wikipedia.setAction(self.EDIT_SUMMARY)
def run(self):
with closing(file(self.OFFSET_FILE, 'r')) as f:
N = load(f)
# Set the edit summary message
print "Advancing by %s..." % N
stdout.flush()
for i in xrange(N):
next(self.generator)
print "Done advancing!"
stdout.flush()
try:
for pageIndex, page in enumerate(self.generator):
wikipedia.setAction(self.EDIT_SUMMARY)
self.treat(page, pageIndex)
finally:
self.log.close()
with closing(file(self.OFFSET_FILE, 'w')) as f:
dump(N+pageIndex-5, f)
#########
def treat(self, page, pageIndex):
"""
Loads the given page, does some changes, and saves it.
"""
print "=================================================================="
print "PAGE TITLE:", page.title()
print "PAGE#:", pageIndex+1
print "EDIT COUNT:", self.editCount
if page.namespace() != 0:
wikipedia.output(u"SKIPPING: Non-article namespace!")
return
try:
# Load the page
text = page.get()
except wikipedia.NoPage:
wikipedia.output(u"Page %s does not exist; skipping." % page.aslink())
return
except wikipedia.IsRedirectPage:
wikipedia.output(u"Page %s is a redirect; skipping." % page.aslink())
return
disams = list(re.finditer(self.DISAMBIGUATION, text))
if not disams:
self.log.write("FALSE POSITIVE: "+page.title().encode('utf8')+"\n")
print "FALSE POSITIVE:", page.title().encode('utf8')
return
print "REDIRECTS:"
redirects = list(list_redirects_to(page))
print " ", "\n ".join([redirect.title() for redirect in redirects])
norm_with_caps = page.title().split(u"(")[0].strip()
normalized_title = norm_with_caps.lower()
if any(redir.title().lower() == normalized_title for redir in redirects):
print "***PRIMARY TOPIC REDIRECTS HERE***"
person = False
dbmove = False
while True:
print "Choose option:"
print "[0] Skip page"
for i, disamb in enumerate(disams):
lineno = text[:disamb.start()].count("\n")
print "[%s] (line %s): %s" % (i+1, lineno, disamb.group().strip())
try:
input = raw_input("Enter number of your choice: ")
choice = int(input)
except ValueError:
if input == "person":
person = True
choice = 1
break
if input == "dbmove":
dbmove = True
break
print "Invalid input; try again."
else:
if choice <= len(disams):
break
else:
print "Invalid input; try again."
if dbmove:
target = wikipedia.Page(SITE, norm_with_caps)
text = self.DB_MOVE % page.title() + target.get()
page = target
elif choice == 0:
print "SKIPPED"
return
else:
redo = choice < 0
if choice < 0: choice = -choice
choice -= 1
redact = disams[choice]
if person:
wikipedia.setAction(self.PERSON_SUMMARY)
text = text[:redact.start()] + "{{otherpeople|%s}}\n" % norm_with_caps + text[redact.end():]
else:
text = text[:redact.start()] + text[redact.end():]
# only save if something was changed
if text != page.get():
# Show the title of the page we're working on.
# Highlight the title in purple.
wikipedia.output(u"\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
# show what was changed
wikipedia.showDiff(page.get(), text)
# raw_input("Continue?")
# sleep(3)
if dbmove or self.debug:
choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No'], ['y', 'N'], 'N')
if choice == 'n':
return
try:
# Save the page
page.put(text)
except wikipedia.LockedPage:
wikipedia.output(u"Page %s is locked; skipping." % page.aslink())
except wikipedia.EditConflict:
wikipedia.output(u'Skipping %s because of edit conflict' % (page.title()))
except wikipedia.SpamfilterError, error:
wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (page.title(), error.url))
else:
self.editCount += 1
if redo:
self.treat(wikipedia.Page(SITE, page.title()), pageIndex)
def main():
DEBUG = False
bot = CobraBot(DEBUG)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
bot.run()
if __name__ == "__main__":
try:
main()
finally:
wikipedia.stopme()