#!/usr/bin/python # -*- coding: utf-8 -*- # # Published by zhuyifei1999 (https://wikitech.wikimedia.org/wiki/User:Zhuyifei1999) # under the terms of Creative Commons Attribution-ShareAlike 3.0 Unported (CC BY-SA 3.0) # https://creativecommons.org/licenses/by-sa/3.0/ import os import re import pywikibot from pywikibot import xmlreader import commonfunc # Argh global bots gets mwoauth-invalid-authorization-invalid-user # with open(__import__('os').path.expanduser('~/.oauth-token.json'), 'r') as _f: # pywikibot.config.authenticate['*'] = __import__('json').load(_f) config = None # To avoid "UserWarning: Site objects have been created before arguments were handled" class Config(object): def __init__(self): self.approved = [ pywikibot.Site("de", "wikipedia"), pywikibot.Site("en", "wikipedia"), pywikibot.Site("fr", "wikipedia"), pywikibot.Site("it", "wikipedia"), pywikibot.Site("ur", "wikipedia"), pywikibot.Site("zh", "wikipedia"), pywikibot.Site("zh", "wikivoyage"), pywikibot.Site("commons", "commons"), pywikibot.Site("species", "species"), ] self.globalapproved = True self.optout = [ pywikibot.Site("en", "wikinews"), pywikibot.Site("en", "wikibooks"), pywikibot.Site("pl", "wikisource"), pywikibot.Site("ceb", "wikipedia"), # Madness, one of the slowest to process (except commons) pywikibot.Site("meta", "meta"), # Something weird going on, see [[m:Help:User page]] ] self.optoutfamilies = [ "wikibooks", "wikinews", "wikisource", "wiktionary", ] self.usecrontab = [ pywikibot.Site("commons", "commons"), ] class Robot(object): def __init__(self, site=None, globalrun=False, simulate=False): global config if not config: config = Config() self.site = site or pywikibot.Site() self.site.login(autocreate=True) self.famlangs = ((self.site.family.interwiki_forward and\ self.site.family.load(self.site.family.interwiki_forward))\ or self.site.family).langs self.famname = self.site.family.interwiki_forward or self.site.family.name # @source https://github.com/legoktm/wikidata/blob/master/enwiki_removal.py summaries = { "af":"Verplasing van %(counter)d interwikiskakels wat op [[d:|Wikidata]] beskikbaar is op [[d:%(id)s]]", "ar":"بوت: ترحيل %(counter)d وصلة إنترويكي, موجودة الآن في [[d:|ويكي بيانات]] على [[d:%(id)s]]", "am":"ሎሌ፦ መያያዣዎች ወደ %(counter)d ልሳናት አሁን በ[[d:|Wikidata]] ገጽ [[d:%(id)s]] ስላሉ ተዛውረዋል።", "ca":"Bot: Traient %(counter)d enllaços interwiki, ara proporcionats per [[d:|Wikidata]] a [[d:%(id)s]]", "ckb":"بۆت: گواستنەوەی %(counter)d بەستەری نێوانویکی، ئێستا دابین کراوە لەسەر [[d:| ویکیدراوە]] لە [[d:%(id)s]]", "cs":"Bot: Odstranění %(counter)d [[w:Wikipedie:Wikidata#Mezijazykové odkazy|odkazů interwiki]], které jsou nyní dostupné na [[d:|Wikidatech]] ([[d:%(id)s]])", "bat-smg":"Perkeliamas %(counter)d tarpkalbėnės nūruodas, daba esontės [[d:|Wikidata]] poslapī [[d:%(id)s]].", "be":"Робат: перанёс %(counter)d міжмоўных спасылак да аб'екта [[d:%(id)s]] на [[d:|Wikidata]]", "be-tarask":"Робат: перанос %(counter)d міжмоўных спасылак у [[d:|Вікізьвесткі]] да аб’екта [[d:%(id)s]]", "be-x-old":"Робат: перанос %(counter)d міжмоўных спасылак у [[d:|Вікізьвесткі]] да аб’екта [[d:%(id)s]]", "bg":"Робот: Преместване на %(counter)d междуезикови препратки към [[d:|Уикиданни]], в [[d:%(id)s]].", "bn":"বট: %(counter)d টি আন্তঃউইকি সংযোগ সরিয়ে নেওয়া হয়েছে, যা এখন [[d:|উইকিউপাত্তের]] - [[d:%(id)s]] এ রয়েছে", "bs":"Bot: premještanje %(counter)d međuwiki linkova koji su sada dostupni na stranici [[d:%(id)s]] na [[d:|Wikidati]]", "da":"Bot: Migrerer %(counter)d interwikilinks, som nu leveres af [[d:|Wikidata]] på [[d:%(id)s]]", "de":(lambda id, rm, remain:u"%(counter)d [[Hilfe:Internationalisierung|Interwiki-Link(s)]] nach [[d:|Wikidata]] ([[d:%(id)s]]) migriert" % {'id':id, 'counter':rm} + ((u"; %d Interwiki-Link(s) verbleiben" % remain) if remain else "")), "el":"Ρομπότ: Μεταφέρω %(counter)d σύνδεσμους interwiki, που τώρα παρέχονται από τα [[d:|Wikidata]] στο [[d:%(id)s]]", "en":(lambda id, rm, remain:u"Bot: Migrating %(counter)d langlinks, now provided by [[d:|Wikidata]] on [[d:%(id)s]]" % {'id':id, 'counter':rm} + ((u"; %d langlinks remaining" % remain) if remain else "")), "eo":"Roboto: Forigo de %(counter)d interlingvaj ligiloj, kiuj nun disponeblas per [[d:|Vikidatumoj]] ([[d:%(id)s]])", "es":"Moviendo %(counter)d enlaces interlingúisticos, ahora proporcionado(s) por [[d:|Wikidata]] en la página [[d:%(id)s]].", "et":"Robot: muudetud %(counter)d intervikilinki, mis on nüüd andmekogus [[d:%(id)s|Wikidata]]", "eu":"Robota: hizkuntza arteko %(counter)d lotura lekualdatzen; aurrerantzean [[d:|Wikidata]] webgunean izango dira, [[d:%(id)s]] orrian", "fa":"ربات: انتقال %(counter)d پیوند میانویکی به [[d:%(id)s]] در [[d:|ویکیداده]]", "fi":"Botti poisti %(counter)d [[d:|Wikidatan]] sivulle [[d:%(id)s]] siirrettyä kielilinkkiä", # "fr":"Suis retirer %(counter)d liens entre les wikis, actuellement fournis par [[d:|Wikidata]] sur la page [[d:%(id)s]]", "fr":"Retrait de %(counter)d lien(s) interlangue(s), désormais fourni(s) par [[d:|Wikidata]] à la page [[d:%(id)s]]", "frr":"Bot: Fersküüw %(counter)d interwiki-links, diar nü uun [[d:|Wikidata]] üüb det sidj [[d:%(id)s]] paroot stun", "gl":"Bot: Retiro %(counter)d ligazóns interlingüísticas, proporcionadas agora polo [[d:|Wikidata]] en [[d:%(id)s]]", "he":"בוט: מעביר קישורי בינויקי ל[[d:|ויקינתונים]] - [[d:%(id)s]]", "hu":"Bot: %(counter)d interwiki link áthelyezve a [[d:|Wikidata]] [[d:%(id)s]] adatába", "ia":"Robot: Migration de %(counter)d ligamines interwiki, fornite ora per [[d:|Wikidatos]] in [[d:%(id)s]]", "id":"Bot: Migrasi %(counter)d pranala interwiki, karena telah disediakan oleh [[d:|Wikidata]] pada item [[d:%(id)s]]", "ilo":"Bot: Agiyal-alis kadagiti %(counter)d nga interwiki, a nait-iteden idiay [[d:|Wikidata]] iti [[d:%(id)s]]", "is":"Bot: Flyt %(counter)d tungumálatengla, sem eru núna sóttir frá [[d:|Wikidata]] á [[d:%(id)s]]", "it":"migrazione automatica di %(counter)d collegamenti interwiki a [[d:Wikidata:Pagina_principale|Wikidata]], [[d:%(id)s]]", "ja":"ボット: 言語間リンク %(counter)d 件を[[d:|ウィキデータ]]上の [[d:%(id)s]] に転記", "ka":"Bot: %(counter)d [[ვპ:ებ|ენათაშორისი ბმული]] გადატანილია [[d:|Wikidata]]-ზე, [[d:%(id)s]]", "ko":"봇: 인터위키 링크 %(counter)d 개가 [[d:|위키데이터]]의 [[d:%(id)s]] 항목으로 옮겨짐", "ku":"Bot: %(counter)d girêdanên înterwîkiyê ên ku niha li ser [[d:|Wikidata]]yê ne, jê bibe", "la":"Bot %(counter)d nexus intervici removet, quod nunc apud [[d:|Vicidata]] cum tessera [[d:%(id)s]] sunt", "lb":"Bot: Huet %(counter)d Interwikilinke geréckelt, déi elo op [[d:|Wikidata]] op [[d:%(id)s]] zur Verfügung gestallt ginn", "lt":"Perkeliamos %(counter)d tarpkalbinės nuorodos, dabar pasiekiamos [[d:|Wikidata]] puslapyje [[d:%(id)s]].", "mg":"Nanala rohy interwiki %(counter)d izay efa omen'i [[d:|Wikidata]] eo amin'i [[d:%(id)s]]", "min":"Bot: Migrasi %(counter)d pautan interwiki, dek lah disadioan jo [[d:|Wikidata]] pado [[d:%(id)s]]", "ml":"%(counter)d ഇന്റര്വിക്കി കണ്ണികളെ [[d:|വിക്കിഡാറ്റയിലെ]] [[d:%(id)s]] എന്ന താളിലേക്ക് മാറ്റിപ്പാര്പ്പിച്ചിരിക്കുന്നു. ", "mr":"सांगकाम्या: %(counter)d इतर भाषातील दुव्यांचे विलिनीकरण, आता [[d:|विकिडेटा]]वर उपलब्ध [[d:%(id)s]]", "ms":"Bot: Memindahkan %(counter)d pautan interwiki, kini disediakan oleh [[d:|Wikidata]] di [[d:%(id)s]]", "nl":"Robot: Verplaatsing van %(counter)d interwikilinks. Deze staan nu op [[d:|Wikidata]] onder [[d:%(id)s]]", "no":"bot: Fjerner %(counter)d interwikilenker som nå hentes fra [[d:%(id)s]] på [[d:|Wikidata]]", "nv":"wikidata bitsʼą́ą́dę́ę́ʼígíí chodaoʼį́ kʼad ([[d:%(id)s]]; %(counter)d wikidata bitsʼą́ą́dę́ę́ʼ)", "os":"Бот схафта %(counter)d æвзагы æрвитæны, кæцытæ [[d:|Викирардты]] нырид сты ацы фарсы: [[d:%(id)s]]", "pl":"Bot: Przenoszę linki interwiki (%(counter)d) do [[d:|Wikidata]], są teraz dostępne do edycji na [[d:%(id)s]]", "pt":"A migrar %(counter)d interwikis, agora providenciados por [[d:|Wikidata]] em [[d:%(id)s]]", "ro":"Migrare a %(counter)d legături interwiki, furnizate acum de [[d:|Wikidata]] la [[d:%(id)s]]", "ru":"Перемещение %(counter)d интервики на [[d:|Викиданные]], [[d:%(id)s]]", "sh":"Bot: migracija %(counter)d međuwiki veza sada dostupnih na stranici [[d:%(id)s]] na [[d:|Wikidati]]", "sk":"Bot: Odstránenie %(counter)d odkazov interwiki, ktoré sú teraz dostupné na [[d:|Wikiúdajoch]] ([[d:%(id)s]])", "sl":"Bot: Migracija %(counter)d interwikija/-ev, od zdaj gostuje(-jo) na [[d:|Wikipodatkih]], na [[d:%(id)s]]", "sr":"Бот: Селим %(counter)d међујезичких веза, које су сад на [[d:|Википодацима]] на [[d:%(id)s]]", "sv":"Bot överför %(counter)d interwikilänk(ar), som nu återfinns på sidan [[d:%(id)s]] på [[d:|Wikidata]]", "szl":"Bot przećepoł %(counter)d linkůw interwiki, terozki bydům ůune na [[d:|Wikidata]]", "ta":"தானியங்கி: %(counter)d விக்கியிடை இணைப்புகள் நகர்த்தப்படுகின்றன, தற்போது [[d:|விக்கிதரவில்]] இங்கு [[d:%(id)s]]", "tet":"Bot: Hasai %(counter)d ligasaun interwiki, ne'ebé agora mai husi [[d:%(id)s]] iha [[d:|Wikidata]]", "tt":"Бот: бу мәкаләнең [[Википедия:Интервики|интервики]] сылтамалары (%(counter)d) хәзер [[d:%(id)s|Wikidata-да]]", "ur":"روبالہ: منتقلی %(counter)d بین الویکی روابط، اب [[d:|ویکی ڈیٹا]] میں [[d:%(id)s]] پر موجود ہیں", "uz":"Bot: endilikda [[d:Wikidata:Ana_Sayfa|Wikidata]] [[d:%(id)s]] sahifasida saqlanadigan %(counter)d intervikini koʻchirdi", "vi":"Bot: Di chuyển %(counter)d liên kết ngôn ngữ đến [[d:|Wikidata]] tại [[d:%(id)s]]", "vec":"Bot: Migrasion de %(counter)d interwiki links so [[d:Wikidata:Pagina_principale|Wikidata]] - [[d:%(id)s]]", "zh":(lambda id, rm, remain:u"机器人:移除%(counter)d个跨语言链接,现在由[[d:|维基数据]]的[[d:%(id)s]]提供" % {'id':id, 'counter':rm} + ((u";剩余%d个链接。" % remain) if remain else u"。")), } self.summary = pywikibot.translate(self.site, summaries, fallback=True) self.total = { # 'enwiki': 99, # 'zhwiki': 20, # 'commonswiki': 20, # 'urwiki': 20, # 'itwiki': 20, # 'frwiki': 20, # 'dewiki': 20, }.get(self.site.dbName(), -1) self.count = 0 self.simulate = simulate or (self.site in config.optout) or not ((self.site in config.approved) or (globalrun and config.globalapproved) or (self.total > 0)) def gen(self): path = commonfunc.getDumpName(self.site.dbName(), "pages-meta-current.xml.bz2") dump = xmlreader.XmlDump(path) for page in dump.parse(): # "Module" namespace is ignored due to the different syntax if (int(page.ns) < 0) or (int(page.ns) in (2, 6, 8, 828)) or (int(page.ns) % 2 == 1) or page.isredirect: continue text = page.text if u"noexternallanglinks" in text.lower(): continue for wikilink in pywikibot.link_regex.finditer(text): if not wikilink.group("title").strip(): continue if wikilink.group("title").strip()[0] == u":": continue if any([char in wikilink.group(0) for char in "|#"]): continue lang = wikilink.group("title").split(u":")[0] if not lang in self.famlangs: continue try: link = pywikibot.Link(wikilink.group("title"), source=self.site) link.parse() # except (pywikibot.exceptions.InvalidTitle, pywikibot.exceptions.SiteDefinitionError): except pywikibot.exceptions.Error: continue if link.site == self.site: continue if link.site.family.name != self.famname: continue if link.site.code != lang: continue break else: continue yield pywikibot.Page(self.site, page.title, ns=int(page.ns)) def run(self): for page in self.gen(): self.treat(page) def treat(self, page): rm = [] remain = [] try: text_i = text_o = text = page.get(get_redirect=False) except pywikibot.exceptions.Error: return if u"noexternallanglinks" in text.lower(): return if not (page.has_permission('edit') and page.botMayEdit()): return try: item = page.data_item() item.get() except pywikibot.exceptions.NoPageError: return for wikilink in pywikibot.link_regex.finditer(pywikibot.textlib.removeDisabledParts(text)): if not wikilink.group("title").strip(): continue if wikilink.group("title").strip()[0] == u":": continue if any([char in wikilink.group(0) for char in "|#"]): continue lang = wikilink.group("title").split(u":")[0].strip() if not lang in self.famlangs: continue try: link = pywikibot.Link(wikilink.group("title"), source=self.site) link.parse() # except (pywikibot.exceptions.InvalidTitle, pywikibot.exceptions.SiteDefinitionError): except pywikibot.exceptions.Error: continue if link.site == self.site: continue if link.site.family.name != self.famname: continue if link.site.code != lang: continue try: sitelink = item.getSitelink(link.site) except pywikibot.exceptions.NoPageError: remain.append(wikilink) continue if (self.site == pywikibot.Site("zh", "wikivoyage")) and link.title.lower().startswith("voy:"): link._title = link._title[4:] page_l = pywikibot.Page(link) page_d = pywikibot.Page(link.site, sitelink) if page_l != page_d: try: page_l = self.get_target(page_l) page_d = self.get_target(page_d) except pywikibot.exceptions.Error: __import__('traceback').print_exc() remain.append(wikilink) continue if (page_l != page_d) or not (page_l and page_d): remain.append(wikilink) continue if self.site == pywikibot.Site("commons", "commons"): # Commons does not allow the removal of cross-namespace links if page_l.namespace() != page.namespace(): # unless the page has {{Wikidata Infobox}}, https://commons.wikimedia.org/wiki/Template_talk:Wikidata_Infobox#Removal_of_interwiki_links # if pywikibot.Page(self.site, title='Template:Wikidata Infobox') not in page.templates(): if not any(pywikibot.Page(self.site, title=title) in page.templates() for title in ['Module:Interwiki', 'Template:Wikidata Infobox']): remain.append(wikilink) continue text = pywikibot.textlib.replaceExcept(text, u"(\n)?"+re.escape(wikilink.group(0))+u"(?(1)(?=\n))", "", ['comment', 'header', 'pre', 'source', 'ref', 'table', 'gallery']) if text != text_i: rm.append(wikilink) text_i = text else: remain.append(wikilink) if not rm: pywikibot.output(u'No changes were needed on %s' % page.title(as_link=True)) return if hasattr(self.summary, '__call__'): summary = self.summary(item.title().lower(), len(rm), len(remain)) else: summary = self.summary % {"id": item.title().lower(), "counter": len(rm)} self.userPut(page, text_o, text, summary=summary, asynchronous=True) @staticmethod def get_target(page): to_log = [] processed = [] site = page.site while True: if page in processed: to_log.append("loop") break processed.append(page) to_log.append(page.title(as_link=True)) if page.site != site: to_log.append("xsite") break if page.section(): to_log.append("section") # section is not accepted break page.site.loadpageinfo(page) if not hasattr(page, "_pageid"): to_log.append("bad page") break if page.exists(): if page.isRedirectPage(): to_log.append("redirect") try: page = page.getRedirectTarget() continue except pywikibot.exceptions.CircularRedirectError: break elif page.isCategoryRedirect(): to_log.append("catredir") page = page.getCategoryRedirectTarget() continue else: to_log.append("all good") break else: try: logs = page.site.logevents(logtype="move", page=page, total=1) except: to_log.append("error") pywikibot.exception() break for log in logs: break else: to_log.append("deleted") # Page got deleted? break try: to_log.append("moved") page = log.target_page except: to_log.append("error") pywikibot.exception() break continue pywikibot.log(u" => ".join(to_log)) return page def userPut(self, page, oldtext, newtext, **kwargs): if oldtext == newtext: pywikibot.output(u'No changes were needed on %s' % page.title(as_link=True)) return elif self.count == self.total: raise RuntimeError("Maxium edits reached!") else: self.count += 1 pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title(as_link=True)) if self.simulate: pywikibot.showDiff(oldtext, newtext) if 'summary' in kwargs: pywikibot.output(u'Summary: %s' % kwargs['summary']) return page.text = newtext try: page.save(**kwargs) except pywikibot.exceptions.Error as e: pywikibot.output(u'Failed to save %s: %r: %s' % (page.title(as_link=True), e, e)) self.count -= 1 def main(): pywikibot.handle_args() bot = Robot() bot.run() if __name__ == "__main__": try: main() finally: pywikibot.stopme()