User:AkkakkBot/code/04-brackets-de

From Wikidata
Jump to navigation Jump to search

python code:

        site = pywikibot.Site("wikidata", "wikidata")
        repo = site.data_repository()
        db = MySQLdb.connect(host="wikidatawiki.labsdb", db="wikidatawiki_p", read_default_file="~/replica.my.cnf")

        editmax = 2000
        log = ""
        exit_reason = "end of data"

        #get items
        print("get items")
        items = []
        cur = db.cursor()
        cur.execute('select page_title from wb_terms inner join wb_entity_per_page on term_entity_id=epp_entity_id inner join page on epp_page_id = page_id where term_language = "de" and term_entity_type = "item" and (term_text like "%[[%]]%") limit 2500;')
        lines = cur.fetchall()
        for line in lines:
            for cell in line:
                items.append(cell)

        #iterate items
        print("iterate items")
        editcnt = 0
        try:
            for i, q in enumerate(items):
                print("checking {}".format(q));
                item = pywikibot.ItemPage(repo, q)
                try:
                    content = item.get()
                    descriptions = content['descriptions']
                    if 'de' in descriptions:
                        dedescriptionold = descriptions['de']
                        dedescriptionnew = re.sub("\[\[([^][|]*?)\]\]", "\\1", re.sub("\[\[([^][|]*?\|)(.*?)\]\]", "\\2", dedescriptionold))
                        if(dedescriptionold == dedescriptionnew):
                            1
                            #print ("  description '"+dedescriptionold+"' unchanged")
                        else:
                            #print("  changing '"+dedescriptionold+"' to '"+dedescriptionnew+"'...")
                            descriptions["de"] = dedescriptionnew
                            log += "* [["+q+"]]: old:<nowiki>"+dedescriptionold+"</nowiki> new:"+dedescriptionnew+"\n"
                            item.editDescriptions(summary=u"fixing de description ("+dedescriptionnew+") (task 4)", descriptions=descriptions)
                            editcnt += 1
                            if(editcnt >= editmax):
                                print("maximum number of edits reached")
                                exit_reason = "maximum number of edits reached"
                                break
                    else:
                        print("  no de description")
                except pywikibot.exceptions.NoPage:
                    print("  item does not exist")
                except pywikibot.data.api.APIError:
                    print("  api error. trying to continue.")
                except UnicodeEncodeError:
                    print("  unicodeEncodeError.")
        except Exception as exc:
            print("exception")
            traceback.print_exc()
            exit_reason = "exception"
        
        if log != "":
            log += "exit reason:"+exit_reason
            pageobj = pywikibot.Page(site, u"User:AkkakkBot/log")
            pageobj.put(log, u"log for task 4: remove links from de descriptions", minorEdit = False)