User:AkkakkBot/code/02-terminator-descriptions

From Wikidata
Jump to navigation Jump to search

python code:

        site = pywikibot.Site("wikidata", "wikidata")
        repo = site.data_repository()
        editmax = 1000
        lang = 'fr'
        log = u""
        exit_reason = "end of data"
        #get items from file
        print("get items from file")
        items = [line.strip() for line in open("bot-terminator-descriptions-list-"+lang)]

        #iterate items
        print("iterate items")
        if lang == "en":
            regularexpression = r".* \((academic|actor|album|.* album|architect|artist|astronomer|athlete|attorney|author|band|bishop|blogger|book|.* book|boxer|broadcaster|.* broadcaster|businessman|cartoonist|chef|clergyman|criminal|coach|.* coach|comedian|composer|composition|computer program|congressman|constituency|.* constituency|councillor|crater|.* crater|cricketer|.* cricketer|curler|currency|detective|diplomat|director|dramatist|electoral district|editor|educator|electorate|.* electorate|entrepreneur|EP|equestrian|explorer|fencer|fighter|film|[12][90]\d\d film|film editor|filmmaker|footballer|.* footballer|football player|game|.* game|governor|hurler|infielder|inventor|journalist|.* journalist|judge|.* judge|lawyer|magazine|manager|mathematician|merchant|TV miniseries|minister|missionary|MP|municipality|musical|musician|.* musician|name|.* name|novel|.* novel|novelist|officer|.* officer|opera|organist|outfielder|philosopher|photographer|physician|physicist|pianist|piper|pitcher|play|.* play|poem|poet|political activist|politician|.* politician|preacher|priest|publisher|quarterback|rapper|referee|reporter|.* reporter|rower|rugby player|sailor|satellite|sculptor|sheriff|singer|sociologist|song|.* song|soundtrack|.* soundtrack|station|.* station|serial|.* serial|series|.* series|show|.* show|soldier|surname|swimmer|.* swimmer|theologian|trumpeter|operating system|unit|VC|video game|[12][90]\d\d video game|volcano|wide receiver|wrestler|writer|zoologist)\)"
        elif lang == "fr":
#            regularexpression = r".* \((acteur|actrice|album|album .*|arbitre|architecte|artiste|astronome|avocat|bande originale|chanson|chanson .*|chanteur|chanteuse|cinéaste|circonscription|circonscription .*|comédien|comédienne|compositeur|cuisinier|dessinateur|diplomate|dramaturge|écrivain|écrivain .*|explorateur|femme politique|film|groupe|homme d'affaires|homme politique|humoriste|inventeur|jeu vidéo|journaliste|juge|logiciel|mathématicien|médecin|musicien|nageur|officier|officier .*|peintre|philosophe|photographe|physicien|pianiste|poème|poète|politicien|politicien .*|réalisateur|roman|satellite|sculpteur|série télévisée|sociologue|théologien|volcan)\)"
            regularexpression = r".* \((acteur|actrice|album|album .*|arbitre|architecte|artiste|astronome|avocat|bande originale|chanson|chanson .*|chanteur|chanteuse|cin.aste|circonscription|circonscription .*|com.dien|com.dienne|compositeur|cuisinier|dessinateur|diplomate|dramaturge|.crivain|.crivain .*|explorateur|femme politique|film|groupe|homme d'affaires|homme politique|humoriste|inventeur|jeu vid.o|journaliste|juge|logiciel|math.maticien|m.decin|musicien|nageur|officier|officier .*|peintre|philosophe|photographe|physicien|pianiste|po.me|po.te|politicien|politicien .*|r.alisateur|roman|satellite|sculpteur|s.rie t.l.vis.e|sociologue|th.ologien|volcan)\)"
        editcnt = 0
        lang_wiki = lang+"wiki"
        try:
            for i, q in enumerate(items):
                print("checking "+q),
                sys.stdout.flush()
                item = pywikibot.ItemPage(repo, q)
                try:
                    content = item.get()
                    links = content['sitelinks']
                    if lang_wiki in links:
                        tarlink = links[lang_wiki]#.decode('utf-8')
                        labels = content['labels']
                        if lang in labels:
                            tarlabel =labels[lang]#.decode('utf-8')
                            descriptions = content['descriptions']
                            if not lang in descriptions:
                                result = re.match(regularexpression, tarlink)
                                if result:
                                    tardescription = result.group(1)#.decode('utf-8')
                                    if lang == 'en' and tardescription == "VC":
                                        tardescription = "recipient of the Victoria Cross"
                                    descriptions[lang] = tardescription
                                    editcnt += 1
                                    print("- edit {}".format(editcnt)+": set description")# to "+tardescription+"...")
                                    log_append = u"* [["+q+"]]: link:[[:"+lang+":"+tarlink+"]] label:"+tarlabel+" description:'''"+tardescription+"'''"
                                    #print(log_append)
                                    item.editDescriptions(summary=u"set "+lang+" description to "+tardescription+" (task 2)", descriptions=descriptions)
                                    log += log_append+"\n"
                                    if(editcnt >= editmax):
                                        print("maximum number of edits reached")
                                        exit_reason = "maximum number of edits reached"
                                        break
                                else:
                                    print("  no match with re")
                            else:
                                print("  "+lang+" description is present")
                        else:
                            print("  no "+lang+" label present")
                    else:
                        print("  no "+lang+" link given")
                except pywikibot.exceptions.NoPage:
                    print("  item does not exist")
                except pywikibot.data.api.APIError:
                    print("  api error. trying to continue.")
                except UnicodeEncodeError:
                    print("  UnicodeEncodeError.why?")
                    traceback.print_exc()
                except pywikibot.data.api.TimeoutError:
                    print("  TimeoutError. trying to continue")
        except Exception as exc:
            print("exception:")
            traceback.print_exc()
            exit_reason = "exception"
            
        if log != "":
            log += "exit reason:"+exit_reason
            pageobj = pywikibot.Page(site, u"User:AkkakkBot/log")
            pageobj.put(log, u"log for task 2: set "+lang+" description to the text that is in brackets in the "+lang+"wiki-link", minorEdit = False)