User:AkkakkBot/code/03-update-db-reports

From Wikidata
Jump to navigation Jump to search

python code:

        site = pywikibot.Site("wikidata", "wikidata")
        repo = site.data_repository()

        db = MySQLdb.connect(host="wikidatawiki.labsdb", db="wikidatawiki_p", read_default_file="~/replica.my.cnf")
        cur = db.cursor()
        
        sqlstatements = {
            "label with brackets" :    u'select * from s51880_akkakkbot.items where label like "% (%)" or label like "%, %" limit 60;',
            "label with brackets/en" : u'select * from s51880_akkakkbot.items where language = "en" and (label like "% (%)" or label like "%, %") limit 40,60;',
            "label with brackets/de" : u'select * from s51880_akkakkbot.items where language = "de" and (label like "% (%)" or label like "%, %") limit 10,60;',
            "term with link" :         u'select term_language, term_type, concat("[[Q", term_entity_id, "]]") as item, concat("<nowiki>", term_text, "</nowiki>") as text from wb_terms where term_entity_type = "item" and term_text like "%[[%]]%" limit 60;',
            "long terms" :             u'select term_language, term_type, char_length(term_text), concat("[[Q", term_entity_id, "]]") as item, term_text from wb_terms where term_entity_type = "item" and char_length(term_text) > 200 order by char_length(term_text) desc, term_text, term_entity_id limit 50;',
            "long terms/en" :          u'select term_language, term_type, char_length(term_text), concat("[[Q", term_entity_id, "]]") as item, term_text from wb_terms where term_language = "en" and term_entity_type = "item" and char_length(term_text) > 200 order by char_length(term_text) desc, term_text, term_entity_id limit 50;',
            "long terms/de" :          u'select term_language, term_type, char_length(term_text), concat("[[Q", term_entity_id, "]]") as item, term_text from wb_terms where term_language = "de" and term_entity_type = "item" and char_length(term_text) > 200 order by char_length(term_text) desc, term_text, term_entity_id limit 50;',
            "link without label/brackets" :    u'select concat("[[Q", ips_item_id, "]] ([[User:YMS/LC#Q", ips_item_id, "|LC]])") as item, ips_site_id, ips_site_page from wb_items_per_site join s51880_akkakkbot.languages on lang_db = ips_site_id and     (ips_site_page like "%, %" or ips_site_page like "% (%)") and not exists (select term_language from wb_terms where ips_item_id = term_entity_id and lang_wd = term_language and term_type = "label") limit 60;',
            "link without label/no-brackets" : u'select concat("[[Q", ips_item_id, "]] ([[User:YMS/LC#Q", ips_item_id, "|LC]])") as item, ips_site_id, ips_site_page from wb_items_per_site join s51880_akkakkbot.languages on lang_db = ips_site_id and not (ips_site_page like "%, %" or ips_site_page like "% (%)") and not exists (select term_language from wb_terms where ips_item_id = term_entity_id and lang_wd = term_language and term_type = "label") limit 60;',
            "link without label/en" :  u'select concat("[[Q", ips_item_id, "]] ([[User:YMS/LC#Q", ips_item_id, "|LC]])") as item, ips_site_id, ips_site_page from wb_items_per_site join s51880_akkakkbot.languages on lang_db = "enwiki" and lang_db = ips_site_id and (ips_site_page like "%, %" or ips_site_page like "% (%)") and not exists (select term_language from wb_terms where ips_item_id = term_entity_id and lang_wd = term_language and term_type = "label") limit 100;',
            "link without label/de" :  u'select concat("[[Q", ips_item_id, "]] ([[User:YMS/LC#Q", ips_item_id, "|LC]])") as item, ips_site_id, ips_site_page from wb_items_per_site join s51880_akkakkbot.languages on lang_db = "dewiki" and lang_db = ips_site_id and (ips_site_page like "%, %" or ips_site_page like "% (%)") and not exists (select term_language from wb_terms where ips_item_id = term_entity_id and lang_wd = term_language and term_type = "label") limit 100;',
            "link without label/es" :  u'select concat("[[Q", ips_item_id, "]] ([[User:YMS/LC#Q", ips_item_id, "|LC]])") as item, ips_site_id, ips_site_page from wb_items_per_site join s51880_akkakkbot.languages on lang_db = "eswiki" and lang_db = ips_site_id and (ips_site_page like "%, %" or ips_site_page like "% (%)") and not exists (select term_language from wb_terms where ips_item_id = term_entity_id and lang_wd = term_language and term_type = "label") limit 100;',
            "link without label/fr" :  u'select concat("[[Q", ips_item_id, "]] ([[User:YMS/LC#Q", ips_item_id, "|LC]])") as item, ips_site_id, ips_site_page from wb_items_per_site join s51880_akkakkbot.languages on lang_db = "frwiki" and lang_db = ips_site_id and (ips_site_page like "%, %" or ips_site_page like "% (%)") and not exists (select term_language from wb_terms where ips_item_id = term_entity_id and lang_wd = term_language and term_type = "label") limit 100;',
            "link without label/stat": u'select count(*), lang_wd, lang_db from wb_items_per_site join s51880_akkakkbot.languages on lang_db = ips_site_id and ips_site_page not like "%, %" and ips_site_page not like "% (%)" and not exists (select term_language from wb_terms where ips_item_id = term_entity_id and lang_wd = term_language and term_type = "label") group by lang_wd, lang_db order by count(*) desc;',
            "description ending with period" :    u'select language, item, label, description from s51880_akkakkbot.items where description like "%." limit 50;',
            "description ending with period/en" : u'select language, item, label, description from s51880_akkakkbot.items where language = "en" and description like "%." limit 50;',
            "description ending with period/de" : u'select language, item, label, description from s51880_akkakkbot.items where language = "de" and description like "%." limit 50,50;',
            "disambiguation conflicts" : u'select concat("[[Q", t1.term_entity_id, "]]") as item, t1.term_language as lang1, t1.term_text as en_label, t2.term_language as lang2, t2.term_text as de_label from wb_terms as t1 join wb_terms as t2 on t1.term_entity_id = t2.term_entity_id where t1.term_text != t2.term_text and t1.term_language = "en" and t1.term_type = "label" and t1.term_entity_type = "item" and t2.term_language != "en" and t2.term_type = "label" and t2.term_entity_type = "item" and exists (select * from wb_terms as t3 where t3.term_entity_id = t1.term_entity_id and t3.term_entity_type = "item" and t3.term_language = "en" and t3.term_type = "description" and t3.term_text = "Wikipedia disambiguation page") limit 25,50;',
#            "disambiguation conflicts" : u'select l1.item, l1.language, l1.label, l2.language, l2.label from s51880_akkakkbot.items as l1 join s51880_akkakkbot.items as l2 on l1.item = l2.item where l1.language = "en" and l2.language != "en" and l1.label != l2.label and (l1.description = "Wikimedia disambiguation page" or l1.description = "Wikipedia disambiguation page") limit 60;',
            "deprecated-languages/de-formal" :    u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "de"        and def.term_language = "de-formal"    and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
            "deprecated-languages/als" :          u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "gsw"       and def.term_language = "als"          and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
#            "deprecated-languages/crh" :          u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "crh-latn"  and def.term_language = "crh"          and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
            "deprecated-languages/crh":           u'select concat("[[Q", term_entity_id, "]]") as item, term_type, term_text from wb_terms where term_language = "crh" limit 100;',
            "deprecated-languages/no" :           u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "nb"        and def.term_language = "no"           and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
            "deprecated-languages/simple" :       u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "en"        and def.term_language = "simple"       and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
#            "deprecated-languages/bat-smg" :      u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "sgs"       and def.term_language = "bat-smg"      and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
            "deprecated-languages/bat-smg":           u'select concat("[[Q", term_entity_id, "]]") as item, term_type, term_text from wb_terms where term_language = "bat-smg" limit 100;',
            "deprecated-languages/be-x-old" :     u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "be-tarask" and def.term_language = "be-x-old"     and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
            "deprecated-languages/fiu-vro" :      u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "vro"       and def.term_language = "fiu-vro"      and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
            "deprecated-languages/roa-rup" :      u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "rup"       and def.term_language = "roa-rup"      and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
            "deprecated-languages/zh-classical" : u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "lzh"       and def.term_language = "zh-classical" and de.term_type != "alias" and de.term_text != def.term_text order by de.term_entity_id limit 100;',
            "deprecated-languages/zh-min-nan" :   u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "nan"       and def.term_language = "zh-min-nan"   and de.term_type != "alias" and de.term_text != def.term_text order by de.term_entity_id limit 100;',
            "deprecated-languages/zh-yue" :       u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "yue"       and def.term_language = "zh-yue"       and de.term_type != "alias" and de.term_text != def.term_text order by de.term_entity_id limit 100;',
            "deprecated-languages/stat" : u'select count(*), term_language, term_type from wb_terms where term_entity_type = "item" and (term_language = "als" or term_language = "de-formal" or term_language = "crh" or term_language = "no" or term_language = "simple" or term_language = "bat-smg" or term_language = "be-x-old" or term_language = "fiu-vro" or term_language = "roa-rup" or term_language = "zh-classical" or term_language = "zh-min-nan" or term_language = "zh-yue" or term_language = "nl-informal") group by term_language, term_type order by term_language, term_type;',
            "deprecated-languages/nl-informal":   u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "nl"        and def.term_language = "nl-informal"  and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
            "language-mapping" : u'select * from s51880_akkakkbot.languages where lang_db != concat(lang_wd, "wiki");',
            "alias-conflicts" : u'select concat("[[Q", l.term_entity_id, "]]") as item, l.term_language, l.term_text from wb_terms as l join wb_terms as a on l.term_entity_id = a.term_entity_id and l.term_entity_type = "item" and a.term_entity_type = "item" and l.term_language = a.term_language and l.term_type = "label" and a.term_type = "alias" and l.term_text = a.term_text limit 60;',
            "alias without label" : u'select concat("[[Q", a.term_entity_id, "]]"), a.term_language, a.term_text from wb_terms as a where a.term_type = "alias" and a.term_entity_type = "item" and not exists(select * from wb_terms as l where a.term_entity_id = l.term_entity_id and l.term_entity_type = "item" and a.term_language = l.term_language and l.term_type = "label") limit 60;'
        }
        print("iterate pages")
        for i, page in enumerate(sqlstatements):
            sqlstatement = sqlstatements[page];
            print("updating '"+page+"' ("+sqlstatement+")")
            sys.stdout.flush()
            cur.execute(sqlstatement)
            result_text = u"<nowiki>"+sqlstatement.replace("</nowiki>", "</nowiki></nowiki><nowiki>")+u"</nowiki>\n{| class=\"wikitable\"\n|-\n"
            lines = cur.fetchall()
            line_count = 1
            for line in lines:
                result_text += u"|| {} ".format(line_count)
                line_count += 1
                for cell in  line:
                    if(type(cell) == str):
                        try:
                            cell = cell.decode('utf8')
                        except UnicodeDecodeError:
                            cell = u"(encoding error)"
                    else:
                        cell = u"{}".format(cell)
                    result_text += u"|| "+cell+u" "
                result_text += u"\n|-\n"
            result_text += u"|}\n"
            pageobj = pywikibot.Page(site, u"User:Akkakk/issues/"+page)
            text = pageobj.get(get_redirect = True)
            if(text != result_text):
              print("- sending new text...")
              sys.stdout.flush()
              pageobj.put(result_text, u"update (task 3)", minorEdit = False)
            else:
              print("- text not changed")
        print("end of updates")
        sys.stdout.flush()