User:AkkakkBot/code/07-fix-language-mappings
< User:AkkakkBot | code
python code:
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()
db = MySQLdb.connect(host="wikidatawiki.labsdb", db="wikidatawiki_p", read_default_file="~/replica.my.cnf")
editmax = 1000
replace_languages = {"als":"gsw", "de-formal":"de", "crh":"crh-latn", "no":"nb", "simple":"en", "bat-smg":"sgs", "be-x-old":"be-tarask", "fiu-vro":"vro", "roa-rup":"rup", "zh-classical":"lzh", "zh-min-nan":"nan", "zh-yue":"yue"}
log = ""
exit_reason = "end of data"
summary_max_len = 100
#get items from file
print("get items")
sys.stdout.flush()
items = []
cur = db.cursor()
cur.execute('select term_entity_id from wb_terms where (term_language = "als" or term_language = "de-formal" or term_language = "crh" or term_language = "no" or term_language = "bat-smg" or term_language = "be-x-old" or term_language = "fiu-vro" or term_language = "roa-rup" or term_language = "zh-classical" or term_language = "zh-min-nan" or term_language = "zh-yue" or term_language = "simple") limit 50000;')
lines = cur.fetchall()
for line in lines:
for cell in line:
items.append("Q{}".format(cell))
#iterate items
print("iterate items")
editcnt = 0
try:
for i, q in enumerate(items):
if(os.path.isfile("bot-07-fix-language-mappings.stop")):
print("stop file")
exit_reason = "stop file"
break
item = pywikibot.ItemPage(repo, q)
try:
content = item.get()
descriptions = content['descriptions']
labels = content['labels']
aliases = content['aliases']
#labels
print("checking {} labels:".format(q)),
sys.stdout.flush()
changed = False
summary_text = ""
for lang_source in replace_languages:
lang_target = replace_languages[lang_source]
if lang_source in labels:
if lang_target in labels:
if labels[lang_source] == labels[lang_target]:
print(" remove redundant "+lang_source+" label"),
summary_text += "- remove redundant "+lang_source+" label"
labels[lang_source] = ""
if(not changed):
changed = True
log += "* [["+q+"]]: "
log += "- remove redundant "+lang_source+" label "
else:
print("- "+lang_source+"/"+lang_target+"-labels differ!"),
else:
print("- "+lang_source+" lab to "+lang_target),
labels[lang_target] = labels[lang_source]
labels[lang_source] = ""
if(not changed):
changed = True
log += "* [["+q+"]]: "
log += "- rename "+lang_source+" label to "+lang_target+" "
summary_text += "- rename "+lang_source+" label to "+lang_target
if(changed):
editcnt += 1
print("- edit {}...".format(editcnt))
log += "\n"
summary_text += " (task 7)"
if(len(summary_text) > summary_max_len):
summary_text = "fix language mappings in labels (task 7)"
item.editLabels(summary=summary_text, labels=labels)
if(editcnt >= editmax):
print("maximum number of edits reached")
exit_reason = "maximum number of edits reached"
break
else:
print("")
#descriptions
print("checking {} descriptions:".format(q)),
sys.stdout.flush()
changed = False
summary_text = ""
for lang_source in replace_languages:
lang_target = replace_languages[lang_source]
if lang_source in descriptions:
if lang_target in descriptions:
if descriptions[lang_source] == descriptions[lang_target]:
print(" remove redundant "+lang_source+" description"),
summary_text += "- remove redundant "+lang_source+" description"
descriptions[lang_source] = ""
if(not changed):
changed = True
log += "* [["+q+"]]: "
log += "- remove redundant "+lang_source+" description "
else:
print("- "+lang_source+"/"+lang_target+"-descriptions differ!"),
else:
print("- "+lang_source+" desc to "+lang_target),
descriptions[lang_target] = descriptions[lang_source]
descriptions[lang_source] = ""
if(not changed):
changed = True
log += "* [["+q+"]]: "
log += "- rename "+lang_source+" description to "+lang_target+" "
summary_text += "- rename "+lang_source+" description to "+lang_target
if(changed):
editcnt += 1
print("- edit {}...".format(editcnt))
log += "\n"
summary_text += " (task 7)"
if(len(summary_text) > summary_max_len):
summary_text = "fix language mappings in descriptions (task 7)"
item.editDescriptions(summary=summary_text, descriptions=descriptions)
if(editcnt >= editmax):
print("maximum number of edits reached")
exit_reason = "maximum number of edits reached"
break
else:
print("")
#aliases
print("checking {} aliases:".format(q)),
sys.stdout.flush()
changed = False
summary_text = ""
for lang_source in replace_languages:
lang_target = replace_languages[lang_source]
if lang_source in aliases:
if lang_target in aliases:
if aliases_equals(aliases[lang_source], aliases[lang_target]):
print("- remove redundant "+lang_source+" aliases"),
aliases[lang_source] = [""]
if(not changed):
changed = True
log += "* [["+q+"]]: "
log += "- remove redundant "+lang_source+" aliases"
summary_text += "- remove redundant "+lang_source+" aliases"
else:
print("- "+lang_source+"/"+lang_target+"-aliases differ!"),
if(not changed):
changed = True
log += "* [["+q+"]]: "
for alias in aliases[lang_source]:
if not alias in aliases[lang_target]:
print("- adding alias "),
log += "- adding "+lang_target+" alias "
aliases[lang_target].append(alias)
summary_text += "- fixing "+lang_source+"/"+lang_target+" aliases"
log += "- removing "+lang_source+" alias(es)"
aliases[lang_source] = [""]
else:
print("- rename "+lang_source+" aliases to "+lang_target),
aliases[lang_target] = aliases[lang_source]
aliases[lang_source] = [""]
if(not changed):
changed = True
log += "* [["+q+"]]: "
#log += "- copy "+lang_source+" aliases to "+lang_target+" "
#summary_text += "- copy "+lang_source+" aliases to "+lang_target
log += "- rename "+lang_source+" aliases to "+lang_target+" "
summary_text += "- rename "+lang_source+" aliases to "+lang_target
if(changed):
editcnt += 1
print("- edit {}...".format(editcnt))
log += "\n"
summary_text += " (task 7)"
if(len(summary_text) > summary_max_len):
summary_text = "fix language mappings in descriptions (task 7)"
item.editAliases(summary=summary_text, aliases=aliases)
if(editcnt >= editmax):
print("maximum number of edits reached")
exit_reason = "maximum number of edits reached"
break
else:
print("")
except pywikibot.exceptions.NoPage:
print(" item does not exist")
except pywikibot.data.api.APIError:
print(" api error. trying to continue.")
except UnicodeEncodeError:
print(" UnicodeEncodeError.why?")
except TypeError:
print(" TypeError")
except ValueError:
print(" ValueError")
except Exception as exc:
print("exception")
traceback.print_exc()
exit_reason = "exception"
if log != "":
log += "exit reason:"+exit_reason
pageobj = pywikibot.Page(site, u"User:AkkakkBot/log")
pageobj.put(log, u"log for task 7: fix language mappings", minorEdit = False)
print("end of script")
return exit_reason
def aliases_equals(old, new):
old.sort
new.sort
return old == new