User:Edoderoobot/template2wikidata
Jump to navigation
Jump to search
import pywikibot
from pywikibot import pagegenerators
import mwparserfromhell
from mwparserfromhell.nodes import Template
language = 'en'
findtemplate = 'FIS'
findparameter = 'ID'
allowed_namespaces = [0]
findclaims = ['P2772','P2773','P2774','P2775','P2776','P2777',]
def genPagesWithTemplate(site,template): #get all pages with a template
refPage = pywikibot.Page(pywikibot.Link('template:'+template,site))
gen = pagegenerators.ReferringPageGenerator(refPage)
for onepage in gen:
yield(onepage)
def getTemplate(page,template): #find one template on a page
wikicode=mwparserfromhell.parse(page.text)
templates=wikicode.filter_templates()
mytemplate = [x for x in templates if x.name.matches(template)]
if (len(mytemplate)>0):
return mytemplate[0] #return template we searched for
else:
return Template('') #template not found on page, return emtpy template
def getParamFromTemplate(template,whichparam): #find one parameter from this template
for myparam in template.params:
if (myparam[:len(whichparam)+1].strip().upper()==((whichparam)+'=').strip().upper()):
return(myparam[len(whichparam)+1:])
return(None)
def getSourcewiki(lang):
lang_values = {
'en': 'Q328',
'fr': 'Q8447',
'es': 'Q8449',
'nl': 'Q10000',
'it': 'Q11920',
'pt': 'Q11921',
'de': 'Q48183',
'fa': 'Q48952',
'tr': 'Q58255',
'sv': 'Q169514',
'cs': 'Q191168',
'no': 'Q191769',
'ca': 'Q199693',
'uk': 'Q199698',
'ar': 'Q199700',
'vi': 'Q200180',
'ru': 'Q206855',
'be': 'Q877583',
'pl': 'Q1551807',
}
if lang in lang_values:
return lang_values.get(lang)
else:
return None
def addFIScode(repo,wd,claim2add,target,mysummary,lng):
FISclaim = pywikibot.Claim(repo,claim2add)
FISclaim.setTarget(target)
wd.addClaim(FISclaim,summary=mysummary) #add claim to item
sourceclaimtarget = pywikibot.ItemPage(repo,getSourcewiki(lng)) #this is the source wiki for this claim
source_claim = pywikibot.Claim(repo, 'P143', isReference=False) #vermeld in/stated in of P143 imported from
source_claim.setTarget(sourceclaimtarget)
FISclaim.addSources([source_claim])
def findFIScodes():
site = pywikibot.Site(language)
repo = site.data_repository()
default={}
default['P2772']=' al '
default['P2773']=' cc '
default['P2774']=' fs '
default['P2775']=' jp '
default['P2776']=' nk '
default['P2777']=' sb '
counter = 0
for mypage in genPagesWithTemplate(site,findtemplate):
result = {}
if mypage.namespace().id in allowed_namespaces:
myvalue = ''
mytemplate = getTemplate(mypage,findtemplate)
mytype = getParamFromTemplate(mytemplate,'S').upper()
myvalue = getParamFromTemplate(mytemplate,findparameter)
if mytype=='AL':
claim2add = 'P2772'
elif mytype=='CC':
claim2add = 'P2773'
elif mytype=='FS':
claim2add = 'P2774'
elif mytype=='JP':
claim2add = 'P2775'
elif mytype=='NK':
claim2add = 'P2776'
elif mytype=='SB':
claim2add = 'P2777'
else:
error('%s has wrong type %s' % (mypage.title, mytype))
if myvalue!=None:
if (mypage.data_item() != None): #has wikidata entry
wd = mypage.data_item() #read wikidata item
wd.get()
claimfound = False #no claim found initially
for findclaim in findclaims:
if (findclaim in wd.claims):
result[findclaim]='{:6}'.format(wd.claims.get(findclaim)[0].getTarget())
claimfound = True #something useful found
else: #claim does not yet exist
result[findclaim]='{:6}'.format(default[findclaim]) #this claim is not present, use placeholder for printing
counter += 1
if not claimfound: #not found? then add!
addFIScode(repo,wd,'Pxxxx',myvalue,u'add FIS-code from en-wiki',language)
print('%s: %s-%s-[%s]: %s-%s-%s-%s-%s-%s' % ('{:5}'.format(counter),'{:30}'.format(mypage.title()),mytype, '{:6}'.format(myvalue), result['P2772'],result['P2773'],result['P2774'],result['P2775'],result['P2776'],result['P2777'],))
else:
print('New:' % mypage.title) #item has no wikidata yet
findFIScodes()