User:Edoderoobot/template2wikidata

From Wikidata
Jump to navigation Jump to search
import pywikibot
from pywikibot import pagegenerators
import mwparserfromhell
from mwparserfromhell.nodes import Template

language = 'en'
findtemplate = 'FIS' 
findparameter = 'ID'
allowed_namespaces = [0]
findclaims = ['P2772','P2773','P2774','P2775','P2776','P2777',]

def genPagesWithTemplate(site,template):   #get all pages with a template
   refPage = pywikibot.Page(pywikibot.Link('template:'+template,site))
   gen = pagegenerators.ReferringPageGenerator(refPage)
   for onepage in gen:
     yield(onepage)

def getTemplate(page,template):                #find one template on a page
  wikicode=mwparserfromhell.parse(page.text)     
  templates=wikicode.filter_templates()
  mytemplate = [x for x in templates if x.name.matches(template)]
  if (len(mytemplate)>0):
    return mytemplate[0]                       #return template we searched for  
  else:
    return Template('')                        #template not found on page, return emtpy template

def getParamFromTemplate(template,whichparam): #find one parameter from this template
  for myparam in template.params:
    if (myparam[:len(whichparam)+1].strip().upper()==((whichparam)+'=').strip().upper()):
      return(myparam[len(whichparam)+1:])
  return(None)

def getSourcewiki(lang):
    lang_values = {
            'en':  'Q328',
            'fr':  'Q8447',
            'es':  'Q8449',
            'nl':  'Q10000',
            'it':  'Q11920',
            'pt':  'Q11921',
            'de':  'Q48183',
            'fa':  'Q48952',
            'tr':  'Q58255',
            'sv':  'Q169514',
            'cs':  'Q191168',
            'no':  'Q191769',
            'ca':  'Q199693',
            'uk':  'Q199698',
            'ar':  'Q199700',
            'vi':  'Q200180',
            'ru':  'Q206855',
            'be':  'Q877583',
            'pl':  'Q1551807',
        }
    if lang in lang_values:
      return lang_values.get(lang)
    else:
      return None

def addFIScode(repo,wd,claim2add,target,mysummary,lng):
  FISclaim = pywikibot.Claim(repo,claim2add)
  FISclaim.setTarget(target)          
  wd.addClaim(FISclaim,summary=mysummary)                          #add claim to item
  sourceclaimtarget = pywikibot.ItemPage(repo,getSourcewiki(lng))  #this is the source wiki for this claim
  source_claim = pywikibot.Claim(repo, 'P143', isReference=False)  #vermeld in/stated in  of P143 imported from
  source_claim.setTarget(sourceclaimtarget)
  FISclaim.addSources([source_claim])
      
  
def findFIScodes():    
  site = pywikibot.Site(language)
  repo = site.data_repository()
  
  default={}
  default['P2772']='  al  '
  default['P2773']='  cc  '
  default['P2774']='  fs  '
  default['P2775']='  jp  '
  default['P2776']='  nk  '
  default['P2777']='  sb  '
  counter = 0
  
  for mypage in genPagesWithTemplate(site,findtemplate):
    result = {}
    if mypage.namespace().id in allowed_namespaces:
      myvalue = ''
      mytemplate = getTemplate(mypage,findtemplate)
      mytype = getParamFromTemplate(mytemplate,'S').upper()
      myvalue = getParamFromTemplate(mytemplate,findparameter)
      if mytype=='AL':
        claim2add = 'P2772'
      elif mytype=='CC':
        claim2add = 'P2773'
      elif mytype=='FS':
        claim2add = 'P2774'
      elif mytype=='JP':
        claim2add = 'P2775'
      elif mytype=='NK':
        claim2add = 'P2776'
      elif mytype=='SB':
        claim2add = 'P2777'
      else:
        error('%s has wrong type %s' % (mypage.title, mytype))
      
      if myvalue!=None:
        if (mypage.data_item() != None): #has wikidata entry
          wd = mypage.data_item()        #read wikidata item
          wd.get()
          claimfound = False             #no claim found initially
          for findclaim in findclaims:
            if (findclaim in wd.claims):
              result[findclaim]='{:6}'.format(wd.claims.get(findclaim)[0].getTarget())
              claimfound = True          #something useful found
            else:                        #claim does not yet exist
              result[findclaim]='{:6}'.format(default[findclaim])  #this claim is not present, use placeholder for printing
          counter += 1
          if not claimfound:             #not found? then add!
            addFIScode(repo,wd,'Pxxxx',myvalue,u'add FIS-code from en-wiki',language)
            print('%s: %s-%s-[%s]: %s-%s-%s-%s-%s-%s' % ('{:5}'.format(counter),'{:30}'.format(mypage.title()),mytype, '{:6}'.format(myvalue), result['P2772'],result['P2773'],result['P2774'],result['P2775'],result['P2776'],result['P2777'],))    
        else:
          print('New:' % mypage.title)  #item has no wikidata yet
        
    

findFIScodes()