User:Edoderoobot/scientific article-title.py

From Wikidata
Jump to navigation Jump to search
import pywikibot
from pywikibot import pagegenerators as pg
import urllib

query='SELECT ?item ?pubmedid WHERE { ?item wdt:P31 wd:Q13442814. ?item wdt:P698 ?pubmedid}'

def wd_sparql_query(spq):
  wikidatasite=pywikibot.Site('wikidata','wikidata') 
  generator=pg.WikidataSPARQLPageGenerator(spq,site=wikidatasite)
  for wd in generator:
    try:
      wd.get(get_redirect=True)
      yield wd
    except:
      pass  
      
def get_pubmed_xml(ID):
  
  url = 'https://www.ncbi.nlm.nih.gov/pubmed/?term=%s&report=xml&format=text' % ID
  response = urllib.request.urlopen(url)
  xml = response.read().decode('UTF-8')
  startpos=xml.find('<ArticleTitle&gt')
  endpos=xml.find('</ArticleTitle&gt')
  return('%s' % xml[startpos+20:max(endpos,startpos+21)])
   
print('Start')   
for item in wd_sparql_query(query):
  data = {}
  if not('en' in item.labels):
    if ('P698' in item.claims):
      pubmedID = item.claims.get('P698')[0].getTarget()
      newtitle=get_pubmed_xml(pubmedID)
      print('%s-%s' % (item.title(), newtitle))
      data.update({'labels':{'nl':newtitle}})
  else:
    data.update({'labels':{'nl':item.labels['en']}})  #copy en-label to nl
  if (len(data)>0):    #print(data)
    try:
      item.editEntity(data,summary='Publication title set from URL of pubmedID')
    except:
      pass    
      #print(0/0)