User:Edoderoobot/scientific article-title.py
Jump to navigation
Jump to search
import pywikibot
from pywikibot import pagegenerators as pg
import urllib
query='SELECT ?item ?pubmedid WHERE { ?item wdt:P31 wd:Q13442814. ?item wdt:P698 ?pubmedid}'
def wd_sparql_query(spq):
wikidatasite=pywikibot.Site('wikidata','wikidata')
generator=pg.WikidataSPARQLPageGenerator(spq,site=wikidatasite)
for wd in generator:
try:
wd.get(get_redirect=True)
yield wd
except:
pass
def get_pubmed_xml(ID):
url = 'https://www.ncbi.nlm.nih.gov/pubmed/?term=%s&report=xml&format=text' % ID
response = urllib.request.urlopen(url)
xml = response.read().decode('UTF-8')
startpos=xml.find('<ArticleTitle>')
endpos=xml.find('</ArticleTitle>')
return('%s' % xml[startpos+20:max(endpos,startpos+21)])
print('Start')
for item in wd_sparql_query(query):
data = {}
if not('en' in item.labels):
if ('P698' in item.claims):
pubmedID = item.claims.get('P698')[0].getTarget()
newtitle=get_pubmed_xml(pubmedID)
print('%s-%s' % (item.title(), newtitle))
data.update({'labels':{'nl':newtitle}})
else:
data.update({'labels':{'nl':item.labels['en']}}) #copy en-label to nl
if (len(data)>0): #print(data)
try:
item.editEntity(data,summary='Publication title set from URL of pubmedID')
except:
pass
#print(0/0)