User:Edoderoobot/scientific article.py

From Wikidata
Jump to navigation Jump to search
import pywikibot
#from pywikibot import pagegenerators
from pywikibot import pagegenerators as pg
#import pywikibot.data.wikidataquery as wdquery
from pywikibot.data import api
import codecs
import sys
import datetime
from datetime import datetime, date, time


querylist=[
           'SELECT ?item  WHERE { 	?item wdt:P31 wd:Q13442814 .     ?item wdt:P577 ?published .     filter ((?published > "1800-01-01T00:00:00Z"^^xsd:dateTime) && (?published < "2000-01-01T00:00:00Z"^^xsd:dateTime)) }',
           'SELECT ?item  WHERE { 	?item wdt:P31 wd:Q13442814 .     ?item wdt:P577 ?published .     filter ((?published > "2000-01-01T00:00:00Z"^^xsd:dateTime) && (?published < "2020-01-01T00:00:00Z"^^xsd:dateTime)) }',
           ]
 

lng = 'cs'
sca={'de':'wissenschaftlicher Artikel', 
     'da':'videnskabelig artikel',
     'en':'scientific article',
     'fr':'article scientifique',
     'it':'articolo scientifico',
     'nl':'wetenschappelijk artikel', 
     'pt':'artigo científico',
     'sr':'научни чланак',
     'sv':'vetenskaplig artikel',
     'tr':'bilimsel makale',
     'ca':'article científic',
     'cs':'vědecký článek',
     'sk':'vedecký článok',
     }

pubtxt={
        'de':'veröffentlicht',
        'da':'udgivet ',        
        'en':'published on',
        'fr':'publié',
        'it':'pubblicato il',
        'nl':'gepubliceerd op',
        'pt':'publicado na',
        'sr':'објављен',
        'sv':'publicerad på',
        'tr':'günü yayınlanan',
        'ca':'publicat el',
        'cs':'publikovaný v roce',
        'sk':'publikovaný',
        
        }     
     
def wd_sparql_query(spq):
  wikidatasite=pywikibot.Site('wikidata','wikidata') 
  generator=pg.WikidataSPARQLPageGenerator(spq,site=wikidatasite)
  for wd in generator:
    wd.get(get_redirect=True)
    yield wd

def sparql_nodescription(sparql):
  return 'select distinct ?item where {{%s}filter (!bound(?itemDescription))}' % sparql 
  
def main():
 site=pywikibot.Site('nl')
 repo=site.data_repository()
 written=0
 
 for sparql_query in querylist: #need to split the query in at least two parts to avoid time outs
   for item in wd_sparql_query(sparql_query):
     if not(lng in item.descriptions):
       replace=True
     else:
       replace=item.descriptions[lng]==sca[lng]
     
     if (replace):
       if ('P577' in item.claims):
         publicationdate=item.claims.get('P577')[0].getTarget()
         if (publicationdate.year==0):
             publicationtxt = ''
         if (lng=='cs'):
            publicationtxt = '%s %s' % (pubtxt[lng],publicationdate.year)
         else :
           if (publicationdate.month==0):
              publicationtxt='(%s %04d)' % (pubtxt[lng],publicationdate.year)
           elif (publicationdate.day==0):
              publicationtxt='(%s %04d-%02d)' % (pubtxt[lng],publicationdate.year,publicationdate.month)
           else:
             publicationtxt='(%s %02d.%02d.%04d)' % (pubtxt[lng],publicationdate.day,publicationdate.month,publicationdate.year)
       else:
         publicationtxt=''       
       data={}
       data.update( {'descriptions': {lng:'%s %s'%(sca[lng],publicationtxt)}} )
       if (True):
         item.editEntity(data,summary=u'%s description scientific article' % (lng))
         written += 1
         #print(0/(written%5))
       else:
         print('Would add: %s' % (data))
     else:
       print('%s is: %s' % (item.title(),item.descriptions[lng]))   
       
       
main()