User:Edoderoobot/count labels from Sparql-Query

From Wikidata
Jump to navigation Jump to search
# -*- coding: latin-1 -*-


import codecs
import json
import pywikibot
from pywikibot import pagegenerators as pg
import urllib
import re
#import pywikibot.data.wikidataquery as wdquery
#from pywikibot.data import api

WD_undefined_lists_query =  u'claim[31:13406463] and noclaim[360]'# and link[nlwiki] and claim[910]'
SparqlQuery='select ?item where {?item wdt:P31 wd:Q5 . ?item wdt:P106 wd:Q1028181 . ?item wdt:P27 wd:Q29999}'

taalcodes=[]
taalteller=0


def grablabel(langcode, wdlabels) : 
    if langcode in wdlabels :
      return( wdlabels[langcode])
    else :
      return( '' )

def action_one_item(wikidataitem):        
        """
        Starts the robot.
        """
        global taalcodes
        global taalteller
        taalcodes = ['en','nl','ru','de','fr','sk','pt','ro','fi','ja','fa','zh','it']
        taalteller= [ 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 0]

        #print('oh yeah')
        itemcount = 0 
        site = pywikibot.Site('nl')
        repo = site.data_repository()
         

        
        itemcount += 1
        if wikidataitem.exists() :
                wikidataitem.get()
                for thislabel in wikidataitem.labels :
                   taalloop = 0 
                   for looplabel in taalcodes :
                      if looplabel==thislabel :
                        taalteller[taalloop] += 1
                      else :
                        taalloop += 1
                   if taalloop>=len(taalcodes) :
                     taalcodes.append(thislabel)
                     taalteller.append(1)

def last_step_when_finished():
        
        global taalcodes
        global taalteller

        itssorted = False
        print("now sorting!")

        minimizer = 1
        looped = 0
        while not itssorted :
          high= len(taalteller)-minimizer
          low = high-1
          while (taalteller[high]>=taalteller[low]) and (low>0) :
            low -= 1
          if low<0 :
            itssorted = True
          else : 
            if (taalteller[high]>=taalteller[low]) :
              minimizer += 1
            else :
              swapteller = taalteller[low]
              swapcodes  = taalcodes[low]
              taalteller[low] = taalteller[high]
              taalcodes[low] = taalcodes[high]
              taalteller[high] = swapteller
              taalcodes[high]  = swapcodes


        linenr = 0
        print
        for thislabel in taalcodes :
          print ("%i: %s %s" % (linenr, thislabel, taalteller[linenr] )) 
          linenr += 1
        i = 1
        
def wd_sparql_generator(query):        
  wikidatasite=pywikibot.Site('wikidata','wikidata') 
  generator=pg.WikidataSPARQLPageGenerator(query,site=wikidatasite)
  for wd in generator:
    wd.get(get_redirect=True)
    yield wd



def main():
    #pigenerator = pagegenerators.PreloadingItemGenerator(pagegenerators.WikidataItemGenerator(WikidataQueryItemPageGenerator(WD_undefined_lists_query)))
    #wdBot = WDBot(pigenerator)
    print('Started')
    site=pywikibot.Site('wikidata','wikidata')
    repo=site.data_repository()
    for wdBot in wd_sparql_generator(SparqlQuery):
      #print('Found an item')
      action_one_item(wdBot)
    last_step_when_finished()


main()