User:Edoderoobot/WD-stat-counter

From Wikidata
Jump to navigation Jump to search
import pywikibot
from pywikibot import pagegenerators
import pywikibot.data.wikidataquery as wdquery
import codecs #used in logfiles, unicoded strings
import datetime

#simple counter: how many items do have a description, how many items have a missing description
#on the go: count the number of items with 1,2,3,4,5,etc claims

debugedo=True
debugedo=False

default_query='link[svwiki]'
default_language = 'sv' 
maxclaims = 50
claimlst=[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
withdesc=0
withoutdesc=0

def log_premature(itemno):
  with codecs.open("WD-stat-counter.prelog.csv","a", encoding="utf-8") as logfile:
    logfile.write('%s\n' % (itemno))
  logfile.close
  
  
def logme(verbose, formatstring, *parameters):
  with codecs.open("cyrillic-description.log.csv", "a", encoding="utf-8") as logfile:
    formattedstring = formatstring.encode('utf-8') + '\n'
   
    try:   
      logfile.write(formattedstring % (parameters) )
    except :
      exctype, value = sys.exc_info()[:2]
      print("1) Error writing to logfile on: [%s] [%s]" % (exctype, value))
      verbose = True    #now I want to see what!   
    logfile.close()
  if verbose:
    print(formatstring % (parameters))  


class WDBot():
    """
    A bot to add streets on Wikidata
    """
    def __init__(self, generator):
        """
        Arguments:
            * generator    - A generator that yields itempage objects.
        """
        self.generator = generator
        self.repo = pywikibot.Site().data_repository()

    def run(self,lng):
        """
        Starts the robot.
        """
        site = pywikibot.getSite('sv')
        repo = site.data_repository()
        
        items_found=0
        for WDIquery in self.generator:
            if items_found> 9999999930 :  #while testing, stop after so many processed items
               break
            if WDIquery.exists() :
                #log_premature(WDIquery.title())   #log which item we process ... in case of an error, I know which item it is
                WDIquery.get(get_redirect=True) 
                items_found += action_one_item(repo,WDIquery,lng)
        

def WikidataQueryItemPageGenerator(query, site=None):
    """Generate pages that result from the given WikidataQuery.
    @param query: the WikidataQuery query string.
    """
    
    global items2do
    
    if site is None:
        site = pywikibot.Site()
    repo = site.data_repository()

    wd_queryset = wdquery.QuerySet(query)

    wd_query = wdquery.WikidataQuery(cacheMaxAge=0)
    data = wd_query.query(wd_queryset)

    items2do = data[u'status'][u'items']
    pywikibot.output(u'retrieved %d items' % data[u'status'][u'items'])
    for item in data[u'items']:
        yield pywikibot.ItemPage(repo, u'Q' + str(item))


def action_one_item(repo, wditem, lng):
  global claimlst, withdesc,withoutdesc
  
  #log_premature(wditem.title())
  nrclaims = len(wditem.claims)
  if (nrclaims+1>maxclaims): nrclaims=maxclaims
  claimlst[nrclaims+1] += 1

  if(lng in wditem.descriptions):
    withdesc += 1
  else :
    withoutdesc += 1

  return 1

def my_print(format, *str):
  retstr = (format % (str))+'\n'
  print ("%s" % (retstr))
  return retstr  
  
def print_wikitable(lng): 
    global claimlst,withdesc,withoutdesc
    stars = '********************************************************************************'
    
    wikiString = u''
    wikiString += my_print("\n")
    wikiString += my_print("%s" % ("{| class=\"wikitable\""))
    wikiString += my_print("|+%s" % datetime.date.today().strftime('%d-%b-%Y') )
    wikiString += my_print("%s" % "! # of properties !! # of items !!")

    totalitems = 0
    max = 0
    for x in range(1,len(claimlst)):
      totalitems = totalitems+claimlst[x]
      if claimlst[x]>max : max = claimlst[x]
    for x in range(1, len(claimlst)):
      nrofstars = int( (len(stars) * claimlst[x])/(max) )
      wikiString += my_print("%s" % "|-")
      wikiString += my_print("|%s||%s||%s" % (x-1,claimlst[x],stars[1:nrofstars]))
    
    wikiString += my_print("%s" % "|}")
    wikiString += my_print("\n")
    wikiString += my_print("%s items have a description, %s items have not!" % (withdesc,withoutdesc))

    #pywikibot.Page(pywikibot.getSite(), "user:Edoderoo/stats/xx").put(wikiString, comment='Update') #Save page
    pywikibot.Page(pywikibot.getSite('wikidata','wikidata'), "user:Edoderoo/stats/"+lng).put(wikiString, comment='Update') #Save page
    
def main():
    #action_one_item(None,None,'')
    
    print ("main")
    query = default_query #later, I want to manage this with params
    lng = default_language

    pigenerator = pagegenerators.PreloadingItemGenerator(pagegenerators.WikidataItemGenerator(WikidataQueryItemPageGenerator(query)))
    
    wikidataBot = WDBot(pigenerator)
    wikidataBot.run(lng)
    print_wikitable(lng)
    
if __name__ == "__main__":  
 if debugedo:
   print("debug is on")
 else:
   print("Klaar voor de start")
   main()