User:ANU Outreachy/Outreachy 6

From Wikidata
Jump to navigation Jump to search
import pywikibot
from pywikibot import pagegenerators
import re

enwiki = pywikibot.Site('en', 'wikipedia')
enwiki_repo = enwiki.data_repository()

def addQid(page, itempage):	#function to add the ID to wikidata 
# I found one of the three types of regex expressions would be needed to extract the ID 
    regex_exp = re.compile(r'UEFA\splayer\|(\d+)')  	#regex expression to find UEFA player id 
    found = regex_exp.findall(page.text)
    if not found:
        regex_exp = re.compile(r'UEFA\|(\d+)')		#regex expression to find UEFA player id 
        found = regex_exp.findall(page.text)
        if not found:
            regex_exp = re.compile(r'UEFA\splayer\|id\=(\d+)')		#regex expression to find UEFA player id 
            found = regex_exp.findall(page.text)
    
    claim = pywikibot.Claim(enwiki_repo, u'P2276')		#adding the ID to wikidata
    claim.setTarget(found[0])
    print(claim)
    text = input("Save? ")
    if text == 'y' or text == 'Y':
        itempage.addClaim(claim, summary=u'Adding UEFA player ID')
    
        

def checkqids(page, itempage):		#Check if the wikidata proerty already exusts
    qid = 'P2276'
    item_dict = item.get()
    try:
        found = item_dict['claims'][qid]
    except:
        addQid(page, itempage)

 
targetcats = "Category:UEFA_player_ID_not_in_Wikidata"

cat = pywikibot.Category(enwiki, targetcats)
pages = pagegenerators.CategorizedPageGenerator(cat, recurse=False);

for page in pages:		#Loop through the pages listed in the category
    item = pywikibot.ItemPage.fromPage(page)
    print(page)
    checkqids(page, item)