User:Yamaha5/ConflictBot

From Wikidata
Jump to navigation Jump to search

Wrong Interwiki Namespace

[edit]
sql fawiki_p 'SELECT /*SLOW OK */ page_title,page_namespace ,ll_title  FROM page  JOIN langlinks ON page_id = ll_from WHERE page_namespace = 0 AND page_is_redirect = 0 AND ll_lang="en" AND ((ll_title LIKE "%Template:%") OR (ll_title LIKE "%Category:%") OR (ll_title LIKE "%Wikipedia:%") OR (ll_title LIKE "%Portal:%") OR (ll_title LIKE "%User:%") OR (ll_title LIKE "%talk:%") OR (ll_title LIKE "%Talk:%")) GROUP BY page_id;' >foo.txt

Templates Wrong interwiki

[edit]
sql fawiki_p 'SELECT page_title FROM page JOIN langlinks ON page_id = ll_from WHERE page_namespace = 10 AND page_is_redirect = 0 AND (page_title LIKE "%/doc%" OR page_title LIKE "%/Doc%") GROUP BY page_title ORDER BY COUNT(ll_from) DESC;' >foo2.txt

Bot code

[edit]

This code lists the pages which has wrong interwiki for example:

  • fa:a > en:a > fa:b
  • fa:a > en:a > nopage

you can change fa with other local wiki (de,it,ru,...)

It will report the conflicts on WrongInterList.txt file.

options

[edit]

by changing 'namespace' you can get query for other namespaces

code

[edit]
#!/usr/bin/python
# -*- coding: utf-8 -*-
# BY: رضا (User:reza1615 on fa.wikipedia)
# Distributed under the terms of the CC-BY-SA 3.0 .
import codecs,config,wikipedia
import MySQLdb as mysqldb
items_a,items_b,items_c=[],[],{}
text_w,q_text=u'\n',u'\n'

#--------------------------------------------------need change---------------------------
first_wiki='fa'
second_wiki='en'
'''
Mediawiki Important Namespaces
0  =  Main
10 = Template
14 = Category
'''
namespace="0"

#----------------------------------------------------------------------------------------
if int(namespace)>0:
    first_wiki_site = wikipedia.getSite(first_wiki)
    first_wiki_pre=first_wiki_site.namespace(int(namespace), all = True)[0]+u':'
    
    second_wiki_site=wikipedia.getSite(second_wiki)
    second_wiki_pre=second_wiki_site.namespace(int(namespace), all = True)[0]+u':'
else:
    first_wiki_pre,second_wiki_pre=u'',u''

'''
first_wiki_pre,second_wiki_pre=u'رده:',u'Category:'
namespace="14"
'''

wikipedia.output('--------------First Query--------------------')
site  = wikipedia.getSite(first_wiki)
querys = "SELECT page_title,ll_title  FROM page  JOIN langlinks ON page_id = ll_from WHERE page_is_redirect = 0 AND page_namespace="+namespace+" AND ll_lang='"+second_wiki+"' GROUP BY ll_from;"

conn = mysqldb.connect(first_wiki+"wiki.labsdb", db = site.dbName(),
                       user = config.db_username,
                       passwd = config.db_password)                                
cursor = conn.cursor()
wikipedia.output(u'Executing query:\n%s' % querys)
querys = querys.encode(site.encoding())
cursor.execute(querys)
results = cursor.fetchall()
for row in results:
    q_text+=unicode(str(row[0]), 'UTF-8')+u'\t'+unicode(str(row[1]), 'UTF-8')+u'\n'
with codecs.open( first_wiki+'.txt',mode = 'w',encoding = 'utf8' ) as file:
                 file.write(q_text)
del file,q_text
q_text=u'\n'
wikipedia.output(first_wiki+'.txt is built')
wikipedia.output('----------------Second Query------------------')
site  = wikipedia.getSite(second_wiki)
querys = "SELECT page_title,ll_title  FROM page  JOIN langlinks ON page_id = ll_from WHERE page_is_redirect = 0 AND page_namespace="+namespace+" AND ll_lang='"+first_wiki+"' GROUP BY ll_from;"

conn = mysqldb.connect(second_wiki+"wiki.labsdb", db = site.dbName(),
                       user = config.db_username,
                       passwd = config.db_password)                                
cursor = conn.cursor()
wikipedia.output(u'Executing query:\n%s' % querys)
querys = querys.encode(site.encoding())
cursor.execute(querys)
results = cursor.fetchall()
for row in results:
    q_text+=unicode(str(row[0]), 'UTF-8')+u'\t'+unicode(str(row[1]), 'UTF-8')+u'\n'
with codecs.open( second_wiki+'.txt',mode = 'w',encoding = 'utf8' ) as file:
                 file.write(q_text)
del file,q_text,results,cursor
wikipedia.output(second_wiki+'.txt is built')

wikipedia.output('----------------------------------')
Textfirst_wiki = codecs.open(first_wiki+'.txt','r' ,'utf8' )
lines_first_wiki=Textfirst_wiki.read().replace(u'\r',u'').strip().split(u'\n')


Textsecond_wiki = codecs.open(second_wiki+'.txt','r' ,'utf8' )
lines_second_wiki=Textsecond_wiki.read().replace(u'\r',u'').strip().split(u'\n')

del Textsecond_wiki,Textfirst_wiki

for line_F in lines_first_wiki:
    items_a.append(line_F.split(u'\t')[0].replace(u'_',u' '))
    items_c[line_F.split(u'\t')[1].replace(second_wiki_pre,u'')]=line_F.split(u'\t')[0].replace(u'_',u' ')

for line_S in lines_second_wiki:
    case=line_S.split(u'\t')[1].replace(first_wiki_pre,u'').replace(u'_',u' ')
    if case in items_a:
        items_a.remove(case)
    else:
        wikipedia.output(line_S)
        try:
           item_c=first_wiki_pre+items_c[line_S.split(u'\t')[0].replace(u'_',u' ')]
           if item_c!=line_S.split(u'\t')[1]:
               to_add=u'# [[:'+item_c+u']]  >  [[:'+second_wiki+u':'+second_wiki_pre+line_S.split(u'\t')[0]+u']] > [[:'+line_S.split(u'\t')[1]+u']]\n'
               text_w+=to_add
        except:
            to_add=u'# [[:'+second_wiki+u':'+second_wiki_pre+line_S.split(u'\t')[0]+u']] > [[:'+line_S.split(u'\t')[1]+u']]\n'
            text_w+=to_add
with codecs.open( u'WrongInterList.txt',mode = 'w',encoding = 'utf8' ) as file:
    file.write(text_w)