User:Yamaha5/ConflictBot
Jump to navigation
Jump to search
Wrong Interwiki Namespace
[edit]sql fawiki_p 'SELECT /*SLOW OK */ page_title,page_namespace ,ll_title FROM page JOIN langlinks ON page_id = ll_from WHERE page_namespace = 0 AND page_is_redirect = 0 AND ll_lang="en" AND ((ll_title LIKE "%Template:%") OR (ll_title LIKE "%Category:%") OR (ll_title LIKE "%Wikipedia:%") OR (ll_title LIKE "%Portal:%") OR (ll_title LIKE "%User:%") OR (ll_title LIKE "%talk:%") OR (ll_title LIKE "%Talk:%")) GROUP BY page_id;' >foo.txt
Templates Wrong interwiki
[edit]sql fawiki_p 'SELECT page_title FROM page JOIN langlinks ON page_id = ll_from WHERE page_namespace = 10 AND page_is_redirect = 0 AND (page_title LIKE "%/doc%" OR page_title LIKE "%/Doc%") GROUP BY page_title ORDER BY COUNT(ll_from) DESC;' >foo2.txt
Bot code
[edit]This code lists the pages which has wrong interwiki for example:
- fa:a > en:a > fa:b
- fa:a > en:a > nopage
you can change fa with other local wiki (de,it,ru,...)
It will report the conflicts on WrongInterList.txt file.
options
[edit]by changing 'namespace' you can get query for other namespaces
code
[edit]#!/usr/bin/python
# -*- coding: utf-8 -*-
# BY: رضا (User:reza1615 on fa.wikipedia)
# Distributed under the terms of the CC-BY-SA 3.0 .
import codecs,config,wikipedia
import MySQLdb as mysqldb
items_a,items_b,items_c=[],[],{}
text_w,q_text=u'\n',u'\n'
#--------------------------------------------------need change---------------------------
first_wiki='fa'
second_wiki='en'
'''
Mediawiki Important Namespaces
0 = Main
10 = Template
14 = Category
'''
namespace="0"
#----------------------------------------------------------------------------------------
if int(namespace)>0:
first_wiki_site = wikipedia.getSite(first_wiki)
first_wiki_pre=first_wiki_site.namespace(int(namespace), all = True)[0]+u':'
second_wiki_site=wikipedia.getSite(second_wiki)
second_wiki_pre=second_wiki_site.namespace(int(namespace), all = True)[0]+u':'
else:
first_wiki_pre,second_wiki_pre=u'',u''
'''
first_wiki_pre,second_wiki_pre=u'رده:',u'Category:'
namespace="14"
'''
wikipedia.output('--------------First Query--------------------')
site = wikipedia.getSite(first_wiki)
querys = "SELECT page_title,ll_title FROM page JOIN langlinks ON page_id = ll_from WHERE page_is_redirect = 0 AND page_namespace="+namespace+" AND ll_lang='"+second_wiki+"' GROUP BY ll_from;"
conn = mysqldb.connect(first_wiki+"wiki.labsdb", db = site.dbName(),
user = config.db_username,
passwd = config.db_password)
cursor = conn.cursor()
wikipedia.output(u'Executing query:\n%s' % querys)
querys = querys.encode(site.encoding())
cursor.execute(querys)
results = cursor.fetchall()
for row in results:
q_text+=unicode(str(row[0]), 'UTF-8')+u'\t'+unicode(str(row[1]), 'UTF-8')+u'\n'
with codecs.open( first_wiki+'.txt',mode = 'w',encoding = 'utf8' ) as file:
file.write(q_text)
del file,q_text
q_text=u'\n'
wikipedia.output(first_wiki+'.txt is built')
wikipedia.output('----------------Second Query------------------')
site = wikipedia.getSite(second_wiki)
querys = "SELECT page_title,ll_title FROM page JOIN langlinks ON page_id = ll_from WHERE page_is_redirect = 0 AND page_namespace="+namespace+" AND ll_lang='"+first_wiki+"' GROUP BY ll_from;"
conn = mysqldb.connect(second_wiki+"wiki.labsdb", db = site.dbName(),
user = config.db_username,
passwd = config.db_password)
cursor = conn.cursor()
wikipedia.output(u'Executing query:\n%s' % querys)
querys = querys.encode(site.encoding())
cursor.execute(querys)
results = cursor.fetchall()
for row in results:
q_text+=unicode(str(row[0]), 'UTF-8')+u'\t'+unicode(str(row[1]), 'UTF-8')+u'\n'
with codecs.open( second_wiki+'.txt',mode = 'w',encoding = 'utf8' ) as file:
file.write(q_text)
del file,q_text,results,cursor
wikipedia.output(second_wiki+'.txt is built')
wikipedia.output('----------------------------------')
Textfirst_wiki = codecs.open(first_wiki+'.txt','r' ,'utf8' )
lines_first_wiki=Textfirst_wiki.read().replace(u'\r',u'').strip().split(u'\n')
Textsecond_wiki = codecs.open(second_wiki+'.txt','r' ,'utf8' )
lines_second_wiki=Textsecond_wiki.read().replace(u'\r',u'').strip().split(u'\n')
del Textsecond_wiki,Textfirst_wiki
for line_F in lines_first_wiki:
items_a.append(line_F.split(u'\t')[0].replace(u'_',u' '))
items_c[line_F.split(u'\t')[1].replace(second_wiki_pre,u'')]=line_F.split(u'\t')[0].replace(u'_',u' ')
for line_S in lines_second_wiki:
case=line_S.split(u'\t')[1].replace(first_wiki_pre,u'').replace(u'_',u' ')
if case in items_a:
items_a.remove(case)
else:
wikipedia.output(line_S)
try:
item_c=first_wiki_pre+items_c[line_S.split(u'\t')[0].replace(u'_',u' ')]
if item_c!=line_S.split(u'\t')[1]:
to_add=u'# [[:'+item_c+u']] > [[:'+second_wiki+u':'+second_wiki_pre+line_S.split(u'\t')[0]+u']] > [[:'+line_S.split(u'\t')[1]+u']]\n'
text_w+=to_add
except:
to_add=u'# [[:'+second_wiki+u':'+second_wiki_pre+line_S.split(u'\t')[0]+u']] > [[:'+line_S.split(u'\t')[1]+u']]\n'
text_w+=to_add
with codecs.open( u'WrongInterList.txt',mode = 'w',encoding = 'utf8' ) as file:
file.write(text_w)