-
-
Save Vesihiisi/1f40a0fea06c2c4f4f89df36a7a974b9 to your computer and use it in GitHub Desktop.
Short script for reproducing WLM style reporting pages for Wikidata powered competitions (here WLE in Sweden)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# python check_WLE_id.py -live -dir:~/Projects/batchUploadTools/ | |
"""Script for updating unused imges/unknonw ids pages for WLE on sv.wp.""" | |
import pywikibot | |
import wikidataStuff.wdqsLookup as query | |
SETTING = { | |
'prop': 'P3613', | |
'formatter_url': 'http://skyddadnatur.naturvardsverket.se/sknat/?nvrid={}', | |
'cat': 'Category:Protected areas of Sweden with known IDs', | |
'unused_page': 'Wikipedia:Projekt naturgeografi/WLE/Oanvända bilder', | |
'unknown_page': 'Wikipedia:Projekt naturgeografi/WLE/Ogiltiga id', | |
'intro_subpage': 'intro', | |
'no_id_caption': '<no id>', | |
'lang': 'sv', | |
'project': 'wikipedia', | |
'edit_summary': 'Automatisk uppdatering av gallerier', | |
'biospheres': ["q1525232", "q3373690", | |
"q22680883", "q22680881", | |
"q22806736", "q85493403", "q86189513"] | |
} | |
# retrieve info from Wikidata | |
def process_query_data(raw_data): | |
"""Process the raw output of query.make_select_wdqs_query.""" | |
data = {} | |
for entry in raw_data: | |
idno = entry['id'] | |
image = False | |
if entry.get('P18'): | |
image = True | |
qid = entry['item'][len('http://www.wikidata.org/entity/'):] | |
if idno in data and data[idno]['qid'] != qid: | |
pywikibot.warning('duplicate idno for {}'.format(idno)) | |
data[idno] = {'image': image, 'qid': qid} | |
return data | |
def quick_output(output_data): | |
"""A quick command line output format .""" | |
output = '' | |
for k, v in output_data.items(): | |
k = k or SETTING.get('no_id_caption') | |
caption = k | |
if v.get('qid'): | |
caption += ' ({})'.format(v.get('qid')) | |
output += '{}\n'.format(caption) | |
output += '\t{}\n'.format('\n\t'.join(v.get('images'))) | |
return output | |
def wikitext_output(output_data): | |
""" | |
Wikitext format suitable for consumption by volonteers. | |
All non-gallery information (categories, instructions) should be placed | |
in the intro subpage. | |
""" | |
output = '{{/%s}}\n\n' % SETTING.get('intro_subpage') | |
for k in sorted(output_data.keys()): # reduce amount of changes on page | |
v = output_data.get(k) | |
caption = '[{url} {id}]'.format( | |
url=SETTING.get('formatter_url').format(k), | |
id=k) | |
if not k: | |
caption = SETTING.get('no_id_caption') | |
if v.get('qid'): | |
caption += ' - {{Q|%s}}' % v.get('qid') | |
output += '==={}===\n'.format(caption) | |
output += '<gallery>\n' | |
output += '\n'.join(v.get('images')) | |
output += '\n</gallery>\n\n' | |
return output | |
def load_and_process(): | |
"""Load items and images and identify unused images and unknown ids.""" | |
# retrieve info from Wikidata | |
wle_items = process_query_data( | |
query.make_select_wdqs_query( | |
'?item wdt:{} ?id'.format(SETTING.get('prop')), | |
optional_props=['P18'], select_value='id', allow_multiple=True, | |
raw=True)) | |
# retrieve info from Commons | |
commons = pywikibot.Site('commons', 'commons') | |
commons_data = {} | |
for member in pywikibot.data.api.QueryGenerator( | |
site=commons, list='categorymembers', | |
cmprop='title|sortkeyprefix', cmtitle=SETTING.get('cat')): | |
if member['sortkeyprefix'] not in commons_data: | |
commons_data[member['sortkeyprefix']] = [] | |
commons_data[member['sortkeyprefix']].append(member['title']) | |
# process data | |
unknown_ids = {} | |
for k, v in commons_data.items(): | |
if k not in wle_items and k.lower() not in SETTING["biospheres"]: | |
unknown_ids[k] = { | |
'images': v, | |
'qid': None | |
} | |
unused_images = {} | |
for k, v in commons_data.items(): | |
if k in wle_items and not wle_items[k]['image']: | |
unused_images[k] = { | |
'images': v, | |
'qid': wle_items[k]['qid'] | |
} | |
return (unknown_ids, unused_images) | |
def update_pages(unknown_ids, unused_images): | |
"""Update both the unused images and unknown ids wikipages.""" | |
wiki = pywikibot.Site(SETTING.get('lang'), SETTING.get('project')) | |
# unknown ids | |
update_single_page( | |
pywikibot.Page(wiki, SETTING.get('unknown_page')), | |
wikitext_output(unknown_ids)) | |
update_single_page( | |
pywikibot.Page(wiki, SETTING.get('unused_page')), | |
wikitext_output(unused_images)) | |
def update_single_page(page, content): | |
"""Update a single wikipage.""" | |
summary = SETTING.get('edit_summary') | |
if not isinstance(page, pywikibot.Page): | |
pywikibot.warning( | |
'Could not save page {0} because it is not a Page ' | |
'instance.'.format(page)) | |
try: | |
page.put(newtext=content, summary=summary) | |
except (pywikibot.exceptions.OtherPageSaveError, | |
pywikibot.exceptions.PageSaveRelatedError) as e: | |
pywikibot.warning( | |
'Could not save page {0} ({1}): {2}'.format(page, summary, e)) | |
def handle_args(args): | |
"""Handle any local arguments and pass on generic pywikibot arguments.""" | |
live = False | |
for arg in pywikibot.handle_args(args): | |
option, sep, value = arg.partition(':') | |
if option == '-live': | |
live = True | |
return live | |
def main(*args): | |
live = handle_args(args) | |
unknown_ids, unused_images = load_and_process() | |
if live: | |
update_pages(unknown_ids, unused_images) | |
else: | |
pywikibot.output(quick_output(unknown_ids)) | |
pywikibot.output('-------------------------------------') | |
pywikibot.output(quick_output(unused_images)) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment