User:MastCell/dermimages.py


 * 1) File: Dermimages.py
 * 2) By MastCell
 * 3) Released for any and all reuse and modification
 * 4) Use at your own risk.
 * 5) This script does the following:
 * 6)  1. Load all articles linked to the Dermatology Task Force,
 * 7)     using Category:Dermatology task force articles.
 * 8)  2. Check each page for images, removing those which are part of
 * 9)     common templates
 * 10)  3. Output the results in a sortable wikitable which can be
 * 11)     cut-and-pasted onto Wikipedia.
 * 12) The goal is to assess how many dermatology-related articles currently
 * 13) lack images, and to assess the overall prevalence of images across
 * 14) derm-related articles.
 * 15) The script can be run from the command line. It will create a file
 * 16) called "dermimages_output.txt" in the same directory where the script
 * 17) itself resides. Note that if such a file already exists, it will be
 * 18) overwritten. The output format is meant to be cut-and-pasted into
 * 19) Wikipedia as a wikitable.
 * 1) itself resides. Note that if such a file already exists, it will be
 * 2) overwritten. The output format is meant to be cut-and-pasted into
 * 3) Wikipedia as a wikitable.

import mwclient
 * 1) MWclient module for Wikimedia API calls

global_exclude_list =\ set(["Normal Epidermis and Dermis with Intradermal Nevus 10x.JPG",\       "LinkFA-star.png",\        "Featured article star.svg",\        "Symbol support vote.svg",\        "Rod of Asclepius2.svg",\        "Mergefrom.svg",\        "Gray944.png",\        "Question book-new.svg",\        "Ambox contradict.svg",\        "Mitotic spindle color micrograph.gif",\        "Ambox content.png",\        "Text document with red question mark.svg",\        "Edit-clear.svg",\        "UK-Medical-Bio-Stub.svg",\        "Flag of Germany.svg",\        "Commons-logo.svg",\        "Wiki letter w.svg",\        "Chromosome.svg",\        "DNA stub.png",\        "Merge-arrow.svg"])
 * 1) Global set of image names to exclude
 * 2) (include images from templates, featured article stars, etc
 * 3)  which should not be counted as "content" images)
 * 4) Modify as needed.

global_imagenums = {"Total pages": 0,\ "Pages with zero images": 0,\ "Pages with one image": 0,\ "Pages with two images": 0,\ "Pages with three images": 0,\ "Pages with four or more images": 0}
 * 1) Global dictionary to count how many pages have zero images, one image, two images, ...

wpHandle = mwclient.Site('en.wikipedia.org') dermTalkPages = wpHandle.Pages['Category:Dermatology task force articles']
 * 1) Open the site and collect pages from category
 * 2) (Note that these will generally be article talk pages, since that's
 * 3)  where the Derm task force template is typically placed)

def main_program: setUpTable for page in dermTalkPages: # Make sure we're dealing with the article page, rather than talk page page = wpHandle.Pages[page.page_title]
 * 1) Main program loop: load and process each page

# Increment the total page count global_imagenums["Total pages"] += 1 # Load images and process them imageHandle = page.images imageList = imageHandle processPage(page, imageList) closeTable outputDictionary def processPage(page, imageList): outputFile.write("|-\n") outputFile.write("| ")   outputFile.write(page.name.encode("iso-8859-15", "xmlcharrefreplace"))    outputFile.write(" ||")
 * 1) Page processing function
 * 2) Note to self: need to encode the page and image names. Otherwise
 * 3) the script will eventually choke with a UnicodeEncodingError.
 * 4) Hopefully the XMLchar replacements will be properly rendered by
 * 5) Wikimedia.

imageCount = 0 for image in imageList: if (image.page_title not in global_exclude_list): if (imageCount > 0): outputFile.write(" \n") imageCount += 1 outputFile.write( + image.name.encode("iso-8859-15", "xmlcharrefreplace"))           outputFile.write() outputFile.write("\n" + ' || ' + str(imageCount) + "\n") incrementPageCounter(imageCount)

def incrementPageCounter(numImages): if (numImages is 0): global_imagenums["Pages with zero images"] += 1 elif (numImages is 1): global_imagenums["Pages with one image"] += 1 elif (numImages is 2): global_imagenums["Pages with two images"] += 1 elif (numImages is 3): global_imagenums["Pages with three images"] += 1 else: global_imagenums["Pages with four or more images"] += 1
 * 1) Function to update the dictionary of pages
 * 2) I'm sure there's a more elegant way to do this, but...

def setUpTable: outputFile.write('{| class="wikitable sortable" border="1"') outputFile.write("\n") outputFile.write('! Page !! class="unsortable" | Images !! Number of images') outputFile.write("\n")
 * 1) Output the table header boilerplate

def closeTable: outputFile.write('|}')
 * 1) Output the table footer boilerplate

def outputDictionary: outputFile.write("\n== Totals by number of images ==\n") for key, value in global_imagenums.iteritems: outputFile.write(key + ": ") outputFile.write(str(value)) outputFile.write("\n") with open('dermimages_output.txt', 'w') as outputFile: main_program
 * 1) Output the dictionary counts
 * 1) Main program
 * 2) Opens a handle to the output file, then runs the main loop
 * 1) Opens a handle to the output file, then runs the main loop
 * 1) Opens a handle to the output file, then runs the main loop