User:Gaming Euan


 * 1) !/usr/bin/env python

import re import yaml import urllib import urllib2

class WikipediaError(Exception): pass

class Wikipedia: url_article = 'http://%s.wikipedia.org/w/index.php?action=raw&title=%s' url_image = 'http://%s.wikipedia.org/w/index.php?title=Special:FilePath&file=%s' url_search = 'http://%s.wikipedia.org/w/api.php?action=query&list=search&srsearch=%s&sroffset=%d&srlimit=%d&format=yaml' def __init__(self, lang): self.lang = lang def __fetch(self, url): request = urllib2.Request(url) request.add_header('User-Agent', 'Mozilla/5.0') try: result = urllib2.urlopen(request) except urllib2.HTTPError, e:           raise WikipediaError(e.code) except urllib2.URLError, e:           raise WikipediaError(e.reason) return result def article(self, article): url = self.url_article % (self.lang, urllib.quote_plus(article)) content = self.__fetch(url).read if content.upper.startswith('#REDIRECT'): match = re.match('(?i)#REDIRECT \[\[([^\[\]]+)\]\]', content) if not match == None: return self.article(match.group(1)) raise WikipediaError('Can\'t found redirect article.') return content def image(self, image, thumb=None): url = self.url_image % (self.lang, image) result = self.__fetch(url) content = result.read if thumb: url = result.geturl + '/' + thumb + 'px-' + image url = url.replace('/commons/', '/commons/thumb/') url = url.replace('/' + self.lang + '/', '/' + self.lang + '/thumb/') return self.__fetch(url).read return content def search(self, query, page=1, limit=10): offset = (page - 1) * limit url = self.url_search % (self.lang, urllib.quote_plus(query), offset, limit) content = self.__fetch(url).read parsed = yaml.load(content) search = parsed['query']['search'] results = [] if search: for article in search: title = article['title'].strip snippet = article['snippet'] snippet = re.sub(r'(?m)<.*?>', '', snippet) snippet = re.sub(r'\s+', ' ', snippet) snippet = snippet.replace(' . ', '. ') snippet = snippet.replace(', ', ', ') snippet = snippet.strip wordcount = article['wordcount'] results.append({                   'title' : title,                    'snippet' : snippet,                    'wordcount' : wordcount                }) # yaml.dump(results, default_style='', default_flow_style=False,       #     allow_unicode=True) return results

if __name__ == '__main__': wiki = Wikipedia('simple') wiki.article('Uruguay') wiki.image('Bono_at_the_2009_Tribeca_Film_Festival.jpg', '640') wiki.search('Wikipedia') print 'OK'