User:JeffGBot/config.py

__version__ = '$Id: config.py 8813 2010-12-29 22:27:03Z xqt $'
 * 1) -*- coding: utf-8 -*-
 * 2) (C) Rob W.W. Hooft, 2003
 * 3)     parts by holger@trillke.net 2002/03/18
 * 4)     Purodha Blissenbach (Modifier), 2010
 * 5) (C) Pywikipedia bot team, 2007-2010
 * 6) Distributed under the terms of the MIT license.
 * 1) Distributed under the terms of the MIT license.
 * 1) Distributed under the terms of the MIT license.

import os, re import sys as __sys
 * 1) IMPORTANT:
 * 2) Do not change any of the variables in this file. Instead, make
 * 3) a file user-config.py, and overwrite values in there.


 * 1) Note: all variables defined in this module are made available to bots as
 * 2) configuration settings, *except* variable names beginning with an
 * 3) underscore (example: _variable).  Be sure to use an underscore on any
 * 4) variables that are intended only for internal use and not to be exported
 * 5) to other modules.


 * 1) ACCOUNT SETTINGS ##############

family = 'wikipedia' mylang = 'language'
 * 1) The family of sites we are working on. wikipedia.py will import
 * 2) families/xxx_family.py so if you want to change this variable,
 * 3) you need to write such a file.
 * 1) The language code of the site we're working on.

usernames = {} sysopnames = {}
 * 1) The dictionary usernames should contain a username for each site where you
 * 2) have a bot account. Please set your usernames by adding such lines to your
 * 3) user-config.py:
 * 4) usernames['wikipedia']['de'] = 'myGermanUsername'
 * 5) usernames['wiktionary']['en'] = 'myEnglishUsername'
 * 6) If you have a sysop account on some wikis, this will be used to delete pages
 * 7) or to edit locked pages if you add such lines to your
 * 8) user-config.py:
 * 9) sysopnames['wikipedia']['de'] = 'myGermanUsername'
 * 10) sysopnames['wiktionary']['en'] = 'myEnglishUsername'
 * 1) sysopnames['wikipedia']['de'] = 'myGermanUsername'
 * 2) sysopnames['wiktionary']['en'] = 'myEnglishUsername'
 * 1) sysopnames['wiktionary']['en'] = 'myEnglishUsername'

disambiguation_comment = {} gdab_namespaces = {} account_global = False
 * 1) See section SOLVE_DISAMBIGUATION SETTINGS for details.
 * 1) This is currently not used anywhere:
 * 1) This is currently not used anywhere:

solve_captcha = True
 * 1) Solve captchas in the webbrowser. Setting this to False will result in the
 * 2) exception CaptchaError being thrown if a captcha is encountered.
 * 3) TODO: allow more flexibility, such as runtime choices, skipping, and postponing

authenticate = {}
 * 1) Some sites will require password authentication to access the HTML pages at
 * 2) the site. If you have any such site, add lines to your user-config.py of
 * 3) the following form:
 * 4) authenticate['en.wikipedia.org'] = ('John','XXXXX')
 * 5) where John is your login name, and XXXXX your password.
 * 6) Note:
 * 7) 1. This is only for sites that use authentication in the form that gives
 * 8)    you a popup for name and password when you try to access any data, NOT
 * 9)    for, for example, wiki usernames
 * 10) 2. You must use the hostname of the site, not its family/language pair
 * 1)    for, for example, wiki usernames
 * 2) 2. You must use the hostname of the site, not its family/language pair

SSL_connection = False password_file = None
 * 1)    Secure Connection to all Wikimedia Projects
 * 1) password_file = ".passwd"
 * 2) A password file with default passwords. For more information, please
 * 3) see LoginManager.readPassword in login.py.
 * 4) By default you are asked for a password on the terminal.

use_api_login = True
 * 1) Login using the API. This is less likely to break.

use_api = True
 * 1) Enable data recieve from all avalible API.

notify_unflagged_bot = True
 * 1) Display a warning message if your edits appear in recent changes page

ignore_bot_templates = False
 * 1) Set to True to override the  exclusion protocol (at your own risk!)


 * 1) USER INTERFACE SETTINGS ##############

try: console_encoding = __sys.stdout.encoding except: #When using pywikipedia inside a daemonized twisted application, #we get "StdioOnnaStick instance has no attribute 'encoding'" console_encoding = None
 * 1) The encoding that's used in the user's console, i.e. how strings are encoded
 * 2) when they are read by raw_input. On Windows systems' DOS box, this should
 * 3) be 'cp850' ('cp437' for older versions). Linux users might try 'iso-8859-1'
 * 4) or 'utf-8'.
 * 5) This default code should work fine, so you don't have to think about it.
 * 6) TODO: consider getting rid of this config variable.

textfile_encoding = 'utf-8'
 * 1) The encoding in which textfiles are stored, which contain lists of page
 * 2) titles. The most used is: 'utf-8'. 'utf-8-sig' recognizes BOM but it is
 * 3) available on Python 2.5 or higher. For a complete list please see:
 * 4) http://docs.python.org/library/codecs.html#standard-encodings

userinterface = 'terminal'
 * 1) tkinter isn't yet ready

transliterate = True
 * 1) Should we transliterate characters that do not exist in the console
 * 2) character set?
 * 3) True: whenever possible
 * 4) False: never - always replace them by question marks
 * 5) Currently only works if interface 'terminal' is set.

ring_bell = False
 * 1) Should the system bell ring if the bot expects user input?

try: # Don't print colorized when the output is, for example, piped to a file. colorized_output = __sys.stdout.isatty except: colorized_output = False
 * 1) Colorization can be used to markup important text parts of the output.
 * 2) On Linux/Unix terminals, ANSI escape codes are used for this. On Windows,
 * 3) it is done by a DLL call via ctypes. ctypes is only available since
 * 4) Python 2.5, so if you're using Python 2.4 or lower on Windows, you should
 * 5) upgrade.
 * 6) Set this to False if you're using Linux and your tty doesn't support
 * 7) ANSI colors.

tkhorsize = 1600 tkvertsize = 1000
 * 1) An indication of the size of your screen, or rather the size of the screen
 * 2) to be shown, for flickrripper

if __sys.platform == 'win32': try: import _winreg _key1 = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER, 'Software\Microsoft\Windows\CurrentVersion\Explorer\FileExts\.txt\OpenWithProgids') _progID = _winreg.EnumValue(_key1, 1)[0] _key2 = _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, '%s\shell\open\command' % _progID) _cmd = _winreg.QueryValueEx(_key2, None)[0] editor = _cmd.replace('%1', '') # Notepad is even worse than our Tkinter editor. # Nobody has deserved to use it. if editor.lower.endswith('notepad.exe'): editor = None except: # XXX what are we catching here? #raise editor = None else: editor = None
 * 1) EXTERNAL EDITOR SETTINGS ##############
 * 2) The command for the editor you want to use. If set to None, a simple Tkinter
 * 3) editor will be used.
 * 4) On Windows systems, this script tries to determine the default text editor.

editor_encoding = 'utf-8'
 * 1) Warning: DO NOT use an editor which doesn't support Unicode to edit pages!
 * 2) You will BREAK non-ASCII symbols!

editor_filename_extension = 'wiki'
 * 1) The temporary file name extension can be set in order to use syntax
 * 2) highlighting in your text editor.


 * 1) LOGFILE SETTINGS ##############

log = ['interwiki']
 * 1) Defines for which scripts a logfile should be enabled. Logfiles will be
 * 2) saved in the 'logs' subdirectory.
 * 3) Example:
 * 4)     log = ['interwiki', 'weblinkchecker', 'table2wiki']
 * 5) It is also possible to enable logging for all scripts, using this line:
 * 6)     log = ['*']
 * 7) To disable all logging, use this:
 * 8)     log = []
 * 9) Per default, logging of interwiki.py is enabled because its logfiles can
 * 10) be used to generate so-called warnfiles.
 * 11) This setting can be overridden by the -log or -nolog command-line arguments.


 * 1) INTERWIKI SETTINGS ##############

interwiki_backlink = True
 * 1) Should interwiki.py report warnings for missing links between foreign
 * 2) languages?

interwiki_shownew = True
 * 1) Should interwiki.py display every new link it discovers?

interwiki_graph = False
 * 1) Should interwiki.py output a graph PNG file on conflicts?
 * 2) You need pydot for this: http://dkbza.org/pydot.html

interwiki_min_subjects = 100
 * 1) Specifies that the robot should process that amount of subjects at a time,
 * 2) only starting to load new pages in the original language when the total
 * 3) falls below that number. Default is to process (at least) 100 subjects at
 * 4) once.

interwiki_graph_formats = ['png']
 * 1) If interwiki graphs are enabled, which format(s) should be used?
 * 2) Supported formats include png, jpg, ps, and svg. See:
 * 3) http://www.graphviz.org/doc/info/output.html
 * 4) If you want to also dump the dot files, you can use this in your
 * 5) user-config.py:
 * 6) interwiki_graph_formats = ['dot', 'png']
 * 7) If you need a PNG image with an HTML image map, use this:
 * 8) interwiki_graph_formats = ['png', 'cmap']
 * 9) If you only need SVG images, use:
 * 10) interwiki_graph_formats = ['svg']

interwiki_graph_url = None
 * 1) You can post the contents of your autonomous_problems.dat to the wiki,
 * 2) e.g. to http://de.wikipedia.org/wiki/Wikipedia:Interwiki-Konflikte.
 * 3) This allows others to assist you in resolving interwiki problems.
 * 4) To help these people, you can upload the interwiki graphs to your
 * 5) webspace somewhere. Set the base URL here, e.g.:
 * 6) 'http://www.example.org/~yourname/interwiki-graphs/'

without_interwiki = False
 * 1) Save file with local articles without interwikis.

interwiki_contents_on_disk = False
 * 1) Experimental feature:
 * 2) Store the page contents on disk (/cache/ directory) instead of loading
 * 3) them in RAM.


 * 1) SOLVE_DISAMBIGUATION SETTINGS ############
 * 2) Set disambiguation_comment[FAMILY][LANG] to a non-empty string to override
 * 3) the default edit comment for the solve_disambiguation bot.
 * 4) Use %s to represent the name of the disambiguation page being treated.
 * 5) Example:
 * 6) disambiguation_comment['wikipedia']['en'] = \
 * 7)    "Robot-assisted disambiguation (you can help!): %s"
 * 1) disambiguation_comment['wikipedia']['en'] = \
 * 2)    "Robot-assisted disambiguation (you can help!): %s"

sort_ignore_case = False

upload_to_commons = False
 * 1) IMAGE RELATED SETTINGS ##############
 * 2) If you set this to True, images will be uploaded to Wikimedia
 * 3) Commons by default.


 * 1) SETTINGS TO AVOID SERVER OVERLOAD ##############

minthrottle = 1 maxthrottle = 60
 * 1) Slow down the robot such that it never requests a second page within
 * 2) 'minthrottle' seconds. This can be lengthened if the server is slow,
 * 3) but never more than 'maxthrottle' seconds. However - if you are running
 * 4) more than one bot in parallel the times are lengthened.

put_throttle = 10
 * 1) Slow down the robot such that it never makes a second page edit within
 * 2) 'put_throttle' seconds.

noisysleep = 3.0
 * 1) Sometimes you want to know when a delay is inserted. If a delay is larger
 * 2) than 'noisysleep' seconds, it is logged on the screen.

maxlag = 5
 * 1) Defer bot edits during periods of database server lag.  For details, see
 * 2) http://www.mediawiki.org/wiki/Maxlag_parameter
 * 3) You can set this variable to a number of seconds, or to None (or 0) to
 * 4) disable this behavior. Higher values are more aggressive in seeking
 * 5) access to the wiki.
 * 6) It is recommended that you do not change this parameter unless you know
 * 7) what you are doing and have a good reason for it!

maxretries = 5
 * 1) Number of retries before giving up when putting a page

special_page_limit = 500
 * 1) Maximum of pages which can be retrieved by special pages. Increase this if
 * 2) you heavily use redirect.py with action "double", and especially if you're
 * 3) running solve_disambiguation.py with the -primary argument.


 * 1) TABLE CONVERSION BOT SETTINGS ##############

splitLongParagraphs = False deIndentTables = True table2wikiAskOnlyWarnings = True table2wikiSkipWarnings = False
 * 1) will split long paragraphs for better reading the source.
 * 2) only table2wiki.py use it by now
 * 1) sometimes HTML-tables are indented for better reading.
 * 2) That can do very ugly results.
 * 1) table2wiki.py works quite stable, so you might switch to True


 * 1) WEBLINK CHECKER SETTINGS ##############

max_external_links = 100
 * 1) How many external links should weblinkchecker.py check at the same time?
 * 2) If you have a fast connection, you might want to increase this number so
 * 3) that slow servers won't slow you down.

report_dead_links_on_talk = False

db_hostname = 'localhost' db_username = 'wikiuser' db_password = ''
 * 1) DATABASE SETTINGS ##############


 * 1) SEARCH ENGINE SETTINGS ##############

google_key = ''
 * 1) Some scripts allow querying Google via the Google Web API. To use this feature,
 * 2) you must install the pyGoogle module from http://pygoogle.sf.net/ and have a
 * 3) Google Web API license key. Note that Google doesn't give out license keys
 * 4) anymore.
 * 5) Google web API is obsoleted for long time, now we can use Google AJAX Search API,
 * 6) You can signup an API key from http://code.google.com/apis/ajaxsearch/signup.html.
 * 1) You can signup an API key from http://code.google.com/apis/ajaxsearch/signup.html.

google_api_refer = ''
 * 1) using Google AJAX Search API, it requires the referer website, this variable saves the referer web address
 * 2) when you sign up with the key.

yahoo_appid = ''
 * 1) Some scripts allow using the Yahoo! Search Web Services. To use this feature,
 * 2) you must install the pYsearch module from http://pysearch.sourceforge.net/
 * 3) and get a Yahoo AppID from http://developer.yahoo.com

msn_appid = ''
 * 1) To use Windows Live Search web service you must get an AppID from
 * 2) http://search.msn.com/developer

flickr = { 'api_key': u'', # Provide your key! 'review': False, # Do we use automatically make our uploads reviewed? 'reviewer': u'', # If so, under what reviewer name? }
 * 1) Using the Flickr api

panoramio = { 'review': False, # Do we use automatically make our uploads reviewed? 'reviewer': u'', # If so, under what reviewer name? }
 * 1) Using the Panoramio api

proxy = { 'host': None, 'auth': None, }
 * 1) for all connections: proxy handle
 * 2) to use it, proxy['host'] have to support HTTP and include port number (e.g. localhost:8080)
 * 3) if the proxy server needs authentication, set ('ID', 'PASSWORD') to proxy['auth'].


 * 1) COPYRIGHT SETTINGS ##############

copyright_google = True copyright_yahoo = True copyright_msn = False
 * 1) Enable/disable search engine in copyright.py script

copyright_check_in_source_google = False copyright_check_in_source_yahoo = False copyright_check_in_source_msn = False
 * 1) Perform a deep check, loading URLs to search if 'Wikipedia' is present.
 * 2) This may be useful to increase the number of correct results. If you haven't
 * 3) a fast connection, you might want to keep them disabled.

copyright_check_in_source_section_names = False
 * 1) Web pages may contain a Wikipedia text without the word 'Wikipedia' but with
 * 2) the typical '[edit]' tag as a result of a copy & paste procedure. You want
 * 3) no report for this kind of URLs, even if they are copyright violations.
 * 4) However, when enabled, these URLs are logged in a file.

copyright_max_query_for_page = 25
 * 1) Limit number of queries for page.

copyright_skip_query = 0
 * 1) Skip a specified number of queries

copyright_connection_tries = 10
 * 1) Number of attempts on connection error.

copyright_exceeded_in_queries = 2 copyright_exceeded_in_queries_sleep_hours = 6
 * 1) Behavior if an exceeded error occur.
 * 2) Possibilities:
 * 3)    0 = None
 * 4)    1 = Disable search engine
 * 5)    2 = Sleep (default)
 * 6)    3 = Stop
 * 1)    2 = Sleep (default)
 * 2)    3 = Stop

copyright_show_date = True
 * 1) Append last modified date of URL to script result

copyright_show_length = True
 * 1) Append length of URL to script result

copyright_economize_query = True
 * 1) By default the script tries to identify and skip text that contains a large
 * 2) comma separated list or only numbers. But sometimes that might be the
 * 3) only part unmodified of a slightly edited and not otherwise reported
 * 4) copyright violation. You can disable this feature to try to increase the
 * 5) number of results.

persistent_http = False
 * 1) HTTP SETTINGS ##############
 * 2) Use a persistent http connection. An http connection has to be established
 * 3) only once per site object, making stuff a whole lot faster. Do NOT EVER
 * 4) use this if you share Site objects across threads without proper locking.
 * 5) DISABLED FUNCTION. Setting this variable will not have any effect.

socket_timeout = 120 # set a pretty long timeout just in case...
 * 1) Default socket timeout. Set to None to disable timeouts.


 * 1) FURTHER SETTINGS ##############
 * 2) The bot can make some additional changes to each page it edits, e.g. fix
 * 3) whitespace or positioning of interwiki and category links.

cosmetic_changes = False
 * 1) This is an experimental feature; handle with care and consider re-checking
 * 2) each bot edit if enabling this!

cosmetic_changes_mylang_only = True cosmetic_changes_enable = {} cosmetic_changes_disable = {} use_diskcache = False
 * 1) If cosmetic changes are switched on, and you also have several accounts at
 * 2) projects where you're not familiar with the local conventions, you probably
 * 3) only want the bot to do cosmetic changes on your "home" wiki which you
 * 4) specified in config.mylang and config.family.
 * 5) If you want the bot to also do cosmetic changes when editing a page on a
 * 6) foreign wiki, set cosmetic_changes_mylang_only to False, but be careful!
 * 1) The dictionary cosmetic_changes_enable should contain a tuple of languages
 * 2) for each site where you wish to enable in addition to your own langlanguage
 * 3) (if cosmetic_changes_mylang_only is set)
 * 4) Please set your dictionary by adding such lines to your user-config.py:
 * 5) cosmetic_changes_enable['wikipedia'] = ('de', 'en', 'fr')
 * 1) The dictionary cosmetic_changes_disable should contain a tuple of languages
 * 2) for each site where you wish to disable cosmetic changes. You may use it with
 * 3) cosmetic_changes_mylang_only is False, but you can also disable your own
 * 4) language. This also overrides the settings in the cosmetic_changes_enable
 * 5) dictionary. Please set your dict by adding such lines to your user-config.py:
 * 6) cosmetic_changes_disable['wikipedia'] = ('de', 'en', 'fr')
 * 1) Use the experimental disk cache to prevent huge memory usage

retry_on_fail = True
 * 1) Retry loading a page on failure (back off 1 minute, 2 minutes, 4 minutes
 * 2) up to 30 minutes)

max_queue_size = 64
 * 1) How many pages should be put to a queue in asynchroneous mode.
 * 2) If maxsize is <= 0, the queue size is infinite.
 * 3) Increasing this value will increase memory space but could speed up
 * 4) processing. As higher this value this effect will decrease.


 * 1) End of configuration section

def makepath(path): """Return a normalized absolute version of the path argument.

- if the given path already exists in the filesystem the filesystem is not modified.

- otherwise makepath creates directories along the given path using the dirname of the path. You may append a '/' to the path if you want it to be a directory path.

from holger@trillke.net 2002/03/18

"""   from os import makedirs    from os.path import normpath, dirname, exists, abspath

dpath = normpath(dirname(path)) if not exists(dpath): makedirs(dpath) return normpath(abspath(path))

def datafilepath(*filename): """Return an absolute path to a data file in a standard location.

Argument(s) are zero or more directory names, optionally followed by a   data file name. The return path is offset to config.base_dir. Any directories in the path that do not already exist are created.

"""   import os    return makepath(os.path.join(base_dir, *filename))

def shortpath(path): """Return a file path relative to config.base_dir.""" import os   if path.startswith(base_dir): return path[len(base_dir) + len(os.path.sep) : ] return path

_verbose = False for _arg in __sys.argv[1:]: if _arg == "-v" or _arg == "-verbose": _verbose = True break if _verbose: print "Config.py"
 * 1) is config verbose?

import wikipediatools as _wt _base_dir = _wt.get_base_dir _RfamilyFile = re.compile('(?P .+)_family.py$') for _filename in os.listdir(os.path.join(_base_dir, 'families')): _m = _RfamilyFile.match(_filename) if _m: familyName = _m.group('name') usernames[familyName] = {} sysopnames[familyName] = {} disambiguation_comment[familyName] = {} _glv = {} _glv.update(globals) _gl = _glv.keys _tp = {} for _key in _gl: if _key[0] != '_': _tp[_key] = type(globals[_key])
 * 1) Get the names of all known families, and initialize
 * 2) with empty dictionaries
 * 1) System-level and User-level changes.
 * 2) Store current variables and their types.

_thislevel = 0 _fns = [os.path.join(_base_dir, "user-config.py")] for _filename in _fns: _thislevel += 1 if os.path.exists(_filename): _filestatus = os.stat(_filename) _filemode = _filestatus[0] _fileuid = _filestatus[4] if __sys.platform == 'win32' or _fileuid in [os.getuid, 0]: if __sys.platform == 'win32' or _filemode & 002 == 0: execfile(_filename) else: print "WARNING: Skipped '%s': writeable by others."%_filename else: print "WARNING: Skipped '%s': owned by someone else."%_filename
 * 1) Get the user files

for _key, _val in globals.items: if _key.startswith('_'): pass elif _key in _gl: nt = type(_val) ot = _tp[_key] if nt == ot or _val is None or ot == type(None): pass elif nt is int and (ot is float or ot is bool): pass elif ot is int and (nt is float or nt is bool): pass else: print "WARNING: Type of '%s' changed"%_key print "      Was: ",ot print "      Now: ",nt del nt, ot   else: print "WARNING: Configuration variable %r is defined but unknown. Misspelled?" %_key
 * 1) Test for obsoleted and/or unknown variables.

if console_encoding is None: if __sys.platform == 'win32': console_encoding = 'cp850' else: console_encoding = 'iso-8859-1'
 * 1) Fix up default console_encoding

base_dir = _base_dir if _verbose: print "- base_dir: ", base_dir
 * 1) Save base_dir for use by other modules

if _verbose: print "- done."
 * 1) Exit message

if __name__ == "__main__": import types _all = 1 for _arg in __sys.argv[1:]: if _arg == "modified": _all = 0 elif _arg == "-v": pass elif _arg == "-verbose": pass elif _arg.startswith("-dir:"): pass else: print "Unknown arg %s ignored"%_arg _k = globals.keys _k.sort for _name in _k: if _name[0] != '_': if not type(globals[_name]) in [types.FunctionType, types.ModuleType]: try: if _all or _glv[_name] != globals[_name]: print _name, "=", repr(globals[_name]) except KeyError: print _name, "=(new)=", repr(globals[_name])
 * 1) When called as main program, list all configuration variables
 * 1) When called as main program, list all configuration variables

for __var in globals.keys: if __var.startswith("_") and not __var.startswith("__"): del __sys.modules[__name__].__dict__[__var]
 * 1) cleanup all locally-defined variables

del __var, __sys del os, re