User:Quarl/location canonicalize.js

// User:Quarl/location_canonicalize.js - canonicalizes location WikiLinks // as per WikiProject Location Format

// Example: Seattle, Washington becomes Seattle, Washington, USA.

// depends: wikipage.js, util.js, wikitabs.js, wikiedit.js, autoedit.js

// quarl 2006-01-22 initial version // quarl 2006-02-08 refactored to autoedit.js

//

var locz = new autoedit(   'locz',    'LocZ', 'ca-locz', 'Canonicalize location wikilinks',    'Location canonicalization');

locz.initData = function { var CountryData = function(states, link_country, regexp_country) { this.states = states; this.link_country = link_country.match(/\[/) ? link_country : +link_country+; var regexp_country = regexp_country || '\\[\\['+link_country+'\\]\\]'; this.regexp_country = new RegExp(regexp_country); this.regexp_country_sq = new RegExp('^, *'+regexp_country); this.regexp_substate = (           new RegExp('^([^,]+), *(' + this.states.join('|') + ')$')); this.regexp_state = (           new RegExp('^(?:' + this.states.join('|') + ')$')); }

this.countries = [ new CountryData( // USA           ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado',             'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho',             'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine',             'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi',             'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey',             'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio',             'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina',             'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia',             'Washington', 'West Virginia', 'Wisconsin', 'Wyoming',             'Washington, DC', 'Washington, D.C.' // not strictly a state, but needs to be qualified with country also             ],            'USA', '\\[\\[(?:United[ _]States(?:[ _][^|\\\]]+?)?|USA)(?:\\|[^|\\\]]+?)?\\]\\]'),

new CountryData( // Canada           ['British Columbia', 'Alberta', 'Saskatchewan', 'Manitoba',             'Ontario', 'Quebec', 'New Brunswick', 'Nova Scotia',             'Prince Edward Island', 'Newfoundland and Labrador'],            'Canada'),

new CountryData( // England           ['Bedfordshire', 'Berkshire', 'City of Bristol',             'Buckinghamshire', 'Cambridgeshire', 'Cheshire',             'Cornwall', 'Cumbria', 'Derbyshire', 'Devon', 'Dorset',             'Durham', 'East Riding of Yorkshire', 'East Sussex', 'Essex',             'Gloucestershire', 'Greater London', 'Greater Manchester',             'Hampshire', 'Herefordshire', 'Hertfordshire', 'Isle of Wight',             'Kent', 'Lancashire', 'Leicestershire', 'Lincolnshire',             'City of London', 'Merseyside', 'Norfolk', 'Northamptonshire',             'Northumberland', 'North Yorkshire', 'Nottinghamshire',             'Oxfordshire', 'Rutland', 'Shropshire', 'Somerset',             'South Yorkshire', 'Staffordshire', 'Suffolk', 'Surrey',             'Tyne and Wear', 'Warwickshire', 'West Midlands', 'West Sussex',             'West Yorkshire', 'Wiltshire', 'Worcestershire'], 'England'),

]; }

locz.splitText = function(input) { var inputs = [];

// special case for hat link, if there is one if (input.match(/^: *''.*/)) { var infobox = RegExp.lastMatch; var right = RegExp.rightContext;

inputs.push(infobox); input = right; }

// special case the first Infobox, if there is one if (input.match(/^(?:|{\|(?:.|\n)*?\n\|})/i)) { // var left = RegExp.leftContext; var infobox = RegExp.lastMatch; var right = RegExp.rightContext;

// treat the infobox separately, so that USA links get added to main // article. inputs.push(infobox); input = right; }

inputs.push(input); return inputs; }

locz.buildRegExp = function { return /\[\[ *(?:([^|\]]+?) *\| *)?([^\]]+?) *\]\]/; }

locz.replaceRegExp = function(d, m) { var wlink = m[1] || m[2]; var wtext = m[2];

// non-main namespace - usually a category if (wtext.match(/:/)) return;

if (wlink != wtext) return;

for (i in this.countries) { var c = this.countries[i];

var changes = 0; var wfull; if (wtext.match(c.regexp_substate)) { var city = RegExp.$1, state = RegExp.$2; wfull =  + city + ; // only add link to state if we haven't link it yet. if (d.left.match('\\[\\['+state+'\\]\\]')) { wfull += ', ' + state; } else { wfull += ', '+state+''; }           ++changes; } else if (wtext.match(c.regexp_state)) { // state link -- just need to add country link as necessary wfull = +wtext+; }

if (!wfull) continue;

if (d.left.match(c.regexp_country)) { // Already mentioned country. Delete redundant subsequent // country links if (d.right.match(c.regexp_country_sq)) { d.right = RegExp.rightContext; // only count as a change if we actually delete it! ++changes; }       } else { // Haven't mentioned country earlier if (d.right.match(c.regexp_country_sq)) { // it's right after the current link; good. } else { // not there; add it. wfull += ', ' + c.link_country; ++changes; }       }

if (changes) { d.text = wfull; }       return; }

return; }

locz._load = function { locz.qAutoEdit; locz.addTab; }

addOnloadHook(locz._load);

//