Module:Sandbox/Peter coxhead/Tests

require('strict') local TaxonItalics = require('Module:TaxonItalics') local ItalicTitle = require('Module:Italic title') local p = {} local l = {} -- separate out local functions

function l.show(items) local result = "" for i = 1,#items,1 do result = result .. "|" .. items[i] .. "" .. mw.ustring.len(items[i]) .. ""	end return result .. "|" end

-- ***************************************************************************** -- ========= Constants for states ========= local startState = 0 local italState = 1 local whiteSpState = 2 local nonItalState = 3 local entityState = 4 local endState = 9

-- Split a string representing a taxon name into 'units'. function l.doSplitTaxonName(str) -- connecting terms that are not italicized local cTerms = { --subsp. subspecies = "subsp.", ["subsp."] = "subsp.", subsp = "subsp.", ["ssp."] = "subsp.", ssp = "subsp.", --var. varietas = "var.", ["var."] = "var.", var = "var.", --subvar. subvarietas = "subvar.", ["subvar."] = "subvar.", subvar = "subvar.", --f. forma = "f.", ["f."] = "f.", f = "f.", --subf. subforma = "subf.", ["subf."] = "subf.", subf = "subf.", --subg. subgenus = "subg.", ["subg."] = "subg.", subg = "subg.", --sect. section = "sect.", ["sect."] = "sect.", sect = "sect.", --subsect. subsection = "subsect.", ["subsect."] = "subsect.", subsect = "subsect.", --ser. series = "ser.", ["ser."] = "ser.", ser = "ser.", --subser. subseries = "subser.", ["subser."] = "subser.", subser = "subser.", --cf. cf = "cf.", ["cf."] = "cf.", ["c.f."] = "cf." }	local specialChrs = '[%(%)×%+]' -- pattern listing all specially treated characters local units = {} local states = {} local state = startState local j = 0 local currUnit = '' local chr -- startUnit starts a new unit to be taken from the string str local startUnit = function (nextState) currUnit = chr state = nextState end -- saveUnit saves the current unit taken from the string str and starts a	-- new one local saveUnit = function (nextState) -- check for words that don't get italicized (?at this position) if state == italState and cTerms[currUnit] then currUnit = cTerms[currUnit] state = nonItalState end j = j + 1 units[j] = currUnit states[j] = state currUnit = chr state = nextState end local n = mw.ustring.len(str) local i = 1 while i <= n do		chr = mw.ustring.sub(str, i, i)		if state == startState then if chr == ' ' then startUnit(whiteSpState) elseif mw.ustring.match(chr, specialChrs) then -- chr == '(' or chr == ')' or chr == '×' or chr == '+' startUnit(nonItalState) elseif chr == '&' then startUnit(entityState) else -- other kind of character startUnit(italState) end elseif state == whiteSpState then if chr == ' ' then -- ignore elseif mw.ustring.match(chr, specialChrs) then -- chr == '(' or chr == ')' or chr == '×' or chr == '+' saveUnit(nonItalState) elseif chr == '&' then saveUnit(entityState) else -- other kind of character saveUnit(italState) end elseif state == nonItalState then if chr == ' ' then saveUnit(whiteSpState) elseif mw.ustring.match(chr, specialChrs) then -- chr == '(' or chr == ')' or chr == '×' or chr == '+' saveUnit(nonItalState) elseif chr == '&' then saveUnit(entityState) else -- other kind of character saveUnit(italState) end elseif state == entityState then if chr == ';' then currUnit = currUnit .. chr chr = '' -- nonbreaking spaces are treated a whitespace if currUnit == ' ' or currUnit == '&#160;' or currUnit == '&#xA0;' or currUnit == '&#x00A0;' then state = whiteSpState end saveUnit(startState) else currUnit = currUnit .. chr end else -- state == italState if chr == ' ' then saveUnit(whiteSpState) elseif mw.ustring.match(chr, specialChrs) then -- chr == '(' or chr == ')' or chr == '×' or chr == '+' saveUnit(nonItalState) elseif chr == '&' then saveUnit(entityState) else -- other kind of character currUnit = currUnit .. chr end end i = i + 1 end j = j + 1 -- save the last unit and its state units[j] = currUnit states[j] = state return units, states end

function p.italicizeTaxonName(frame) local str = frame.args[1] or "" -- first clean up the name str = mw.ustring.gsub(str, "%s+", " ") str = mw.ustring.gsub(str, "'''", "") str = mw.ustring.gsub(str, "''", "") str = mw.ustring.gsub(str, "", "") -- remove the last 'word' if it appears to be a disambiguating term local last = l.lastWord(str) local ln = mw.ustring.len(last) if mw.ustring.match(last, "%([a-z].*%)") then str = mw.ustring.sub(str, 1, -ln-1) else last = '' end -- now split up the name into 'units' units, states = l.doSplitTaxonName(str) -- finally, put everything back together, correctly italicized return l.joinUnits(units, states) .. last end

function l.joinUnits(units, states) local res = '\n|' local j = 1 local n = #units while j <= n do res = res .. units[j] .. '' .. tostring(states[j]) .. '|'   	j = j + 1 end res = '\n' states[n+1] = endState states[n+2] = endState local openItalics = true j = 1 while j <= n do -- res = res .. '' .. tostring(states[j]) .. tostring(italState) .. ''   	if states[j] == italState then if openItalics then res = res .. "" .. units[j] openItalics = false else res = res .. units[j] end if not (states[j+1] == whiteSpState and states[j+2] == italState) then res = res .. "" openItalics = true end else res = res .. units[j] end j = j + 1 end return res end -- *****************************************************************************

function p.test(frame) local taxon = frame.args[1] or "" local item = frame.args[2] or "" local ok, info = pcall(frame.expandTemplate, frame, { title = 'Template:Taxonomy/' .. taxon, args = {['machine code'] = item } }) if ok then return 'ok=true; >' .. info .. '<'	else return 'ok=false; >' .. info .. '<'	end end

function p.test1(frame) local str = frame.args[1] or '' local pattern = frame.args[2]  or "%([A-Z].*%)" if mw.ustring.match(str, pattern) then return 'matched' else return 'not matched' end end

function p.test2(frame) local a1 = frame.args[1] or '' local t = {} t[1] = {} t[1].a = a1	t[1].e = 'Argument 1 = ' return t[1].e .. t[1].a end

function p.test3(frame) local a1 = frame.args[1] or '' local a2 = frame.args[2] or '' local t = mw.text.split( a1, a2, true ) res = '' for i = 1, #t do		if t[i] == nil then t[i] = 'NIL' elseif t[i] == '' then t[i] = 'EMPTY' end res = res .. t[i] .. ' '	end local tbl = {} tbl[1] = {} tbl[1][9] = 'TEST' return res .. ' /' .. tbl[1][9].. ' /' .. ' /' .. table.concat(tbl[1],',',9,9) end

function p.test4(frame) local a1 = frame.args[1] or '' local firstCh = mw.ustring.sub(a1,1,1) if firstCh == '[' then firstCh = '*wikilink*' end return 'Frame arg1 = '..a1..', first char = '..firstCh end

function p.test5(frame) local tab1 = {} local tab2 = {} tab1[1] = 'test' tab2[1] = {} tab2[1].k = 'test' tab2[1].v = 0 return tab1[1]..' '..' '..tab2[1].k..'+'..tab2[1].v end

function p.existsTest1(frame) local taxon = frame.args[1] or 'Life' local res if mw.title.new('Taxonomy/'..taxon, 'Template').exists then res = 'Taxonomy/'..taxon..'exists' else res = 'Taxonomy/'..taxon..'does not exist' end return res end

function p.existsTest2(frame) local taxon = frame.args[1] or 'Life' local template = 'Template:Taxonomy/' .. taxon local item = frame.args[2] or 'all' local ok, dummy = pcall(frame.expandTemplate, frame, { title = template, args = {['machine code'] = item } }) local res = template if ok then return res .. ' exists' else return res .. ' does not exist' end end

function p.getGenus(frame) local str = frame.args[1] or '' return l.genus(str, 1) end

function l.genus(str, init) local res = mw.ustring.match(str, '^[^%s]*', init) if res == mw.ustring.char(215) then res = res .. ' ' .. l.genus(str, 3) end return res end

function p.getLastWord(frame) local str = frame.args[1] or '' return l.lastWord(str) end

function l.lastWord(str) local res, n = mw.ustring.gsub(str, '.*%s', '', 1) if n == 0 then return str else return res end end

function p.abbreviate(frame) local str = frame.args[1] or '' local res, n = mw.ustring.gsub(str, '([A-Z]).- (.*)', '%1. %2') return res .. ' (' .. tostring(n) .. ' matches)' end

function p.italicTaxonTitle(frame) local pageName = frame.args[1] or '' pageName = frame:expandTemplate{ title = 'Taxon italics', args = {pageName} } return frame:callParserFunction{ name = 'DISPLAYTITLE', args = {pageName} } end

function p.linkCheck(frame) local linkTarget = frame.args[1] or '' local linkText = frame.args[2] or '' local res = true if linkTarget ~=  and linkText ~=  and linkTarget ~= linkText then local linkTargetTitle = mw.title.new(linkTarget) local linkTextTitle = mw.title.new(linkText) res = linkTextTitle.redirectTarget == linkTargetTitle end return res end

function p.parseSpeciesName(frame) local speciesName = frame.args[1] or '' local genus, disambig, species = l.doParseSpeciesName(speciesName) return 'genus ='..genus..', disambig='..disambig..', species='..species end

function l.doParseSpeciesName(speciesName) local genus = '' local disambig = '' local species = '' local words = mw.text.split(speciesName, " ", true) local nWords = #words local currWord = 1 if currWord > nWords then return genus, disambig, species end genus = words[currWord] if genus == mw.ustring.char(215) then -- hybrid sign currWord = currWord + 1 if currWord > nWords then return '', disambig, species end genus = genus .. ' ' .. words[currWord] end currWord = currWord + 1 if currWord > nWords then return genus, disambig, species end local disambig = '' local species = words[currWord] local test = mw.ustring.sub(species,1,1) if mw.ustring.sub(species,1,1) == mw.ustring.char(40) then -- '('		disambig = species		currWord = currWord + 1		if currWord > nWords then return genus, disambig,  end		species = words[currWord]	end	if species == mw.ustring.char(215) then -- hybrid sign		currWord = currWord + 1		if currWord > nWords then return genus, disambig,  end		species = species .. ' ' .. words[currWord]	end	return genus, disambig, species end -- ============================================================================= function p.infraspeciesboxName(frame)	local name = frame.args[1] or 	local genus = frame.args[2] or 	local species = frame.args[3] or 	local ct = frame.args[4] or    local infraspecies = frame.args[5] or 	local basePageTitle = frame.args[6] or 	local italicTitle = frame.args[7] or ''	return l.doinfraspeciesboxName(name, genus, species, ct, infraspecies, basePageTitle, italicTitle) end function l.doinfraspeciesboxName(name, genus, species, ct, infraspecies, basePageTitle, italicTitle) genus = mw.ustring.gsub(mw.ustring.gsub(genus, '%s+%b$', , 1), '/.*$', , 1) -- strip any disambig and qualifier local taxon = genus .. ' ' .. species if ct == '' then taxon = taxon .. ' ' .. infraspecies else taxon = taxon .. ' ' .. ct .. ' ' .. infraspecies end local italicizeP = italicTitle ~= 'no' and (basePageTitle == taxon) -- use basePageTitle to match taxon -- deal with taxobox name (i.e. its caption) if name == '' then name = basePageTitle if italicizeP then name = TaxonItalics.italicizeTaxonName(name, false, false) end end -- deal with page title if italicizeP then local pageTitle = mw.title.getCurrentTitle.text -- formatting the page title with DISPLAYTITLE needs the full page title pageTitle = TaxonItalics.italicizeTaxonName(pageTitle, false, false, true) -- format pageTitle, not italicizing any parenthesized term if italicTitle ~= 'test' then mw.getCurrentFrame:callParserFunction('DISPLAYTITLE', pageTitle) else name = name .. ' \\Italic title\\ ' .. pageTitle -- for testing and debugging end end return name end -- =============================================================================

function p.boldList(frame) local items = {} for i, v in ipairs(frame:getParent.args) do table.insert(items, v)	end local conj = "'''" .. (#items > 2 and ", " or " ") .. (frame.args.conj or "or") .. " "	return ""..mw.text.listToText(items, ", ", conj).."'''" end

return p