Module:Sandbox/trappist the monk/test

-- this code is a test of an alternate method for dissecting IETF language tags. Because this is a hack, it is only meant to be run from the debug console.  To do that, in the console window below, type: =p.get_ietf_parts

p={}

--[[--< P A R S E _ I E T F >--

]]

local function parse_ietf (source, pattern_table) local element = {} for _, pattern in ipairs (pattern_table) do		element[1], element[2], element[3], element[4] = source:match (pattern); if element[1] then return element[1], element[2], element[3], element[4]; end end end

--[[--< G E T _ I E T F _ P A R T S >--

extracts and returns IETF language tag parts: primary language subtag (required) - 2 or 3 character IANA language code script subtag - four character IANA script code region subtag - two-letter or three digit IANA region code variant subtag - four digit or 5-8 alnum variant code private subtag - x- followed by 1-8 alnum private code; only supported with the primary language tag

in any one of these forms lang					lang-variant lang-script				lang-script-variant lang-region				lang-region-variant lang-script-region		lang-script-region-variant lang-x-private each of lang, script, region, variant, and private, when used, must be valid

Languages with both two- and three-character code synonyms are promoted to the two-character synonym because the IANA registry file omits the synonymous three-character code; we cannot depend on browsers understanding the synonymous three-character codes in the lang= attribute.

For templates, the parameters |script=, |region=, and |variant= are supported (not supported in because those parameters are superfluous to the IETF subtags in |code=)

returns six values. Valid parts are returned as themselves; omitted parts are returned as empty strings, invalid parts are returned as nil; the sixth returned item is an error message (if an error detected) or nil.

see http://www.rfc-editor.org/rfc/bcp/bcp47.txt section 2.1

]]

function p.get_ietf_parts (frame) local code; local script; local region; local variant; local private; local source = frame.args[1];	-- debug

local csrv_patterns = {																			-- code, script, region, variant patterns '^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)%-(%d%d%d%d)$',												-- cc-Ssss-RR-variant (where variant is 4 digits) '^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-(%d%d%d%d)$',											-- cc-Ssss-DDD-variant (where region is 3 digits; variant is 4 digits) '^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)%-([%a%d][%a%d][%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?)$',	-- cc-Ssss-RR-variant (where variant is 5-8 alnum characters) '^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-([%a%d][%a%d][%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?)$',	-- cc-Ssss-DDD-variant (where region is 3 digits; variant is 5-8 alnum characters) }	local crv_patterns = {																			-- code, region, variant patterns '^(%a%a%a?)%-(%a%a)%-(%d%d%d%d)$',															-- cc-RR-variant (where variant is 4 digits) '^(%a%a%a?)%-(%d%d%d)%-(%d%d%d%d)$',														-- cc-DDD-variant (where region is 3 digits; variant is 4 digits) '^(%a%a%a?)%-(%a%a)%-([%a%d][%a%d][%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?)$',				-- cc-RR-variant (where variant is 5-8 alnum characters) '^(%a%a%a?)%-(%d%d%d)%-([%a%d][%a%d][%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?)$',				-- cc-DDD-variant (where region is 3 digits; variant is 5-8 alnum characters) }	local csv_patterns = {																			-- code, script, variant patterns '^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d%d)$',														-- cc-Ssss-variant (where variant is 4 digits) '^(%a%a%a?)%-(%a%a%a%a)%-([%a%d][%a%d][%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?)$',			-- cc-Ssss-variant (where variant is 5-8 alnum characters) }	local csr_patterns = {																			-- code, script, region patterns '^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)$',															-- cc-Ssss-RR '^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)$',														-- cc-Ssss-DDD (where region is 3 digits) }	local cv_patterns = {																			-- code, variant patterns '^(%a%a%a?)%-(%d%d%d%d)$',																	-- cc-variant (where variant is 4 digits) '^(%a%a%a?)%-([%a%d][%a%d][%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?)$',						-- cc-variant (where variant is 5-8 alnum characters) }	local cr_patterns = {																			-- code, region patterns '^(%a%a%a?)%-(%a%a)$',																		-- cc-RR '^(%a%a%a?)%-(%d%d%d)$',																	-- cc-DDD (region is 3 digits) }

local cs_patterns = {																			-- code, script patterns '^(%a%a%a?)%-(%a%a%a%a)$',																	-- cc-Ssss }

local cp_patterns = {																			-- code, private-use patterns '^(%a%a%a?)%-x%-([%a%d][%a%d]?[%a%d]?[%a%d]?[%a%d]?[%a%d]?[%a%d]?[%a%d]?)$'					-- cc-x-pppppppp where private is 1-8 alnum characters }	local c_patterns = {																			-- code, private-use patterns '^(%a%a%a?)$',																				-- cc	}

--	if not is_set (source) then --		return nil, nil, nil, nil, nil, 'missing language tag'; --	end

local _, element_count = source:gsub ('%-', '')

if 0 == element_count then code = source:match (c_patterns[1]);									-- done this way because there is only one pattern elseif 1 == element_count then while true do															-- spoof a loop so that we can use break to escape when a match is made code, script = source:match (cs_patterns[1]);							-- done this way because there is only one pattern if code then break; end code, region = parse_ietf (source, cr_patterns); if code then break; end

code, variant = parse_ietf (source, cv_patterns); break;																	-- terminal exit from loop spoof end elseif 2 == element_count then while true do															-- spoof a loop so that we can use break to escape when a match is made code, script, region = parse_ietf (source, csr_patterns); if code then break; end code, script, variant = parse_ietf (source, csv_patterns); if code then break; end

code, region, variant = parse_ietf (source, crv_patterns); if code then break; end

code, private = source:match (cp_patterns[1]);							-- the -x- counts as an element but we don't return that; done this way because there is only one pattern break;																	-- terminal exit from loop spoof end elseif 3 == element_count then code, script, region, variant = parse_ietf (source, csrv_patterns); else return table.concat ({'unrecognized language tag: ', source});		-- debug return --		return nil, nil, nil, nil, nil, table.concat ({'unrecognized language tag: ', source});		-- don't know what we got but it is malformed - too many hyphens end if not code then return nil, nil, nil, nil, nil, table.concat ({'unrecognized language tag: ', source});		-- don't know what we got but it is malformed end --	return code, script, region, variant, private;	-- debug return return table.concat ({code, script or , region or , variant or , private or }, ', '); --debug return

end

return p;