Module:Sandbox/Erutuon

local p = {}

function p.show(frame) local page = frame.args[1] or "User:Erutuon/Unicode/DerivedCoreProperties.txt" local text = assert(mw.title.new(page):getContent) local defaultIgnorable = text :match("Derived Property: Default_Ignorable_Code_Point.-(%f[^\n]%x%x%x%x.-)%s*\n# Total code points") local singles, ranges = {}, {} for codePoint1, codePoint2 in defaultIgnorable:gmatch("%f[^\n%z](%x+)%.?%.?(%x*)") do		codePoint1, codePoint2 = tonumber(codePoint1, 16), tonumber(codePoint2, 16) local lastRange = ranges[#ranges] if lastRange and lastRange[2] == codePoint1 - 1 then lastRange[2] = codePoint2 or codePoint1 else if not codePoint2 then singles[codePoint1] = true else table.insert(ranges, { codePoint1, codePoint2 }) end end end local template = [[ local data = {}

data.defaultIgnorable = { singles = { ...	},	ranges = { ...	}, }

return data ]]

local Array = require "Module:array" local printedRanges = Array for _, range in ipairs(ranges) do		local low, high, script_code = unpack(range) printedRanges:insert(('\t\t{ 0x%05X, 0x%05X },'):format(low, high)) end local printedSingles = Array for codepoint in require 'Module:TableTools'.sortedPairs(singles) do		printedSingles:insert(('\t\t[0x%05X] = true,'):format(codepoint)) end local data = template :gsub('%.%.%.', printedSingles:concat('\n'), 1) :gsub('%.%.%.', printedRanges:concat('\n'), 1) return data end

local Unicode_data = require "Module:Unicode data/sandbox" local fun = require "Module:fun" local m_table = require "Module:TableTools"

local function errorf(level, ...) if type(level) == "number" then return error(string.format(...), level + 1) else -- level is actually the format string. return error(string.format(level, ...), 2) end end

function p.search_for_language_codes(frame) local page_name = frame.args[1] or "English language" local success, title_object = pcall(mw.title.new, page_name) if not (success and title_object) then mw.logf("Could not make title object for '%s'.", page_name) return end local content = title_object:getContent local language_codes = {} for lang_template in content:gmatch "{{lang[^}]+" do		local template_name = lang_template:match("{{([^|}]+)") local language_code if template_name == "lang" then language_code = lang_template:match "{{lang|([^|}]+)" elseif template_name:find "^lang-" then language_code = lang_template:match "{{lang-([^|}]+)" end if language_code then language_codes[language_code] = true end end return table.concat(m_table.keysToList(language_codes), ", ") end

local parsed_subtags_mt = { __index = { -- "error" is the error message. -- "index" is the ordinal of the subtag in which the error was found. throw = function (self, error, index) self.error = self.error_messages[error] self.invalid = table.concat(self.input, "-", index) return self:remove_unnecessary_fields end, remove_unnecessary_fields = function (self) -- Only useful internally. self.input = nil self:pretty_print p.validate_lang_tag(self) return self end, -- Regularize capitalization of language subtags: -- ZH-LATN -> zh-Latn, FR-ca -> fr-CA pretty_print = function (self) for key, func in pairs(self.print_funcs) do				if self[key] then self[key] = func(self[key]) end end return self end, -- Re-create the original tag from the parsed subtags. get_tag = function (self) if self.tag then return self.tag end local tag = {} for _, subtag_name in ipairs(self.subtag_order) do				if subtag_name == "private_use" then table.insert(tag, "x") end if type(self[subtag_name]) == "table" then for _, subtag in ipairs(self[subtag_name]) do						table.insert(tag, subtag) end else table.insert(tag, self[subtag_name]) end end tag = table.concat(tag, "-") self.tag = tag -- Cache the result. return tag end, subtag_order = { "language", "script", "region", "variant", "private_use" },		error_messages = { invalid_characters = "invalid characters", no_language = "no language subtag", invalid_subtag = "invalid subtag", invalid_private_use = "length of private-use subtag out of range", empty_private_use = "empty private-use subtag", }	} } local function initial_caps_helper(initial, rest) return string.upper(initial) .. string.lower(rest) end local function lower_or_map_lower(str) if type(str) == "table" then return fun.map(string.lower, str) else return string.lower(str) end end parsed_subtags_mt.__index.print_funcs = { language = string.lower, script = function (script_code) return (string.gsub(script_code, "^(%a)(%a%a%a)$", initial_caps_helper)) end, region = string.upper, variant = lower_or_map_lower, private_use = lower_or_map_lower, }

setmetatable(parsed_subtags_mt, {	__call = function (self, input)		return setmetatable({ input = input }, self)	end }) -- An array of patterns for each subtag, and a "type" field for the name -- of the subtag. -- The patterns are checked in order, and any of the subtags can be skipped. -- So, for example, the "language" subtag must precede the "script" -- subtag, but a tag may contain a "language" subtag, no "script" subtag -- and then a "region" subtag. -- If the full list of subtags has been iterated over, the remaining subtags -- must match the pattern for a private-use subtag, or the tag is invalid. local subtag_info = { -- can be put in data module { "%a%a%a?", "1%a+", type = "language" }, -- ll or lll; special case -- include extlang? { "%a%a%a%a", type = "script" }, -- Ssss { "%a%a", "%d%d%d", type = "region" }, -- rr, DDD {		"%d%d%d%d", -- 4 digits "%w%w%w%w%w%w?%w?%w?", -- 5-8 alnum characters type = "variant", repeatable = true, -- There can be multiple variants. } }

-- A previous draft, in Module:Lang/sandbox: -- https://en.wikipedia.org/w/index.php?oldid=812819217

-- Based on https://www.w3.org/International/articles/language-tags/.

-- Parse a language tag. -- Returns nil if tag is not a string or empty. -- Else returns a table with a map of subtag type to subtag for all subtags that -- were parsed. -- If there was an error, returns an "error" field with a description of the -- error, and an "invalid" field with the suffix of the tag starting at the -- index where the error occurred.

-- Does not recognize "extension" tags, such as those introduced by "u", as they -- are not needed on Wikipedia. Does not recognize "grandfathered" tags. -- Does not recognize extended language subtags, such as "zh-yue". -- https://www.rfc-editor.org/rfc/rfc6067.txt, https://tools.ietf.org/html/bcp47

-- Only checks that the syntax is correct, not that the values are valid. For -- instance, will accept non-existent language codes, like "zz". function p.parse_IETF(tag) if type(tag) ~= "string" or tag == "" then return nil end -- This may contain the special fields "invalid", "error". -- "error" indicates why the -- tag is invalid (if applicable). -- All other fields are subtags, and they appear in the tag in the following -- order: -- "language", "script", "region", "variant", "private_use", "invalid" -- All these subtags can be strings or nil, while "variant" can also be an -- array of strings if more than one variant subtag was found. -- "invalid" is the portion of the tag after the last valid subtag (minus a	-- hyphen). local segments = mw.text.split(tag, "-") local parsed_subtags = parsed_subtags_mt(segments) -- Language tags probably only contain ASCII alphabetic and numerical -- characters and hyphen-minus. if not tag:find "^[A-Za-z0-9-]+$" then return parsed_subtags:throw(			"invalid_characters",			fun.indexOf( function (tag) return tag:find "[^A-Za-z0-9-]" end, segments)) end local subtag_i = 1 -- Index of current item in subtag_info. local segment_i = 1 -- Index of current segment. while segments[segment_i] and subtag_info[subtag_i] do		local segment = segments[segment_i] local subtag_type while not subtag_type and subtag_info[subtag_i] do -- Check each pattern for the subtag type at "subtag_i" in "subtag_info". local cur_subtag = subtag_info[subtag_i] for _, pattern in ipairs(cur_subtag) do if segment:find("^" .. pattern .. "$") then subtag_type = cur_subtag.type -- There can be multiple "variant" subtags (and "extension"					-- subtags, if those are added). if not cur_subtag.repeatable then subtag_i = subtag_i + 1 end break end end if not subtag_type then -- No match; try next subtag. subtag_i = subtag_i + 1 end end -- If language subtag has not been found, or the current segment has not -- been matched as a subtag, break the loop and check for -- a private-use subtag. if segment_i == 1 and subtag_type ~= "language" or not subtag_type then break else if parsed_subtags[subtag_type] then -- Create an array. if type(parsed_subtags[subtag_type]) == "string" then parsed_subtags[subtag_type] = { parsed_subtags[subtag_type] } end -- else table table.insert(parsed_subtags[subtag_type], segment) else parsed_subtags[subtag_type] = segment end last_matched_segment_i = segment_i end segment_i = segment_i + 1 end if segments[segment_i] then -- More segments to scan? -- Not all potential subtags were matched. Check for private-use subtags. -- https://tools.ietf.org/html/bcp47#section-2.2.7 -- Private-use subtags consist of one or more sequences of 1 to 8 -- alphanumeric characters preceded by "x-". -- Alphanumericity has already been checked. -- A tag must start with either a language subtag or a private-use subtag. -- If next segment is not "x", introducing a private-use subtag, there -- is no private-use subtag. if segments[segment_i] and segments[segment_i]:lower ~= "x" then if not parsed_subtags.language then return parsed_subtags:throw("no_language", 1) else return parsed_subtags:throw("invalid_subtag",					segment_i) end elseif not segments[segment_i + 1] then return parsed_subtags:throw("empty_private_use",				segment_i) end -- Check length of all segments after "x". for i = segment_i + 1, #segments do			local length = #segments[i] if not (1 <= length and length <= 8) then return parsed_subtags :throw("invalid_private_use", segment_i) end end if not segments[last_matched_segment_i + 3] then -- There is only one private-use subtag. parsed_subtags.private_use = segments[segment_i + 1] else parsed_subtags.private_use = {} for i = segment_i + 1, #segments do				table.insert(parsed_subtags.private_use, segments[i]) end end end return parsed_subtags:remove_unnecessary_fields end

local lang_name_table = mw.loadData "Module:Language/name/data" local synonym_table = mw.loadData "Module:Lang/ISO 639 synonyms" local lang_data = mw.loadData "Module:Lang/data"

function p.validate_lang_tag(parsed_subtags) -- Already checked that the tag starts with a language subtag or a private-use subtag. -- Script code is initially capitalized, region code is uppercase, -- everything else is lowercase. -- Check existence of language tag. if parsed_subtags.language and not (lang_data.override[parsed_subtags.language]			or lang_name_table.lang[parsed_subtags.language]) then mw.log("Invalid language code", parsed_subtags.language, "in", parsed_subtags:get_tag) end -- Check existence of script tag. if parsed_subtags.script then local lower_script = parsed_subtags.script:lower if not lang_name_table.script[lower_script] then mw.log("Invalid script code", parsed_subtags.script, "in", parsed_subtags:get_tag) end -- Check that script tag is not marked as superfluous (because the		-- it is considered the default one for the language). if lang_name_table.suppressed[lower_script] and parsed_subtags.language and m_table.inArray(					lang_name_table.suppressed[lower_script],					parsed_subtags.language:lower) then mw.log(parsed_subtags.script, "is suppressed with",				parsed_subtags.language, "in", parsed_subtags:get_tag) end end -- Check existence of region code.. if parsed_subtags.region and not lang_name_table.region[parsed_subtags.region:lower] then mw.log("Invalid region code", parsed_subtags.region, "in", parsed_subtags:get_tag) end -- Check that variant code is valid, and that it can validly be used with the -- given combination of language, script, region, and variant. -- Check for duplicate variant subtags? if parsed_subtags.variant then local lower_tag = parsed_subtags:get_tag:lower for _, variant in ipairs(type(parsed_subtags.variant) == "table"				and parsed_subtags.variant or { parsed_subtags.variant }) do			if not lang_name_table.variant[variant] then mw.log("Invalid variant code", variant, "in", parsed_subtags:get_tag) else local prefix = parsed_subtags:get_tag:lower:match("^(.-)%-" .. variant) -- Check that at least one of the prefixes is found at the -- beginning of lower_tag. if not fun.some(function (prefix)							return lower_tag:find(prefix, 1, true) == 1						end,						lang_name_table.variant[variant].prefixes) then mw.log("Variant tag", variant, "does not belong with prefix",						prefix, "in", parsed_subtags:get_tag) end end end end -- Check that the private-use subtag is actually used by Wikipedia. if parsed_subtags.private_use and not lang_data.override[parsed_subtags.tag] then mw.log("Invalid private-use subtag in", parsed_subtags:get_tag) end end

function p.show_COinS(frame) local ref = frame.args[1] local tag = ref:match(']*class="Z3988"[^>]*>') local data = tag:match('title="(.-)"') local vals = {} for item in mw.text.gsplit(data, "&") do		local key, value = item:match("(.-)=(.*)") vals[key] = mw.uri.decode(value) end return ref .. "\n\n" .. table.concat(		require "Module:fun".mapIter( function (value, key) return ("%s: %s"):format(key, value) end, m_table.sortedPairs(				vals)),		", ") end

return p