Module:Wikt-lang/data/sandbox

local U = mw.ustring.char

-- Diacritics, from the Combining Diacritical Marks block. local grave       = U(0x300) local acute       = U(0x301) local circumflex  = U(0x302) local tilde       = U(0x303) local macron      = U(0x304) local breve       = U(0x306) local dot         = U(0x307) local diaeresis   = U(0x308) local double_acute = U(0x30B) local caron       = U(0x30C) local double_grave = U(0x30F) local invbreve    = U(0x311) local dot_below   = U(0x323) local undertie    = U(0x35C)

--	This is a table of Wiktionary language codes with data belonging to them.	Name is the "canonical name" used on Wiktionary. local data = { ["languages"] = { ["ab"] = { ["name"] = "Abkhaz", },		["ang"] = { -- Remove macrons, acutes, and overdots ["replacements"] = { decompose = true, from = { "[" .. macron .. acute .. dot .. "]" },			},		},		["ar"] = { ["direction"] = "rtl", -- Should be in the script data module. ["replacements"] = { -- ālif with wasla is replaced by ālif; [U(0x0671)] = U(0x0627), -- taṭwīl, fatḥatan, ḍammatan, kasratan, -- fatḥa, ḍamma, kasra, -- shadda, sukūn, and superscript (dagger) ālif are removed. ["["..U(0x0640)..U(0x064B)..U(0x064C)..U(0x064D) ..U(0x064E)..U(0x064F)..U(0x0650) ..U(0x0651)..U(0x0652)..U(0x0670).."]"] = "", },		},		["bal"] = { ["name"] = "Baluchi", },		["be"] = { ["replacements"] = { [acute] = "", },		},		["bua"] = { ["name"] = "Buryat", },		["cu"] = { ["name"] = "Old Church Slavonic", },		["egy"] = { ["name"] = "Egyptian", },		["frp"] = { ["name"] = "Franco-Provençal", },		["goh"] = { ["replacements"] = { decompose = true, from = { "[" .. macron .. circumflex .. diaeresis .. "]",				},			},		},		["got"] = { ["replacements"] = { -- Latin to Gothic since people will not want to have to copy -- and paste Gothic letters in				["[AÁaáĀā]"] = "𐌰", ["[Bb]"]    = "𐌱", ["[Gg]"]    = "𐌲", ["[Dd]"]    = "𐌳", ["[EeĒē]"]  = "𐌴", ["[Qq]"]    = "𐌵", ["[Zz]"]    = "𐌶", ["[Hh]"]    = "𐌷", ["[Þþ]"]    = "𐌸", ["[IiÍí]"]  = "𐌹", ["[Kk]"]    = "𐌺", ["[Ll]"]    = "𐌻", ["[Mm]"]    = "𐌼", ["[Nn]"]    = "𐌽", ["[Jj]"]    = "𐌾", ["[UuÚúŪū]"] = "𐌿", ["[Pp]"]    = "𐍀", ["[Rr]"]    = "𐍂", ["[Ss]"]    = "𐍃", ["[Tt]"]    = "𐍄", ["[WwYy]"]  = "𐍅", ["[Ff]"]    = "𐍆", ["[Xx]"]    = "𐍇", ["[Ƕƕ]"]   = "𐍈", -- Not sure if "hw" and "hv" can safely be converted ["[OoŌō]"]  = "𐍉", },		},		["grc"] = { ["replacements"] = { decompose = true, from = { -- Replace variant letterforms with standard ones. "ϐ", "ϵ", "ϑ", "ϰ", "ϱ", "ϲ", "ϕ", -- Remove macrons and breves. "[" .. macron .. breve .. undertie .. "]"				},				to  = { "β", "ε", "θ", "κ", "ρ", "σ", "φ", }			},		},		["ha"] = { -- remove tilde, grave, acute, macron, circumflex ["replacements"] = { decompose = true, from = { "[" .. grave .. circumflex .. macron .. acute .. tilde .. "]" },			},		},		["jbo"] = { ["type"] = "appendix", },		["la"] = { ["replacements"] = { decompose = true, from = { "[" .. macron .. breve .. diaeresis .. "]" },			},		},		["lt"] = { -- remove acute, tilde, grave ["replacements"] = { decompose = true, from = { "[" .. acute .. tilde .. grave .. "]" },			},		},		["moe"] = { ["name"] = "Cree", },		["mul"] = { ["name"] = "Translingual", },		["nci"] = { -- Remove macrons, acutes, circumflexes and graves ["replacements"] = { decompose = true, -- Remove macrons, acutes, circumflexes, graves, and saltillo; -- see Saltillo (linguistics). from = { "[" .. grave .. acute .. macron .. circumflex .. "Ꞌꞌʻʼ'ʔ]" }, },		},		["nds-de"] = { ["name"] = "German Low German", },		["orv"] = { ["replacements"] = { [U(0x484)] = "", },		},		["ru"] = { ["replacements"] = { [acute] = "", }, },		["rw"] = { ["name"] = "Rwanda-Rundi", },		["se"] = { ["replacements"] = { ["([đflmnŋrsšŧv])'%1"] = "%1%1", },		},		["sh"] = { ["replacements"] = { decompose = true, from = { "([AaEeIiOoUuRrАаЕеИиОоУуРр])[" .. double_grave .. grave .. invbreve .. acute .. macron .. tilde .. "]" },				to  = { "%1" }, },		},		["sl"] = { ["replacements"] = { decompose = true, -- remove tonal orthography from = {"ł", "[" .. grave .. acute .. macron .. double_grave .. invbreve .. circumflex .. dot_below .. "]"},				to = {"l"}, },		},		["uk"] = { ["replacements"] = { [acute] = "", } },		["xcl"] = { ["replacements"] = { ["[՞՜՛՟]"] = "",				["և"] = "եւ", },		},		["xgf"] = { ["replacements"] = { ["['`ʔ]"] = "ʼ", },		},

-- Custom private-use codes which should be added to Module:Lang. -- Codes are in the format of "code-x-code" ["gem-x-proto"] = { ["type"] = "reconstructed", },		["grk-x-proto"] = { ["name"] = "Proto-Hellenic", ["type"] = "reconstructed", },		["ine-x-proto"] = { ["type"] = "reconstructed", },		["sem-x-proto"] = { ["type"] = "reconstructed", },		["sla-x-proto"] = { ["type"] = "reconstructed", ["replacements"] = { ["[ÀÁÃĀȀȂ]"] = "A", ["[àáãāȁȃ]"] = "a", ["[ÈÉẼĒȄȆ]"] = "E", ["[èéẽēȅȇ]"] = "e", ["[ÌÍĨĪȈȊ]"] = "I", ["[ìíĩīȉȋ]"] = "i", ["[ÒÓÕŌȌȎŐ]"] = "O", ["[òóõōȍȏő]"] = "o", ["[ÙÚŨŪȔȖŰ]"] = "U", ["[ùúũūȕȗű]"] = "u", ["[ỲÝỸȲ]"] = "Y", ["[ỳýỹȳ]"] = "y", ["Ǭ"] = "Ǫ", ["ǭ"] = "ǫ", ["[" .. grave .. acute .. double_acute .. tilde .. macron .. double_grave .. invbreve .. "]"] = "",				["ĭ"] = "ь", ["ŭ"] = "ъ", },		},	}, -- Here, keys (for example, "gem") are Wikipedia language codes used in --, and values (for example, "gem-pro") are the equivalent Wiktionary -- code. -- Subtags are not currently supported. ["redirects"] = { ["aae"] = "sq", ["aiq"] = "fa", ["aln"] = "sq", ["als"] = "sq", ["azb"] = "az", ["azj"] = "az", ["bgn"] = "bal", ["bs"] = "sh", ["bxr"] = "bua", ["ciw"] = "oj", ["cnr"] = "sh", ["fil"] = "tl", ["fuf"] = "ff", ["gem"] = "gem-x-proto", -- Not correct, but is commonly used. ["gmw-ecg"] = "gmw-x-ecg", ["hak"] = "zh", ["hbo"] = "he", ["hr"] = "sh", ["ine"] = "ine-x-proto", -- Not correct, but might be commonly used. ["kjv"] = "sh", ["nan"] = "zh", ["prs"] = "fa", ["rn"] = "rw", ["sli"] = "gmw-x-ecg", ["sr"] = "sh", ["src"] = "sc", ["sro"] = "sc", ["tw"] = "ak", ["wae"] = "gsw", ["wep"] = "nds-de", ["yue"] = "zh", ["xno"] = "fro",

-- Incorrect private use tags ["cel-proto"] = "cel-x-proto", ["gem-pro"] = "gem-x-proto", ["grk-pro"] = "grk-x-proto", ["ine-pro"] = "ine-x-proto", ["ine-bsl-pro"] = "ine-x-proto", ["sem-pro"] = "sem-x-proto", ["sla-pro"] = "sla-x-proto", }, }

return data