Module:Sandbox/DePiep/uchar/data

local uchar_data = { tPlanes = { [ 0] = "Basic Multilingual Plane"; [ 1] = "Supplementary Multilingual Plane"; [ 2] = "Supplementary Ideographic Plane"; [ 3] = "Tertiary Ideographic Plane"; [14] = "Supplementary Special-purpose Plane"; [15] = "Supplementary Private Use Area-A"; [16] = "Supplementary Private Use Area-B"; }, tGenCat = { ['L'] = { "Letter",            "x" }, ['LC'] = { "Cased Letter",     "x" }, ['Lu'] = { "Uppercase Letter", "0042" }, ['Ll'] = { "Lowercase Letter", "0062" }, ['Lt'] = { "Titlecase Letter", "01F2" }, ['Lm'] = { "Modifier Letter", "02B0" }, ['Lo'] = { "Other Letter",    "0294" }, ['M'] = { "Mark", "x" }, ['Mn'] = { "Nonspacing Mark",       "0302" }, ['Mc'] = { "Spacing Combining Mark", "0BC2" }, ['Me'] = { "Enclosing Mark",        "20DF" }, ['N'] = { "Number",                  "x" }, ['Nd'] = { "Decimal Digit Number",  "0039" }, ['Nl'] = { "Letter Number",         "216B" }, ['No'] = { "Other Number",          "00BE" }, ['P'] = { "Punctuation", "x" }, ['Pc'] = { "Connector Punctuation", "x" }, ['Pd'] = { "Dash Punctuation", "x" }, ['Ps'] = { "Open Punctuation", "x" }, ['Pe'] = { "Close Punctuation", "x" }, ['Pi'] = { "Initial Quote Punctuation", "x" }, ['Pf'] = { "Final Quote Punctuation", "x" }, ['Po'] = { "Other Punctuation", "x" }, ['S'] = { "Symbol", "x" }, ['Sm'] = { "Math Symbol", "x" }, ['Sc'] = { "Currency Symbol", "x" }, ['Sk'] = { "Modifer Symbol", "x" }, ['So'] = { "Other Symbol", "x" }, ['Z'] = { "Separator", "x" }, ['Zs'] = { "Space Separator", "x" }, ['Zl'] = { "Line Separator", "x" }, ['Zp'] = { "Paragraph Separator", "x" }, ['C'] = { "Other", "x" }, ['Cc'] = { "Other control", "x" }, ['Cf'] = { "Other format", "00AD" }, ['Cs'] = { "Other surrogate", "x" }, ['Co'] = { "Other private use", "x" }, ['Cn'] = { "Other not assigned", "x" } },	-- Module:Unicode data/scripts .. 'aliases' 29-04-2022 -- Scripts.txt gives full names; here we consider them aliases to save space. tScriptName = { Adlm = "Adlam", Aghb = "Caucasian Albanian", Ahom = "Ahom", Arab = "Arabic", Armi = "Imperial Aramaic", Armn = "Armenian", Avst = "Avestan", Bali = "Balinese", Bamu = "Bamum", Bass = "Bassa Vah", Batk = "Batak", Beng = "Bengali", Bhks = "Bhaiksuki", Bopo = "Bopomofo", Brah = "Brahmi", Brai = "Braille", Bugi = "Buginese", Buhd = "Buhid", Cakm = "Chakma", Cans = "Canadian Aboriginal", Cari = "Carian", Cham = "Cham", Cher = "Cherokee", Chrs = "Chorasmian", Copt = "Coptic", Cpmn = "Cypro Minoan", Cprt = "Cypriot", Cyrl = "Cyrillic", Deva = "Devanagari", Diak = "Dives Akuru", Dogr = "Dogra", Dsrt = "Deseret", Dupl = "Duployan", Egyp = "Egyptian Hieroglyphs", Elba = "Elbasan", Elym = "Elymaic", Ethi = "Ethiopic", Geor = "Georgian", Glag = "Glagolitic", Gong = "Gunjala Gondi", Gonm = "Masaram Gondi", Goth = "Gothic", Gran = "Grantha", Grek = "Greek", Gujr = "Gujarati", Guru = "Gurmukhi", Hang = "Hangul", Hani = "Han", Hano = "Hanunoo", Hatr = "Hatran", Hebr = "Hebrew", Hira = "Hiragana", Hluw = "Anatolian Hieroglyphs", Hmng = "Pahawh Hmong", Hmnp = "Nyiakeng Puachue Hmong", Hrkt = "Katakana Or Hiragana", Hung = "Old Hungarian", Ital = "Old Italic", Java = "Javanese", Kali = "Kayah Li", Kana = "Katakana", Khar = "Kharoshthi", Khmr = "Khmer", Khoj = "Khojki", Kits = "Khitan Small Script", Knda = "Kannada", Kthi = "Kaithi", Lana = "Tai Tham", Laoo = "Lao", Latn = "Latin", Lepc = "Lepcha", Limb = "Limbu", Lina = "Linear A", Linb = "Linear B", Lisu = "Lisu", Lyci = "Lycian", Lydi = "Lydian", Mahj = "Mahajani", Maka = "Makasar", Mand = "Mandaic", Mani = "Manichaean", Marc = "Marchen", Medf = "Medefaidrin", Mend = "Mende Kikakui", Merc = "Meroitic Cursive", Mero = "Meroitic Hieroglyphs", Mlym = "Malayalam", Modi = "Modi", Mong = "Mongolian", Mroo = "Mro", Mtei = "Meetei Mayek", Mult = "Multani", Mymr = "Myanmar", Nand = "Nandinagari", Narb = "Old North Arabian", Nbat = "Nabataean", Newa = "Newa", Nkoo = "Nko", Nshu = "Nushu", Ogam = "Ogham", Olck = "Ol Chiki", Orkh = "Old Turkic", Orya = "Oriya", Osge = "Osage", Osma = "Osmanya", Ougr = "Old Uyghur", Palm = "Palmyrene", Pauc = "Pau Cin Hau", Perm = "Old Permic", Phag = "Phags Pa", Phli = "Inscriptional Pahlavi", Phlp = "Psalter Pahlavi", Phnx = "Phoenician", Plrd = "Miao", Prti = "Inscriptional Parthian", Rjng = "Rejang", Rohg = "Hanifi Rohingya", Runr = "Runic", Samr = "Samaritan", Sarb = "Old South Arabian", Saur = "Saurashtra", Sgnw = "SignWriting", Shaw = "Shavian", Shrd = "Sharada", Sidd = "Siddham", Sind = "Khudawadi", Sinh = "Sinhala", Sogd = "Sogdian", Sogo = "Old Sogdian", Sora = "Sora Sompeng", Soyo = "Soyombo", Sund = "Sundanese", Sylo = "Syloti Nagri", Syrc = "Syriac", Tagb = "Tagbanwa", Takr = "Takri", Tale = "Tai Le", Talu = "New Tai Lue", Taml = "Tamil", Tang = "Tangut", Tavt = "Tai Viet", Telu = "Telugu", Tfng = "Tifinagh", Tglg = "Tagalog", Thaa = "Thaana", Thai = "Thai", Tibt = "Tibetan", Tirh = "Tirhuta", Tnsa = "Tangsa", Toto = "Toto", Ugar = "Ugaritic", Vaii = "Vai", Vith = "Vithkuqi", Wara = "Warang Citi", Wcho = "Wancho", Xpeo = "Old Persian", Xsux = "Cuneiform", Yezi = "Yezidi", Yiii = "Yi", Zanb = "Zanabazar Square", Zinh = "Inherited", Zyyy = "Common", Zzzz = "Unknown", }, } return uchar_data