Module:Sandbox/Erutuon/UTF-8

local p = {}

local bit = require("bit32") local band = bit.band local rshift = bit.rshift

function table.forEach(t, func) for i, item in ipairs(t) do		func(item) end end

local function setMt(arr) return setmetatable(arr, { __index = table }) end

-- Converts a string representing a number in binary base to a Lua number. local function binary(stringBinary) return tonumber(stringBinary, 2) end

-- Find the digit at a certain position in a byte. local function digitAt(number, index) if type(number) == "string" then number = binary(number) end return band(rshift(number, 8 - index), 1) end

-- Returns a table containing bits in a byte, from highest to lowest. local function getBits(byte) local t = {} for bit = 8, 1, -1 do		t[bit] = band(byte, 1) byte = rshift(byte, 1) end return t end

-- mw.log(table.concat(getBits(rshift(binary("11100001"), 8 - 3))))

-- Do something to each byte in a string; put the result in a table. local function iterBytes(str, func) local out = {} for i = 1, #str do		table.insert(out, func(string.byte(str, i))) end return out end

local function makeTag(color) return { ' ', ' ' } end

-- Find leading digits marking ASCII, leading bytes, or continuation bytes, -- else tag byte as red. local function markDigits(byteTable) local onesCount = 0 setMt(byteTable) for i, digit in ipairs(byteTable) do		if digit == 1 then onesCount = onesCount + 1 if onesCount > 4 then local tag = makeTag("red") byteTable:insert(#byteTable, tag[2]) byteTable:insert(1, tag[1]) end else local tag -- ASCII (0x00 - 0x7F) if onesCount == 0 then tag = makeTag("darkgray") -- continuation bytes elseif onesCount == 1 then tag = makeTag("chocolate") -- leading bytes else tag = makeTag("deeppink") end byteTable:insert(i + 1, tag[2]) byteTable:insert(1, tag[1]) return byteTable end end return byteTable end

local function printBytes(str) return table.concat(		iterBytes( str, function(byte) return table.concat(markDigits(getBits(byte))) end ),		" "	) end

local function makeCharByteTables(str) local chars = setMt {} local bytes = setMt {} for char in mw.ustring.gmatch(str, ".") do		chars:insert(char) bytes:insert(printBytes(char)) end return chars, bytes end

local function print(chars, bytes) setMt(chars) setMt(bytes) local output = setMt { '{| class="wikitable"' } chars:forEach(		function(char)			output:insert("| " .. char)		end	) output:insert("|-") bytes:forEach(		function(byteString)			output:insert("| ")		end	) output:insert("|}") return output:concat("\n") end

function p.show(frame) local str = frame.args[1] or "abc πρᾶγμᾰ" return print(makeCharByteTables(str)) end

return p