Module:User:Cscott/llpeg

return (function local builders = {} local function register(name, f) builders[name] = f end register('advent.compat', function return require Module:User:Cscott/compat end)

register('llpeg.types', function(myrequire) myrequire('strict') local compat = myrequire('advent.compat') local CHARMAX = 0x7F -- maximum codepoint for charsets

-- metatable for pattern objects; will be filled in later local metareg = {}

local enum = function(keys) local Enum = {} Enum.__index = Enum function Enum:__tostring return self.name end function Enum:pairs return keys end function Enum:type return Enum end

for name, value in pairs(keys) do      Enum[name] = setmetatable({ name = name, value = value }, Enum) end return Enum end

local CapKind = enum{ close = "close", -- not used in trees */ position = "position", const = "constant", -- ktable[key] is Lua constant backref = "backref", -- ktable[key] is "name" of group to get capture arg = "argument", -- 'key' is arg's number simple = "simple", -- next node is pattern table = "table", -- next node is pattern ["function"] = "function", -- ktable[key] is function; next node is pattern acc = "acc", -- ktable[key] is function; next node is pattern query = "query", -- ktable[key] is table; next node is pattern string = "string", -- ktable[key] is string; next node is pattern num = "num", -- numbered capture; 'key' is number of value to return subst = "substitution", -- substitution capture; next node is pattern fold = "fold", -- ktable[key] is function; next node is pattern runtime = "runtime", -- not used in trees (is uses another type for tree) group = "group", -- ktable[key] is group's "name" }

local TTag = enum{ Char = "char", -- 'n' has unicode codepoint Set = "set", -- 'set' has sparse array codepoint->true for codepoint <=CHARMAX -- 'rest' indicates whether all codepoints > CHARMAX should be               -- part of the set (true) or not (false) Any = "any", True = "true", False = "false", UTFR = "utf8.range", -- range of UTF-8 codepoints;                 'from' has initial codepoint; 'to' has final codepoint -- Rep = "rep", -- 'sib1' * Seq = "seq", -- 'sib1' 'sib2' Choice = "choice", -- 'sib1' / 'sib2' Not = "not", -- !'sib1' And = "and", -- &'sib1' Call = "call", -- 'sib2' is rule being called; otherwise same as TOpenCall OpenCall = "opencall", -- 'key' is rule name Rule = "rule", -- 'key' is rule name (but key == nil for unused rules);             'sib1' is rule's pattern pre-rule; 'sib2' is next rule;             'n' is rule's sequential number, 'name' is rule name (even             for unused rules) -- XInfo = "xinfo", -- extra info (not used) Grammar = "grammar", -- 'sib1' is initial (and first) rule, 'n' is # rules Behind = "behind", -- 'sib1' is pattern, 'n' is how much to go back Capture = "capture", -- captures: 'cap' is kind of capture (enum 'CapKind');                'key' is Lua value associated with capture;               'sib1' is capture body -- RunTime = "run-time", -- run-time capture: 'key' is Lua function;                 'sib1' is capture body -- Throw = "throw",   -- labeled failure: 'key' is label's name, -- sib2 is associated recovery rule }

local PE = enum{ nullable = "nullable", nofail = "nofail", }

-- virtual machine instructions local Opcode = enum{ Any = "any", -- if no char, fail Char = "char", -- if char != aux, fail Set = "set", -- if char not in buff, fail TestAny = "testany", -- in no char, jump to 'offset' TestChar = "testchar", -- if char != aux, jump to 'offset' TestSet = "testset", -- if char not in buff, jump to 'offset' Span = "span", -- read a span of chars in buff UTFR = "utf-range", -- if codepoint not in range [offset, utf_to], fail Behind = "behind", -- walk back 'aux' characters (fail if not possible) Ret = "ret", -- return from a rule End = "end", -- end of pattern Choice = "choice", -- stack a choice; next fail will jump to 'offset' PredChoice = "pred_choice", -- labeled failure: stack a choice; changes label env next fail will jump to 'offset' Jmp = "jmp", -- jump to 'offset' Call = "call", -- call rule at 'offset' OpenCall = "open_call", -- call rule number 'key' (must be closed to a ICall) Commit = "commit", -- pop choice and jump to 'offset' PartialCommit = "partial_commit", -- update top choice to current position and jump BackCommit = "back_commit", -- backtrack like "fail" but jump to its own 'offset' FailTwice = "failtwice", -- pop one choice and then fail Fail = "fail", -- go back to saved state on choice and jump to saved offset Giveup = "giveup", -- internal use FullCapture = "fullcapture", -- complete capture of last 'off' chars OpenCapture = "opencapture", -- start a capture CloseCapture = "closecapture", CloseRunTime = "closeruntime", Throw = "throw",   -- fails with a given label --labeled failure ThrowRec = "throw_rec", -- fails with a given label and call rule at 'offset' --labeled failure Empty = "--", -- to fill empty slots left by optimizations }

-- helper for visitor pattern definitions function define(dispatch, which, f)  for _,v in pairs(which) do      assert(v ~= nil) -- catch typos dispatch[v] = f  end end

local numsiblings = {} define(numsiblings, {         TTag.Char, TTag.Set, TTag.Any,          TTag.True, TTag.False, TTag.UTFR,          TTag.Call, TTag.OpenCall,          TTag.Throw, }, 0) define(numsiblings, {         TTag.Rep, TTag.Not, TTag.And, TTag.Grammar,          TTag.Behind, TTag.Capture, TTag.RunTime, }, 1) define(numsiblings, {         TTag.Seq, TTag.Choice, TTag.Rule, }, 2)

-- more help for visitor functions

local function_name_registry = {} function register_fname(name, f)  assert(type(name) == "string") assert(type(f) == "function") function_name_registry[f] = name end

function report_ferror(f, msg) local fname = function_name_registry[f] if fname ~= nil then msg = fname .. ": " .. msg end error(msg) end

function define_type_visitor(tbl) local dispatch = {} for keys,func in pairs(tbl) do     if type(keys) ~= "table" then keys = { keys } end define(dispatch, keys, func) end local visit visit = function(val, ...) local a = dispatch["assert"] if a ~= nil then a(val, ...) end -- assert preconditions local ty = type(val) if ty == 'table' and getmetatable(val) == metareg then ty = 'pattern' end local f = dispatch[ty] if f ~= nil then return f(val, ...) end f = dispatch.default if f == nil then report_ferror(visit, "no default for " .. ty) end return f(val, ...) end return visit end

function define_tree_visitor(tbl, opt_name) local dispatch = {} for keys,func in pairs(tbl) do     if type(keys) ~= "table" or getmetatable(keys) == TTag then keys = { keys } end define(dispatch, keys, func) end local visit visit = function(tree, ...) if tree == nil then report_ferror(visit, "nil tree") end local a = dispatch["assert"] if a ~= nil then a(tree, ...) end -- assert preconditions local f = dispatch[tree.tag] if f ~= nil then return f(tree, ...) end f = dispatch.default if f == nil then report_ferror(visit, "no default for " .. tree.tag) end return f(tree, ...) end return visit end

function define_opcode_visitor(tbl) local dispatch = {} for keys,func in pairs(tbl) do     if type(keys) ~= "table" or getmetatable(keys) == Opcode then keys = { keys } end define(dispatch, keys, func) end local visit visit = function(op, ...) if op == nil then report_ferror(visit, "nil op") end local a = dispatch["assert"] if a ~= nil then a(op, ...) end -- assert preconditions local f = dispatch[op.code] if f ~= nil then return f(op, ...) end f = dispatch.default if f == nil then report_ferror(visit, "no default for " .. op.code) end return f(op, ...) end return visit end

-- helper for module imports function from(mod, list) local result = {} for _,v in ipairs(list) do     table.insert(result, mod[v]) end return compat.unpack(result) end

function newcharset return setmetatable({        tag = TTag.Set,         code = nil,         rest = false,         set = {}   }, metareg) end

local fullset = newcharset for i = 0,CHARMAX do  fullset.set[i] = true end fullset.rest = true -- make sure non-ascii unicode chars are included! assert(fullset.tag == TTag.Set)

return { CHARMAX = CHARMAX, CapKind = CapKind, Opcode = Opcode, PE = PE, TTag = TTag, define = define, define_tree_visitor = define_tree_visitor, enum = enum, from = from, fullset = fullset, metareg = metareg, newcharset = newcharset, numsiblings = numsiblings, register_fname = register_fname, }

end)

register('llpeg.print', function(myrequire) myrequire('strict') local compat = myrequire('advent.compat') local from = myrequire('llpeg.types').from local  CHARMAX,   CapKind,   Opcode,   TTag,   define,   define_tree_visitor,   numsiblings,   _ = from(myrequire('llpeg.types'), { 'CHARMAX', 'CapKind', 'Opcode', 'TTag', 'define', 'define_tree_visitor', 'numsiblings', })

function printcharset(tree) local result = "[" local i = 0 while i <= CHARMAX do     local first = i      while tree.set[i] and i <= CHARMAX do         i = i + 1 end if first == (i - 1) then -- unary range result = result .. string.format("(%02x)", first) elseif first < (i-1) then -- non-empty range result = result .. string.format("(%02x-%02x)", first, i - 1) end i = i + 1 end if tree.rest then result = result .. "(80-FFFF)" end return result .. "]" end

function printjmp(op, pc) return "-> " .. op.target end

local printinst_helper = define_opcode_visitor{ [Opcode.Char] = function(op, pc) return string.format("'%c' (%02x)", op.aux, op.aux) end, [Opcode.TestChar] = function(op, pc) return string.format("'%c' (%02x)", op.aux, op.aux) .. printjmp(op, pc) end, [Opcode.UTFR] = function(op, pc) return string.format("%d - %d", op.from, op.to) end, [Opcode.FullCapture] = function(op, pc) return string.format("%s (size = %s) (idx = %s)",                           op.cap.value, op.aux, op.key) end, [Opcode.OpenCapture] = function(op, pc) return string.format("%s (idx = %s)",                          op.cap.value, op.key) end, [Opcode.Set] = function(op, pc) return printcharset(op) end, [Opcode.TestSet] = function(op, pc) return printcharset(op) .. printjmp(op, pc) end, [Opcode.Span] = function(op, pc) return printcharset(op) end, [Opcode.OpenCall] = function(op, pc) return string.format("-> %d", op.target) -- rule number end, [Opcode.Behind] = function(op, pc) return string.format("%d", op.aux) end, [{Opcode.Jmp, Opcode.Call, Opcode.Commit, Opcode.Choice, Opcode.PartialCommit, Opcode.BackCommit, Opcode.TestAny, Opcode.PredChoice}] = function(op, pc) return printjmp(op, pc) end, [Opcode.Throw] = function(op, pc) -- labeled failure return string.format("(idx = %s)", op.key) end, [Opcode.ThrowRec] = function(op, pc) return printjmp(op, pc) .. string.format("(idx = %s)", op.key) end, default = function return '' end, } function printinst(pc, op, accum) table.insert(accum, string.format("%02d: %s ", pc, op.code.value)) table.insert(accum, printinst_helper(op, pc)) table.insert(accum, "\n") return accum end

function printpatt(code, accum) for pc,op in ipairs(code) do     printinst(pc, op, accum) end return accum end

function printcap(cap, indent) print(string.format("%s%s", string.rep(' ', indent), cap)) end

function printcap2close(captures, ncaptures, i, indent) local head = captures[i] i = i + 1 printcap(head, indent) -- print head capture while i <= ncaptures and head:inside(captures[i]) do     i = printcap2close(captures, ncaptures, i, indent + 2) -- print nested captures end if i <= ncaptures and head:isopencap then assert(captures[i]:isclosecap) printcap(captures[i], indent) -- print and skip close capture i = i + 1 end return i end

function printcaplist(captures, ncaptures) -- for debugging, first print a raw list of captures if ncaptures == nil then ncaptures = #captures end for i=1,ncaptures do     printcap(captures[i], 0) end print(">======"); local i=1 while i <= ncaptures and not captures[i]:isclosecap do    i = printcap2close(captures, ncaptures, i, 0) end if i > ncaptures then print(" ") end print("======="); end

local printtree_helper = define_tree_visitor{ [TTag.Char] = function(tree) local c = compat.utf8char(tree.n)     if c:find("%C") ~= nil then -- printable? return " '" .. c .. "'"     else return string.format(" (%02X)", tree.n)     end end, [TTag.Set] = function(tree) return printcharset(tree) end, [TTag.UTFR] = function(tree) return " " .. tree.from .. " - " .. tree.to  end, [{TTag.OpenCall, TTag.Call}] = function(tree) local ret = string.format(" key: %s", tree.key) local rule = tree.sib2 if rule ~= nil then ret = ret .. " (rule: " .. rule.n .. ")" end return ret end, [TTag.Behind] = function(tree) return " " .. tree.n  end, [TTag.Capture] = function(tree) return string.format(" kind: '%s' key: %s", tree.cap.value, tree.key) end, [TTag.Rule] = function(tree) return string.format(" key: %s", tree.key) end, [TTag.XInfo] = function(tree) return " n: " .. tree.n  end, [TTag.Grammar] = function(tree) return " " .. tree.n -- number of rules end, [TTag.Throw] = function(tree) return string.format(" key: %s", tree.key) end, default = function(tree) return '' end } function printtree(tree, indent, accum) local sibs = numsiblings[tree.tag]

table.insert(accum, string.rep(' ', indent)) table.insert(accum, tree.tag.value)

table.insert(accum, printtree_helper(tree)) table.insert(accum, "\n")

if tree.tag == TTag.Rule then sibs = 1 -- don't print sib2 elseif tree.tag == TTag.Grammar then local rule = tree.sib1 for i=1,tree.n do        printtree(rule, indent + 2, accum) rule = rule.sib2 end sibs = 0 -- siblings already handled end if sibs >= 1 then printtree(tree.sib1, indent + 2, accum) if sibs >= 2 then printtree(tree.sib2, indent + 2, accum) end end return accum end

local PREFIX = "" -- could also be "l." or "lpeg." etc local printrepl_helper printrepl_helper = define_tree_visitor{ [TTag.True] = function(tree, buf) table.insert(buf, PREFIX .. 'P""') end, [TTag.Any] = function(tree, buf) table.insert(buf, PREFIX .. 'P(1)') end, [TTag.Char] = function(tree, buf) table.insert(buf, PREFIX .. 'P"')     local c = compat.utf8char(tree.n)      if c:find("%C") ~= nil then -- printable?         table.insert(buf, c)      else         table.insert(buf, string.format('\\%02X', tree.n))      end      table.insert(buf, '"') end, [TTag.Set] = function(tree, buf) local nbuf = {} local insertchar = function(cp) local c = compat.utf8char(cp) if string.find(c, "^[^%w%p ]") ~= nil then table.insert(nbuf, string.format('\\x%02X', cp)) else table.insert(nbuf, c)        end end local nargs = 0 local inserttwo = function(cp1, cp2) if nargs > 0 then table.insert(nbuf, ',') end nargs = nargs + 1 table.insert(nbuf, '"')        insertchar(cp1)         insertchar(cp2)         table.insert(nbuf, '"') end

local i = 0 while i <= CHARMAX do        local first = i         while tree.set[i] and i <= CHARMAX do            i = i + 1 end if first == (i - 1) then -- unary range inserttwo(first, first) elseif first < (i-1) then -- non-empty range inserttwo(first, i-1) end i = i + 1 end

local r = table.concat(nbuf) if nargs == 1 then r = PREFIX .. 'S' .. r     else r = PREFIX .. 'S(' .. r .. ')' end

if tree.rest then table.insert(buf, '(')        table.insert(buf, r)         table.insert(buf, ' + ')         table.insert(buf, PREFIX)         table.insert(buf, 'utfR(0x80, 0x10FFFF))') else table.insert(buf, r)     end end, [TTag.UTFR] = function(tree, buf) table.insert(buf, string.format("%sutfR(0x%04X, 0x%04X)", PREFIX, tree.from, tree.to)) end, [{TTag.OpenCall, TTag.Call}] = function(tree, buf) table.insert(buf, string.format('%sV"%s"', PREFIX, tree.key)) end, [TTag.Not] = function(tree, buf) table.insert(buf, '-(')     printrepl_helper(tree.sib1, buf)      table.insert(buf, ')') end, [TTag.Seq] = function(tree, buf) table.insert(buf, "(")     printrepl_helper(tree.sib1, buf)      table.insert(buf, " * ")      printrepl_helper(tree.sib2, buf)      table.insert(buf, ")") end, [TTag.Choice] = function(tree, buf) table.insert(buf, "(")     printrepl_helper(tree.sib1, buf)      table.insert(buf, " + ")      printrepl_helper(tree.sib2, buf)      table.insert(buf, ")") end, [TTag.Rep] = function(tree, buf) printrepl_helper(tree.sib1, buf) table.insert(buf, "^0") end, --  [TTag.Behind] = function(tree)      return " " .. tree.n      end,   -- [TTag.Capture] = function(tree, buf) local repl = define_type_visitor{ string = function(v) return '"' .. v .. '"' -- xxx should handle escapes end, default = tostring, }     local name = nil local show_patt = false local show_key = false if tree.cap == CapKind.simple then name = 'C'        show_patt = true elseif tree.cap == CapKind.subst then name = 'Cs' show_patt = true elseif tree.cap == CapKind.table then name = 'Ct' show_patt = true elseif tree.cap == CapKind.pos then name = 'Cp' elseif tree.cap == CapKind.arg then name = 'Carg' show_key = true elseif tree.cap == CapKind.backref then name = 'Cb' show_key = true elseif tree.cap == CapKind.group then name = 'Cg' show_patt = true show_key = (tree.key ~= nil) end if name ~= nil then table.insert(buf, PREFIX) table.insert(buf, name) table.insert(buf, '(')        if show_patt then            printrepl_helper(tree.sib1, buf)            if show_key then               table.insert(buf, ', ')            end         end         if show_key then            table.insert(buf, repl(tree.key))         end         table.insert(buf, ')') return end if tree.cap == CapKind.string or        tree.cap == CapKind.num or         tree.cap == CapKind.query or         tree.cap == CapKind['function'] then printrepl_helper(tree.sib1, buf) table.insert(buf, ' / ') table.insert(buf, repl(tree.key)) return end -- fallback table.insert(buf, string.format("", tostring(tree.tag))) end, [TTag.Rule] = function(tree, buf) local key = tree.name if type(key) == 'number' then key = string.format("[%d]", key) end table.insert(buf, key) table.insert(buf, " = ") printrepl_helper(tree.sib1, buf) end, [TTag.Grammar] = function(tree, buf) table.insert(buf, PREFIX .. "P{") local rule = tree.sib1

local r = {} local first_rule_name = rule.name r[first_rule_name] = rule rule = rule.sib2

local names = {} for i=2,tree.n do        table.insert(names, rule.name) r[rule.name] = rule rule = rule.sib2 end

-- sort rule names table.sort(names) table.insert(names, 1, first_rule_name)

-- now print in order for _,name in ipairs(names) do        printrepl_helper(r[name], buf) table.insert(buf, ", ") end table.insert(buf, "}") end, --  [TTag.Throw] = function(tree)      return " key: " .. tree.key .. "  (rule: " .. tree.sib2.cap .. ")"   end,   -- default = function(tree, buf) table.insert(buf, string.format("", tostring(tree.tag))) end, } function printrepl(tree) local buf = {} printrepl_helper(tree, buf) return table.concat(buf) end

return { printcaplist = printcaplist, printcharset = printcharset, printinst = printinst, printpatt = printpatt, printrepl = printrepl, printtree = printtree, }

end)

register('llpeg.code', function(myrequire) myrequire('strict') local compat = myrequire('advent.compat') local from = myrequire('llpeg.types').from local  CHARMAX,   CapKind,   Opcode,   PE,   TTag,   define,   define_tree_visitor,   fullset,   newcharset,   numsiblings,   register_fname,   _ = from(myrequire('llpeg.types'), { 'CHARMAX', 'CapKind', 'Opcode', 'PE', 'TTag', 'define', 'define_tree_visitor', 'fullset', 'newcharset', 'numsiblings', 'register_fname', }) local printinst = myrequire('llpeg.print').printinst

local TRACE_INSTRUCTIONS = false

-- signals a "no-instruction" local NOINST = nil

-- don't optimize captures longer than this local MAXOFF = 15

-- forward declarations local codegen

local CompileState = {} CompileState.__index = CompileState

-- --
 * Analysis and some optimizations
 * Analysis and some optimizations

-- -- function charsettype(cs) local count = 0 local candidate for i,_ in pairs(cs.set) do     candidate = i      count = count + 1 end if cs.rest then if count == (CHARMAX + 1) then return Opcode.Any -- full set end elseif count == 0 then return Opcode.Fail -- empty set elseif count == 1 then return Opcode.Char, candidate -- single char end return Opcode.Set -- neither full nor empty nor singleton end
 * Check whether a charset is empty (returns IFail), singleton (IChar),
 * full (IAny), or none of those (ISet). When singleton, '*c' returns
 * which character it is. (When generic set, the set was the input,
 * so there is no need to return it.)

-- A few basic operations on charsets; returns new object

function cs_clone(cs) local result = newcharset for i,_ in pairs(cs.set) do     result.set[i] = true end result.rest = cs.rest return result end

function cs_complement(cs) local result = newcharset for i=0,CHARMAX do     if not cs.set[i] then result.set[i] = true end end result.rest = not cs.rest return result end

function cs_intersection(a, b)  local result = newcharset for i,_ in pairs(a.set) do     if a.set[i] and b.set[i] then result.set[i] = true end end result.rest = a.rest and b.rest return result end

function cs_union(a, b)  local result = newcharset for i=0,CHARMAX do     if a.set[i] or b.set[i] then result.set[i] = true end end result.rest = a.rest or b.rest return result end

function cs_diff(a, b)  local result = newcharset for i=0,CHARMAX do     if a.set[i] and not b.set[i] then result.set[i] = true end end result.rest = a.rest and not b.rest return result end

function cs_disjoint(a, b)  if a.rest == b.rest then return false end for i,_ in pairs(a.set) do     if b.set[i] then return false end end for i,_ in pairs(b.set) do     if a.set[i] then return false end end return true end

function cs_equal(a, b)  if a.rest ~= b.rest then return false end for i,_ in pairs(a.set) do     if not b.set[i] then return false end end for i,_ in pairs(b.set) do     if not a.set[i] then return false end end return true end

-- -- local tocharset = define_tree_visitor{ [TTag.Set] = function(v) return v -- copy set end, [TTag.Char] = function(v) -- only one char if v.n <= CHARMAX then local t = newcharset t.set[v.n] = true return t     else return nil end end, [TTag.Any] = function(v) return fullset end, [TTag.False] = function(v) return newcharset end, default = function(v) return nil end, } register_fname("tocharset", tocharset)
 * If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a
 * charset and return it; else return nil.

-- -- function CompileState:callrecursive(tree, f, default_value, ...) if tree.tag ~= TTag.Call then error("unexpected tree tag") end local rule = self.grammar.ruletab[tree.key] if rule.tag ~= TTag.Rule then error("unexpected tree sibling") end if tree.seen == true then return default_value -- node already visited else -- first visit local oldseen = tree.seen tree.seen = true local result = f(rule, ...) tree.seen = oldseen -- restore tree return result end end
 * Visit a TCall node taking care to stop recursion. If node not yet
 * visited, return 'f(rule for call)', otherwise return 'def' (default
 * value)

-- -- local hascaptures hascaptures = define_tree_visitor{ [{TTag.Capture, TTag.RunTime}] = function(tree, cs) return true end, [TTag.Call] = function(tree, cs) assert(cs ~= nil) return cs:callrecursive(tree, hascaptures, false, cs) end, [TTag.Rule] = function(tree, cs) -- do not follow siblings return hascaptures(tree.sib1, cs) end, [TTag.OpenCall] = function(tree, cs) error("should not happen") end, [TTag.Grammar] = function(tree, cs) -- make a fake compile state to hold the grammar, if necessary if cs == nil then cs = CompileState:new(nil) end return cs:withGrammar(tree, hascaptures, tree.sib1, cs) end, default = function(tree, cs) local n = numsiblings[tree.tag] if n == 1 then return hascaptures(tree.sib1, cs) -- tail call elseif n == 2 then if hascaptures(tree.sib1, cs) then return true end return hascaptures(tree.sib2, cs) -- tail call elseif n == 0 then return false else error("how many siblings does this have?") end end, } function CompileState:hascaptures(t) return hascaptures(t, self) end register_fname("hascaptures", hascaptures)
 * Check whether a pattern tree has captures

-- -- local checkaux checkaux = define_tree_visitor{ [{        TTag.Char, TTag.Set, TTag.Any, TTag.UTFR, TTag.False, TTag.OpenCall, TTag.Throw, }] = function(tree, pred, cs) return false -- not nullable end, [{TTag.Rep,TTag.True}] = function(tree, pred, cs) return true -- no fail end, [{TTag.Not,TTag.Behind}] = function(tree, pred, cs) -- can match empty, but can fail if pred == PE.nofail then return false else return true end end, [TTag.And] = function(tree, pred, cs) -- can match empty; fail iff body does if pred == PE.nullable then return true end return checkaux(tree.sib1, pred, cs) -- tail call end, [TTag.RunTime] = function(tree, pred, cs) -- can fail; match empty iff body does if pred == PE.nofail then return false end return checkaux(tree.sib1, pred, cs) -- tail call end, [TTag.Seq] = function(tree, pred, cs) if not checkaux(tree.sib1, pred, cs) then return false end return checkaux(tree.sib2, pred, cs) -- tail call end, [TTag.Choice] = function(tree, pred, cs) if checkaux(tree.sib2, pred, cs) then return true end return checkaux(tree.sib1, pred, cs) -- tail call end, [{ TTag.Capture, TTag.Rule, TTag.XInfo, }] = function(tree, pred, cs) return checkaux(tree.sib1, pred, cs) end, [TTag.Grammar] = function(tree, pred, cs) -- make a fake compile state to hold the grammar, if necessary if cs == nil then cs = CompileState:new(nil) end return cs:withGrammar(tree, checkaux, tree.sib1, pred, cs) end, [TTag.Call] = function(tree, pred, cs) -- open calls are assumed not nullable; checked again after grammar -- is fixed if cs == nil then return false end return checkaux(cs.grammar.ruletab[tree.key], pred, cs) end, } register_fname("checkaux", checkaux)
 * Checks how a pattern behaves regarding the empty string,
 * in one of two different ways:
 * A pattern is *nullable* if it can match without consuming any character;
 * A pattern is *nofail* if it never fails for any string
 * (including the empty string).
 * The difference is only for predicates and run-time captures;
 * for other patterns, the two properties are equivalent.
 * (With predicates, &'a' is nullable but not nofail. Of course,
 * nofail => nullable.)
 * These functions are all convervative in the following way:
 * p is nullable => nullable(p)
 * nofail(p) => p cannot fail
 * The function assumes that TOpenCall is not nullable;
 * this will be checked again when the grammar is fixed.
 * Run-time captures can do whatever they want, so the result
 * is conservative.

function nofail(t, cs) return checkaux(t, PE.nofail, cs) end

function CompileState:nofail(t) return nofail(t, self) end

function nullable(t, cs) return checkaux(t, PE.nullable, cs) end

function CompileState:nullable(t) return nullable(t, self) end

function nullable_with_grammar(t, grm) local cs = CompileState:new(nil) return cs:withGrammar(grm, nullable, t, cs) end

-- Note that we are counting characters, not bytes local fixedlen, fixedlen_helper fixedlen_helper = define_tree_visitor{ [{TTag.Char, TTag.Set, TTag.Any, TTag.UTFR}] = function(tree, len) return len + 1 end, [{TTag.False, TTag.True, TTag.Not, TTag.And, TTag.Behind}] = function(tree, len) return len end, [{TTag.Rep, TTag.RunTime, TTag.OpenCall, TTag.Throw,}] = function(tree, len) return -1 -- variable end, [{TTag.Capture, TTag.Rule, TTag.XInfo,}] = function(tree, len, cs) return fixedlen_helper(tree.sib1, len, cs) end, [TTag.Grammar] = function(tree, len, cs) -- make a fake compile state to hold the grammar, if necessary if cs == nil then cs = CompileState:new(nil) end return cs:withGrammar(tree, fixedlen_helper, tree.sib1, len, cs) end, [TTag.Call] = function(tree, len, cs) -- If evaluating outside the context of a grammar, conservatively -- return "variable" if cs == nil then return -1 end -- otherwise, carefully recurse local n1 = cs:callrecursive(tree, fixedlen, -1, cs) if n1 < 0 then return -1 end -- variable return len + n1  end, [TTag.Seq] = function(tree, len, cs) local n1 = fixedlen_helper(tree.sib1, len, cs) if n1 < 0 then return -1 end -- variable return fixedlen_helper(tree.sib2, n1, cs) end, [TTag.Choice] = function(tree, len, cs) local n1 = fixedlen_helper(tree.sib1, len, cs) local n2 = fixedlen_helper(tree.sib2, len, cs) if n1 ~= n2 or n1 < 0 then return -1 else return n1     end end, } function fixedlen(tree, cs) return fixedlen_helper(tree, 0, cs) -- supply default 0 for 'len' end function CompileState:fixedlen(t) return fixedlen(t, self) end register_fname("fixedlen_helper", fixedlen_helper)

-- -- local getfirst getfirst = define_tree_visitor{ [TTag.Char] = function(t, follow, cs) if t.n <= CHARMAX then return 0, tocharset(t) end -- conservative approximation! local s = newcharset s.rest = true return 0, s  end, [{ TTag.Set, TTag.Any, TTag.False }] = function(t, follow, cs) return 0, tocharset(t) end, [TTag.UTFR] = function(t, follow, cs) -- conservative approximation! local firstset = newcharset if t.from <= CHARMAX then for i=t.from, math.min(CHARMAX, t.to) do           firstset.set[i] = true end end if t.to > CHARMAX then -- conservative approximation of non-ascii unicode range firstset.rest = true end return 0, firstset end, [TTag.True] = function(t, follow, cs) return 1, follow -- 1 because this accepts the empty string end, [TTag.Throw] = function(t, follow, cs) -- labeled failure: must always throw the label return 1, fullset end, [TTag.Choice] = function(t, follow, cs) local firstset = newcharset local e1,e1set = getfirst(t.sib1, follow, cs) local e2,e2set = getfirst(t.sib2, follow, cs) local firstset = cs_union(e1set, e2set) local ret = 0 -- awkward lua5.1 way to say "e1 | e2" if (e1 % 2) == 1 or (e2 % 2) == 1 then ret = ret + 1 end e1,e2 = compat.rshift(e1, 1), compat.rshift(e2, 1) if (e1 % 2) == 1 or (e2 % 2) == 1 then ret = ret + 2 end return ret, firstset end, [TTag.Seq] = function(t, follow, cs) if not nullable(t.sib1, cs) then -- when p1 is not nullable, p2 has nothing to contribute return getfirst(t.sib1, fullset, cs) -- tail call else -- FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) local e2,csaux = getfirst(t.sib2, follow, cs) local e1,firstset = getfirst(t.sib1, csaux, cs) if e1 == 0 then return 0, firstset -- 'e1' ensures that first can be used elseif compat.rshift(e1, 1) % 2 == 1 or compat.rshift(e2, 1) % 2 == 1 then -- one of the children has a matchtime? return 2, firstset -- pattern has a matchtime capture else return e2, firstset -- else depends on e2        end end end, [TTag.Rep] = function(t, follow, cs) local _,firstset = getfirst(t.sib1, follow, cs) return 1, cs_union(firstset, follow, cs) -- accepts the empty string end, [{ TTag.Capture,TTag.Rule,TTag.XInfo }] = function(t, follow, cs) return getfirst(t.sib1, follow, cs) -- tail call end, [TTag.Grammar] = function(t, follow, cs) return cs:withGrammar(t, getfirst, t.sib1, follow, cs) end, [TTag.RunTime] = function(t, follow, cs) -- function invalidates any follow info local e,firstset = getfirst(t.sib1, fullset, cs) if e ~= 0 then -- function is not "protected"? return 2,firstset else -- pattern inside capture ensures first can be used return 0,firstset end end, [TTag.Call] = function(t, follow, cs) local rule = cs.grammar.ruletab[t.key] return getfirst(rule, follow, cs) -- tail call end, [TTag.And] = function(t, follow, cs) local e,firstset = getfirst(t.sib1, follow, cs) return e, cs_intersection(firstset, follow, cs) end, [{ TTag.Not, TTag.Behind }] = function(t, follow, cs) if t.tag == TTag.Not then local firstset = tocharset(t.sib1) if firstset ~= nil then return 1,cs_complement(firstset) -- could match empty input end end -- the TNot or TBehind gives no new information -- call getfirst only to check for math-time captures local e,_ = getfirst(t.sib1, follow, cs) if e%2 == 0 then e = e + 1 end -- set the lsb; could match empty input return e, follow -- uses follow end, } function CompileState:getfirst(t, follow) return getfirst(t, follow, self) end register_fname("getfirst", getfirst)
 * Computes the 'first set' of a pattern.
 * The result is a conservative aproximation:
 * match p ax -> x (for some x) ==> a belongs to first(p)
 * or
 * a not in first(p) ==> match p ax -> fail (for all x)
 * The set 'follow' is the first set of what follows the
 * pattern (full set if nothing follows it).
 * The function returns 0 when this resulting set can be used for
 * test instructions that avoid the pattern altogether.
 * A non-zero return can happen for two reasons:
 * 1) match p  ->            ==> return has bit 1 set
 * (tests cannot be used because they would always fail for an empty input);
 * 2) there is a match-time capture ==> return has bit 2 set
 * (optimizations should not bypass match-time captures).
 * 2) there is a match-time capture ==> return has bit 2 set
 * (optimizations should not bypass match-time captures).

--  -- ie, a single character of lookahead is enough to evaluate the pattern   -- rooted at this node -- local headfail headfail = define_tree_visitor{ [{TTag.Char, TTag.Set, TTag.Any, TTag.False}] = function(t, cs) return true end, [{TTag.True, TTag.Rep, TTag.RunTime, TTag.Not, -- even though we are codepoint-based, we don't have a TestUTFR instruction -- so we can't use a Test instruction on the first character, which is    -- implicitly what headfail is checking for. TTag.UTFR, TTag.Behind, TTag.Throw}] = function(t, cs) return false end, [{TTag.Capture, TTag.Rule, TTag.XInfo, TTag.And}] = function(t, cs) return headfail(t.sib1, cs) -- tail call end, [TTag.Grammar] = function(t, cs) return cs:withGrammar(t, headfail, t.sib1, cs) end, [TTag.Call] = function(t, cs) local rule = cs.grammar.ruletab[t.key] return headfail(rule, cs) -- tail call end, [TTag.Seq] = function(t, cs) if not nofail(t.sib2, cs) then -- if the second child could possibly fail, then we can't        -- evaluate the entire seq based just on the first child return false end return headfail(t.sib1, cs) -- tail call end, [TTag.Choice] = function(t, cs) -- both children need to be headfail for this to be headfail if not headfail(t.sib1, cs) then return false end return headfail(t.sib2, cs) -- tail call end, } function CompileState:headfail(t) return headfail(t, self) end register_fname("headfail", headfail)
 * If 'headfail(tree)' true, then 'tree' can fail only depending on the
 * next character of the subject.

-- -- local needfollow needfollow = define_tree_visitor{ [{TTag.Char, TTag.Set, TTag.Any, TTag.UTFR, TTag.False, TTag.True, TTag.And, TTag.Not, TTag.RunTime, TTag.Grammar, TTag.Call, TTag.Behind, TTag.Throw, }] = function(tree) return false end, [{TTag.Choice, TTag.Rep}] = function(tree) return true end, [TTag.Capture] = function(tree) return needfollow(tree.sib1) end, [TTag.Seq] = function(tree) return needfollow(tree.sib2) end, } register_fname("needfollow", needfollow)
 * Check whether the code generation for the given tree can benefit
 * from a follow set (to avoid computing the follow set when it is
 * not needed)

-- --
 * Code generation
 * Code generation

local Instruction = {} Instruction.__index = Instruction

function Instruction:new(arg) local opcode = arg[1] if opcode == nil then error("no opcode") end -- target is rule # for open calls before correction, and absolute pc after local instr = { code = opcode, exec = opcode.exec, -- copy the exec function from the opcode! aux = arg.aux, -- used for the "primary argument" key = arg.key, -- used for string-valued arguments target = arg.target, -- used for jmp-like instructions from = arg.from, -- inclusive start, for ranges to = arg.to, -- inclusive end, for ranges set = arg.set, -- charset <= CHARMAX rest = arg.rest, -- include characters above CHARMAX? cap = arg.cap, -- used for "capture kind" }  setmetatable(instr, self) instr:setCode(opcode) -- opportunity to add tracing logic! return instr end

function Instruction:setCode(opcode) self.code = opcode local exec = opcode.exec if TRACE_INSTRUCTIONS then local str self.exec = function(self, state, ...) if str == nil then str = table.concat(printinst(0, self, { "Executing " })):gsub("\n","") end print(state.bytePos, state.codepoint, str) return exec(self, state, ...) end else self.exec = exec end end

-- state for the compiler

function CompileState:new(p) local cs = { p = p,  } setmetatable(cs, self) return cs end

function CompileState:withGrammar(g, f, ...) local lastGrammar = self.grammar self.grammar = g  local result = compat.pack(f(...)) self.grammar = lastGrammar return compat.unpack(result) end

function CompileState:codegen(tree, opt, tt, fl) assert(fl.tag == TTag.Set) -- just a little helper return codegen(tree, self, opt, tt, fl) end

function CompileState:getinstr(i) return self.p.code[i] end

function CompileState:addinstruction(arg) local code = self.p.code table.insert(code, Instruction:new(arg)) return #code end

function CompileState:gethere local code = self.p.code return 1 + #code end

function CompileState:jumptothere(pc, where) if pc ~= NOINST then local code = self.p.code code[pc].target = where end end

function CompileState:jumptohere(pc) self:jumptothere(pc, self:gethere) end

function codethrow(cs, throw) local rule = nil if cs.grammar ~= nil then -- we only lookup/match *string* rule names, not numeric indices rule = cs.grammar.ruletab[tostring(throw.key)] end if rule ~= nil then return cs:addinstruction{ Opcode.ThrowRec, key=throw.key, -- rule name / error label target=rule.n -- recovery rule number }  else return cs:addinstruction{ Opcode.Throw, key=throw.key, -- rule name / error label -- no recovery rule }  end end

function codeutfr(cs, tree) return cs:addinstruction{ Opcode.UTFR, from = tree.from, to = tree.to, } end

-- -- function codechar(cs, codepoint, tt) if tt ~= NOINST and cs:getinstr(tt).code == Opcode.TestChar and cs:getinstr(tt).aux == codepoint then cs:addinstruction{Opcode.Any} else cs:addinstruction{Opcode.Char, aux=codepoint,} end end
 * Code an IChar instruction, or IAny if there is an equivalent
 * test dominating it

-- -- function addcharset(cs, codepoint) -- static void addcharset (CompileState *compst, const byte *cs) { int p = gethere(compst);  int i;  for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++)    nextinstruction(compst);  /* space for buffer */  /* fill buffer with charset */      loopset(j, getinstr(compst, p).buff[j] = cs[j]);   -- end
 * Add a charset posfix to an instruction

-- -- function codecharset(cs, tree, tt) local op,codepoint = charsettype(tree) if op == Opcode.Char then return codechar(cs, codepoint, tt) elseif op == Opcode.Set then -- non-trivial set? if tt ~= NOINST and cs:getinstr(tt).code == Opcode.TestSet and cs_equal(tree, cs:getinstr(tt)) then return cs:addinstruction{Opcode.Any} else return cs:addinstruction{ Opcode.Set, set = tree.set, -- XXX ensure immutable rest = tree.rest, }     end else return cs:addinstruction{op} -- Any or Fail end end
 * code a char set, optimizing unit sets for IChar, "complete"
 * sets for IAny, and empty sets for IFail; also use an IAny
 * when instruction is dominated by an equivalent test.

-- -- function codetestset(cs, tree, e)  if e ~= 0 then return NOINST end local op,codepoint = charsettype(tree) if op == Opcode.Fail then return cs:addinstruction{Opcode.Jmp, target = NOINST} -- always jump elseif op == Opcode.Any then return cs:addinstruction{Opcode.TestAny, target = NOINST} elseif op == Opcode.Char then return cs:addinstruction{ Opcode.TestChar, target = NOINST, aux = codepoint, }  elseif op == Opcode.Set then return cs:addinstruction{ Opcode.TestSet, target = NOINST, set = tree.set, -- XXX ensure immutable rest = tree.rest, }  else error("unreachable") end end
 * code a test set, optimizing unit sets for ITestChar, "complete"
 * sets for ITestAny, and empty sets for IJmp (always fails).
 * 'e' is nonzero iff test should accept the empty string. (Test
 * instructions in the current VM never accept the empty string.)

-- -- function codebehind(cs, tree) if tree.n > 0 then cs:addinstruction{ Opcode.Behind, aux = tree.n } end return cs:codegen(tree.sib1, false, NOINST, fullset) end
 *  == behind n;  (where n = fixedlen(p))

-- -- function codechoice(cs, p1, p2, opt, fl) local emptyp2 = (p2.tag == TTag.True) local e1, cs1 = cs:getfirst(p1, fullset) local headfailp1 = cs:headfail(p1) local e2, cs2 if not headfailp1 and e1 == 0 then e2, cs2 = cs:getfirst(p2, fl) -- avoid computing unless necessary end if headfailp1 or (e1 == 0 and cs_disjoint(cs1, cs2)) then --  == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: local test = codetestset(cs, cs1, 0) local jmp = NOINST cs:codegen(p1, false, test, fl) if not emptyp2 then jmp = cs:addinstruction{Opcode.Jmp, target = NOINST } end cs:jumptohere(test) cs:codegen(p2, opt, NOINST, fl) cs:jumptohere(jmp) elseif opt and emptyp2 then -- p1? == IPartialCommit; p1     cs:jumptohere(cs:addinstruction{Opcode.PartialCommit, target = NOINST}) cs:codegen(p1, true, NOINST, fullset) else --  == -- test(first(p1)) -> L1; choice L1; ; commit L2; L1: ; L2: local test = codetestset(cs, cs1, e1) local pchoice = cs:addinstruction{Opcode.Choice, target = NOINST} cs:codegen(p1, emptyp2, test, fullset) local pcommit = cs:addinstruction{Opcode.Commit, target = NOINST} cs:jumptohere(pchoice) cs:jumptohere(test) cs:codegen(p2, opt, NOINST, fl) cs:jumptohere(pcommit) end end
 * Choice; optimizations:
 * - when p1 is headfail or when first(p1) and first(p2) are disjoint,
 * than a character not in first(p1) cannot go to p1 and a character
 * in first(p1) cannot go to p2, either because p1 will accept
 * (headfail) or because it is not in first(p2) (disjoint).
 * (The second case is not valid if p1 accepts the empty string,
 * as then there is no character at all...)
 * - when p2 is empty and opt is true; a IPartialCommit can reuse
 * the Choice already active in the stack.

-- -- function codeand(cs, tree, tt) -- labeled failure: optimization disabled because in case of a failure it    does not report the expected error position (the current subject position     when begin the matching of <&p>) -- local pchoice = cs:addinstruction{Opcode.PredChoice, target = NOINST} cs:codegen(tree, false, tt, fullset) local pcommit = cs:addinstruction{Opcode.BackCommit, target = NOINST} cs:jumptohere(pchoice) cs:addinstruction{Opcode.Fail} cs:jumptohere(pcommit) end
 * And predicate
 * optimization: fixedlen(p) = n ==> <&p> == ; behind n
 * (valid only when 'p' has no captures)

-- -- function codecapture(cs, tree, tt, fl) local len = cs:fixedlen(tree.sib1) if len >= 0 and len <= MAXOFF and not cs:hascaptures(tree.sib1) then cs:codegen(tree.sib1, false, tt, fl) cs:addinstruction{ Opcode.FullCapture, cap = tree.cap, key = tree.key, -- capture name aux = len, }  else assert(tree.cap ~= nil) cs:addinstruction({           Opcode.OpenCapture,            cap = tree.cap,            key = tree.key, -- capture name      }) cs:codegen(tree.sib1, false, tt, fl) cs:addinstruction({           Opcode.CloseCapture,            cap = CapKind.close,      }) end end
 * Captures: if pattern has fixed (and not too big) length, and it
 * has no nested captures, use a single IFullCapture instruction
 * after the match; otherwise, enclose the pattern with OpenCapture -
 * CloseCapture.

function coderuntime(cs, tree, tt) cs:addinstruction({        Opcode.OpenCapture,         cap = CapKind.group,         key = tree.key, -- capture *function*   }) cs:codegen(tree.sib1, false, tt, fullset) cs:addinstruction({        Opcode.CloseRunTime,         cap = CapKind.close,   }) end

-- -- function coderep(cs, tree, opt, fl) local st = tocharset(tree) if st ~= nil then return cs:addinstruction{ Opcode.Span, set = st.set, rest = st.rest, }  end local e1,st = cs:getfirst(tree, fullset) if cs:headfail(tree) or (e1 == 0 and cs_disjoint(st, fl)) then -- L1: test (fail(p1)) -> L2; ; jmp L1; L2: local test = codetestset(cs, st, 0) cs:codegen(tree, false, test, fullset) local jmp = cs:addinstruction{Opcode.Jmp, target = NOINST} cs:jumptohere(test) cs:jumptothere(jmp, test) else -- test(fail(p1)) -> L2; choice L2; L1: ; partialcommit L1; L2: -- or (if 'opt'): partialcommit L1; L1: ; partialcommit L1; local test = codetestset(cs, st, e1) local pchoice = NOINST if opt then cs:jumptohere(cs:addinstruction{Opcode.PartialCommit, target = NOINST}) else pchoice = cs:addinstruction{Opcode.Choice, target = NOINST} end local l2 = cs:gethere cs:codegen(tree, false, NOINST, fullset) local commit = cs:addinstruction{Opcode.PartialCommit, target = NOINST} cs:jumptothere(commit, l2) cs:jumptohere(pchoice) cs:jumptohere(test) end end
 * Repetition; optimizations:
 * When pattern is a charset, can use special instruction ISpan.
 * When pattern is head fail, or if it starts with characters that
 * are disjoint from what follows the repetions, a simple test
 * is enough (a fail inside the repetition would backtrack to fail
 * again in the following pattern, so there is no need for a choice).
 * When 'opt' is true, the repetion can reuse the Choice already
 * active in the stack.

-- -- function codenot(cs, tree) local e,st = cs:getfirst(tree, fullset) local test = codetestset(cs, st, e)  if cs:headfail(tree) then -- test (fail(p1)) -> L1; fail; L1: cs:addinstruction{Opcode.Fail} else -- test(fail(p))-> L1; choice L1; ; failtwice; L1: local pchoice = cs:addinstruction{Opcode.PredChoice, target = NOINST } cs:codegen(tree, false, NOINST, fullset) cs:addinstruction{Opcode.FailTwice} cs:jumptohere(pchoice) end cs:jumptohere(test) end
 * Not predicate; optimizations:
 * In any case, if first test fails, 'not' succeeds, so it can jump to
 * the end. If pattern is headfail, that is all (it cannot fail
 * in other parts); this case includes 'not' of simple sets. Otherwise,
 * use the default code (a choice plus a failtwice).

-- find the final destination of a sequence of jumps function finaltarget(code, pc) while code[pc].code == Opcode.Jmp do     pc = code[pc].target end return pc end

-- final label (after traversing any jumps) function finallabel(code, pc) return finaltarget(code, code[pc].target) end

-- -- function correctcalls(cs, positions, from, to) local code = cs.p.code for i=from,(to-1) do     local op = code[i] if op.code == Opcode.OpenCall or op.code == Opcode.ThrowRec then local n = op.target -- rule number local rule = positions[n] -- rule position if rule == from or code[rule - 1].code == Opcode.Ret then -- sanity check! ok! else error("bad rule position") end if op.code == Opcode.OpenCall then if code[finaltarget(code, i+1)].code == Opcode.Ret then -- call; ret => tail call op:setCode(Opcode.Jmp) else op:setCode(Opcode.Call) -- open call no more end end op.target = rule end end end
 * change open calls to calls, using list 'positions' to find
 * correct offsets; also optimize tail calls

-- -- function codegrammar(cs, tree) local firstcall = cs:addinstruction{Opcode.Call, target = NOINST} -- call initial rule local jumptoend = cs:addinstruction{Opcode.Jmp, target = NOINST} -- jump to the end local start = cs:gethere -- here starts the initial rule cs:jumptohere(firstcall)
 * Code for a grammar:
 * call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2:

local positions = {} local rule = tree.sib1 for i=1,tree.n do     local pattern = rule.sib1 positions[i] = cs:gethere -- save rule position cs:codegen(rule.sib1, false, NOINST, fullset) -- code rule cs:addinstruction{Opcode.Ret} rule = rule.sib2 end if rule.tag ~= TTag.True then error("impossible") end cs:jumptohere(jumptoend) correctcalls(cs, positions, start, cs:gethere) end

function codecall(cs, tree) local rule = cs.grammar.ruletab[tree.key] assert(rule ~= nil) assert(rule.n ~= nil) return cs:addinstruction{ Opcode.OpenCall, -- to be corrected later target = rule.n -- rule number } end

-- -- function codeseq1(cs, p1, p2, tt, fl) assert(fl.tag == TTag.Set) if needfollow(p1) then local _, fl1 = cs:getfirst(p2, fl) -- p1 follow is p2 first cs:codegen(p1, false, tt, fl1) else -- use fullset as follow cs:codegen(p1, false, tt, fullset) end if cs:fixedlen(p1) ~= 0 then -- can 'p1' consume anything? return NOINST -- invalidate test else return tt -- else 'tt' still protects sib2 end end
 * Code first child of a sequence
 * (second child is called in-place to allow tail call)
 * Return 'tt' for second child

-- --
 * Main code-generation function: dispatch to auxiliar functions
 * according to kind of tree. ('needfollow' should return true
 * only for consructions that use 'fl'.)

-- -- codegen = define_tree_visitor{ [TTag.Char] = function(tree, cs, opt, tt, fl) return codechar(cs, tree.n, tt) end, [TTag.Any] = function(tree, cs, opt, tt, fl) return cs:addinstruction{Opcode.Any} end, [TTag.Set] = function(tree, cs, opt, tt, fl) return codecharset(cs, tree, tt) end, [TTag.True] = function(tree, cs, opt, tt, fl) return -- do nothing end, [TTag.False] = function(tree, cs, opt, tt, fl) return cs:addinstruction{Opcode.Fail} end, [TTag.UTFR] = function(tree, cs, opt, tt, fl) return codeutfr(cs, tree) end, [TTag.Choice] = function(tree, cs, opt, tt, fl) return codechoice(cs, tree.sib1, tree.sib2, opt, fl) end, [TTag.Rep] = function(tree, cs, opt, tt, fl) return coderep(cs, tree.sib1, opt, fl) end, [TTag.Behind] = function(tree, cs, opt, tt, fl) return codebehind(cs, tree) end, [TTag.Not] = function(tree, cs, opt, tt, fl) return codenot(cs, tree.sib1) end, [TTag.And] = function(tree, cs, opt, tt, fl) return codeand(cs, tree.sib1, tt) end, [TTag.Capture] = function(tree, cs, opt, tt, fl) return codecapture(cs, tree, tt, fl) end, [TTag.RunTime] = function(tree, cs, opt, tt, fl) return coderuntime(cs, tree, tt) end, [TTag.Grammar] = function(tree, cs, opt, tt, fl) return cs:withGrammar(tree, codegrammar, cs, tree) end, [TTag.Call] = function(tree, cs, opt, tt, fl) return codecall(cs, tree) end, [TTag.Seq] = function(tree, cs, opt, tt, fl) tt = codeseq1(cs, tree.sib1, tree.sib2, tt, fl) -- code 'p1' return cs:codegen(tree.sib2, opt, tt, fl) -- tail call end, [TTag.Throw] = function(tree, cs, opt, tt, fl) return codethrow(cs, tree) end, ["assert"] = function(tree, cs, opt, tt, fl) assert(fl.tag == TTag.Set) assert(opt ~= 0) end, } register_fname("codegen", codegen)
 * code generation is recursive; 'opt' indicates that the code is being
 * generated as the last thing inside an optional pattern (so, if that
 * code is optional too, it can reuse the 'IChoice' already in place for
 * the outer pattern). 'tt' points to a previous test protecting this
 * code (or NOINST). 'fl' is the follow set of the pattern.

-- -- function peephole(cs) local code = cs.p.code local jmpswitch local switch = define_opcode_visitor{ -- instructions with labels [{Opcode.Choice, Opcode.Call, Opcode.Commit, Opcode.PartialCommit, Opcode.BackCommit, Opcode.TestChar, Opcode.TestSet, Opcode.TestAny}] = function(op, i)        cs:jumptothere(i, finallabel(code, i)) end, [Opcode.Jmp] = function(op, i)        local ft = finaltarget(code, i)         jmpswitch(code[ft], i, ft) -- jumping to what? end, default = function end, }  jmpswitch = define_opcode_visitor{ -- instructions with unconditional implicit jumps [{Opcode.Ret,Opcode.Fail,Opcode.FailTwice,Opcode.End}] = function(op, i, ft) code[i]:setCode(code[ft].code) -- jump becomes that instruction end, -- instructions with unconditional explicit jumps [{Opcode.Commit, Opcode.PartialCommit, Opcode.BackCommit}] = function(op, i, ft) local fft = finallabel(code, ft) code[i]:setCode(code[ft].code) -- jump becomes that instruction cs:jumptothere(i, fft) -- with an optimized target switch(code[i], i) -- reoptimize the label end, default = function(op, i, ft) cs:jumptothere(i, ft) -- optimize label end, }  for i=1,#code do      switch(code[i], i)   end end
 * Optimize jumps and other jump-like instructions.
 * * Update labels of instructions with labels to their final
 * destinations (e.g., choice L1; ... L1: jmp L2: becomes
 * choice L2)
 * * Jumps to other instructions that do jumps become those
 * instructions (e.g., jump to return becomes a return; jump
 * to commit becomes a commit)

-- thread the instructions to speed up dispatch during execution function thread(cs) local code = cs.p.code for i=1,#code-1 do     code[i].next = code[i+1] if code[i].target ~= nil then code[i].branch = code[code[i].target] end end end

function compile(p) local compst = CompileState:new(p) p.code = {} assert(fullset.tag == TTag.Set) compst:codegen(p, false, NOINST, fullset) compst:addinstruction{Opcode.End} peephole(compst) thread(compst) return p.code end

return { Instruction = Instruction, compile = compile, cs_clone = cs_clone, cs_complement = cs_complement, cs_diff = cs_diff, cs_intersection = cs_intersection, cs_union = cs_union, fixedlen = fixedlen, hascaptures = hascaptures, nofail = nofail, nullable = nullable, nullable_with_grammar = nullable_with_grammar, tocharset = tocharset, }

end)

register('llpeg.utf8util', function(myrequire) myrequire('strict')

local utf8util = {}

function utf8util.codepointAt(s, pos) local c1 = string.byte(s, pos) if c1 <= 0x7F then return c1, 1 end local c2 = string.byte(s, pos + 1) if c1 <= 0xDF then return ((c1 % 0x20 ) * 0x40) + (c2 % 0x40), 2 end local c3 = string.byte(s, pos + 2) if c1 <= 0xEF then return (((c1 % 0x10) * 0x40) + (c2 % 0x40)) * 0x40 + (c3 % 0x40), 3 end local c4 = string.byte(s, pos + 3) if c1 <= 0xF7 then return ((((c1 % 0x08) * 0x40) + (c2 % 0x40)) * 0x40 + (c3 % 0x40)) * 0x40 + (c4 % 0x40), 4 end error( "bad utf8" ) end

-- same as utf8.offset in Lua 5.3 standard library function utf8util.offset(s, n, i)  if n > 0 then error("unimplemented") end while n < 0 do     i = i - 1 if i < 1 then return nil end local c = string.byte(s, i)     if c 0xBF then n = n + 1 end end return i end

return utf8util

end)

register('llpeg.list', function(myrequire) local List = {} List.__index = List

function List:new return setmetatable({ n = 0 }, self) end

function List:__len return self.n end

function List:push(val) local n = self.n + 1 self[n] = val self.n = n end

function List:pop local n = self.n  assert(n > 0) local old = self[n] self[n] = nil self.n = n - 1 return old end

function List:insert(pos, val) for i=self.n,pos,-1 do     self[i+1] = self[i] end self[pos] = val self.n = self.n + 1 end

return List

end)

register('llpeg.cap', function(myrequire) myrequire('strict') local compat = myrequire('advent.compat') local from = myrequire('llpeg.types').from local  CapKind,   _ = from(myrequire('llpeg.types'), { 'CapKind', }) local  printcaplist,   _ = from(myrequire('llpeg.print'), { 'printcaplist', }) local List = myrequire('llpeg.list')

local MAXRECLEVEL = 200

local Capture = {} Capture.__index = Capture

-- kind is CapKind of the capture -- bytePos is the subject position (in bytes) -- byteLen is the length of the capture (in bytes) -- extra is extra info (group name, arg index, etc) function Capture:new(kind, bytePos, byteLen, extra) assert(getmetatable(kind) == CapKind) return setmetatable({        kind = kind, bytePos = bytePos, byteLen = byteLen, extra = extra,   }, self) end

function Capture:__tostring return string.format("Capture{kind=%s, pos=%d, len=%s, extra=%s}",                       self.kind, self.bytePos, self.byteLen, self.extra) end

function Capture:isopencap return self.byteLen == nil end

-- true if c2 is (any number of levels) inside self function Capture:inside(c2) if self:isopencap then return not c2:isclosecap else return c2.bytePos < (self.bytePos + self.byteLen) end end

function Capture:isclosecap return self.kind == CapKind.close end

-- -- function Capture:size(close) if self:isopencap then assert(close:isclosecap) return close.bytePos - self.bytePos else return self.byteLen end end
 * Return the size of capture 'cap'. If it is an open capture, 'close'
 * must be its corresponding close.

function CapKind:newCapture(bytePos, byteLen, extra) return Capture:new(self, bytePos, byteLen, extra) end

local CapState = {} CapState.__index = CapState

-- Capture cap: current capture -- Capture ocap: (original) capture list -- int ptop: index of last argument to 'match' -- string s: original string -- int valuecached: value stored in cache slot -- int reclevel: recursion level function CapState:new(captures, source, extraArgs) return setmetatable({        captures = captures,         index = 1,         source = source,         valuecached = {},         reclevel = 0,         extraArgs = extraArgs,    }, self) end

function CapState:cap -- helper return self.captures[self.index] end

function CapState:advance -- helper local i = self.index local cap = self.captures[i] self.index = i + 1 return cap, i end

function CapState:substr(start, len) -- helper return string.sub(self.source, start, start + len - 1) end

function CapState:skipclose(head) if head:isopencap then assert(self.captures[self.index]:isclosecap) self.index = self.index + 1 end end

function CapState:closesize(head) return head:size(self:cap) end

-- -- function CapState:nextcap local cap = self:cap if cap:isopencap then -- must look for a close local n = 0 -- number of opens waiting a close while true do -- look for corresponding close self.index = self.index + 1 cap = self:cap if cap:isopencap then n = n + 1 elseif cap:isclosecap then if n == 0 then break end n = n - 1 end end self.index = self.index + 1 -- skip last close (or entire single capture) else self.index = self.index + 1 while cap:inside(self:cap) do        self.index = self.index + 1 -- skip captures inside the current one end end end
 * Go to the next capture at the same level

-- -- function CapState:findopen(i) -- captures[i] is the close that we want to match assert(self.captures[i]:isclosecap) local n = 0 -- number of closes waiting an open while i > 1 do     i = i - 1 local cap = self.captures[i] if cap:isclosecap then n = n + 1 -- one more open to skip elseif cap:isopencap then if n == 0 then return cap,i end n = n - 1 end end error("couldn't find open") end
 * Goes back in a list of captures looking for an open capture
 * corresponding to a close

-- -- function CapState:capvisible(igrp, ref) local i = igrp local grp = self.captures[igrp] while i > 1 do     i = i - 1 local cap = self.captures[i] if cap:isclosecap then return true -- can stop the search elseif cap:inside(grp) then -- is 'grp' inside cap? return cap:inside(ref) -- ok iff cap also contains ref end end return true -- 'grp' is not inside any capture end
 * Checks whether group 'grp' is visible to 'ref', that is, 'grp' is
 * not nested inside a full capture that does not contain 'ref'. (We
 * only need to care for full captures because the search at 'findback'
 * skips open-end blocks; so, if 'grp' is nested in a non-full capture,
 * 'ref' is also inside it.) To check this, we search backward for the
 * inner full capture enclosing 'grp'. A full capture cannot contain
 * non-full captures, so a close capture means we cannot be inside a
 * full capture anymore.

-- -- function CapState:findback(name, i)  if i == nil then i = self.index end local ref = self.captures[i] while i > 1 do     i = i - 1 local cap = self.captures[i] if cap:isclosecap or not cap:inside(ref) then if cap:isclosecap then cap,i = self:findopen(i) end if cap.kind == CapKind.group and self:capvisible(i, ref) then if cap.extra == name then return cap,i end end end end error("back reference '"..name.."' not found") end
 * Try to find a named group capture with the name given;
 * goes backward from 'i'.

function CapState:getcaptures local result = List:new while not self:cap:isclosecap do     self:pushcapture(result) end return result end

function CapState:pushcapture(result) self.reclevel = self.reclevel + 1 if self.reclevel > MAXRECLEVEL then error("subcapture nesting too deep") end local cap = self.captures[self.index] assert(cap.kind.push ~= nil) local res = cap.kind.push(self, cap, result) self.reclevel = self.reclevel - 1 return res end

-- helper functions for pushcapture

-- -- function CapState:pushnestedvalues(result, addextra) local head = self:advance local n = 0 -- number of pushed subvalues -- repeat for all nested patterns while head:inside(self:cap) do     n = n + self:pushcapture(result) end if addextra or n == 0 then -- need extra? result:push(self:substr(head.bytePos, self:closesize(head))) n = n + 1 end self:skipclose(head) return n end
 * Push on the Lua stack all values generated by nested captures inside
 * the current capture. Returns number of values pushed. 'addextra'
 * makes it push the entire match after all captured values. The
 * entire match is pushed also if there are no other nested values,
 * so the function never returns zero.

-- -- function CapState:pushonenestedvalue(result) local n = self:pushnestedvalues(result, false) if n == 0 then result:push(nil) -- ensure there's exactly one value return 1 end while n > 1 do     result:pop -- pop extra values n = n - 1 end return n end
 * Push only the first value generated by nested captures

-- visitor patterns for pushcapture function CapKind.position.push(capstate, cap, result) result:push(cap.bytePos) capstate.index = capstate.index + 1 return 1 end

function CapKind.const.push(capstate, cap, result) result:push(cap.extra) capstate.index = capstate.index + 1 return 1 end

function CapKind.arg.push(capstate, cap, result) local n = cap.extra if n > capstate.extraArgs.n then error(string.format("reference to absent extra argument #%d", n)) end result:push(capstate.extraArgs[n]) capstate.index = capstate.index + 1 return 1 end

function CapKind.simple.push(capstate, cap, result) local k = capstate:pushnestedvalues(result, true) -- reorder so that the whole match is the first result, not the last local last = result:pop result:insert(2 + #result - k, last) return k end

-- missing a bunch

-- -- function CapKind.table.push(capstate, cap, result) -- aka tablecap local t = {} result:push(t) local head = capstate:advance
 * Table capture: creates a new table and populates it with nested
 * captures.

local n = 0 while head:inside(capstate:cap) do     cap = capstate:cap if cap.kind == CapKind.group and cap.extra ~= nil then -- named group? capstate:pushonenestedvalue(result) t[cap.extra] = result:pop -- move it into table else -- not a named group local k = capstate:pushcapture(result) for i=k,1,-1 do           t[n + i] = result:pop -- move it into table (indexed) end n = n + k     end end capstate:skipclose(head) return 1 -- number of values pushed (only the table) end

-- -- function CapKind.query.push(capstate, cap, result) -- aka querycap capstate:pushonenestedvalue(result) local key = result:pop local tbl = cap.extra assert(type(tbl) == "table") local val = tbl[key] if val ~= nil then result:push(val) return 1 else return 0 end end
 * Table-query capture

-- -- function CapKind.fold.push(capstate, cap, result) -- aka foldcap local f = cap.extra assert(type(f) == "function") local head = capstate:advance if capstate:cap:isclosecap then -- no nested captures? (large subject) error("no initial value for fold capture") end local args = List:new local n = capstate:pushcapture(args) if n == 0 then -- nested captures with no values? error("no initial value for fold capture") end local accum = args[1] -- leave only one result for accumulator while head:inside(capstate:cap) do     args = List:new args:push( accum ) -- put accumulator first n = capstate:pushcapture(args) -- get next capture's values accum = f(compat.unpack(args)) end capstate:skipclose(head) -- only accumulator left in result result:push(accum) return 1 end
 * Fold capture

-- -- CapKind["function"].push = function(capstate, cap, result) local f = cap.extra assert(type(f) == "function") local args = List:new local n = capstate:pushnestedvalues(args, false) local r = compat.pack(f(compat.unpack(args))) for i=1,r.n do     result:push(r[i]) end return r.n end
 * Function capture

-- -- function CapKind.acc.push(capstate, cap, result) -- aka accumulatorcap if #result == 0 then error("no previous value for accumulator capture") end local f = cap.extra assert(type(f) == "function") local prev = #result local args = List:new args:push(result[prev]) local n = capstate:pushnestedvalues(args, false) result[prev] = f(compat.unpack(args)) return 0 -- did not add any extra value end
 * Accumulator capture

-- -- function CapKind.num.push(capstate, cap, result) -- aka numcap local idx = cap.extra -- value to select if idx == 0 then -- no values? capstate:nextcap -- skip entire capture return 0 -- no value produced else local vals = List:new local n = capstate:pushnestedvalues(vals, false) if n < idx then -- invalid index? error("no capture '"..idx.."'") else result:push(vals[idx]) return 1 end end end
 * Select capture

function CapState:runtimecap(closePos) local close = self.captures[closePos] local open,openPos = self:findopen(closePos) -- get open group capture assert(open.kind == CapKind.group) self.index = openPos -- set state to the open capture local args = List:new args:push( self.source) -- original subject args:push( close.bytePos ) -- current position local n = self:pushnestedvalues(args, false) -- push nested captures local func = open.extra local funcRet = compat.pack(func(compat.unpack(args))) local res = closePos - openPos -- number of captures to be removed return res, funcRet end

function CapKind.runtime.push(capstate, cap, result) -- aka runtimecap result:push(cap.extra) capstate.index = capstate.index + 1 return 1 end

local MAXSTRCAPS = 10

-- -- function CapState:getstrcaps(cps, n)  local k = n   n = n + 1 cps[k] = { isstring = true, -- get string value bytePos = self:cap.bytePos, -- starts here }  local head = self:advance while head:inside(self:cap) do     if n > MAXSTRCAPS then -- too many captures? self:nextcap -- skip extra captures (will not need them) elseif self:cap.kind == CapKind.Simple then -- string? n = self:getstrcaps(cps, n) -- put info into array else cps[n] = { isstring = false, -- not a string cap = self.index, -- keep original capture }        self:nextcap n = n + 1 end end
 * Collect values from current capture into array 'cps'. Current
 * capture must be Cstring (first call) or Csimple (recursive calls).
 * (In first call, fills %0 with whole match for Cstring.)
 * Returns number of elements in the array that were filled.

cps[k].endPos = head.bytePos + self:closesize(head) self:skipclose(head)

return n end

function CapState:addonestring(buffer, what) local cap = self:cap if cap.kind == CapKind.string then -- add capture directly to buffer return stringcap(self, buffer) elseif cap.kind == CapKind.subst then -- add capture directly to buffer return substcap(self, buffer) elseif cap.kind == CapKind.acc then error("invalid context for an accumulator capture") end local result = List:new local n = self:pushcapture(result) if n == 0 then return 0 end -- no values to add local val = result[1] -- take only one result (the first) if type(val) == "number" then val = tostring(val) elseif type(val) ~= "string" then error("invalid "..what.." value (a "..type(val)..")") end table.insert(buffer, val) return 1 end

-- -- function stringcap(capstate, buffer) local fmt = capstate:cap.extra local cps = {} local n = capstate:getstrcaps(cps, 1) - 1 -- collect nested captures local sawEscape = false for _,c in compat.utf8codes(fmt) do     if sawEscape then sawEscape = false if c 57 then -- not followed by a digit table.insert(buffer, compat.utf8char(c)) else local l = 1 + c - 48 -- capture index (1-based) if l > n then error("invalid capture index ("..(l-1)..")") elseif cps[l].isstring then table.insert(buffer, capstate:substr(cps[l].bytePos, cps[l].endPos - cps[l].bytePos)) else -- go back to evaluate that nested capture local curr = capstate.index capstate.index = cps[l].cap if capstate:addonestring(buffer, "capture") == 0 then error("no values in capture index "..l)              end capstate.index = curr end end elseif c ~= 37 then -- not a % escape? table.insert(buffer, compat.utf8char(c)) else sawEscape = true end end return 1 end
 * String capture: add result to buffer 'b' (instead of pushing
 * it into the stack)

-- -- function substcap(capstate, buffer) local head = capstate:advance
 * Substitution capture: add result to buffer 'b'

local curr = head.bytePos while head:inside(capstate:cap) do     local cap = capstate:cap local nextPos = cap.bytePos local s = capstate:substr(curr, nextPos - curr) table.insert(buffer, s) -- add text up to capture if capstate:addonestring(buffer, "replacement") == 0 then -- no capture value, keep original text in final result curr = nextPos else -- continue after match local lastCap = capstate.captures[capstate.index - 1] curr = nextPos + cap:size(lastCap) end end -- add last piece of text local s = capstate:substr(curr, head.bytePos + capstate:closesize(head) - curr) table.insert(buffer, s)  capstate:skipclose(head) end

function CapKind.subst.push(capstate, cap, result) -- aka substcap local buffer = {} substcap(capstate, buffer) result:push(table.concat(buffer)) return 1 end

function CapKind.string.push(capstate, cap, result) -- aka stringcap local buffer = {} stringcap(capstate, buffer) result:push(table.concat(buffer)) return 1 end

function CapKind.group.push(capstate, cap, result) if cap.extra == nil then -- anonymous group? return capstate:pushnestedvalues(result, false) -- add all nested values else -- named group: add no values capstate:nextcap return 0 end end

-- -- function CapKind.backref.push(capstate, cap, result) local curr = capstate.index local _,i = capstate:findback(cap.extra) capstate.index = i  local n = capstate:pushnestedvalues(result, false) capstate.index = curr + 1 return n end
 * Back-reference capture. Return number of values pushed.

return { CapState = CapState, Capture = Capture, }

end)

register('llpeg.vm', function(myrequire) myrequire('strict') local compat = myrequire('advent.compat') local utf8util = myrequire('llpeg.utf8util') local from = myrequire('llpeg.types').from local  CHARMAX,   CapKind,   Opcode,   enum,   _ = from(myrequire('llpeg.types'), { 'CHARMAX', 'CapKind', 'Opcode', 'enum', }) local  CapState,   Capture,   __ = from(myrequire('llpeg.cap'), { 'CapState', 'Capture', }) local  Instruction,   ___ = from(myrequire('llpeg.code'), { 'Instruction', }) local  printcaplist,   ___ = from(myrequire('llpeg.print'), { 'printcaplist', })

local LFAIL = {} local InsidePred = enum{ OUTPRED = 0, INPRED = 1, }

local Stack = {} Stack.__index = Stack -- Stack prev: previous entry in the stack -- int bytePos: saved position, or NULL for calls -- Instruction pc: saved instruction -- int caplevel -- int labenv -- for labeled failure -- bool predchoice -- for labeled failure function Stack:new(prev, bytePos, pc, caplevel, labenv, predchoice) return setmetatable({        prev = prev,         bytePos = bytePos, pc = pc, caplevel = caplevel,         labenv = labenv, predchoice = predchoice,   }, self) end

function Stack:__tostring return string.format(     "Stack{ bytePos=%d, caplevel=%d, labenv=%s, predchoice=%s }",      self.bytePos, self.caplevel, self.labenv, self.predchoice   ) end

function Stack:print local s = self while s ~= nil do        print("Stack", s)         s = s.prev end end

local MatchResult = {} MatchResult.__index = MatchResult

function MatchResult:new return setmetatable({        labelf = nil, -- failure label         sfail = -1, -- farthest failure   }, self) end

local State = {} State.__index = State

function State:new(source, bytePos, ...) local giveup = Instruction:new{Opcode.Giveup} local insidepred = InsidePred.OUTPRED -- label environment is off inside predicates local stack = Stack:new(nil, bytePos, giveup, 0, insidepred, nil) local cp,cpLen if bytePos <= #source then cp, cpLen = utf8util.codepointAt(source, bytePos) else cp, cpLen = nil, nil end assert(bytePos ~= nil) return setmetatable({        source = source, -- the source string         bytePos = bytePos, -- current position in the string, in bytes         codepoint = cp, -- the codepoint at 'bytePos' in 'source'         codepointLen = cpLen, -- the length of the codepoint at 'bytePos'         stack = stack, -- top of stack         captures = {}, -- list of captures         captop = 1, -- point to first empty slot in captures (1-based)         extraArgs = compat.pack(...),         -- labeled failures:         insidepred = insidepred,         labelf = nil, -- failure label         sfail = -1, -- farthest failure   }, self) end

function State:advance return self:resetPosTo(self.bytePos + self.codepointLen) end

function State:resetPosTo(newPos) assert(newPos ~= nil) self.bytePos = newPos local source = self.source if newPos <= #source then local cp, cpLen = utf8util.codepointAt(source, newPos) self.codepoint = cp     self.codepointLen = cpLen return cp  else self.codepoint = nil self.codepointLen = nil return nil end end

function State:backtrack(n) local off = utf8util.offset(self.source, -n, self.bytePos) if off == nil then return false end -- can't backtrack that far! self:resetPosTo(off) return true end

function State:updatefarthest(label) self.labelf = label if self.bytePos > self.sfail then self.sfail = self.bytePos end end

function State:pushcapture(cap) self.captures[self.captop] = cap self.captop = self.captop + 1 end

function State:fail -- pattern failed, try to backtrack local lastStack repeat lastStack = self.stack self.stack = lastStack.prev until lastStack.bytePos ~= nil self:resetPosTo(lastStack.bytePos) self.captop = lastStack.caplevel self.insidepred = lastStack.labenv -- labeled failure return lastStack.pc:exec(self) end

function State:giveup local r = nil local lab = "fail" local errpos = self.sfail if self.labelf ~= nil and self.labelf ~= LFAIL then lab = self.labelf end return r, lab, errpos end

function State:getcaptures local results = {} if self.captures[1].kind == CapKind.close then -- are there any captures? return results -- no captures end return CapState:new(self.captures, self.source, self.extraArgs):getcaptures end

function Opcode.End:exec(state) state:pushcapture(CapKind.close:newCapture(state.bytePos, 0)) -- trim table to proper length while #state.captures > state.captop - 1 do     table.remove(state.captures) end -- printcaplist(state.captures, #state.captures) -- for debugging local results = state:getcaptures if #results == 0 then -- no captured values? return state.bytePos -- return only end position else return compat.unpack(results) end end

function Opcode.Giveup:exec(state) return state:giveup end

function Opcode.Ret:exec(state) local lastStack = state.stack state.stack = lastStack.prev return lastStack.pc:exec(state) end

function Opcode.Any:exec(state) if state.codepoint ~= nil then state:advance return self.next:exec(state) end state:updatefarthest(LFAIL) return state:fail end

function Opcode.TestAny:exec(state) if state.codepoint ~= nil then return self.next:exec(state) else return self.branch:exec(state) end end

function Opcode.UTFR:exec(state) local cp = state.codepoint if cp ~= nil and self.from <= cp and cp <= self.to then state:advance return self.next:exec(state) end state:updatefarthest(LFAIL) return state:fail end

function Opcode.Char:exec(state) if state.codepoint == self.aux then state:advance return self.next:exec(state) end state:updatefarthest(LFAIL) return state:fail end

function Opcode.TestChar:exec(state) if state.codepoint == self.aux then return self.next:exec(state) else return self.branch:exec(state) end end

function Opcode.Set:exec(state) local cp = state.codepoint if cp ~= nil then if cp <= CHARMAX then if self.set[cp] then state:advance return self.next:exec(state) end else if self.rest then state:advance return self.next:exec(state) end end end state:updatefarthest(LFAIL) return state:fail end

function Opcode.TestSet:exec(state) local cp = state.codepoint if cp ~= nil then if cp <= CHARMAX then if self.set[cp] then return self.next:exec(state) end elseif self.rest then return self.next:exec(state) end end return self.branch:exec(state) end

function Opcode.Behind:exec(state) local n = self.aux -- XXX SLOW self.aux is in *characters* not *bytes* if state:backtrack(n) then return self.next:exec(state) end state:updatefarthest(LFAIL) return state:fail end

function Opcode.Span:exec(state) local cp = state.codepoint while true do     local match = false if cp ~= nil then if cp <= CHARMAX then if self.set[cp] then match = true end else if self.rest then match = true end end end if not match then break end cp = state:advance end return self.next:exec(state) end

function Opcode.Jmp:exec(state) return self.branch:exec(state) end

function Opcode.Choice:exec(state) state.stack = Stack:new(     state.stack, state.bytePos, self.branch, state.captop, state.insidepred   ) return self.next:exec(state) end

function Opcode.PredChoice:exec(state) state.stack = Stack:new(     state.stack, state.bytePos, self.branch, state.captop, state.insidepred,      true -- predchoice   ) state.insidepred = InsidePred.INPRED return self.next:exec(state) end

function Opcode.Call:exec(state) state.stack = Stack:new(     state.stack, nil, self.next   ) return self.branch:exec(state) end

function Opcode.Commit:exec(state) state.stack = state.stack.prev return self.branch:exec(state) end

function Opcode.PartialCommit:exec(state) local stack = state.stack stack.bytePos = state.bytePos stack.caplevel = state.captop return self.branch:exec(state) end

function Opcode.BackCommit:exec(state) local stack = state.stack state.stack = stack.prev -- pop the stack state:resetPosTo(stack.bytePos) -- but reset the position to that stored state.insidepred = stack.labenv -- labeled failure state.captop = stack.caplevel return self.branch:exec(state) end

function Opcode.Throw:exec(state) if state.insidepred == InsidePred.OUTPRED then state.labelf = self.key -- pop entire stack while state.stack.prev ~= nil do        state.stack = state.stack.prev end else state.labelf = LFAIL -- pop until you read a 'predchoice' state while not state.stack.predchoice do        state.stack = state.stack.prev end end state.sfail = state.bytePos return state:fail end

function Opcode.ThrowRec:exec(state) -- with recovery state.sfail = state.bytePos if state.insidepred == InsidePred.OUTPRED then state.labelf = self.key state.stack = Stack:new(        state.stack, nil, self.next, state.captop      ) return self.branch:exec(state) else state.labelf = LFAIL -- pop until you read a 'predchoice' state while not state.stack.predchoice do        state.stack = state.stack.prev end return state:fail end end

function Opcode.FailTwice:exec(state) state.stack = state.stack.prev state:updatefarthest(LFAIL) return state:fail end

function Opcode.Fail:exec(state) state:updatefarthest(LFAIL) return state:fail end

function Opcode.CloseRunTime:exec(state) -- close the group state:pushcapture(self.cap:newCapture(state.bytePos, 0)) -- trim table to proper length while #state.captures > state.captop - 1 do     table.remove(state.captures) end local cs = CapState:new(state.captures, state.source, state.extraArgs) local n, funcRet = cs:runtimecap(state.captop - 1) state.captop = state.captop - n -- remove nested captures -- resdyncaptures: resolve returned values in `funcRet` -- first argument false=fail, true=keep current pos, number=next position local firstArg = funcRet[1] if funcRet.n == 0 then firstArg = false -- returning void means we'll fail end if not firstArg then -- if it is falsey, discard rest of returned vals & fail state:updatefarthest(LFAIL) return state:fail -- tail call elseif type(firstArg) == "boolean" then -- keep current position, nothing needs to be done else local npos = tonumber(firstArg) if npos < state.bytePos or npos > (1 + #(state.source)) then error("invalid position returned by match-time capture") end state:resetPosTo(npos) end -- push the rest of the funcRet values as new captures local n = funcRet.n - 1 -- number of new captures if n == 0 then -- no new captures? state.captop = state.captop - 1 -- remove open group else -- new captures, keep original open group -- add new captures + close group to 'capture' list -- adddyncaptures: assert(state.captures[state.captop - 1].kind == CapKind.group) assert(state.captures[state.captop - 1]:isopencap) -- make group capture an anonymous group (this used to hold match-time f)     state.captures[state.captop - 1].extra = nil for i=2,funcRet.n do -- add runtime captures state:pushcapture(CapKind.runtime:newCapture(state.bytePos, 0, funcRet[i])) end -- close group state:pushcapture(CapKind.close:newCapture(state.bytePos, 0)) end return self.next:exec(state) end

local MAXLOP = 20 function findopen(captures, i, currPos) i = i - 1 -- check last captop local cap = captures[i] if (not cap:isopencap) and cap.bytePos == currPos then return nil -- current one cannot be a full capture end -- else, look for an 'open' capture for j=1,MAXLOP do     if cap:isopencap then -- open capture? return cap,i -- that's the one to be closed elseif cap.kind == CapKind.close then return nil -- a full capture should not nest a non-full one end i = i - 1 if i<1 then break end cap = captures[i] end return nil -- not found within allowed search limit end

function Opcode.CloseCapture:exec(state) -- if possible, turn capture into a full capture assert(state.captop > 1) local open,_ = findopen(state.captures, state.captop, state.bytePos) if open ~= nil then -- if possible, turn capture into a full capture open.byteLen = state.bytePos - open.bytePos else -- non-nil length to mark entry as closed state:pushcapture(self.cap:newCapture(state.bytePos, 0)) end return self.next:exec(state) end

function Opcode.OpenCapture:exec(state) state:pushcapture(self.cap:newCapture( -- byteLen = nil marks entry as open state.bytePos, nil, self.key ))  return self.next:exec(state) end

function Opcode.FullCapture:exec(state) -- XXX SLOW: self.aux is in *characters* not *bytes* local nPos = utf8util.offset(state.source, -self.aux, state.bytePos) state:pushcapture(self.cap:newCapture( nPos, state.bytePos - nPos, self.key ))  return self.next:exec(state) end

function match(s, init, code, ...) local state = State:new(s, init, ...) return code[1]:exec(state) end

return { match = match, }

end)

register('llpeg', function(myrequire) local VERSION = '0.0.1' local MAXSTACK = 400 -- maximum backtracking local MAXBEHIND = 255 -- maximum look-behind local MAXRULES = 1000 -- maximum number of rules local LPEG_COMPAT = true

myrequire('strict') local compat = myrequire('advent.compat')

local from = myrequire('llpeg.types').from local CHARMAX, CapKind, TTag, define, define_tree_visitor, metareg, newcharset, numsiblings, _ = from(myrequire('llpeg.types'), {              'CHARMAX',               'CapKind',               'TTag',               'define',               'define_tree_visitor',               'metareg',               'newcharset',               'numsiblings',   }) local compile, cs_diff, cs_union, fixedlen, hascaptures, nofail, nullable, nullable_with_grammar, tocharset, __ = from(myrequire('llpeg.code'), {               'compile',                'cs_diff',                'cs_union',                'fixedlen',                'hascaptures',                'nofail',                'nullable',                'nullable_with_grammar',                'tocharset',   }) local match, ___ = from(myrequire('llpeg.vm'), {               'match',   }) local printpatt, printrepl, printtree, ____ = from(myrequire('llpeg.print'), {                'printpatt',                 'printrepl',                 'printtree',   })

function checkint(v) if type(v) == 'string' then v = tonumber(v) end if type(v) ~= "number" then error("not a number") end return math.floor(v) end

local is_pattern = define_type_visitor{ pattern = function return true end, default = function return false end, }

local ptype = define_type_visitor{ pattern = function return "pattern" end, default = function(v) return type(v) end, }

function val2str(v) if type(v) == 'number' then return tostring(v) end if type(v) == 'string' then return v end return string.format("(a %s)", ptype(v)) end

-- lpltree.c --

function newtree(tag) local t = { tag = tag, code = nil, }  setmetatable(t, metareg) return t end

function newleaf(tag, n)  return setmetatable({         tag = tag,         code = nil,         n = n,   }, metareg) end

function newroot1sib(tag, sib1) return setmetatable({        tag = tag,         code = nil,         sib1 = sib1,   }, metareg) end

function newroot2sib(tag, sib1, sib2) return setmetatable({        tag = tag,         code = nil,         sib1 = sib1,         sib2 = sib2,   }, metareg) end

-- Build a sequence of -- function fillseq(tag, s)  if type(s) == 'number' then local len = checkint(s) s = setmetatable({}, {__len = function return len end}) end if #s == 0 then return newleaf(tag, 0) end local i = #s local result = newleaf(tag, s[i]) while i > 1 do     i = i - 1 result = newroot2sib(        TTag.Seq,         newleaf(tag, s[i]),         result      ) end return result end

-- Numbers as patterns: 0 == true (always match); n == TAny repeated 'n' times; -n == not (TAny repeated 'n' times) -- function numtree(n) n = checkint(n) if n == 0 then return newleaf(TTag.True) elseif n > 0 then return fillseq(TTag.Any, n) -- sequence of 'n' anys else return newroot1sib(TTag.Not, fillseq(TTag.Any, -n)) end end

-- Convert value v to a pattern local getpatt = define_type_visitor{ ["string"] = function(s) if #s == 0 then return newleaf(TTag.True) -- always match if string is empty end local cp = {} for _,c in compat.utf8codes(s) do        table.insert(cp, c)      end return fillseq(TTag.Char, cp) end, ["number"] = function(n) return numtree(n) end, ["boolean"] = function(b) if b then return newleaf(TTag.True) else return newleaf(TTag.False) end end, ["function"] = function(f) return setmetatable({           tag = TTag.RunTime,            code = nil,            key = f,            sib1 = newleaf(TTag.True),      }, metareg) end, ["pattern"] = function(v) return v  end, ["table"] = function(v) return newgrammar(v) end, default = function(v) error("Not a pattern") end, }

-- labeled failure begin function newthrowleaf(label) return setmetatable({        tag = TTag.Throw,         code = nil,         sib2 = nil, -- no recovery rule associated (yet)         key = label,   }, metareg) end -- labeled failure end

function lp_P(v) return getpatt(v) end

-- --
 * sequence operator; optimizations:
 * false x => false, x true => x, true x => x
 * (cannot do x . false => false because x may have runtime captures)

function lp_seq(tree1, tree2) tree1 = getpatt(tree1) tree2 = getpatt(tree2) if tree1.tag == TTag.False or tree2.tag == TTag.True then -- false. x = false, x. true = x     return tree1 elseif tree1.tag == TTag.True then -- true. x = x     return tree2 else return newroot2sib(TTag.Seq, tree1, tree2) end end

-- -- function lp_choice(t1, t2) t1 = getpatt(t1) t2 = getpatt(t2) local t1c = tocharset(t1) local t2c = tocharset(t2) if t1c ~= nil and t2c ~= nil then local t = cs_union(t1c, t2c) return t  elseif nofail(t1) or t2.tag == TTag.False then -- true / x => true, x / false => x     return t1   elseif t1.tag == TTag.False then -- false / x => x     return t2   else return newroot2sib(TTag.Choice, t1, t2) end end
 * choice operator; optimizations:
 * charset / charset => charset
 * true / x => true, x / false => x, false / x => x
 * (x / true is not equivalent to true)

--  p^n -- function lp_star(p, n)  local tree1 = getpatt(p) n = checkint(n) if n >= 0 then -- seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) if nullable(tree1) then error("loop body may accept empty string") end local tree = newroot1sib(TTag.Rep, tree1) while n > 0 do        tree = newroot2sib(TTag.Seq, tree1, tree) n = n - 1 end return tree else -- choice (seq tree1 ... choice tree1 true ...) true n = -n local tree = newroot2sib( -- at most 1           TTag.Choice,            tree1,            newleaf(TTag.True)      ) while n > 1 do        tree = newroot2sib( -- at most (n-1)            TTag.Seq,            tree1,            tree         ) tree = newroot2sib(TTag.Choice, tree, newleaf(TTag.True)) n = n - 1 end return tree end end

-- -- function lp_and(v) return newroot1sib(TTag.And, getpatt(v)) end
 * #p == &p

-- -- function lp_not(v) return newroot1sib(TTag.Not, getpatt(v)) end
 * -p == !p

-- -- function lp_sub(t1, t2) t1 = getpatt(t1) t2 = getpatt(t2) local t1c = tocharset(t1) local t2c = tocharset(t2) if t1c ~= nil and t2c ~= nil then return cs_diff(t1c, t2c) else return newroot2sib(        TTag.Seq,         newroot1sib(TTag.Not, t2),         t1      ) end end
 * [t1 - t2] == Seq (Not t2) t1
 * If t1 and t2 are charsets, make their difference.

--  A set with the given characters -- function lp_set(s) local t = newcharset local extra = nil for _,c in compat.utf8codes(s) do     if c > CHARMAX then -- non ascii, we can't use charset for these local one = newleaf(TTag.Char, c)        if extra == nil then extra = one else extra = newroot2sib(TTag.Choice, extra, one) end else t.set[c] = true end end if extra == nil then return t  else return newroot2sib(TTag.Choice, t, extra) end end

function lp_range(...) local t = newcharset local extra = nil for _,v in ipairs{...} do     if type(v) ~= "string" then error("argument must be string") else local first, second for _,c in compat.utf8codes(v) do           if first == nil then first = c           elseif second == nil then second = c           else error("range must have two characters") end end if first == nil or second == nil then error("range must have two characters") end if first > second then if LPEG_COMPAT then -- ignore, just silently create an empty range else error("empty range") end elseif second <= CHARMAX then -- ascii range for i = first, second do              t.set[i] = true end else local r = lp_utfr(first, second) if extra == nil then extra = r           else extra = newroot2sib(TTag.Choice, extra, one) end end end end if extra == nil then return t  else return newroot2sib(TTag.Choice, t, extra) end end

function lp_utfr(from, to) from = checkint(from) to = checkint(to) if from > to then error("empty range") end if to > 0x10FFFF then error("invalid code point") end if to <= CHARMAX then -- ascii range? local t = newcharset -- code it as a regular charset for i = from, to do        t.set[i] = true end return t  end -- multibyte utf-8 range return setmetatable({        tag = TTag.UTFR,         code = nil,         from = from,         to = to,   }, metareg) end

--  Look-behind predicate -- function lp_behind(v) local tree1 = getpatt(v) local n = fixedlen(tree1) if n < 0 then error("pattern may not have fixed length") end if hascaptures(tree1) then error("pattern has captures") end if n > MAXBEHIND then error("pattern too long to look behind") end return setmetatable({        tag = TTag.Behind,         code = nil,         sib1 = tree1,         n = n,   }, metareg) end

-- labeled failure begin -- -- -- local lp_throw = define_type_visitor{ [{"string","number"}] = newthrowleaf, default = function error("not a string") end, } -- labeled failure end --
 * Throws a label

-- -- function lp_V(v) if v == nil then error("non-nil value expected") end return setmetatable({        tag = TTag.Call,         code = nil,         key = v,   }, metareg) end
 * Create a non-terminal

-- -- function capture_aux(capkind, patt, val) local t = newroot1sib(TTag.Capture, getpatt(patt)) t.cap = capkind t.key = val return t end
 * Create a tree for a non-empty capture, with a body and
 * optionally with an associated Lua value (at index 'labelidx' in the
 * stack)

function newemptycap(capkind, val) return capture_aux(capkind, newleaf(TTag.True), val) end

-- -- local divcapture_helper = define_type_visitor{ ["function"] = function(v, p)     return capture_aux(CapKind["function"], p, v)   end, ["table"] = function(v, p)     return capture_aux(CapKind.query, p, v)   end, ["string"] = function(v, p)     return capture_aux(CapKind.string, p, v)   end, ["number"] = function(v, p)     v = checkint(v) if v 65536 then error("invalid number") end return capture_aux(CapKind.num, p, v)  end, default = function(v) error("unexpected "..ptype(v).." as 2nd operand to LPeg '/'") end, } function lp_divcapture(p, v)  return divcapture_helper(v, p) -- dispatch on v end
 * Captures with syntax p / v
 * (function capture, query capture, string capture, or number capture)

function lp_acccapture(p, v)  return capture_aux(CapKind.acc, p, v) end

-- the match for patt with the values from nested captures replacing their -- matches function lp_substcapture(patt) return capture_aux(CapKind.subst, patt) end

-- a table with all captures from patt function lp_tablecapture(patt) return capture_aux(CapKind.table, patt) end

-- the values produced by patt, optionally tagged with key function lp_groupcapture(patt, key) -- key can be nil return capture_aux(CapKind.group, patt, key) end

-- folding capture (deprecated) function lp_foldcapture(patt, func) if type(func) ~= "function" then error("Bad function argument") end return capture_aux(CapKind.fold, patt, func) end

-- the match for patt plus all captures made by patt function lp_simplecapture(patt) return capture_aux(CapKind.simple, patt) end

-- the current position (matches the empty string) function lp_poscapture return newemptycap(CapKind.position) end

-- the value of the nth extra argument to lpeg.match (matches the empty string) function lp_argcapture(n) n = checkint(n) if n <= 0 or n > 65536 then error("invalid argument index") end return newemptycap(CapKind.arg, n) end

-- the value produced by the previous group capture named `key` -- (matches the empty string) function lp_backref(key) return newemptycap(CapKind.backref, key) end

-- Constant capture (matches the empty string) function lp_constcapture(...) local arg = compat.pack(...) if arg.n == 0 then -- no values? return newleaf(TTag.True) -- no capture else local i = arg.n     local tree = newemptycap(CapKind.const, arg[i]) while i > 1 do        i = i - 1 tree = newroot2sib(           TTag.Seq,            newemptycap(CapKind.const, arg[i]),            tree         ) end if arg.n == 1 then -- single constant capture return tree else -- create a group capture with all values return lp_groupcapture( tree ) end end end

-- the returns of function applied to the captures of patt; the application -- is done at match time function lp_matchtime(patt, func) if type(func) ~= 'function' then error('not a function') end return setmetatable({        tag = TTag.RunTime,         code = nil,         key = func,         sib1 = getpatt(patt),   }, metareg) end

--======================================================--

-- --
 * Grammar - Tree generation
 * Grammar - Tree generation

-- -- function getfirstrule(tbl) local first_name, first_rule first_name = tbl[1] -- is this the name of an initial rule? if type(first_name) == 'number' or type(first_name) == 'string' then first_rule = tbl[first_name] -- get associated rule else first_name,first_rule = 1,first_name end if not is_pattern(first_rule) then if first_rule == nil then error("grammar has no initial rule") else error(string.format("initial rule '%s' is not a pattern", first_name)) end end -- rule position (after TGrammar) -- return map from name to position, and from position to name return { [first_name] = 1 }, { first_name } end
 * push on the stack the index and the pattern for the
 * initial rule of grammar at index 'arg' in the stack;
 * also add that index into position table.

-- -- function collectrules(tbl) -- find the first rule and put in position table local postab, rpostab = getfirstrule(tbl)
 * traverse grammar at index 'arg', pushing all its keys and patterns
 * into the stack. Create a new table (before all pairs key-pattern) to
 * collect all keys and their associated positions in the final tree
 * (the "position table").
 * Return the number of rules and (in 'totalsize') the total size
 * for the new tree.

-- collect and sort rule names (for repeatability) local names = {} for k,v in pairs(tbl) do     if k == 1 or postab[k] == 1 then -- initial rule? -- skip the initial rules, it's already in the position table else table.insert(names, k)     end end table.sort(names, function(a,b)                return tostring(a) < tostring(b)   end)

-- fill out rule, name, and position maps for _,k in ipairs(names) do     local v = tbl[k] if not is_pattern(v) then error("rule '" .. val2str(k) .. "' is not a pattern") end table.insert(rpostab, k)     postab[k] = #rpostab end return postab, rpostab end

function buildgrammar(g, tbl, postab, rpostab) local trees = {} for i,name in ipairs(rpostab) do     local rule = setmetatable({            tag = TTag.Rule,            code = nil,            key = nil, -- will be fixed when rule is used            n = i, -- rule number            name = name,            sib1 = tbl[name], -- pattern            sib2 = nil,      }, metareg) table.insert(trees, rule) g.ruletab[name] = rule end -- link up siblings for i = 1, #trees-1 do     trees[i].sib2 = trees[i+1] end trees[#trees].sib2 = newleaf(TTag.True) -- finish list of rules g.sib1 = trees[1] end

-- -- function checkloops(grammar, tree) local n = numsiblings[tree.tag] if tree.tag == TTag.Rep and nullable_with_grammar(tree.sib1, grammar) then return true elseif tree.tag == TTag.Grammar then return false -- sub-grammars already checked elseif n == 1 then return checkloops(grammar, tree.sib1) -- tail call elseif n == 2 then if checkloops(grammar, tree.sib1) then return true else return checkloops(grammar, tree.sib2) -- tail call end elseif n == 0 then return false else error("surprising number of siblings") end end
 * Check whether a tree has potential infinite loops

-- -- function verifyerror(grammar, passed, npassed) local i, j  for i = npassed,1,-1 do -- search for a repetition for j = i-1,1,-1 do        if passed[i] == passed[j] then error(string.format("rule '%s' may be left recursive", val2str(passed[i]))) end end end error("too many left calls in grammar") end
 * Give appropriate error message for 'verifyrule'. If a rule appears
 * twice in 'passed', there is path from it back to itself without
 * advancing the subject.

-- -- local verifyrule verifyrule = define_tree_visitor{ [{        TTag.Char, TTag.Set, TTag.Any, TTag.False, TTag.UTFR, TTag.Throw, -- labeled failure }] = function(tree, g, passed, n, nb) return nb -- cannot pass from here end, [{        TTag.True, TTag.Behind, -- look-behind cannot have calls }] = function(tree, g, passed, n, nb) return true end, [{ TTag.Not, TTag.And, TTag.Rep, }] = function(tree, g, passed, n, nb) return verifyrule(tree.sib1, g, passed, n, true) -- tail call end, [{ TTag.Capture, TTag.RunTime, TTag.XInfo, }] = function(tree, g, passed, n, nb) return verifyrule(tree.sib1, g, passed, n, nb) -- tail call end, [ TTag.Call ] = function(tree, g, passed, n, nb) local rule = g.ruletab[tree.key] -- look up rule return verifyrule(rule, g, passed, n, nb) -- tail call end, [ TTag.Seq ] = function(tree, g, passed, n, nb) -- only check 2nd child if first is nb     if not verifyrule(tree.sib1, g, passed, n, false) then return nb     else -- note that we don't propagate new npassed from 1st child return verifyrule(tree.sib2, g, passed, n, nb) -- tail call end end, [ TTag.Choice ] = function(tree, g, passed, n, nb) -- must check both children nb = verifyrule(tree.sib1, g, passed, n, nb) -- note that we don't propagate new npassed from 1st child return verifyrule(tree.sib2, g, passed, n, nb) -- tail call end, [ TTag.Rule ] = function(tree, g, passed, n, nb) if n >= MAXRULES then -- too many steps? return verifyerror(g, passed, n) -- error else passed[n+1] = tree.key -- add rule to path return verifyrule(tree.sib1, g, passed, n + 1, nb) -- tail call end end, [ TTag.Grammar ] = function(tree, g, passed, n, nb) return nullable(tree) -- sub-grammar cannot be left recursive end, }
 * Check whether a rule can be left recursive; raise an error in that
 * case; otherwise return 1 iff pattern is nullable.
 * The return value is used to check sequences, where the second pattern
 * is only relevant if the first is nullable.
 * Parameter 'nb' works as an accumulator, to allow tail calls in
 * choices. ('nb' true makes function returns true.)
 * Parameter 'passed' is a list of already visited rules, 'npassed'
 * counts the elements in 'passed'.
 * Assume ktable at the top of the stack.

function verifygrammar(grammar) local passed = {} -- check left-recursive rules local rule = grammar.sib1 while rule.tag == TTag.Rule do     if rule.key ~= nil then -- skip unused rules verifyrule(rule.sib1, grammar, passed, 0, false) end rule = rule.sib2 end if rule.tag ~= TTag.True then error("assertion failure") end -- check infinite loops inside rules rule = grammar.sib1 while rule.tag == TTag.Rule do     if rule.key ~= nil then -- skip unused rules if checkloops(grammar, rule.sib1) then error("empty loop in rule '" .. val2str(rule.name) .. "'") end end rule = rule.sib2 end if rule.tag ~= TTag.True then error("assertion failure") end end

-- -- function fixonecall(g, t, postab) local name = t.key local rule = g.ruletab[name] if t.tag == TTag.Call then if rule == nil then error(string.format("rule '%s' undefined in given grammar", val2str(name))) end -- unlike our upstream, we don't clone patterns when we build a grammar -- so we can't mutate this tree w/o possibly breaking any other grammars -- which might hold an alias of this call. So we don't distinguish -- Call and OpenCall and we don't mutate the tag here and -- don't link it up. However, we can mutate the Rule -- as those are not shared rule.key = name -- mark this as used elseif rule ~= nil then -- TTag.Throw -- As before, we can't mutate the tree rule.key = name -- mark this as used end end
 * Fix a TOpenCall into a TCall node, using table 'postable' to
 * translate a key to its rule address in the tree. Raises an
 * error if key does not exist.

-- -- function correctassociativity (tree) local tag = tree.tag if tag ~= TTag.Choice and tag ~= TTag.Seq then error("impossible") end local t1 = tree.sib1 while t1.tag == tree.tag do     local t11, t12 = t1.sib1, t1.sib2 local t2 = tree.sib2 -- don't mutate t1 in place as others may be keeping copies of it; -- mutating 'tree' in place is okay as we're not changing its semantics tree.sib1 = t11 tree.sib2 = newroot2sib(tag, t12, t2) t1 = tree.sib1 end return tree end
 * Transform left associative constructions into right
 * associative ones, for sequence and choice; that is:
 * (t11 + t12) + t2 =>  t11 + (t12 + t2)
 * (t11 * t12) * t2 =>  t11 * (t12 * t2)
 * (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2))

-- -- local finalfix_helper = define_tree_visitor{ [TTag.Grammar] = function(t) return t -- subgrammars were already fixed end, [TTag.Call] = function(t, g, postab) if g == nil then error("rule '" .. val2str(t.key) .. "' used outside a grammar") else return fixonecall(g, t, postab) end end, [TTag.Throw] = function(t, g, postab) if g ~= nil then return fixonecall(g, t, postab) end end, [{TTag.Seq, TTag.Choice}] = function(t, g, postab) return correctassociativity(t) end, default = function(t) return t end, } function finalfix(g, t, postab) finalfix_helper(t, g, postab) if t.tag == TTag.Grammar then return end local n = numsiblings[t.tag] if n == 1 then return finalfix(g, t.sib1, postab) -- tail call elseif n == 2 then finalfix(g, t.sib1, postab) return finalfix(g, t.sib2, postab) -- tail call elseif n == 0 then return else error("strange number of siblings") end end
 * Make final adjustments in a tree. Fix open calls in tree 't',
 * making them refer to their respective rules or raising appropriate
 * errors (if not inside a grammar). Correct associativity of associative
 * constructions (making them right associative). Assume that tree's
 * ktable is at the top of the stack (for error messages).

-- -- function initialrulename(grammar) if grammar.sib1.key == nil then -- initial rule is not referenced? grammar.sib1.key = grammar.sib1.name end end
 * Give a name for the initial rule if it is not referenced

function newgrammar(tbl) local postab, rpostab = collectrules(tbl) local g = setmetatable({        tag = TTag.Grammar,         code = nil,         sib1 = nil, -- will fill this in         n = #rpostab, -- number of rules         ruletab = {}, -- map rule names to rules   }, metareg) buildgrammar(g, tbl, postab, rpostab) finalfix(g, g.sib1, postab) initialrulename(g) verifygrammar(g) return g end

-- ====================================================== --

function prepcompile(p) finalfix(nil, p, {}) -- correct associativity return compile(p) end

function lp_printtree(patt, c)  local tree = getpatt(patt) if c then finalfix(nil, tree, {}) -- correct associativity end print("[]") -- for compatibility, this is a fake 'ktable' io.write(table.concat(printtree(tree, 0, {}))) end

function lp_printcode(patt) local p = getpatt(patt) if p.code == nil then prepcompile(p) end print("[]") -- for compatibility, this is a fake 'ktable' io.write(table.concat(printpatt(p.code, {}))) end

-- -- function initposition(ii, len) if ii > 0 then -- positive index? if ii <= len then -- inside the string? return ii -- return it (no correction to 0-base) else return len + 1 -- crop at the end end else -- negative index if (-ii) <= len then -- inside the string? return len + 1 - (-ii) -- return position from the end else return 1 end end end
 * Get the initial position for the match, interpreting negative
 * values from the end of the subject. Result is 1-based.

-- Main match function function lp_match(pattern, subject, init, ...) local p = getpatt(pattern) if p.code == nil then prepcompile(p) end local code = p.code if type(subject) ~= 'string' then error("subject is not a string") end local i  if init == nil then i = 1 else i = initposition(checkint(init), #subject) end return match(subject, i, code, ...) end

-- --
 * Library creation and functions not related to matching
 * Library creation and functions not related to matching

function lp_setmax(lim) lim = 0 + lim -- convert to integer if lim <= 0 then error("out of range") end MAXSTACK = lim end

local lp_type = define_type_visitor{ pattern = function return "pattern" end, default = function return nil end, }

function lp_gc(p) p._code = nil end

function createcat(charspec) local t = newcharset for i=0,CHARMAX do -- XXX not unicode safe local s = compat.utf8char(i) if s:find(charspec) ~= nil then t.set[i] = true end end return t end

function lp_locale(tbl) if tbl == nil then tbl = {} end tbl.alnum = createcat("%w") tbl.alpha = createcat("%a") tbl.cntrl = createcat("%c") tbl.digit = createcat("%d") tbl.graph = createcat("[%p%w]") -- printable except space tbl.lower = createcat("%l") tbl.print = createcat("%C") -- printable = "not a control character"? tbl.punct = createcat("%p") -- "printable but not space or alnum  tbl.space = createcat("%s")   tbl.upper = createcat("%u")   tbl.xdigit = createcat("%x")   return tbl end

-- lpltree.c --

metareg.__mul = lp_seq metareg.__add = lp_choice metareg.__pow = lp_star metareg.__gc = lp_gc metareg.__len = lp_and metareg.__div = lp_divcapture metareg.__mod = lp_acccapture metareg.__unm = lp_not metareg.__sub = lp_sub metareg.__tostring = printrepl

local pattreg = { ptree = lp_printtree, pcode = lp_printcode, match = lp_match, B = lp_behind, V = lp_V, C = lp_simplecapture, Cc = lp_constcapture, Cmt = lp_matchtime, Cb = lp_backref, Carg = lp_argcapture, Cp = lp_poscapture, Cs = lp_substcapture, Ct = lp_tablecapture, Cf = lp_foldcapture, Cg = lp_groupcapture, P = lp_P, S = lp_set, R = lp_range, utfR = lp_utfr, locale = lp_locale, version = "LLPegLabel " .. VERSION, setmaxstack = lp_setmax, type = lp_type, T = lp_throw, -- labeled failure throw } metareg.__index = pattreg

return pattreg

end)

local modules = {} modules['bit32'] = require('bit32') modules['string'] = require('string') modules['strict'] = {} modules['table'] = require('table') local function myrequire(name) if modules[name] == nil then modules[name] = true modules[name] = (builders[name])(myrequire) end return modules[name] end return myrequire('llpeg') end)