From Wikipedia, the free encyclopedia

return (function()
local builders = {}
local function register(name, f)
  builders[name] = f
end
register('advent.compat', function() return require [[Module:User:Cscott/compat]] end)

register('llpeg.types', function(myrequire)
myrequire('strict')
local compat = myrequire('advent.compat')
local CHARMAX = 0x7F -- maximum codepoint for charsets

-- metatable for pattern objects; will be filled in later
local metareg = {}

local enum = function(keys)
   local Enum = {}
   Enum.__index = Enum
   function Enum:__tostring() return self.name end
   function Enum:pairs() return keys end
   function Enum:type() return Enum end

    for name, value in pairs(keys) do
       Enum[name] = setmetatable({ name = name, value = value }, Enum)
    end
    return Enum
end

local CapKind = enum{
   close = "close",  -- not used in trees */
   position = "position",
   const = "constant",  -- ktable[key] is Lua constant
   backref = "backref",  -- ktable[key] is "name" of group to get capture
   arg = "argument",  -- 'key' is arg's number
   simple = "simple",  -- next node is pattern
   table = "table",  -- next node is pattern
   ["function"] = "function",  -- ktable[key] is function; next node is pattern
   acc = "acc", -- ktable[key] is function; next node is pattern
   query = "query",  -- ktable[key] is table; next node is pattern
   string = "string",  -- ktable[key] is string; next node is pattern
   num = "num",  -- numbered capture; 'key' is number of value to return
   subst = "substitution",  -- substitution capture; next node is pattern
   fold = "fold",  -- ktable[key] is function; next node is pattern
   runtime = "runtime",  -- not used in trees (is uses another type for tree)
   group = "group",  -- ktable[key] is group's "name"
}

local TTag = enum{
  Char = "char", -- 'n' has unicode codepoint
  Set = "set", -- 'set' has sparse array codepoint->true for codepoint <=CHARMAX
                -- 'rest' indicates whether all codepoints > CHARMAX should be
                -- part of the set (true) or not (false)
  Any = "any",
  True = "true",
  False = "false",
  UTFR = "utf8.range",  --[[ range of UTF-8 codepoints;
                 'from' has initial codepoint; 'to' has final codepoint ]]--
  Rep = "rep",  -- 'sib1' *
  Seq = "seq",  -- 'sib1' 'sib2'
  Choice = "choice",  -- 'sib1' / 'sib2'
  Not = "not",  -- !'sib1'
  And = "and",  -- &'sib1'
  Call = "call",  -- 'sib2' is rule being called; otherwise same as TOpenCall
  OpenCall = "opencall",  -- 'key' is rule name
  Rule = "rule",  --[[ 'key' is rule name (but key == nil for unused rules);
             'sib1' is rule's pattern pre-rule; 'sib2' is next rule;
             'n' is rule's sequential number, 'name' is rule name (even
             for unused rules) ]]--
  XInfo = "xinfo",  -- extra info (not used)
  Grammar = "grammar",  -- 'sib1' is initial (and first) rule, 'n' is # rules
  Behind = "behind",  -- 'sib1' is pattern, 'n' is how much to go back
  Capture = "capture",  --[[ captures: 'cap' is kind of capture (enum 'CapKind');
                'key' is Lua value associated with capture;
               'sib1' is capture body ]]--
  RunTime = "run-time",  --[[ run-time capture: 'key' is Lua function;
                 'sib1' is capture body ]]--
  Throw = "throw",    -- labeled failure: 'key' is label's name,
                       -- sib2 is associated recovery rule
}

local PE = enum{
   nullable = "nullable",
   nofail = "nofail",
}

-- virtual machine instructions
local Opcode = enum{
  Any = "any", -- if no char, fail
  Char = "char",  -- if char != aux, fail
  Set = "set",  -- if char not in buff, fail
  TestAny = "testany",  -- in no char, jump to 'offset'
  TestChar = "testchar",  -- if char != aux, jump to 'offset'
  TestSet = "testset",  -- if char not in buff, jump to 'offset'
  Span = "span",  -- read a span of chars in buff
  UTFR = "utf-range",  -- if codepoint not in range [offset, utf_to], fail
  Behind = "behind",  -- walk back 'aux' characters (fail if not possible)
  Ret = "ret",  -- return from a rule
  End = "end",  -- end of pattern
  Choice = "choice",  -- stack a choice; next fail will jump to 'offset'
  PredChoice = "pred_choice",  -- labeled failure: stack a choice; changes label env next fail will jump to 'offset'
  Jmp = "jmp",  -- jump to 'offset'
  Call = "call",  -- call rule at 'offset'
  OpenCall = "open_call",  -- call rule number 'key' (must be closed to a ICall)
  Commit = "commit",  -- pop choice and jump to 'offset'
  PartialCommit = "partial_commit",  -- update top choice to current position and jump
  BackCommit = "back_commit",  -- backtrack like "fail" but jump to its own 'offset'
  FailTwice = "failtwice",  -- pop one choice and then fail
  Fail = "fail",  -- go back to saved state on choice and jump to saved offset
  Giveup = "giveup",  -- internal use
  FullCapture = "fullcapture",  -- complete capture of last 'off' chars
  OpenCapture = "opencapture",  -- start a capture
  CloseCapture = "closecapture",
  CloseRunTime = "closeruntime",
  Throw = "throw",    -- fails with a given label --labeled failure
  ThrowRec = "throw_rec", -- fails with a given label and call rule at 'offset' --labeled failure
  Empty = "--",  -- to fill empty slots left by optimizations
}

-- helper for visitor pattern definitions
function define(dispatch, which, f)
   for _,v in pairs(which) do
      assert(v ~= nil) -- catch typos
      dispatch[v] = f
   end
end

local numsiblings = {}
define(numsiblings, {
          TTag.Char, TTag.Set, TTag.Any,
          TTag.True, TTag.False, TTag.UTFR,
          TTag.Call, TTag.OpenCall,
          TTag.Throw,
}, 0)
define(numsiblings, {
          TTag.Rep, TTag.Not, TTag.And, TTag.Grammar,
          TTag.Behind, TTag.Capture, TTag.RunTime,
}, 1)
define(numsiblings, {
          TTag.Seq, TTag.Choice, TTag.Rule,
}, 2)

-- more help for visitor functions

local function_name_registry = {}
function register_fname(name, f)
   assert(type(name) == "string")
   assert(type(f) == "function")
   function_name_registry[f] = name
end

function report_ferror(f, msg)
   local fname = function_name_registry[f]
   if fname ~= nil then
      msg = fname .. ": " .. msg
   end
   error(msg)
end

function define_type_visitor(tbl)
   local dispatch = {}
   for keys,func in pairs(tbl) do
      if type(keys) ~= "table" then
         keys = { keys }
      end
      define(dispatch, keys, func)
   end
   local visit
   visit = function(val, ...)
      local a = dispatch["assert"]
      if a ~= nil then a(val, ...) end -- assert preconditions
      local ty = type(val)
      if ty == 'table' and getmetatable(val) == metareg then
         ty = 'pattern'
      end
      local f = dispatch[ty]
      if f ~= nil then return f(val, ...) end
      f = dispatch.default
      if f == nil then
         report_ferror(visit, "no default for " .. ty)
      end
      return f(val, ...)
   end
   return visit
end

function define_tree_visitor(tbl, opt_name)
   local dispatch = {}
   for keys,func in pairs(tbl) do
      if type(keys) ~= "table" or getmetatable(keys) == TTag then
         keys = { keys }
      end
      define(dispatch, keys, func)
   end
   local visit
   visit = function(tree, ...)
      if tree == nil then report_ferror(visit, "nil tree") end
      local a = dispatch["assert"]
      if a ~= nil then a(tree, ...) end -- assert preconditions
      local f = dispatch[tree.tag]
      if f ~= nil then return f(tree, ...) end
      f = dispatch.default
      if f == nil then
         report_ferror(visit, "no default for " .. tree.tag)
      end
      return f(tree, ...)
   end
   return visit
end

function define_opcode_visitor(tbl)
   local dispatch = {}
   for keys,func in pairs(tbl) do
      if type(keys) ~= "table" or getmetatable(keys) == Opcode then
         keys = { keys }
      end
      define(dispatch, keys, func)
   end
   local visit
   visit = function(op, ...)
      if op == nil then report_ferror(visit, "nil op") end
      local a = dispatch["assert"]
      if a ~= nil then a(op, ...) end -- assert preconditions
      local f = dispatch[op.code]
      if f ~= nil then return f(op, ...) end
      f = dispatch.default
      if f == nil then
         report_ferror(visit, "no default for " .. op.code)
      end
      return f(op, ...)
   end
   return visit
end

-- helper for module imports
function from(mod, list)
   local result = {}
   for _,v in ipairs(list) do
      table.insert(result, mod[v])
   end
   return compat.unpack(result)
end

function newcharset()
   return setmetatable({
         tag = TTag.Set,
         code = nil,
         rest = false,
         set = {}
   }, metareg)
end

local fullset = newcharset()
for i = 0,CHARMAX do
   fullset.set[i] = true
end
fullset.rest = true -- make sure non-ascii unicode chars are included!
assert(fullset.tag == TTag.Set)

return {
   CHARMAX = CHARMAX,
   CapKind = CapKind,
   Opcode = Opcode,
   PE = PE,
   TTag = TTag,
   define = define,
   define_tree_visitor = define_tree_visitor,
   enum = enum,
   from = from,
   fullset = fullset,
   metareg = metareg,
   newcharset = newcharset,
   numsiblings = numsiblings,
   register_fname = register_fname,
}

end)

register('llpeg.print', function(myrequire)
myrequire('strict')
local compat = myrequire('advent.compat')
local from = myrequire('llpeg.types').from
local
   CHARMAX,
   CapKind,
   Opcode,
   TTag,
   define,
   define_tree_visitor,
   numsiblings,
   _ = from(myrequire('llpeg.types'), {
               'CHARMAX',
               'CapKind',
               'Opcode',
               'TTag',
               'define',
               'define_tree_visitor',
               'numsiblings',
   })

function printcharset(tree)
   local result = "["
   local i = 0
   while i <= CHARMAX do
      local first = i
      while tree.set[i] and i <= CHARMAX do
         i = i + 1
      end
      if first == (i - 1) then -- unary range
         result = result .. string.format("(%02x)", first)
      elseif first < (i-1) then -- non-empty range
         result = result .. string.format("(%02x-%02x)", first, i - 1)
      end
      i = i + 1
   end
   if tree.rest then
      result = result .. "(80-FFFF)"
   end
   return result .. "]"
end

function printjmp(op, pc)
   return "-> " .. op.target
end

local printinst_helper = define_opcode_visitor{
   [Opcode.Char] = function(op, pc)
      return string.format("'%c' (%02x)", op.aux, op.aux)
   end,
   [Opcode.TestChar] = function(op, pc)
      return string.format("'%c' (%02x)", op.aux, op.aux) .. printjmp(op, pc)
   end,
   [Opcode.UTFR] = function(op, pc)
      return string.format("%d - %d", op.from, op.to)
   end,
   [Opcode.FullCapture] = function(op, pc)
      return string.format("%s (size = %s)  (idx = %s)",
                           op.cap.value, op.aux, op.key)
   end,
   [Opcode.OpenCapture] = function(op, pc)
      return string.format("%s (idx = %s)",
                           op.cap.value, op.key)
   end,
   [Opcode.Set] = function(op, pc)
      return printcharset(op)
   end,
   [Opcode.TestSet] = function(op, pc)
      return printcharset(op) .. printjmp(op, pc)
   end,
   [Opcode.Span] = function(op, pc)
      return printcharset(op)
   end,
   [Opcode.OpenCall] = function(op, pc)
      return string.format("-> %d", op.target) -- rule number
   end,
   [Opcode.Behind] = function(op, pc)
      return string.format("%d", op.aux)
   end,
   [{Opcode.Jmp, Opcode.Call, Opcode.Commit, Opcode.Choice,
     Opcode.PartialCommit, Opcode.BackCommit, Opcode.TestAny,
     Opcode.PredChoice}] = function(op, pc)
      return printjmp(op, pc)
   end,
   [Opcode.Throw] = function(op, pc) -- labeled failure
      return string.format("(idx = %s)", op.key)
   end,
   [Opcode.ThrowRec] = function(op, pc)
      return printjmp(op, pc) .. string.format("(idx = %s)", op.key)
   end,
   default = function() return '' end,
}
function printinst(pc, op, accum)
   table.insert(accum, string.format("%02d: %s ", pc, op.code.value))
   table.insert(accum, printinst_helper(op, pc))
   table.insert(accum, "\n")
   return accum
end

function printpatt(code, accum)
   for pc,op in ipairs(code) do
      printinst(pc, op, accum)
   end
   return accum
end

function printcap(cap, indent)
   print(string.format("%s%s", string.rep(' ', indent), cap))
end

function printcap2close(captures, ncaptures, i, indent)
   local head = captures[i]
   i = i + 1
   printcap(head, indent) -- print head capture
   while i <= ncaptures and head:inside(captures[i]) do
      i = printcap2close(captures, ncaptures, i, indent + 2) -- print nested captures
   end
   if i <= ncaptures and head:isopencap() then
      assert(captures[i]:isclosecap())
      printcap(captures[i], indent) -- print and skip close capture
      i = i + 1
   end
   return i
end

function printcaplist(captures, ncaptures)
   -- for debugging, first print a raw list of captures
   if ncaptures == nil then ncaptures = #captures end
   for i=1,ncaptures do
      printcap(captures[i], 0)
   end
  print(">======");
  local i=1
  while i <= ncaptures and not captures[i]:isclosecap() do
     i = printcap2close(captures, ncaptures, i, 0)
  end
  if i > ncaptures then
     print("<unmatched>")
  end
  print("=======");
end

local printtree_helper = define_tree_visitor{
   [TTag.Char] = function(tree)
      local c = compat.utf8char(tree.n)
      if c:find("%C") ~= nil then -- printable?
         return " '" .. c .. "'"
      else
         return string.format(" (%02X)", tree.n)
      end
   end,
   [TTag.Set] = function(tree)
      return printcharset(tree)
   end,
   [TTag.UTFR] = function(tree)
      return " " .. tree.from .. " - " .. tree.to
   end,
   [{TTag.OpenCall, TTag.Call}] = function(tree)
      local ret = string.format(" key: %s", tree.key)
      local rule = tree.sib2
      if rule ~= nil then
         ret = ret .. " (rule: " .. rule.n .. ")"
      end
      return ret
   end,
   [TTag.Behind] = function(tree)
      return " " .. tree.n
   end,
   [TTag.Capture] = function(tree)
      return string.format(" kind: '%s'  key: %s", tree.cap.value, tree.key)
   end,
   [TTag.Rule] = function(tree)
      return string.format(" key: %s", tree.key)
   end,
   [TTag.XInfo] = function(tree)
      return " n: " .. tree.n
   end,
   [TTag.Grammar] = function(tree)
      return " " .. tree.n -- number of rules
   end,
   [TTag.Throw] = function(tree)
      return string.format(" key: %s", tree.key)
   end,
   default = function(tree) return '' end
}
function printtree(tree, indent, accum)
   local sibs = numsiblings[tree.tag]

   table.insert(accum, string.rep(' ', indent))
   table.insert(accum, tree.tag.value)

   table.insert(accum, printtree_helper(tree))
   table.insert(accum, "\n")

   if tree.tag == TTag.Rule then
      sibs = 1 -- don't print sib2
   elseif tree.tag == TTag.Grammar then
      local rule = tree.sib1
      for i=1,tree.n do
         printtree(rule, indent + 2, accum)
         rule = rule.sib2
      end
      sibs = 0 -- siblings already handled
   end
   if sibs >= 1 then
      printtree(tree.sib1, indent + 2, accum)
      if sibs >= 2 then
         printtree(tree.sib2, indent + 2, accum)
      end
   end
   return accum
end

local PREFIX = "" -- could also be "l." or "lpeg." etc
local printrepl_helper
printrepl_helper = define_tree_visitor{
   [TTag.True] = function(tree, buf)
      table.insert(buf, PREFIX .. 'P""')
   end,
   [TTag.Any] = function(tree, buf)
      table.insert(buf, PREFIX .. 'P(1)')
   end,
   [TTag.Char] = function(tree, buf)
      table.insert(buf, PREFIX .. 'P"')
      local c = compat.utf8char(tree.n)
      if c:find("%C") ~= nil then -- printable?
         table.insert(buf, c)
      else
         table.insert(buf, string.format('\\%02X', tree.n))
      end
      table.insert(buf, '"')
   end,
   [TTag.Set] = function(tree, buf)
      local nbuf = {}
      local insertchar = function(cp)
         local c = compat.utf8char(cp)
         if string.find(c, "^[^%w%p ]") ~= nil then
            table.insert(nbuf, string.format('\\x%02X', cp))
         else
            table.insert(nbuf, c)
         end
      end
      local nargs = 0
      local inserttwo = function(cp1, cp2)
         if nargs > 0 then table.insert(nbuf, ',') end
         nargs = nargs + 1
         table.insert(nbuf, '"')
         insertchar(cp1)
         insertchar(cp2)
         table.insert(nbuf, '"')
      end

      local i = 0
      while i <= CHARMAX do
         local first = i
         while tree.set[i] and i <= CHARMAX do
            i = i + 1
         end
         if first == (i - 1) then -- unary range
            inserttwo(first, first)
         elseif first < (i-1) then -- non-empty range
            inserttwo(first, i-1)
         end
         i = i + 1
      end

      local r = table.concat(nbuf)
      if nargs == 1 then
         r = PREFIX .. 'S' .. r
      else
         r = PREFIX .. 'S(' .. r .. ')'
      end

      if tree.rest then
         table.insert(buf, '(')
         table.insert(buf, r)
         table.insert(buf, ' + ')
         table.insert(buf, PREFIX)
         table.insert(buf, 'utfR(0x80, 0x10FFFF))')
      else
         table.insert(buf, r)
      end
   end,
   [TTag.UTFR] = function(tree, buf)
      table.insert(buf, string.format("%sutfR(0x%04X, 0x%04X)", PREFIX, tree.from, tree.to))
   end,
   [{TTag.OpenCall, TTag.Call}] = function(tree, buf)
      table.insert(buf, string.format('%sV"%s"', PREFIX, tree.key))
   end,
   [TTag.Not] = function(tree, buf)
      table.insert(buf, '-(')
      printrepl_helper(tree.sib1, buf)
      table.insert(buf, ')')
   end,
   [TTag.Seq] = function(tree, buf)
      table.insert(buf, "(")
      printrepl_helper(tree.sib1, buf)
      table.insert(buf, " * ")
      printrepl_helper(tree.sib2, buf)
      table.insert(buf, ")")
   end,
   [TTag.Choice] = function(tree, buf)
      table.insert(buf, "(")
      printrepl_helper(tree.sib1, buf)
      table.insert(buf, " + ")
      printrepl_helper(tree.sib2, buf)
      table.insert(buf, ")")
   end,
   [TTag.Rep] = function(tree, buf)
      printrepl_helper(tree.sib1, buf)
      table.insert(buf, "^0")
   end,
   --[[
   [TTag.Behind] = function(tree)
      return " " .. tree.n
      end,
   ]]--
   [TTag.Capture] = function(tree, buf)
      local repl = define_type_visitor{
         string = function(v)
            return '"' .. v .. '"' -- xxx should handle escapes
         end,
         default = tostring,
      }
      local name = nil
      local show_patt = false
      local show_key = false
      if tree.cap == CapKind.simple then
         name = 'C'
         show_patt = true
      elseif tree.cap == CapKind.subst then
         name = 'Cs'
         show_patt = true
      elseif tree.cap == CapKind.table then
         name = 'Ct'
         show_patt = true
      elseif tree.cap == CapKind.pos then
         name = 'Cp'
      elseif tree.cap == CapKind.arg then
         name = 'Carg'
         show_key = true
      elseif tree.cap == CapKind.backref then
         name = 'Cb'
         show_key = true
      elseif tree.cap == CapKind.group then
         name = 'Cg'
         show_patt = true
         show_key = (tree.key ~= nil)
      end
      if name ~= nil then
         table.insert(buf, PREFIX)
         table.insert(buf, name)
         table.insert(buf, '(')
         if show_patt then
            printrepl_helper(tree.sib1, buf)
            if show_key then
               table.insert(buf, ', ')
            end
         end
         if show_key then
            table.insert(buf, repl(tree.key))
         end
         table.insert(buf, ')')
         return
      end
      if tree.cap == CapKind.string or
         tree.cap == CapKind.num or
         tree.cap == CapKind.query or
         tree.cap == CapKind['function'] then
         printrepl_helper(tree.sib1, buf)
         table.insert(buf, ' / ')
         table.insert(buf, repl(tree.key))
         return
      end
      -- fallback
      table.insert(buf, string.format("<pattern %s>", tostring(tree.tag)))
   end,
   [TTag.Rule] = function(tree, buf)
      local key = tree.name
      if type(key) == 'number' then key = string.format("[%d]", key) end
      table.insert(buf, key)
      table.insert(buf, " = ")
      printrepl_helper(tree.sib1, buf)
   end,
   [TTag.Grammar] = function(tree, buf)
      table.insert(buf, PREFIX .. "P{")
      local rule = tree.sib1

      local r = {}
      local first_rule_name = rule.name
      r[first_rule_name] = rule
      rule = rule.sib2

      local names = {}
      for i=2,tree.n do
         table.insert(names, rule.name)
         r[rule.name] = rule
         rule = rule.sib2
      end

      -- sort rule names
      table.sort(names)
      table.insert(names, 1, first_rule_name)

      -- now print in order
      for _,name in ipairs(names) do
         printrepl_helper(r[name], buf)
         table.insert(buf, ", ")
      end
      table.insert(buf, "}")
   end,
   --[[
   [TTag.Throw] = function(tree)
      return " key: " .. tree.key .. "  (rule: " .. tree.sib2.cap .. ")"
   end,
   ]]--
   default = function(tree, buf)
      table.insert(buf, string.format("<pattern %s>", tostring(tree.tag)))
   end,
}
function printrepl(tree)
   local buf = {}
   printrepl_helper(tree, buf)
   return table.concat(buf)
end

return {
   printcaplist = printcaplist,
   printcharset = printcharset,
   printinst = printinst,
   printpatt = printpatt,
   printrepl = printrepl,
   printtree = printtree,
}

end)

register('llpeg.code', function(myrequire)
myrequire('strict')
local compat = myrequire('advent.compat')
local from = myrequire('llpeg.types').from
local
   CHARMAX,
   CapKind,
   Opcode,
   PE,
   TTag,
   define,
   define_tree_visitor,
   fullset,
   newcharset,
   numsiblings,
   register_fname,
   _ = from(myrequire('llpeg.types'), {
               'CHARMAX',
               'CapKind',
               'Opcode',
               'PE',
               'TTag',
               'define',
               'define_tree_visitor',
               'fullset',
               'newcharset',
               'numsiblings',
               'register_fname',
})
local printinst = myrequire('llpeg.print').printinst

local TRACE_INSTRUCTIONS = false

-- signals a "no-instruction"
local NOINST = nil

-- don't optimize captures longer than this
local MAXOFF = 15

-- forward declarations
local codegen

local CompileState = {}
CompileState.__index = CompileState

--[[
** {======================================================
** Analysis and some optimizations
** =======================================================
]]--

--[[
** Check whether a charset is empty (returns IFail), singleton (IChar),
** full (IAny), or none of those (ISet). When singleton, '*c' returns
** which character it is. (When generic set, the set was the input,
** so there is no need to return it.)
]]--
function charsettype(cs)
   local count = 0
   local candidate
   for i,_ in pairs(cs.set) do
      candidate = i
      count = count + 1
   end
   if cs.rest then
      if count == (CHARMAX + 1) then
         return Opcode.Any -- full set
      end
   elseif count == 0 then
      return Opcode.Fail -- empty set
   elseif count == 1 then
      return Opcode.Char, candidate -- single char
   end
   return Opcode.Set -- neither full nor empty nor singleton
end

-- A few basic operations on charsets; returns new object

function cs_clone(cs)
   local result = newcharset()
   for i,_ in pairs(cs.set) do
      result.set[i] = true
   end
   result.rest = cs.rest
   return result
end

function cs_complement(cs)
   local result = newcharset()
   for i=0,CHARMAX do
      if not cs.set[i] then
         result.set[i] = true
      end
   end
   result.rest = not cs.rest
   return result
end

function cs_intersection(a, b)
   local result = newcharset()
   for i,_ in pairs(a.set) do
      if a.set[i] and b.set[i] then
         result.set[i] = true
      end
   end
   result.rest = a.rest and b.rest
   return result
end

function cs_union(a, b)
   local result = newcharset()
   for i=0,CHARMAX do
      if a.set[i] or b.set[i] then
         result.set[i] = true
      end
   end
   result.rest = a.rest or b.rest
   return result
end

function cs_diff(a, b)
   local result = newcharset()
   for i=0,CHARMAX do
      if a.set[i] and not b.set[i] then
         result.set[i] = true
      end
   end
   result.rest = a.rest and not b.rest
   return result
end

function cs_disjoint(a, b)
   if a.rest == b.rest then return false end
   for i,_ in pairs(a.set) do
      if b.set[i] then return false end
   end
   for i,_ in pairs(b.set) do
      if a.set[i] then return false end
   end
   return true
end

function cs_equal(a, b)
   if a.rest ~= b.rest then return false end
   for i,_ in pairs(a.set) do
      if not b.set[i] then return false end
   end
   for i,_ in pairs(b.set) do
      if not a.set[i] then return false end
   end
   return true
end

--[[
** If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a
** charset and return it; else return nil.
]]--
local tocharset = define_tree_visitor{
   [TTag.Set] = function(v)
      return v -- copy set
   end,
   [TTag.Char] = function(v)
      -- only one char
      if v.n <= CHARMAX then
         local t = newcharset()
         t.set[v.n] = true
         return t
      else
         return nil
      end
   end,
   [TTag.Any] = function(v)
      return fullset
   end,
   [TTag.False] = function(v)
      return newcharset()
   end,
   default = function(v)
      return nil
   end,
}
register_fname("tocharset", tocharset)

--[[
** Visit a TCall node taking care to stop recursion. If node not yet
** visited, return 'f(rule for call)', otherwise return 'def' (default
** value)
]]--
function CompileState:callrecursive(tree, f, default_value, ...)
   if tree.tag ~= TTag.Call then
      error("unexpected tree tag")
   end
   local rule = self.grammar.ruletab[tree.key]
   if rule.tag ~= TTag.Rule then
      error("unexpected tree sibling")
   end
   if tree.seen == true then
      return default_value -- node already visited
   else
      -- first visit
      local oldseen = tree.seen
      tree.seen = true
      local result = f(rule, ...)
      tree.seen = oldseen -- restore tree
      return result
   end
end

--[[
** Check whether a pattern tree has captures
]]--
local hascaptures
hascaptures = define_tree_visitor{
   [{TTag.Capture, TTag.RunTime}] = function(tree, cs)
         return true
   end,
   [TTag.Call] = function(tree, cs)
      assert(cs ~= nil)
      return cs:callrecursive(tree, hascaptures, false, cs)
   end,
   [TTag.Rule] = function(tree, cs)
      -- do not follow siblings
      return hascaptures(tree.sib1, cs)
   end,
   [TTag.OpenCall] = function(tree, cs)
      error("should not happen")
   end,
   [TTag.Grammar] = function(tree, cs)
      -- make a fake compile state to hold the grammar, if necessary
      if cs == nil then cs = CompileState:new(nil) end
      return cs:withGrammar(tree, hascaptures, tree.sib1, cs)
   end,
   default = function(tree, cs)
      local n = numsiblings[tree.tag]
      if n == 1 then
         return hascaptures(tree.sib1, cs) -- tail call
      elseif n == 2 then
         if hascaptures(tree.sib1, cs) then return true end
         return hascaptures(tree.sib2, cs) -- tail call
      elseif n == 0 then
         return false
      else
         error("how many siblings does this have?")
      end
   end,
}
function CompileState:hascaptures(t) return hascaptures(t, self) end
register_fname("hascaptures", hascaptures)

--[[
** Checks how a pattern behaves regarding the empty string,
** in one of two different ways:
** A pattern is *nullable* if it can match without consuming any character;
** A pattern is *nofail* if it never fails for any string
** (including the empty string).
** The difference is only for predicates and run-time captures;
** for other patterns, the two properties are equivalent.
** (With predicates, &'a' is nullable but not nofail. Of course,
** nofail => nullable.)
** These functions are all convervative in the following way:
**    p is nullable => nullable(p)
**    nofail(p) => p cannot fail
** The function assumes that TOpenCall is not nullable;
** this will be checked again when the grammar is fixed.
** Run-time captures can do whatever they want, so the result
** is conservative.
]]--
local checkaux
checkaux = define_tree_visitor{
   [{
         TTag.Char, TTag.Set, TTag.Any, TTag.UTFR, TTag.False,
         TTag.OpenCall, TTag.Throw,
   }] = function(tree, pred, cs)
      return false -- not nullable
   end,
   [{TTag.Rep,TTag.True}] = function(tree, pred, cs)
      return true -- no fail
   end,
   [{TTag.Not,TTag.Behind}] = function(tree, pred, cs)
      -- can match empty, but can fail
      if pred == PE.nofail then
         return false
      else
         return true
      end
   end,
   [TTag.And] = function(tree, pred, cs)
      -- can match empty; fail iff body does
      if pred == PE.nullable then
         return true
      end
      return checkaux(tree.sib1, pred, cs) -- tail call
   end,
   [TTag.RunTime] = function(tree, pred, cs)
      -- can fail; match empty iff body does
      if pred == PE.nofail then
         return false
      end
      return checkaux(tree.sib1, pred, cs) -- tail call
   end,
   [TTag.Seq] = function(tree, pred, cs)
      if not checkaux(tree.sib1, pred, cs) then
         return false
      end
      return checkaux(tree.sib2, pred, cs) -- tail call
   end,
   [TTag.Choice] = function(tree, pred, cs)
      if checkaux(tree.sib2, pred, cs) then
         return true
      end
      return checkaux(tree.sib1, pred, cs) -- tail call
   end,
   [{ TTag.Capture, TTag.Rule, TTag.XInfo, }] = function(tree, pred, cs)
      return checkaux(tree.sib1, pred, cs)
   end,
   [TTag.Grammar] = function(tree, pred, cs)
      -- make a fake compile state to hold the grammar, if necessary
      if cs == nil then cs = CompileState:new(nil) end
      return cs:withGrammar(tree, checkaux, tree.sib1, pred, cs)
   end,
   [TTag.Call] = function(tree, pred, cs)
      -- open calls are assumed not nullable; checked again after grammar
      -- is fixed
      if cs == nil then return false end
      return checkaux(cs.grammar.ruletab[tree.key], pred, cs)
   end,
}
register_fname("checkaux", checkaux)

function nofail(t, cs) return checkaux(t, PE.nofail, cs) end

function CompileState:nofail(t) return nofail(t, self) end

function nullable(t, cs) return checkaux(t, PE.nullable, cs) end

function CompileState:nullable(t) return nullable(t, self) end

function nullable_with_grammar(t, grm)
   local cs = CompileState:new(nil)
   return cs:withGrammar(grm, nullable, t, cs)
end

-- Note that we are counting characters, not bytes
local fixedlen, fixedlen_helper
fixedlen_helper = define_tree_visitor{
   [{TTag.Char, TTag.Set, TTag.Any, TTag.UTFR}] = function(tree, len)
      return len + 1
   end,
   [{TTag.False, TTag.True, TTag.Not, TTag.And, TTag.Behind}] = function(tree, len)
      return len
   end,
   [{TTag.Rep, TTag.RunTime, TTag.OpenCall, TTag.Throw,}] = function(tree, len)
      return -1 -- variable
   end,
   [{TTag.Capture, TTag.Rule, TTag.XInfo,}] = function(tree, len, cs)
      return fixedlen_helper(tree.sib1, len, cs)
   end,
   [TTag.Grammar] = function(tree, len, cs)
      -- make a fake compile state to hold the grammar, if necessary
      if cs == nil then cs = CompileState:new(nil) end
      return cs:withGrammar(tree, fixedlen_helper, tree.sib1, len, cs)
   end,
   [TTag.Call] = function(tree, len, cs)
      -- If evaluating outside the context of a grammar, conservatively
      -- return "variable"
      if cs == nil then return -1 end
      -- otherwise, carefully recurse
      local n1 = cs:callrecursive(tree, fixedlen, -1, cs)
      if n1 < 0 then return -1 end -- variable
      return len + n1
   end,
   [TTag.Seq] = function(tree, len, cs)
      local n1 = fixedlen_helper(tree.sib1, len, cs)
      if n1 < 0 then return -1 end -- variable
      return fixedlen_helper(tree.sib2, n1, cs)
   end,
   [TTag.Choice] = function(tree, len, cs)
      local n1 = fixedlen_helper(tree.sib1, len, cs)
      local n2 = fixedlen_helper(tree.sib2, len, cs)
      if n1 ~= n2 or n1 < 0 then
         return -1
      else
         return n1
      end
   end,
}
function fixedlen(tree, cs)
   return fixedlen_helper(tree, 0, cs) -- supply default 0 for 'len'
end
function CompileState:fixedlen(t) return fixedlen(t, self) end
register_fname("fixedlen_helper", fixedlen_helper)

--[[
** Computes the 'first set' of a pattern.
** The result is a conservative aproximation:
**   match p ax -> x (for some x) ==> a belongs to first(p)
** or
**   a not in first(p) ==> match p ax -> fail (for all x)
**
** The set 'follow' is the first set of what follows the
** pattern (full set if nothing follows it).
**
** The function returns 0 when this resulting set can be used for
** test instructions that avoid the pattern altogether.
** A non-zero return can happen for two reasons:
** 1) match p '' -> ''            ==> return has bit 1 set
** (tests cannot be used because they would always fail for an empty input);
** 2) there is a match-time capture ==> return has bit 2 set
** (optimizations should not bypass match-time captures).
]]--
local getfirst
getfirst = define_tree_visitor{
   [TTag.Char] = function(t, follow, cs)
      if t.n <= CHARMAX then return 0, tocharset(t) end
      -- conservative approximation!
      local s = newcharset()
      s.rest = true
      return 0, s
   end,
   [{ TTag.Set, TTag.Any, TTag.False }] = function(t, follow, cs)
      return 0, tocharset(t)
   end,
   [TTag.UTFR] = function(t, follow, cs)
      -- conservative approximation!
      local firstset = newcharset()
      if t.from <= CHARMAX then
         for i=t.from, math.min(CHARMAX, t.to) do
            firstset.set[i] = true
         end
      end
      if t.to > CHARMAX then
         -- conservative approximation of non-ascii unicode range
         firstset.rest = true
      end
      return 0, firstset
   end,
   [TTag.True] = function(t, follow, cs)
      return 1, follow -- 1 because this accepts the empty string
   end,
   [TTag.Throw] = function(t, follow, cs)
      -- labeled failure: must always throw the label
      return 1, fullset
   end,
   [TTag.Choice] = function(t, follow, cs)
      local firstset = newcharset()
      local e1,e1set = getfirst(t.sib1, follow, cs)
      local e2,e2set = getfirst(t.sib2, follow, cs)
      local firstset = cs_union(e1set, e2set)
      local ret = 0 -- awkward lua5.1 way to say "e1 | e2"
      if (e1 % 2) == 1 or (e2 % 2) == 1 then
         ret = ret + 1
      end
      e1,e2 = compat.rshift(e1, 1), compat.rshift(e2, 1)
      if (e1 % 2) == 1 or (e2 % 2) == 1 then
         ret = ret + 2
      end
      return ret, firstset
   end,
   [TTag.Seq] = function(t, follow, cs)
      if not nullable(t.sib1, cs) then
         -- when p1 is not nullable, p2 has nothing to contribute
         return getfirst(t.sib1, fullset, cs) -- tail call
      else -- FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl))
         local e2,csaux = getfirst(t.sib2, follow, cs)
         local e1,firstset = getfirst(t.sib1, csaux, cs)
         if e1 == 0 then
            return 0, firstset -- 'e1' ensures that first can be used
         elseif compat.rshift(e1, 1) % 2 == 1 or compat.rshift(e2, 1) % 2 == 1 then
            -- one of the children has a matchtime?
            return 2, firstset -- pattern has a matchtime capture
         else
            return e2, firstset -- else depends on e2
         end
      end
   end,
   [TTag.Rep] = function(t, follow, cs)
      local _,firstset = getfirst(t.sib1, follow, cs)
      return 1, cs_union(firstset, follow, cs) -- accepts the empty string
   end,
   [{ TTag.Capture,TTag.Rule,TTag.XInfo }] = function(t, follow, cs)
      return getfirst(t.sib1, follow, cs) -- tail call
   end,
   [TTag.Grammar] = function(t, follow, cs)
      return cs:withGrammar(t, getfirst, t.sib1, follow, cs)
   end,
   [TTag.RunTime] = function(t, follow, cs)
      -- function invalidates any follow info
      local e,firstset = getfirst(t.sib1, fullset, cs)
      if e ~= 0 then
         -- function is not "protected"?
         return 2,firstset
      else
         -- pattern inside capture ensures first can be used
         return 0,firstset
      end
   end,
   [TTag.Call] = function(t, follow, cs)
      local rule = cs.grammar.ruletab[t.key]
      return getfirst(rule, follow, cs) -- tail call
   end,
   [TTag.And] = function(t, follow, cs)
      local e,firstset = getfirst(t.sib1, follow, cs)
      return e, cs_intersection(firstset, follow, cs)
   end,
   [{ TTag.Not, TTag.Behind }] = function(t, follow, cs)
      if t.tag == TTag.Not then
         local firstset = tocharset(t.sib1)
         if firstset ~= nil then
            return 1,cs_complement(firstset) -- could match empty input
         end
      end
      -- the TNot or TBehind gives no new information
      -- call getfirst only to check for math-time captures
      local e,_ = getfirst(t.sib1, follow, cs)
      if e%2 == 0 then e = e + 1 end -- set the lsb; could match empty input
      return e, follow -- uses follow
   end,
}
function CompileState:getfirst(t, follow) return getfirst(t, follow, self) end
register_fname("getfirst", getfirst)

--[[
** If 'headfail(tree)' true, then 'tree' can fail only depending on the
** next character of the subject.
   -- ie, a single character of lookahead is enough to evaluate the pattern
   -- rooted at this node
]]--
local headfail
headfail = define_tree_visitor{
   [{TTag.Char, TTag.Set, TTag.Any,
     TTag.False}] = function(t, cs)
      return true
     end,
   [{TTag.True, TTag.Rep, TTag.RunTime, TTag.Not,
     -- even though we are codepoint-based, we don't have a TestUTFR instruction
     -- so we can't use a Test instruction on the first character, which is
     -- implicitly what headfail is checking for.
     TTag.UTFR,
     TTag.Behind, TTag.Throw}] = function(t, cs)
      return false
     end,
   [{TTag.Capture, TTag.Rule,
     TTag.XInfo, TTag.And}] = function(t, cs)
      return headfail(t.sib1, cs) -- tail call
     end,
   [TTag.Grammar] = function(t, cs)
      return cs:withGrammar(t, headfail, t.sib1, cs)
   end,
   [TTag.Call] = function(t, cs)
      local rule = cs.grammar.ruletab[t.key]
      return headfail(rule, cs) -- tail call
   end,
   [TTag.Seq] = function(t, cs)
      if not nofail(t.sib2, cs) then
         -- if the second child could possibly fail, then we can't
         -- evaluate the entire seq based just on the first child
         return false
      end
      return headfail(t.sib1, cs) -- tail call
   end,
   [TTag.Choice] = function(t, cs)
      -- both children need to be headfail for this to be headfail
      if not headfail(t.sib1, cs) then
         return false
      end
      return headfail(t.sib2, cs) -- tail call
   end,
}
function CompileState:headfail(t) return headfail(t, self) end
register_fname("headfail", headfail)

--[[
** Check whether the code generation for the given tree can benefit
** from a follow set (to avoid computing the follow set when it is
** not needed)
]]--
local needfollow
needfollow = define_tree_visitor{
   [{TTag.Char, TTag.Set, TTag.Any, TTag.UTFR,
    TTag.False, TTag.True, TTag.And, TTag.Not,
    TTag.RunTime, TTag.Grammar, TTag.Call, TTag.Behind,
    TTag.Throw, }] = function(tree) return false end,
   [{TTag.Choice, TTag.Rep}] = function(tree) return true end,
   [TTag.Capture] = function(tree) return needfollow(tree.sib1) end,
   [TTag.Seq] = function(tree) return needfollow(tree.sib2) end,
}
register_fname("needfollow", needfollow)

--[[
** ======================================================
** Code generation
** ======================================================
]]--

local Instruction = {}
Instruction.__index = Instruction

function Instruction:new(arg)
   local opcode = arg[1]
   if opcode == nil then error("no opcode") end
   -- target is rule # for open calls before correction, and absolute pc after
   local instr = {
      code = opcode,
      exec = opcode.exec, -- copy the exec function from the opcode!
      aux = arg.aux, -- used for the "primary argument"
      key = arg.key, -- used for string-valued arguments
      target = arg.target, -- used for jmp-like instructions
      from = arg.from, -- inclusive start, for ranges
      to = arg.to, -- inclusive end, for ranges
      set = arg.set, -- charset <= CHARMAX
      rest = arg.rest, -- include characters above CHARMAX?
      cap = arg.cap, -- used for "capture kind"
   }
   setmetatable(instr, self)
   instr:setCode(opcode) -- opportunity to add tracing logic!
   return instr
end

function Instruction:setCode(opcode)
   self.code = opcode
   local exec = opcode.exec
   if TRACE_INSTRUCTIONS then
      local str
      self.exec = function(self, state, ...)
         if str == nil then
            str = table.concat(printinst(0, self, { "Executing " })):gsub("\n","")
         end
         print(state.bytePos, state.codepoint, str)
         return exec(self, state, ...)
      end
   else
      self.exec = exec
   end
end

-- state for the compiler

function CompileState:new(p)
   local cs = {
      p = p,
   }
   setmetatable(cs, self)
   return cs
end

function CompileState:withGrammar(g, f, ...)
   local lastGrammar = self.grammar
   self.grammar = g
   local result = compat.pack(f(...))
   self.grammar = lastGrammar
   return compat.unpack(result)
end

function CompileState:codegen(tree, opt, tt, fl)
   assert(fl.tag == TTag.Set)
   -- just a little helper
   return codegen(tree, self, opt, tt, fl)
end

function CompileState:getinstr(i)
   return self.p.code[i]
end

function CompileState:addinstruction(arg)
   local code = self.p.code
   table.insert(code, Instruction:new(arg))
   return #code
end

function CompileState:gethere()
   local code = self.p.code
   return 1 + #code
end

function CompileState:jumptothere(pc, where)
   if pc ~= NOINST then
      local code = self.p.code
      code[pc].target = where
   end
end

function CompileState:jumptohere(pc)
   self:jumptothere(pc, self:gethere())
end

function codethrow(cs, throw)
   local rule = nil
   if cs.grammar ~= nil then
      -- we only lookup/match *string* rule names, not numeric indices
      rule = cs.grammar.ruletab[tostring(throw.key)]
   end
   if rule ~= nil then
      return cs:addinstruction{
         Opcode.ThrowRec,
         key=throw.key, -- rule name / error label
         target=rule.n -- recovery rule number
      }
   else
      return cs:addinstruction{
         Opcode.Throw,
         key=throw.key, -- rule name / error label
         -- no recovery rule
      }
   end
end

function codeutfr(cs, tree)
   return cs:addinstruction{
      Opcode.UTFR,
      from = tree.from,
      to = tree.to,
   }
end

--[[
** Code an IChar instruction, or IAny if there is an equivalent
** test dominating it
]]--
function codechar(cs, codepoint, tt)
   if tt ~= NOINST and
      cs:getinstr(tt).code == Opcode.TestChar and
      cs:getinstr(tt).aux == codepoint then
      cs:addinstruction{Opcode.Any}
   else
      cs:addinstruction{Opcode.Char, aux=codepoint,}
   end
end

--[[
** Add a charset posfix to an instruction
]]--
function addcharset(cs, codepoint)
   --[[
static void addcharset (CompileState *compst, const byte *cs) {
  int p = gethere(compst);
  int i;
  for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++)
    nextinstruction(compst);  /* space for buffer */
  /* fill buffer with charset */
      loopset(j, getinstr(compst, p).buff[j] = cs[j]);
   ]]--
end

--[[
** code a char set, optimizing unit sets for IChar, "complete"
** sets for IAny, and empty sets for IFail; also use an IAny
** when instruction is dominated by an equivalent test.
]]--
function codecharset(cs, tree, tt)
   local op,codepoint = charsettype(tree)
   if op == Opcode.Char then
      return codechar(cs, codepoint, tt)
   elseif op == Opcode.Set then
      -- non-trivial set?
      if tt ~= NOINST and
         cs:getinstr(tt).code == Opcode.TestSet and
         cs_equal(tree, cs:getinstr(tt)) then
         return cs:addinstruction{Opcode.Any}
      else
         return cs:addinstruction{
            Opcode.Set,
            set = tree.set, -- XXX ensure immutable
            rest = tree.rest,
         }
      end
   else
      return cs:addinstruction{op} -- Any or Fail
   end
end

--[[
** code a test set, optimizing unit sets for ITestChar, "complete"
** sets for ITestAny, and empty sets for IJmp (always fails).
** 'e' is nonzero iff test should accept the empty string. (Test
** instructions in the current VM never accept the empty string.)
]]--
function codetestset(cs, tree, e)
   if e ~= 0 then return NOINST end
   local op,codepoint = charsettype(tree)
   if op == Opcode.Fail then
      return cs:addinstruction{Opcode.Jmp, target = NOINST} -- always jump
   elseif op == Opcode.Any then
      return cs:addinstruction{Opcode.TestAny, target = NOINST}
   elseif op == Opcode.Char then
      return cs:addinstruction{
         Opcode.TestChar,
         target = NOINST,
         aux = codepoint,
      }
   elseif op == Opcode.Set then
      return cs:addinstruction{
         Opcode.TestSet,
         target = NOINST,
         set = tree.set, -- XXX ensure immutable
         rest = tree.rest,
      }
   else
      error("unreachable")
   end
end

--[[
** <behind(p)> == behind n; <p>   (where n = fixedlen(p))
]]--
function codebehind(cs, tree)
   if tree.n > 0 then
      cs:addinstruction{ Opcode.Behind, aux = tree.n }
   end
   return cs:codegen(tree.sib1, false, NOINST, fullset)
end

--[[
** Choice; optimizations:
** - when p1 is headfail or when first(p1) and first(p2) are disjoint,
** than a character not in first(p1) cannot go to p1 and a character
** in first(p1) cannot go to p2, either because p1 will accept
** (headfail) or because it is not in first(p2) (disjoint).
** (The second case is not valid if p1 accepts the empty string,
** as then there is no character at all...)
** - when p2 is empty and opt is true; a IPartialCommit can reuse
** the Choice already active in the stack.
]]--
function codechoice(cs, p1, p2, opt, fl)
   local emptyp2 = (p2.tag == TTag.True)
   local e1, cs1 = cs:getfirst(p1, fullset)
   local headfailp1 = cs:headfail(p1)
   local e2, cs2
   if not headfailp1 and e1 == 0 then
      e2, cs2 = cs:getfirst(p2, fl) -- avoid computing unless necessary
   end
   if headfailp1 or (e1 == 0 and cs_disjoint(cs1, cs2)) then
      -- <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2:
      local test = codetestset(cs, cs1, 0)
      local jmp = NOINST
      cs:codegen(p1, false, test, fl)
      if not emptyp2 then
         jmp = cs:addinstruction{Opcode.Jmp, target = NOINST }
      end
      cs:jumptohere(test)
      cs:codegen(p2, opt, NOINST, fl)
      cs:jumptohere(jmp)
   elseif opt and emptyp2 then
      -- p1? == IPartialCommit; p1
      cs:jumptohere(cs:addinstruction{Opcode.PartialCommit, target = NOINST})
      cs:codegen(p1, true, NOINST, fullset)
   else
      -- <p1 / p2> ==
      --  test(first(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2:
      local test = codetestset(cs, cs1, e1)
      local pchoice = cs:addinstruction{Opcode.Choice, target = NOINST}
      cs:codegen(p1, emptyp2, test, fullset)
      local pcommit = cs:addinstruction{Opcode.Commit, target = NOINST}
      cs:jumptohere(pchoice)
      cs:jumptohere(test)
      cs:codegen(p2, opt, NOINST, fl)
      cs:jumptohere(pcommit)
   end
end

--[[
** And predicate
** optimization: fixedlen(p) = n ==> <&p> == <p>; behind n
** (valid only when 'p' has no captures)
]]--
function codeand(cs, tree, tt)
  --[[ labeled failure: optimization disabled because in case of a failure it
     does not report the expected error position (the current subject position
     when begin the matching of <&p>) ]]--
   local pchoice = cs:addinstruction{Opcode.PredChoice, target = NOINST}
   cs:codegen(tree, false, tt, fullset)
   local pcommit = cs:addinstruction{Opcode.BackCommit, target = NOINST}
   cs:jumptohere(pchoice)
   cs:addinstruction{Opcode.Fail}
   cs:jumptohere(pcommit)
end

--[[
** Captures: if pattern has fixed (and not too big) length, and it
** has no nested captures, use a single IFullCapture instruction
** after the match; otherwise, enclose the pattern with OpenCapture -
** CloseCapture.
]]--
function codecapture(cs, tree, tt, fl)
   local len = cs:fixedlen(tree.sib1)
   if len >= 0 and len <= MAXOFF and not cs:hascaptures(tree.sib1) then
      cs:codegen(tree.sib1, false, tt, fl)
      cs:addinstruction{
         Opcode.FullCapture,
         cap = tree.cap,
         key = tree.key, -- capture name
         aux = len,
      }
   else
      assert(tree.cap ~= nil)
      cs:addinstruction({
            Opcode.OpenCapture,
            cap = tree.cap,
            key = tree.key, -- capture name
      })
      cs:codegen(tree.sib1, false, tt, fl)
      cs:addinstruction({
            Opcode.CloseCapture,
            cap = CapKind.close,
      })
   end
end

function coderuntime(cs, tree, tt)
   cs:addinstruction({
         Opcode.OpenCapture,
         cap = CapKind.group,
         key = tree.key, -- capture *function*
   })
   cs:codegen(tree.sib1, false, tt, fullset)
   cs:addinstruction({
         Opcode.CloseRunTime,
         cap = CapKind.close,
   })
end

--[[
** Repetition; optimizations:
** When pattern is a charset, can use special instruction ISpan.
** When pattern is head fail, or if it starts with characters that
** are disjoint from what follows the repetions, a simple test
** is enough (a fail inside the repetition would backtrack to fail
** again in the following pattern, so there is no need for a choice).
** When 'opt' is true, the repetion can reuse the Choice already
** active in the stack.
]]--
function coderep(cs, tree, opt, fl)
   local st = tocharset(tree)
   if st ~= nil then
      return cs:addinstruction{
         Opcode.Span,
         set = st.set,
         rest = st.rest,
      }
   end
   local e1,st = cs:getfirst(tree, fullset)
   if cs:headfail(tree) or (e1 == 0 and cs_disjoint(st, fl)) then
      -- L1: test (fail(p1)) -> L2; <p>; jmp L1; L2:
      local test = codetestset(cs, st, 0)
      cs:codegen(tree, false, test, fullset)
      local jmp = cs:addinstruction{Opcode.Jmp, target = NOINST}
      cs:jumptohere(test)
      cs:jumptothere(jmp, test)
   else
      -- test(fail(p1)) -> L2; choice L2; L1: <p>; partialcommit L1; L2:
      -- or (if 'opt'): partialcommit L1; L1: <p>; partialcommit L1;
      local test = codetestset(cs, st, e1)
      local pchoice = NOINST
      if opt then
         cs:jumptohere(cs:addinstruction{Opcode.PartialCommit, target = NOINST})
      else
         pchoice = cs:addinstruction{Opcode.Choice, target = NOINST}
      end
      local l2 = cs:gethere()
      cs:codegen(tree, false, NOINST, fullset)
      local commit = cs:addinstruction{Opcode.PartialCommit, target = NOINST}
      cs:jumptothere(commit, l2)
      cs:jumptohere(pchoice)
      cs:jumptohere(test)
   end
end

--[[
** Not predicate; optimizations:
** In any case, if first test fails, 'not' succeeds, so it can jump to
** the end. If pattern is headfail, that is all (it cannot fail
** in other parts); this case includes 'not' of simple sets. Otherwise,
** use the default code (a choice plus a failtwice).
]]--
function codenot(cs, tree)
   local e,st = cs:getfirst(tree, fullset)
   local test = codetestset(cs, st, e)
   if cs:headfail(tree) then
      -- test (fail(p1)) -> L1; fail; L1:
      cs:addinstruction{Opcode.Fail}
   else
      -- test(fail(p))-> L1; choice L1; <p>; failtwice; L1:
      local pchoice = cs:addinstruction{Opcode.PredChoice, target = NOINST }
      cs:codegen(tree, false, NOINST, fullset)
      cs:addinstruction{Opcode.FailTwice}
      cs:jumptohere(pchoice)
   end
   cs:jumptohere(test)
end

-- find the final destination of a sequence of jumps
function finaltarget(code, pc)
   while code[pc].code == Opcode.Jmp do
      pc = code[pc].target
   end
   return pc
end

-- final label (after traversing any jumps)
function finallabel(code, pc)
   return finaltarget(code, code[pc].target)
end

--[[
** change open calls to calls, using list 'positions' to find
** correct offsets; also optimize tail calls
]]--
function correctcalls(cs, positions, from, to)
   local code = cs.p.code
   for i=from,(to-1) do
      local op = code[i]
      if op.code == Opcode.OpenCall or op.code == Opcode.ThrowRec then
         local n = op.target -- rule number
         local rule = positions[n] -- rule position
         if rule == from or code[rule - 1].code == Opcode.Ret then
            -- sanity check! ok!
         else
            error("bad rule position")
         end
         if op.code == Opcode.OpenCall then
            if code[finaltarget(code, i+1)].code == Opcode.Ret then
               -- call; ret => tail call
               op:setCode(Opcode.Jmp)
            else
               op:setCode(Opcode.Call) -- open call no more
            end
         end
         op.target = rule
      end
   end
end

--[[
** Code for a grammar:
** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2:
]]--
function codegrammar(cs, tree)
   local firstcall = cs:addinstruction{Opcode.Call, target = NOINST} -- call initial rule
   local jumptoend = cs:addinstruction{Opcode.Jmp, target = NOINST} -- jump to the end
   local start = cs:gethere() -- here starts the initial rule
   cs:jumptohere(firstcall)

   local positions = {}
   local rule = tree.sib1
   for i=1,tree.n do
      local pattern = rule.sib1
      positions[i] = cs:gethere() -- save rule position
      cs:codegen(rule.sib1, false, NOINST, fullset) -- code rule
      cs:addinstruction{Opcode.Ret}
      rule = rule.sib2
   end
   if rule.tag ~= TTag.True then error("impossible") end
   cs:jumptohere(jumptoend)
   correctcalls(cs, positions, start, cs:gethere())
end

function codecall(cs, tree)
   local rule = cs.grammar.ruletab[tree.key]
   assert(rule ~= nil)
   assert(rule.n ~= nil)
   return cs:addinstruction{
      Opcode.OpenCall, -- to be corrected later
      target = rule.n -- rule number
   }
end

--[[
** Code first child of a sequence
** (second child is called in-place to allow tail call)
** Return 'tt' for second child
]]--
function codeseq1(cs, p1, p2, tt, fl)
   assert(fl.tag == TTag.Set)
   if needfollow(p1) then
      local _, fl1 = cs:getfirst(p2, fl) -- p1 follow is p2 first
      cs:codegen(p1, false, tt, fl1)
   else
      -- use fullset as follow
      cs:codegen(p1, false, tt, fullset)
   end
   if cs:fixedlen(p1) ~= 0 then -- can 'p1' consume anything?
      return NOINST -- invalidate test
   else
      return tt -- else 'tt' still protects sib2
   end
end

--[[
** Main code-generation function: dispatch to auxiliar functions
** according to kind of tree. ('needfollow' should return true
** only for consructions that use 'fl'.)
]]--

--[[
** code generation is recursive; 'opt' indicates that the code is being
** generated as the last thing inside an optional pattern (so, if that
** code is optional too, it can reuse the 'IChoice' already in place for
** the outer pattern). 'tt' points to a previous test protecting this
** code (or NOINST). 'fl' is the follow set of the pattern.
]]--
codegen = define_tree_visitor{
   [TTag.Char] = function(tree, cs, opt, tt, fl)
      return codechar(cs, tree.n, tt)
   end,
   [TTag.Any] = function(tree, cs, opt, tt, fl)
      return cs:addinstruction{Opcode.Any}
   end,
   [TTag.Set] = function(tree, cs, opt, tt, fl)
      return codecharset(cs, tree, tt)
   end,
   [TTag.True] = function(tree, cs, opt, tt, fl)
      return -- do nothing
   end,
   [TTag.False] = function(tree, cs, opt, tt, fl)
      return cs:addinstruction{Opcode.Fail}
   end,
   [TTag.UTFR] = function(tree, cs, opt, tt, fl)
      return codeutfr(cs, tree)
   end,
   [TTag.Choice] = function(tree, cs, opt, tt, fl)
      return codechoice(cs, tree.sib1, tree.sib2, opt, fl)
   end,
   [TTag.Rep] = function(tree, cs, opt, tt, fl)
      return coderep(cs, tree.sib1, opt, fl)
   end,
   [TTag.Behind] = function(tree, cs, opt, tt, fl)
      return codebehind(cs, tree)
   end,
   [TTag.Not] = function(tree, cs, opt, tt, fl)
      return codenot(cs, tree.sib1)
   end,
   [TTag.And] = function(tree, cs, opt, tt, fl)
      return codeand(cs, tree.sib1, tt)
   end,
   [TTag.Capture] = function(tree, cs, opt, tt, fl)
      return codecapture(cs, tree, tt, fl)
   end,
   [TTag.RunTime] = function(tree, cs, opt, tt, fl)
      return coderuntime(cs, tree, tt)
   end,
   [TTag.Grammar] = function(tree, cs, opt, tt, fl)
      return cs:withGrammar(tree, codegrammar, cs, tree)
   end,
   [TTag.Call] = function(tree, cs, opt, tt, fl)
      return codecall(cs, tree)
   end,
   [TTag.Seq] = function(tree, cs, opt, tt, fl)
      tt = codeseq1(cs, tree.sib1, tree.sib2, tt, fl) -- code 'p1'
      return cs:codegen(tree.sib2, opt, tt, fl) -- tail call
   end,
   [TTag.Throw] = function(tree, cs, opt, tt, fl)
      return codethrow(cs, tree)
   end,
   ["assert"] = function(tree, cs, opt, tt, fl)
      assert(fl.tag == TTag.Set)
      assert(opt ~= 0)
   end,
}
register_fname("codegen", codegen)

--[[
** Optimize jumps and other jump-like instructions.
** * Update labels of instructions with labels to their final
** destinations (e.g., choice L1; ... L1: jmp L2: becomes
** choice L2)
** * Jumps to other instructions that do jumps become those
** instructions (e.g., jump to return becomes a return; jump
** to commit becomes a commit)
]]--
function peephole(cs)
   local code = cs.p.code
   local jmpswitch
   local switch = define_opcode_visitor{
      -- instructions with labels
      [{Opcode.Choice, Opcode.Call, Opcode.Commit, Opcode.PartialCommit,
        Opcode.BackCommit, Opcode.TestChar, Opcode.TestSet,
        Opcode.TestAny}] = function(op, i)
         cs:jumptothere(i, finallabel(code, i))
        end,
      [Opcode.Jmp] = function(op, i)
         local ft = finaltarget(code, i)
         jmpswitch(code[ft], i, ft) -- jumping to what?
      end,
      default = function() end,
   }
   jmpswitch = define_opcode_visitor{
      -- instructions with unconditional implicit jumps
      [{Opcode.Ret,Opcode.Fail,Opcode.FailTwice,Opcode.End}] = function(op, i, ft)
         code[i]:setCode(code[ft].code) -- jump becomes that instruction
      end,
      -- instructions with unconditional explicit jumps
      [{Opcode.Commit, Opcode.PartialCommit, Opcode.BackCommit}] = function(op, i, ft)
         local fft = finallabel(code, ft)
         code[i]:setCode(code[ft].code) -- jump becomes that instruction
         cs:jumptothere(i, fft) -- with an optimized target
         switch(code[i], i) -- reoptimize the label
      end,
      default = function(op, i, ft)
         cs:jumptothere(i, ft) -- optimize label
      end,
   }
   for i=1,#code do
      switch(code[i], i)
   end
end

-- thread the instructions to speed up dispatch during execution
function thread(cs)
   local code = cs.p.code
   for i=1,#code-1 do
      code[i].next = code[i+1]
      if code[i].target ~= nil then
         code[i].branch = code[code[i].target]
      end
   end
end

function compile(p)
   local compst = CompileState:new(p)
   p.code = {}
   assert(fullset.tag == TTag.Set)
   compst:codegen(p, false, NOINST, fullset)
   compst:addinstruction{Opcode.End}
   peephole(compst)
   thread(compst)
   return p.code
end

return {
   Instruction = Instruction,
   compile = compile,
   cs_clone = cs_clone,
   cs_complement = cs_complement,
   cs_diff = cs_diff,
   cs_intersection = cs_intersection,
   cs_union = cs_union,
   fixedlen = fixedlen,
   hascaptures = hascaptures,
   nofail = nofail,
   nullable = nullable,
   nullable_with_grammar = nullable_with_grammar,
   tocharset = tocharset,
}

end)

register('llpeg.utf8util', function(myrequire)
myrequire('strict')

local utf8util = {}

function utf8util.codepointAt(s, pos)
   local c1 = string.byte(s, pos)
   if c1 <= 0x7F then
      return c1, 1
   end
   local c2 = string.byte(s, pos + 1)
   if c1 <= 0xDF then
      return ((c1 % 0x20 ) * 0x40) + (c2 % 0x40), 2
   end
   local c3 = string.byte(s, pos + 2)
   if c1 <= 0xEF then
      return (((c1 % 0x10) * 0x40) + (c2 % 0x40)) * 0x40 + (c3 % 0x40), 3
   end
   local c4 = string.byte(s, pos + 3)
   if c1 <= 0xF7 then
      return ((((c1 % 0x08) * 0x40) + (c2 % 0x40)) * 0x40 + (c3 % 0x40)) * 0x40 + (c4 % 0x40), 4
   end
   error( "bad utf8" )
end

-- same as utf8.offset in Lua 5.3 standard library
function utf8util.offset(s, n, i)
   if n > 0 then error("unimplemented") end
   while n < 0 do
      i = i - 1
      if i < 1 then return nil end
      local c = string.byte(s, i)
      if c < 0x80 or c > 0xBF then
         n = n + 1
      end
   end
   return i
end

return utf8util

end)

register('llpeg.list', function(myrequire)
local List = {}
List.__index = List

function List:new()
   return setmetatable({ n = 0 }, self)
end

function List:__len()
   return self.n
end

function List:push(val)
   local n = self.n + 1
   self[n] = val
   self.n = n
end

function List:pop()
   local n = self.n
   assert(n > 0)
   local old = self[n]
   self[n] = nil
   self.n = n - 1
   return old
end

function List:insert(pos, val)
   for i=self.n,pos,-1 do
      self[i+1] = self[i]
   end
   self[pos] = val
   self.n = self.n + 1
end

return List

end)

register('llpeg.cap', function(myrequire)
myrequire('strict')
local compat = myrequire('advent.compat')
local from = myrequire('llpeg.types').from
local
   CapKind,
   _ = from(myrequire('llpeg.types'), {
               'CapKind',
   })
local
   printcaplist,
   _ = from(myrequire('llpeg.print'), {
               'printcaplist',
   })
local List = myrequire('llpeg.list')

local MAXRECLEVEL = 200

local Capture = {}
Capture.__index = Capture

-- kind is CapKind of the capture
-- bytePos is the subject position (in bytes)
-- byteLen is the length of the capture (in bytes)
-- extra is extra info (group name, arg index, etc)
function Capture:new(kind, bytePos, byteLen, extra)
   assert(getmetatable(kind) == CapKind)
   return setmetatable({
         kind = kind, bytePos = bytePos, byteLen = byteLen, extra = extra,
   }, self)
end

function Capture:__tostring()
   return string.format("Capture{kind=%s, pos=%d, len=%s, extra=%s}",
                        self.kind, self.bytePos, self.byteLen, self.extra)
end

function Capture:isopencap()
   return self.byteLen == nil
end

-- true if c2 is (any number of levels) inside self
function Capture:inside(c2)
   if self:isopencap() then
      return not c2:isclosecap()
   else
      return c2.bytePos < (self.bytePos + self.byteLen)
   end
end

function Capture:isclosecap()
   return self.kind == CapKind.close
end

--[[
** Return the size of capture 'cap'. If it is an open capture, 'close'
** must be its corresponding close.
]]--
function Capture:size(close)
   if self:isopencap() then
      assert(close:isclosecap())
      return close.bytePos - self.bytePos
   else
      return self.byteLen
   end
end

function CapKind:newCapture(bytePos, byteLen, extra)
   return Capture:new(self, bytePos, byteLen, extra)
end

local CapState = {}
CapState.__index = CapState

-- Capture cap: current capture
-- Capture ocap: (original) capture list
-- int ptop: index of last argument to 'match'
-- string s: original string
-- int valuecached: value stored in cache slot
-- int reclevel: recursion level
function CapState:new(captures, source, extraArgs)
   return setmetatable({
         captures = captures,
         index = 1,
         source = source,
         valuecached = {},
         reclevel = 0,
         extraArgs = extraArgs,
    }, self)
end

function CapState:cap() -- helper
   return self.captures[self.index]
end

function CapState:advance() -- helper
   local i = self.index
   local cap = self.captures[i]
   self.index = i + 1
   return cap, i
end

function CapState:substr(start, len) -- helper
   return string.sub(self.source, start, start + len - 1)
end

function CapState:skipclose(head)
   if head:isopencap() then
      assert(self.captures[self.index]:isclosecap())
      self.index = self.index + 1
   end
end

function CapState:closesize(head)
   return head:size(self:cap())
end

--[[
** Go to the next capture at the same level
]]--
function CapState:nextcap()
   local cap = self:cap()
   if cap:isopencap() then -- must look for a close
      local n = 0 -- number of opens waiting a close
      while true do -- look for corresponding close
         self.index = self.index + 1
         cap = self:cap()
         if cap:isopencap() then
            n = n + 1
         elseif cap:isclosecap() then
            if n == 0 then break end
            n = n - 1
         end
      end
      self.index = self.index + 1 -- skip last close (or entire single capture)
   else
      self.index = self.index + 1
      while cap:inside(self:cap()) do
         self.index = self.index + 1 -- skip captures inside the current one
      end
   end
end

--[[
** Goes back in a list of captures looking for an open capture
** corresponding to a close
]]--
function CapState:findopen(i) -- captures[i] is the close that we want to match
   assert(self.captures[i]:isclosecap())
   local n = 0 -- number of closes waiting an open
   while i > 1 do
      i = i - 1
      local cap = self.captures[i]
      if cap:isclosecap() then
         n = n + 1 -- one more open to skip
      elseif cap:isopencap() then
         if n == 0 then return cap,i end
         n = n - 1
      end
   end
   error("couldn't find open")
end

--[[
** Checks whether group 'grp' is visible to 'ref', that is, 'grp' is
** not nested inside a full capture that does not contain 'ref'.  (We
** only need to care for full captures because the search at 'findback'
** skips open-end blocks; so, if 'grp' is nested in a non-full capture,
** 'ref' is also inside it.)  To check this, we search backward for the
** inner full capture enclosing 'grp'.  A full capture cannot contain
** non-full captures, so a close capture means we cannot be inside a
** full capture anymore.
]]--
function CapState:capvisible(igrp, ref)
   local i = igrp
   local grp = self.captures[igrp]
   while i > 1 do
      i = i - 1
      local cap = self.captures[i]
      if cap:isclosecap() then
         return true -- can stop the search
      elseif cap:inside(grp) then -- is 'grp' inside cap?
         return cap:inside(ref) -- ok iff cap also contains ref
      end
   end
   return true -- 'grp' is not inside any capture
end

--[[
** Try to find a named group capture with the name given;
** goes backward from 'i'.
]]--
function CapState:findback(name, i)
   if i == nil then i = self.index end
   local ref = self.captures[i]
   while i > 1 do
      i = i - 1
      local cap = self.captures[i]
      if cap:isclosecap() or not cap:inside(ref) then
         if cap:isclosecap() then
            cap,i = self:findopen(i)
         end
         if cap.kind == CapKind.group and self:capvisible(i, ref) then
            if cap.extra == name then
               return cap,i
            end
         end
      end
   end
   error("back reference '"..name.."' not found")
end

function CapState:getcaptures()
   local result = List:new()
   while not self:cap():isclosecap() do
      self:pushcapture(result)
   end
   return result
end

function CapState:pushcapture(result)
   self.reclevel = self.reclevel + 1
   if self.reclevel > MAXRECLEVEL then
      error("subcapture nesting too deep")
   end
   local cap = self.captures[self.index]
   assert(cap.kind.push ~= nil)
   local res = cap.kind.push(self, cap, result)
   self.reclevel = self.reclevel - 1
   return res
end

-- helper functions for pushcapture

--[[
** Push on the Lua stack all values generated by nested captures inside
** the current capture. Returns number of values pushed. 'addextra'
** makes it push the entire match after all captured values. The
** entire match is pushed also if there are no other nested values,
** so the function never returns zero.
]]--
function CapState:pushnestedvalues(result, addextra)
   local head = self:advance()
   local n = 0 -- number of pushed subvalues
   -- repeat for all nested patterns
   while head:inside(self:cap()) do
      n = n + self:pushcapture(result)
   end
   if addextra or n == 0 then -- need extra?
      result:push(self:substr(head.bytePos, self:closesize(head)))
      n = n + 1
   end
   self:skipclose(head)
   return n
end

--[[
** Push only the first value generated by nested captures
]]--
function CapState:pushonenestedvalue(result)
   local n = self:pushnestedvalues(result, false)
   if n == 0 then
      result:push(nil) -- ensure there's exactly one value
      return 1
   end
   while n > 1 do
      result:pop() -- pop extra values
      n = n - 1
   end
   return n
end


-- visitor patterns for pushcapture
function CapKind.position.push(capstate, cap, result)
   result:push(cap.bytePos)
   capstate.index = capstate.index + 1
   return 1
end

function CapKind.const.push(capstate, cap, result)
   result:push(cap.extra)
   capstate.index = capstate.index + 1
   return 1
end

function CapKind.arg.push(capstate, cap, result)
   local n = cap.extra
   if n > capstate.extraArgs.n then
      error(string.format("reference to absent extra argument #%d", n))
   end
   result:push(capstate.extraArgs[n])
   capstate.index = capstate.index + 1
   return 1
end

function CapKind.simple.push(capstate, cap, result)
   local k = capstate:pushnestedvalues(result, true)
   -- reorder so that the whole match is the first result, not the last
   local last = result:pop()
   result:insert(2 + #result - k, last)
   return k
end

-- missing a bunch

--[[
** Table capture: creates a new table and populates it with nested
** captures.
]]--
function CapKind.table.push(capstate, cap, result) -- aka tablecap
   local t = {}
   result:push(t)
   local head = capstate:advance()

   local n = 0
   while head:inside(capstate:cap()) do
      cap = capstate:cap()
      if cap.kind == CapKind.group and cap.extra ~= nil then -- named group?
         capstate:pushonenestedvalue(result)
         t[cap.extra] = result:pop() -- move it into table
      else -- not a named group
         local k = capstate:pushcapture(result)
         for i=k,1,-1 do
            t[n + i] = result:pop() -- move it into table (indexed)
         end
         n = n + k
      end
   end
   capstate:skipclose(head)
   return 1 -- number of values pushed (only the table)
end

--[[
** Table-query capture
]]--
function CapKind.query.push(capstate, cap, result) -- aka querycap
   capstate:pushonenestedvalue(result)
   local key = result:pop()
   local tbl = cap.extra
   assert(type(tbl) == "table")
   local val = tbl[key]
   if val ~= nil then
      result:push(val)
      return 1
   else
      return 0
   end
end

--[[
** Fold capture
]]--
function CapKind.fold.push(capstate, cap, result) -- aka foldcap
   local f = cap.extra
   assert(type(f) == "function")
   local head = capstate:advance()
   if capstate:cap():isclosecap() then
      -- no nested captures? (large subject)
      error("no initial value for fold capture")
   end
   local args = List:new()
   local n = capstate:pushcapture(args)
   if n == 0 then
      -- nested captures with no values?
      error("no initial value for fold capture")
   end
   local accum = args[1] -- leave only one result for accumulator
   while head:inside(capstate:cap()) do
      args = List:new()
      args:push( accum ) -- put accumulator first
      n = capstate:pushcapture(args) -- get next capture's values
      accum = f(compat.unpack(args))
   end
   capstate:skipclose(head)
   -- only accumulator left in result
   result:push(accum)
   return 1
end

--[[
** Function capture
]]--
CapKind["function"].push = function(capstate, cap, result)
   local f = cap.extra
   assert(type(f) == "function")
   local args = List:new()
   local n = capstate:pushnestedvalues(args, false)
   local r = compat.pack(f(compat.unpack(args)))
   for i=1,r.n do
      result:push(r[i])
   end
   return r.n
end

--[[
** Accumulator capture
]]--
function CapKind.acc.push(capstate, cap, result) -- aka accumulatorcap
   if #result == 0 then
      error("no previous value for accumulator capture")
   end
   local f = cap.extra
   assert(type(f) == "function")
   local prev = #result
   local args = List:new()
   args:push(result[prev])
   local n = capstate:pushnestedvalues(args, false)
   result[prev] = f(compat.unpack(args))
   return 0 -- did not add any extra value
end

--[[
** Select capture
]]--
function CapKind.num.push(capstate, cap, result) -- aka numcap
   local idx = cap.extra -- value to select
   if idx == 0 then -- no values?
      capstate:nextcap() -- skip entire capture
      return 0 -- no value produced
   else
      local vals = List:new()
      local n = capstate:pushnestedvalues(vals, false)
      if n < idx then -- invalid index?
         error("no capture '"..idx.."'")
      else
         result:push(vals[idx])
         return 1
      end
   end
end

function CapState:runtimecap(closePos)
   local close = self.captures[closePos]
   local open,openPos = self:findopen(closePos) -- get open group capture
   assert(open.kind == CapKind.group)
   self.index = openPos -- set state to the open capture
   local args = List:new()
   args:push( self.source) -- original subject
   args:push( close.bytePos ) -- current position
   local n = self:pushnestedvalues(args, false) -- push nested captures
   local func = open.extra
   local funcRet = compat.pack(func(compat.unpack(args)))
   local res = closePos - openPos -- number of captures to be removed
   return res, funcRet
end

function CapKind.runtime.push(capstate, cap, result) -- aka runtimecap
   result:push(cap.extra)
   capstate.index = capstate.index + 1
   return 1
end

local MAXSTRCAPS = 10

--[[
** Collect values from current capture into array 'cps'. Current
** capture must be Cstring (first call) or Csimple (recursive calls).
** (In first call, fills %0 with whole match for Cstring.)
** Returns number of elements in the array that were filled.
]]--
function CapState:getstrcaps(cps, n)
   local k = n
   n = n + 1
   cps[k] = {
      isstring = true, -- get string value
      bytePos = self:cap().bytePos, -- starts here
   }
   local head = self:advance()
   while head:inside(self:cap()) do
      if n > MAXSTRCAPS then -- too many captures?
         self:nextcap() -- skip extra captures (will not need them)
      elseif self:cap().kind == CapKind.Simple then -- string?
         n = self:getstrcaps(cps, n) -- put info into array
      else
         cps[n] = {
            isstring = false, -- not a string
            cap = self.index, -- keep original capture
         }
         self:nextcap()
         n = n + 1
      end
   end

   cps[k].endPos = head.bytePos + self:closesize(head)
   self:skipclose(head)

   return n
end

function CapState:addonestring(buffer, what)
   local cap = self:cap()
   if cap.kind == CapKind.string then
      -- add capture directly to buffer
      return stringcap(self, buffer)
   elseif cap.kind == CapKind.subst then
      -- add capture directly to buffer
      return substcap(self, buffer)
   elseif cap.kind == CapKind.acc then
      error("invalid context for an accumulator capture")
   end
   local result = List:new()
   local n = self:pushcapture(result)
   if n == 0 then return 0 end -- no values to add
   local val = result[1] -- take only one result (the first)
   if type(val) == "number" then
      val = tostring(val)
   elseif type(val) ~= "string" then
      error("invalid "..what.." value (a "..type(val)..")")
   end
   table.insert(buffer, val)
   return 1
end

--[[
** String capture: add result to buffer 'b' (instead of pushing
** it into the stack)
]]--
function stringcap(capstate, buffer)
   local fmt = capstate:cap().extra
   local cps = {}
   local n = capstate:getstrcaps(cps, 1) - 1 -- collect nested captures
   local sawEscape = false
   for _,c in compat.utf8codes(fmt) do
      if sawEscape then
         sawEscape = false
         if c < 48 or c > 57 then -- not followed by a digit
            table.insert(buffer, compat.utf8char(c))
         else
            local l = 1 + c - 48 -- capture index (1-based)
            if l > n then
               error("invalid capture index ("..(l-1)..")")
            elseif cps[l].isstring then
               table.insert(buffer, capstate:substr(cps[l].bytePos, cps[l].endPos - cps[l].bytePos))
            else
               -- go back to evaluate that nested capture
               local curr = capstate.index
               capstate.index = cps[l].cap
               if capstate:addonestring(buffer, "capture") == 0 then
                  error("no values in capture index "..l)
               end
               capstate.index = curr
            end
         end
      elseif c ~= 37 then -- not a % escape?
         table.insert(buffer, compat.utf8char(c))
      else
         sawEscape = true
      end
   end
   return 1
end

--[[
** Substitution capture: add result to buffer 'b'
]]--
function substcap(capstate, buffer)
   local head = capstate:advance()

   local curr = head.bytePos
   while head:inside(capstate:cap()) do
      local cap = capstate:cap()
      local nextPos = cap.bytePos
      local s = capstate:substr(curr, nextPos - curr)
      table.insert(buffer, s) -- add text up to capture
      if capstate:addonestring(buffer, "replacement") == 0 then
         -- no capture value, keep original text in final result
         curr = nextPos
      else
         -- continue after match
         local lastCap = capstate.captures[capstate.index - 1]
         curr = nextPos + cap:size(lastCap)
      end
   end
   -- add last piece of text
   local s = capstate:substr(curr, head.bytePos + capstate:closesize(head) - curr)
   table.insert(buffer, s)
   capstate:skipclose(head)
end

function CapKind.subst.push(capstate, cap, result) -- aka substcap
   local buffer = {}
   substcap(capstate, buffer)
   result:push(table.concat(buffer))
   return 1
end


function CapKind.string.push(capstate, cap, result) -- aka stringcap
   local buffer = {}
   stringcap(capstate, buffer)
   result:push(table.concat(buffer))
   return 1
end

function CapKind.group.push(capstate, cap, result)
   if cap.extra == nil then -- anonymous group?
      return capstate:pushnestedvalues(result, false) -- add all nested values
   else -- named group: add no values
      capstate:nextcap()
      return 0
   end
end

--[[
** Back-reference capture. Return number of values pushed.
]]--
function CapKind.backref.push(capstate, cap, result)
   local curr = capstate.index
   local _,i = capstate:findback(cap.extra)
   capstate.index = i
   local n = capstate:pushnestedvalues(result, false)
   capstate.index = curr + 1
   return n
end

return {
   CapState = CapState,
   Capture = Capture,
}

end)

register('llpeg.vm', function(myrequire)
myrequire('strict')
local compat = myrequire('advent.compat')
local utf8util = myrequire('llpeg.utf8util')
local from = myrequire('llpeg.types').from
local
   CHARMAX,
   CapKind,
   Opcode,
   enum,
   _ = from(myrequire('llpeg.types'), {
               'CHARMAX',
               'CapKind',
               'Opcode',
               'enum',
   })
local
   CapState,
   Capture,
   __ = from(myrequire('llpeg.cap'), {
                'CapState',
                'Capture',
   })
local
   Instruction,
   ___ = from(myrequire('llpeg.code'), {
                'Instruction',
   })
local
   printcaplist,
   ___ = from(myrequire('llpeg.print'), {
                'printcaplist',
   })

local LFAIL = {}
local InsidePred = enum{
   OUTPRED = 0,
   INPRED = 1,
}

local Stack = {}
Stack.__index = Stack
-- Stack prev: previous entry in the stack
-- int bytePos: saved position, or NULL for calls
-- Instruction pc: saved instruction
-- int caplevel
-- int labenv -- for labeled failure
-- bool predchoice -- for labeled failure
function Stack:new(prev, bytePos, pc, caplevel, labenv, predchoice)
   return setmetatable({
         prev = prev,
         bytePos = bytePos, pc = pc, caplevel = caplevel,
         labenv = labenv, predchoice = predchoice,
   }, self)
end

function Stack:__tostring()
   return string.format(
      "Stack{ bytePos=%d, caplevel=%d, labenv=%s, predchoice=%s }",
      self.bytePos, self.caplevel, self.labenv, self.predchoice
   )
end

function Stack:print()
   local s = self
      while s ~= nil do
         print("Stack", s)
         s = s.prev
      end
end

local MatchResult = {}
MatchResult.__index = MatchResult

function MatchResult:new()
   return setmetatable({
         labelf = nil, -- failure label
         sfail = -1, -- farthest failure
   }, self)
end

local State = {}
State.__index = State

function State:new(source, bytePos, ...)
   local giveup = Instruction:new{Opcode.Giveup}
   local insidepred = InsidePred.OUTPRED -- label environment is off inside predicates
   local stack = Stack:new(nil, bytePos, giveup, 0, insidepred, nil)
   local cp,cpLen
   if bytePos <= #source then
      cp, cpLen = utf8util.codepointAt(source, bytePos)
   else
      cp, cpLen = nil, nil
   end
   assert(bytePos ~= nil)
   return setmetatable({
         source = source, -- the source string
         bytePos = bytePos, -- current position in the string, in bytes
         codepoint = cp, -- the codepoint at 'bytePos' in 'source'
         codepointLen = cpLen, -- the length of the codepoint at 'bytePos'
         stack = stack, -- top of stack
         captures = {}, -- list of captures
         captop = 1, -- point to first empty slot in captures (1-based)
         extraArgs = compat.pack(...),
         -- labeled failures:
         insidepred = insidepred,
         labelf = nil, -- failure label
         sfail = -1, -- farthest failure
   }, self)
end

function State:advance()
   return self:resetPosTo(self.bytePos + self.codepointLen)
end

function State:resetPosTo(newPos)
   assert(newPos ~= nil)
   self.bytePos = newPos
   local source = self.source
   if newPos <= #source then
      local cp, cpLen = utf8util.codepointAt(source, newPos)
      self.codepoint = cp
      self.codepointLen = cpLen
      return cp
   else
      self.codepoint = nil
      self.codepointLen = nil
      return nil
   end
end

function State:backtrack(n)
   local off = utf8util.offset(self.source, -n, self.bytePos)
   if off == nil then return false end -- can't backtrack that far!
   self:resetPosTo(off)
   return true
end

function State:updatefarthest(label)
   self.labelf = label
   if self.bytePos > self.sfail then
      self.sfail = self.bytePos
   end
end

function State:pushcapture(cap)
   self.captures[self.captop] = cap
   self.captop = self.captop + 1
end

function State:fail()
   -- pattern failed, try to backtrack
   local lastStack
   repeat
      lastStack = self.stack
      self.stack = lastStack.prev
   until lastStack.bytePos ~= nil
   self:resetPosTo(lastStack.bytePos)
   self.captop = lastStack.caplevel
   self.insidepred = lastStack.labenv -- labeled failure
   return lastStack.pc:exec(self)
end

function State:giveup()
   local r = nil
   local lab = "fail"
   local errpos = self.sfail
   if self.labelf ~= nil and self.labelf ~= LFAIL then
      lab = self.labelf
   end
   return r, lab, errpos
end

function State:getcaptures()
   local results = {}
   if self.captures[1].kind == CapKind.close then -- are there any captures?
      return results -- no captures
   end
   return CapState:new(self.captures, self.source, self.extraArgs):getcaptures()
end

function Opcode.End:exec(state)
   state:pushcapture(CapKind.close:newCapture(state.bytePos, 0))
   -- trim table to proper length
   while #state.captures > state.captop - 1 do
      table.remove(state.captures)
   end
   -- printcaplist(state.captures, #state.captures) -- for debugging
   local results = state:getcaptures()
   if #results == 0 then -- no captured values?
      return state.bytePos -- return only end position
   else
      return compat.unpack(results)
   end
end

function Opcode.Giveup:exec(state)
   return state:giveup()
end

function Opcode.Ret:exec(state)
   local lastStack = state.stack
   state.stack = lastStack.prev
   return lastStack.pc:exec(state)
end

function Opcode.Any:exec(state)
   if state.codepoint ~= nil then
      state:advance()
      return self.next:exec(state)
   end
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.TestAny:exec(state)
   if state.codepoint ~= nil then
      return self.next:exec(state)
   else
      return self.branch:exec(state)
   end
end

function Opcode.UTFR:exec(state)
   local cp = state.codepoint
   if cp ~= nil and self.from <= cp and cp <= self.to then
      state:advance()
      return self.next:exec(state)
   end
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.Char:exec(state)
   if state.codepoint == self.aux then
      state:advance()
      return self.next:exec(state)
   end
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.TestChar:exec(state)
   if state.codepoint == self.aux then
      return self.next:exec(state)
   else
      return self.branch:exec(state)
   end
end

function Opcode.Set:exec(state)
   local cp = state.codepoint
   if cp ~= nil then
      if cp <= CHARMAX then
         if self.set[cp] then
            state:advance()
            return self.next:exec(state)
         end
      else
         if self.rest then
            state:advance()
            return self.next:exec(state)
         end
      end
   end
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.TestSet:exec(state)
   local cp = state.codepoint
   if cp ~= nil then
      if cp <= CHARMAX then
         if self.set[cp] then
            return self.next:exec(state)
         end
      elseif self.rest then
         return self.next:exec(state)
      end
   end
   return self.branch:exec(state)
end

function Opcode.Behind:exec(state)
   local n = self.aux
   -- XXX SLOW self.aux is in *characters* not *bytes*
   if state:backtrack(n) then
      return self.next:exec(state)
   end
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.Span:exec(state)
   local cp = state.codepoint
   while true do
      local match = false
      if cp ~= nil then
         if cp <= CHARMAX then
            if self.set[cp] then match = true end
         else
            if self.rest then match = true end
         end
      end
      if not match then break end
      cp = state:advance()
   end
   return self.next:exec(state)
end

function Opcode.Jmp:exec(state)
   return self.branch:exec(state)
end

function Opcode.Choice:exec(state)
   state.stack = Stack:new(
      state.stack, state.bytePos, self.branch, state.captop, state.insidepred
   )
   return self.next:exec(state)
end

function Opcode.PredChoice:exec(state)
   state.stack = Stack:new(
      state.stack, state.bytePos, self.branch, state.captop, state.insidepred,
      true -- predchoice
   )
   state.insidepred = InsidePred.INPRED
   return self.next:exec(state)
end

function Opcode.Call:exec(state)
   state.stack = Stack:new(
      state.stack, nil, self.next
   )
   return self.branch:exec(state)
end

function Opcode.Commit:exec(state)
   state.stack = state.stack.prev
   return self.branch:exec(state)
end

function Opcode.PartialCommit:exec(state)
   local stack = state.stack
   stack.bytePos = state.bytePos
   stack.caplevel = state.captop
   return self.branch:exec(state)
end

function Opcode.BackCommit:exec(state)
   local stack = state.stack
   state.stack = stack.prev -- pop the stack
   state:resetPosTo(stack.bytePos) -- but reset the position to that stored
   state.insidepred = stack.labenv -- labeled failure
   state.captop = stack.caplevel
   return self.branch:exec(state)
end

function Opcode.Throw:exec(state)
   if state.insidepred == InsidePred.OUTPRED then
      state.labelf = self.key
      -- pop entire stack
      while state.stack.prev ~= nil do
         state.stack = state.stack.prev
      end
   else
      state.labelf = LFAIL
      -- pop until you read a 'predchoice' state
      while not state.stack.predchoice do
         state.stack = state.stack.prev
      end
   end
   state.sfail = state.bytePos
   return state:fail()
end

function Opcode.ThrowRec:exec(state) -- with recovery
   state.sfail = state.bytePos
   if state.insidepred == InsidePred.OUTPRED then
      state.labelf = self.key
      state.stack = Stack:new(
         state.stack, nil, self.next, state.captop
      )
      return self.branch:exec(state)
   else
      state.labelf = LFAIL
      -- pop until you read a 'predchoice' state
      while not state.stack.predchoice do
         state.stack = state.stack.prev
      end
      return state:fail()
   end
end

function Opcode.FailTwice:exec(state)
   state.stack = state.stack.prev
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.Fail:exec(state)
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.CloseRunTime:exec(state)
   -- close the group
   state:pushcapture(self.cap:newCapture(state.bytePos, 0))
   -- trim table to proper length
   while #state.captures > state.captop - 1 do
      table.remove(state.captures)
   end
   local cs = CapState:new(state.captures, state.source, state.extraArgs)
   local n, funcRet = cs:runtimecap(state.captop - 1)
   state.captop = state.captop - n -- remove nested captures
   -- resdyncaptures: resolve returned values in `funcRet`
   -- first argument false=fail, true=keep current pos, number=next position
   local firstArg = funcRet[1]
   if funcRet.n == 0 then
      firstArg = false -- returning void means we'll fail
   end
   if not firstArg then -- if it is falsey, discard rest of returned vals & fail
      state:updatefarthest(LFAIL)
      return state:fail() -- tail call
   elseif type(firstArg) == "boolean" then
      -- keep current position, nothing needs to be done
   else
      local npos = tonumber(firstArg)
      if npos < state.bytePos or npos > (1 + #(state.source)) then
         error("invalid position returned by match-time capture")
      end
      state:resetPosTo(npos)
   end
   -- push the rest of the funcRet values as new captures
   local n = funcRet.n - 1 -- number of new captures
   if n == 0 then -- no new captures?
      state.captop = state.captop - 1 -- remove open group
   else
      -- new captures, keep original open group
      -- add new captures + close group to 'capture' list
      -- adddyncaptures:
      assert(state.captures[state.captop - 1].kind == CapKind.group)
      assert(state.captures[state.captop - 1]:isopencap())
      -- make group capture an anonymous group (this used to hold match-time f)
      state.captures[state.captop - 1].extra = nil
      for i=2,funcRet.n do -- add runtime captures
         state:pushcapture(CapKind.runtime:newCapture(state.bytePos, 0, funcRet[i]))
      end
      -- close group
      state:pushcapture(CapKind.close:newCapture(state.bytePos, 0))
   end
   return self.next:exec(state)
end

local MAXLOP = 20
function findopen(captures, i, currPos)
   i = i - 1 -- check last captop
   local cap = captures[i]
   if (not cap:isopencap()) and cap.bytePos == currPos then
      return nil -- current one cannot be a full capture
   end
   -- else, look for an 'open' capture
   for j=1,MAXLOP do
      if cap:isopencap() then -- open capture?
         return cap,i -- that's the one to be closed
      elseif cap.kind == CapKind.close then
         return nil -- a full capture should not nest a non-full one
      end
      i = i - 1
      if i<1 then break end
      cap = captures[i]
   end
   return nil -- not found within allowed search limit
end

function Opcode.CloseCapture:exec(state)
   -- if possible, turn capture into a full capture
   assert(state.captop > 1)
   local open,_ = findopen(state.captures, state.captop, state.bytePos)
   if open ~= nil then -- if possible, turn capture into a full capture
      open.byteLen = state.bytePos - open.bytePos
   else
      -- non-nil length to mark entry as closed
      state:pushcapture(self.cap:newCapture(state.bytePos, 0))
   end
   return self.next:exec(state)
end

function Opcode.OpenCapture:exec(state)
   state:pushcapture(self.cap:newCapture(
      -- byteLen = nil marks entry as open
      state.bytePos, nil, self.key
   ))
   return self.next:exec(state)
end

function Opcode.FullCapture:exec(state)
   -- XXX SLOW: self.aux is in *characters* not *bytes*
   local nPos = utf8util.offset(state.source, -self.aux, state.bytePos)
   state:pushcapture(self.cap:newCapture(
      nPos, state.bytePos - nPos, self.key
   ))
   return self.next:exec(state)
end

function match(s, init, code, ...)
   local state = State:new(s, init, ...)
   return code[1]:exec(state)
end

return {
   match = match,
}

end)

register('llpeg', function(myrequire)
local VERSION = '0.0.1'
local MAXSTACK = 400 -- maximum backtracking
local MAXBEHIND = 255 -- maximum look-behind
local MAXRULES = 1000 -- maximum number of rules
local LPEG_COMPAT = true

myrequire('strict')
local compat = myrequire('advent.compat')

local from = myrequire('llpeg.types').from
local
   CHARMAX,
   CapKind,
   TTag,
   define,
   define_tree_visitor,
   metareg,
   newcharset,
   numsiblings,
   _ = from(myrequire('llpeg.types'), {
               'CHARMAX',
               'CapKind',
               'TTag',
               'define',
               'define_tree_visitor',
               'metareg',
               'newcharset',
               'numsiblings',
   })
local
   compile,
   cs_diff,
   cs_union,
   fixedlen,
   hascaptures,
   nofail,
   nullable,
   nullable_with_grammar,
   tocharset,
   __ = from(myrequire('llpeg.code'), {
                'compile',
                'cs_diff',
                'cs_union',
                'fixedlen',
                'hascaptures',
                'nofail',
                'nullable',
                'nullable_with_grammar',
                'tocharset',
   })
local
   match,
   ___ = from(myrequire('llpeg.vm'), {
                'match',
   })
local
   printpatt,
   printrepl,
   printtree,
   ____ = from(myrequire('llpeg.print'), {
                 'printpatt',
                 'printrepl',
                 'printtree',
   })

function checkint(v)
   if type(v) == 'string' then
      v = tonumber(v)
   end
   if type(v) ~= "number" then
      error("not a number")
   end
   return math.floor(v)
end

local is_pattern = define_type_visitor{
   pattern = function() return true end,
   default = function() return false end,
}

local ptype = define_type_visitor{
   pattern = function() return "pattern" end,
   default = function(v) return type(v) end,
}

function val2str(v)
   if type(v) == 'number' then return tostring(v) end
   if type(v) == 'string' then return v end
   return string.format("(a %s)", ptype(v))
end

--[[ lpltree.c ]]--

function newtree(tag)
   local t = {
      tag = tag,
      code = nil,
   }
   setmetatable(t, metareg)
   return t
end

function newleaf(tag, n)
   return setmetatable({
         tag = tag,
         code = nil,
         n = n,
   }, metareg)
end

function newroot1sib(tag, sib1)
   return setmetatable({
         tag = tag,
         code = nil,
         sib1 = sib1,
   }, metareg)
end

function newroot2sib(tag, sib1, sib2)
   return setmetatable({
         tag = tag,
         code = nil,
         sib1 = sib1,
         sib2 = sib2,
   }, metareg)
end

--[[ Build a sequence of #s nodes from the array 's' with the tag 'tag' ]]--
function fillseq(tag, s)
   if type(s) == 'number' then
      local len = checkint(s)
      s = setmetatable({}, {__len = function() return len end})
   end
   if #s == 0 then
      return newleaf(tag, 0)
   end
   local i = #s
   local result = newleaf(tag, s[i])
   while i > 1 do
      i = i - 1
      result = newroot2sib(
         TTag.Seq,
         newleaf(tag, s[i]),
         result
      )
   end
   return result
end

--[[ Numbers as patterns:
 0 == true (always match); n == TAny repeated 'n' times;
 -n == not (TAny repeated 'n' times)
]]--
function numtree(n)
   n = checkint(n)
   if n == 0 then
      return newleaf(TTag.True)
   elseif n > 0 then
      return fillseq(TTag.Any, n) -- sequence of 'n' anys
   else
      return newroot1sib(TTag.Not, fillseq(TTag.Any, -n))
   end
end

-- Convert value v to a pattern
local getpatt = define_type_visitor{
   ["string"] = function(s)
      if #s == 0 then
         return newleaf(TTag.True) -- always match if string is empty
      end
      local cp = {}
      for _,c in compat.utf8codes(s) do
         table.insert(cp, c)
      end
      return fillseq(TTag.Char, cp)
   end,
   ["number"] = function(n)
      return numtree(n)
   end,
   ["boolean"] = function(b)
      if b then
         return newleaf(TTag.True)
      else
         return newleaf(TTag.False)
      end
   end,
   ["function"] = function(f)
      return setmetatable({
            tag = TTag.RunTime,
            code = nil,
            key = f,
            sib1 = newleaf(TTag.True),
      }, metareg)
   end,
   ["pattern"] = function(v)
      return v
   end,
   ["table"] = function(v)
      return newgrammar(v)
   end,
   default = function(v)
      error("Not a pattern")
   end,
}

-- labeled failure begin
function newthrowleaf(label)
   return setmetatable({
         tag = TTag.Throw,
         code = nil,
         sib2 = nil, -- no recovery rule associated (yet)
         key = label,
   }, metareg)
end
-- labeled failure end

function lp_P(v)
   return getpatt(v)
end

--[[
** sequence operator; optimizations:
** false x => false, x true => x, true x => x
** (cannot do x . false => false because x may have runtime captures)
]]--

function lp_seq(tree1, tree2)
   tree1 = getpatt(tree1)
   tree2 = getpatt(tree2)
   if tree1.tag == TTag.False or tree2.tag == TTag.True then
      -- false . x = false, x . true = x
      return tree1
   elseif tree1.tag == TTag.True then
      -- true . x = x
      return tree2
   else
      return newroot2sib(TTag.Seq, tree1, tree2)
   end
end


--[[
** choice operator; optimizations:
** charset / charset => charset
** true / x => true, x / false => x, false / x => x
** (x / true is not equivalent to true)
]]--
function lp_choice(t1, t2)
   t1 = getpatt(t1)
   t2 = getpatt(t2)
   local t1c = tocharset(t1)
   local t2c = tocharset(t2)
   if t1c ~= nil and t2c ~= nil then
      local t = cs_union(t1c, t2c)
      return t
   elseif nofail(t1) or t2.tag == TTag.False then
      -- true / x => true, x / false => x
      return t1
   elseif t1.tag == TTag.False then
      -- false / x => x
      return t2
   else
      return newroot2sib(TTag.Choice, t1, t2)
   end
end

--[[
   p^n
]]--
function lp_star(p, n)
   local tree1 = getpatt(p)
   n = checkint(n)
   if n >= 0 then -- seq tree1 (seq tree1 ... (seq tree1 (rep tree1)))
      if nullable(tree1) then
         error("loop body may accept empty string")
      end
      local tree = newroot1sib(TTag.Rep, tree1)
      while n > 0 do
         tree = newroot2sib(TTag.Seq, tree1, tree)
         n = n - 1
      end
      return tree
   else -- choice (seq tree1 ... choice tree1 true ...) true
      n = -n
      local tree = newroot2sib( -- at most 1
            TTag.Choice,
            tree1,
            newleaf(TTag.True)
      )
      while n > 1 do
         tree = newroot2sib( -- at most (n-1)
            TTag.Seq,
            tree1,
            tree
         )
         tree = newroot2sib(TTag.Choice, tree, newleaf(TTag.True))
         n = n - 1
      end
      return tree
   end
end

--[[
** #p == &p
]]--
function lp_and(v)
   return newroot1sib(TTag.And, getpatt(v))
end

--[[
** -p == !p
]]--
function lp_not(v)
   return newroot1sib(TTag.Not, getpatt(v))
end

--[[
** [t1 - t2] == Seq (Not t2) t1
** If t1 and t2 are charsets, make their difference.
]]--
function lp_sub(t1, t2)
   t1 = getpatt(t1)
   t2 = getpatt(t2)
   local t1c = tocharset(t1)
   local t2c = tocharset(t2)
   if t1c ~= nil and t2c ~= nil then
      return cs_diff(t1c, t2c)
   else
      return newroot2sib(
         TTag.Seq,
         newroot1sib(TTag.Not, t2),
         t1
      )
   end
end

--[[
   A set with the given characters
]]--
function lp_set(s)
   local t = newcharset()
   local extra = nil
   for _,c in compat.utf8codes(s) do
      if c > CHARMAX then
         -- non ascii, we can't use charset for these
         local one = newleaf(TTag.Char, c)
         if extra == nil then
            extra = one
         else
            extra = newroot2sib(TTag.Choice, extra, one)
         end
      else
         t.set[c] = true
      end
   end
   if extra == nil then
      return t
   else
      return newroot2sib(TTag.Choice, t, extra)
   end
end

function lp_range(...)
   local t = newcharset()
   local extra = nil
   for _,v in ipairs{...} do
      if type(v) ~= "string" then
         error("argument must be string")
      else
         local first, second
         for _,c in compat.utf8codes(v) do
            if first == nil then
               first = c
            elseif second == nil then
               second = c
            else
               error("range must have two characters")
            end
         end
         if first == nil or second == nil then
            error("range must have two characters")
         end
         if first > second then
            if LPEG_COMPAT then
               -- ignore, just silently create an empty range
            else
               error("empty range")
            end
         elseif second <= CHARMAX then -- ascii range
            for i = first, second do
               t.set[i] = true
            end
         else
            local r = lp_utfr(first, second)
            if extra == nil then
               extra = r
            else
               extra = newroot2sib(TTag.Choice, extra, one)
            end
         end
      end
   end
   if extra == nil then
      return t
   else
      return newroot2sib(TTag.Choice, t, extra)
   end
end

function lp_utfr(from, to)
   from = checkint(from)
   to = checkint(to)
   if from > to then
      error("empty range")
   end
   if to > 0x10FFFF then
      error("invalid code point")
   end
   if to <= CHARMAX then -- ascii range?
      local t = newcharset() -- code it as a regular charset
      for i = from, to do
         t.set[i] = true
      end
      return t
   end
   -- multibyte utf-8 range
   return setmetatable({
         tag = TTag.UTFR,
         code = nil,
         from = from,
         to = to,
   }, metareg)
end

--[[
   Look-behind predicate
]]--
function lp_behind(v)
   local tree1 = getpatt(v)
   local n = fixedlen(tree1)
   if n < 0 then
      error("pattern may not have fixed length")
   end
   if hascaptures(tree1) then
      error("pattern has captures")
   end
   if n > MAXBEHIND then
      error("pattern too long to look behind")
   end
   return setmetatable({
         tag = TTag.Behind,
         code = nil,
         sib1 = tree1,
         n = n,
   }, metareg)
end

--[[ labeled failure begin ]]--
--[[
** Throws a label
]]--
local lp_throw = define_type_visitor{
   [{"string","number"}] = newthrowleaf,
   default = function() error("not a string") end,
}
--[[ labeled failure end ]]--


--[[
** Create a non-terminal
]]--
function lp_V(v)
   if v == nil then
      error("non-nil value expected")
   end
   return setmetatable({
         tag = TTag.Call,
         code = nil,
         key = v,
   }, metareg)
end

--[[
** Create a tree for a non-empty capture, with a body and
** optionally with an associated Lua value (at index 'labelidx' in the
** stack)
]]--
function capture_aux(capkind, patt, val)
   local t = newroot1sib(TTag.Capture, getpatt(patt))
   t.cap = capkind
   t.key = val
   return t
end

function newemptycap(capkind, val)
   return capture_aux(capkind, newleaf(TTag.True), val)
end

--[[
** Captures with syntax p / v
** (function capture, query capture, string capture, or number capture)
]]--
local divcapture_helper = define_type_visitor{
   ["function"] = function(v, p)
      return capture_aux(CapKind["function"], p, v)
   end,
   ["table"] = function(v, p)
      return capture_aux(CapKind.query, p, v)
   end,
   ["string"] = function(v, p)
      return capture_aux(CapKind.string, p, v)
   end,
   ["number"] = function(v, p)
      v = checkint(v)
      if v < 0 or v > 65536 then
         error("invalid number")
      end
      return capture_aux(CapKind.num, p, v)
   end,
   default = function(v)
      error("unexpected "..ptype(v).." as 2nd operand to LPeg '/'") end,
}
function lp_divcapture(p, v)
   return divcapture_helper(v, p) -- dispatch on v
end

function lp_acccapture(p, v)
   return capture_aux(CapKind.acc, p, v)
end

-- the match for patt with the values from nested captures replacing their
-- matches
function lp_substcapture(patt)
   return capture_aux(CapKind.subst, patt)
end

-- a table with all captures from patt
function lp_tablecapture(patt)
   return capture_aux(CapKind.table, patt)
end

-- the values produced by patt, optionally tagged with key
function lp_groupcapture(patt, key)
   -- key can be nil
   return capture_aux(CapKind.group, patt, key)
end

-- folding capture (deprecated)
function lp_foldcapture(patt, func)
   if type(func) ~= "function" then
      error("Bad function argument")
   end
   return capture_aux(CapKind.fold, patt, func)
end

-- the match for patt plus all captures made by patt
function lp_simplecapture(patt)
   return capture_aux(CapKind.simple, patt)
end

-- the current position (matches the empty string)
function lp_poscapture()
   return newemptycap(CapKind.position)
end

-- the value of the nth extra argument to lpeg.match (matches the empty string)
function lp_argcapture(n)
   n = checkint(n)
   if n <= 0 or n > 65536 then error("invalid argument index") end
   return newemptycap(CapKind.arg, n)
end

-- the value produced by the previous group capture named `key`
-- (matches the empty string)
function lp_backref(key)
   return newemptycap(CapKind.backref, key)
end

-- Constant capture (matches the empty string)
function lp_constcapture(...)
   local arg = compat.pack(...)
   if arg.n == 0 then -- no values?
      return newleaf(TTag.True) -- no capture
   else
      local i = arg.n
      local tree = newemptycap(CapKind.const, arg[i])
      while i > 1 do
         i = i - 1
         tree = newroot2sib(
            TTag.Seq,
            newemptycap(CapKind.const, arg[i]),
            tree
         )
      end
      if arg.n == 1 then
         -- single constant capture
         return tree
      else
         -- create a group capture with all values
         return lp_groupcapture( tree )
      end
   end
end

-- the returns of function applied to the captures of patt; the application
-- is done at match time
function lp_matchtime(patt, func)
   if type(func) ~= 'function' then
      error('not a function')
   end
   return setmetatable({
         tag = TTag.RunTime,
         code = nil,
         key = func,
         sib1 = getpatt(patt),
   }, metareg)
end

--[[======================================================]]--


--[[
** ======================================================
** Grammar - Tree generation
** ======================================================
]]--

--[[
** push on the stack the index and the pattern for the
** initial rule of grammar at index 'arg' in the stack;
** also add that index into position table.
]]--
function getfirstrule(tbl)
   local first_name, first_rule
   first_name = tbl[1]
   -- is this the name of an initial rule?
   if type(first_name) == 'number' or type(first_name) == 'string' then
      first_rule = tbl[first_name] -- get associated rule
   else
      first_name,first_rule = 1,first_name
   end
   if not is_pattern(first_rule) then
      if first_rule == nil then
         error("grammar has no initial rule")
      else
         error(string.format("initial rule '%s' is not a pattern", first_name))
      end
   end
   -- rule position (after TGrammar)
   -- return map from name to position, and from position to name
   return { [first_name] = 1 }, { first_name }
end

--[[
** traverse grammar at index 'arg', pushing all its keys and patterns
** into the stack. Create a new table (before all pairs key-pattern) to
** collect all keys and their associated positions in the final tree
** (the "position table").
** Return the number of rules and (in 'totalsize') the total size
** for the new tree.
]]--
function collectrules(tbl)
    -- find the first rule and put in position table
   local postab, rpostab = getfirstrule(tbl)

   -- collect and sort rule names (for repeatability)
   local names = {}
   for k,v in pairs(tbl) do
      if k == 1 or postab[k] == 1 then -- initial rule?
         -- skip the initial rules, it's already in the position table
      else
         table.insert(names, k)
      end
   end
   table.sort(names, function(a,b)
                 return tostring(a) < tostring(b)
   end)

   -- fill out rule, name, and position maps
   for _,k in ipairs(names) do
      local v = tbl[k]
      if not is_pattern(v) then
         error("rule '" .. val2str(k) .. "' is not a pattern")
      end
      table.insert(rpostab, k)
      postab[k] = #rpostab
   end
   return postab, rpostab
end

function buildgrammar(g, tbl, postab, rpostab)
   local trees = {}
   for i,name in ipairs(rpostab) do
      local rule = setmetatable({
            tag = TTag.Rule,
            code = nil,
            key = nil, -- will be fixed when rule is used
            n = i, -- rule number
            name = name,
            sib1 = tbl[name], -- pattern
            sib2 = nil,
      }, metareg)
      table.insert(trees, rule)
      g.ruletab[name] = rule
   end
   -- link up siblings
   for i = 1, #trees-1 do
      trees[i].sib2 = trees[i+1]
   end
   trees[#trees].sib2 = newleaf(TTag.True) -- finish list of rules
   g.sib1 = trees[1]
end

--[[
** Check whether a tree has potential infinite loops
]]--
function checkloops(grammar, tree)
   local n = numsiblings[tree.tag]
   if tree.tag == TTag.Rep and nullable_with_grammar(tree.sib1, grammar) then
      return true
   elseif tree.tag == TTag.Grammar then
      return false -- sub-grammars already checked
   elseif n == 1 then
      return checkloops(grammar, tree.sib1) -- tail call
   elseif n == 2 then
      if checkloops(grammar, tree.sib1) then
         return true
      else
         return checkloops(grammar, tree.sib2) -- tail call
      end
   elseif n == 0 then
      return false
   else
      error("surprising number of siblings")
   end
end

--[[
** Give appropriate error message for 'verifyrule'. If a rule appears
** twice in 'passed', there is path from it back to itself without
** advancing the subject.
]]--
function verifyerror(grammar, passed, npassed)
   local i, j
   for i = npassed,1,-1 do -- search for a repetition
      for j = i-1,1,-1 do
         if passed[i] == passed[j] then
            error(string.format("rule '%s' may be left recursive", val2str(passed[i])))
         end
      end
   end
   error("too many left calls in grammar")
end

--[[
** Check whether a rule can be left recursive; raise an error in that
** case; otherwise return 1 iff pattern is nullable.
** The return value is used to check sequences, where the second pattern
** is only relevant if the first is nullable.
** Parameter 'nb' works as an accumulator, to allow tail calls in
** choices. ('nb' true makes function returns true.)
** Parameter 'passed' is a list of already visited rules, 'npassed'
** counts the elements in 'passed'.
** Assume ktable at the top of the stack.
]]--
local verifyrule
verifyrule = define_tree_visitor{
   [{
         TTag.Char, TTag.Set, TTag.Any, TTag.False, TTag.UTFR,
         TTag.Throw, -- labeled failure
   }] = function(tree, g, passed, n, nb)
      return nb -- cannot pass from here
   end,
   [{
         TTag.True, TTag.Behind, -- look-behind cannot have calls
   }] = function(tree, g, passed, n, nb)
      return true
   end,
   [{ TTag.Not, TTag.And, TTag.Rep, }] = function(tree, g, passed, n, nb)
      return verifyrule(tree.sib1, g, passed, n, true) -- tail call
   end,
   [{ TTag.Capture, TTag.RunTime, TTag.XInfo, }] = function(tree, g, passed, n, nb)
      return verifyrule(tree.sib1, g, passed, n, nb) -- tail call
   end,
   [ TTag.Call ] = function(tree, g, passed, n, nb)
      local rule = g.ruletab[tree.key] -- look up rule
      return verifyrule(rule, g, passed, n, nb) -- tail call
   end,
   [ TTag.Seq ] = function(tree, g, passed, n, nb)
      -- only check 2nd child if first is nb
      if not verifyrule(tree.sib1, g, passed, n, false) then
         return nb
      else
         -- note that we don't propagate new npassed from 1st child
         return verifyrule(tree.sib2, g, passed, n, nb) -- tail call
      end
   end,
   [ TTag.Choice ] = function(tree, g, passed, n, nb)
      -- must check both children
      nb = verifyrule(tree.sib1, g, passed, n, nb)
      -- note that we don't propagate new npassed from 1st child
      return verifyrule(tree.sib2, g, passed, n, nb) -- tail call
   end,
   [ TTag.Rule ] = function(tree, g, passed, n, nb)
      if n >= MAXRULES then -- too many steps?
         return verifyerror(g, passed, n) -- error
      else
         passed[n+1] = tree.key -- add rule to path
         return verifyrule(tree.sib1, g, passed, n + 1, nb) -- tail call
      end
   end,
   [ TTag.Grammar ] = function(tree, g, passed, n, nb)
      return nullable(tree) -- sub-grammar cannot be left recursive
   end,
}

function verifygrammar(grammar)
   local passed = {}
   -- check left-recursive rules
   local rule = grammar.sib1
   while rule.tag == TTag.Rule do
      if rule.key ~= nil then -- skip unused rules
         verifyrule(rule.sib1, grammar, passed, 0, false)
      end
      rule = rule.sib2
   end
   if rule.tag ~= TTag.True then
      error("assertion failure")
   end
   -- check infinite loops inside rules
   rule = grammar.sib1
   while rule.tag == TTag.Rule do
      if rule.key ~= nil then -- skip unused rules
         if checkloops(grammar, rule.sib1) then
            error("empty loop in rule '" .. val2str(rule.name) .. "'")
         end
      end
      rule = rule.sib2
   end
   if rule.tag ~= TTag.True then
      error("assertion failure")
   end
end

--[[
** Fix a TOpenCall into a TCall node, using table 'postable' to
** translate a key to its rule address in the tree. Raises an
** error if key does not exist.
]]--
function fixonecall(g, t, postab)
   local name = t.key
   local rule = g.ruletab[name]
   if t.tag == TTag.Call then
      if rule == nil then
         error(string.format("rule '%s' undefined in given grammar", val2str(name)))
      end
      -- unlike our upstream, we don't clone patterns when we build a grammar
      -- so we can't mutate this tree w/o possibly breaking any other grammars
      -- which might hold an alias of this call.  So we don't distinguish
      -- Call and OpenCall and we don't mutate the tag here and
      -- don't link it up.  However, we can mutate the Rule
      -- as those are not shared
      rule.key = name -- mark this as used
   elseif rule ~= nil then -- TTag.Throw
      -- As before, we can't mutate the tree
      rule.key = name -- mark this as used
   end
end

--[[
** Transform left associative constructions into right
** associative ones, for sequence and choice; that is:
** (t11 + t12) + t2  =>  t11 + (t12 + t2)
** (t11 * t12) * t2  =>  t11 * (t12 * t2)
** (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2))
]]--
function  correctassociativity (tree)
   local tag = tree.tag
   if tag ~= TTag.Choice and tag ~= TTag.Seq then
      error("impossible")
   end
   local t1 = tree.sib1
   while t1.tag == tree.tag do
      local t11, t12 = t1.sib1, t1.sib2
      local t2 = tree.sib2
      -- don't mutate t1 in place as others may be keeping copies of it;
      -- mutating 'tree' in place is okay as we're not changing its semantics
      tree.sib1 = t11
      tree.sib2 = newroot2sib(tag, t12, t2)
      t1 = tree.sib1
   end
   return tree
end

--[[
** Make final adjustments in a tree. Fix open calls in tree 't',
** making them refer to their respective rules or raising appropriate
** errors (if not inside a grammar). Correct associativity of associative
** constructions (making them right associative). Assume that tree's
** ktable is at the top of the stack (for error messages).
]]--
local finalfix_helper = define_tree_visitor{
   [TTag.Grammar] = function(t)
      return t -- subgrammars were already fixed
   end,
   [TTag.Call] = function(t, g, postab)
      if g == nil then
         error("rule '" .. val2str(t.key) .. "' used outside a grammar")
      else
         return fixonecall(g, t, postab)
      end
   end,
   [TTag.Throw] = function(t, g, postab)
      if g ~= nil then
         return fixonecall(g, t, postab)
      end
   end,
   [{TTag.Seq, TTag.Choice}] = function(t, g, postab)
      return correctassociativity(t)
   end,
   default = function(t) return t end,
}
function finalfix(g, t, postab)
   finalfix_helper(t, g, postab)
   if t.tag == TTag.Grammar then return end
   local n = numsiblings[t.tag]
   if n == 1 then
      return finalfix(g, t.sib1, postab) -- tail call
   elseif n == 2 then
      finalfix(g, t.sib1, postab)
      return finalfix(g, t.sib2, postab) -- tail call
   elseif n == 0 then
      return
   else
      error("strange number of siblings")
   end
end

--[[
** Give a name for the initial rule if it is not referenced
]]--
function initialrulename(grammar)
   if grammar.sib1.key == nil then -- initial rule is not referenced?
      grammar.sib1.key = grammar.sib1.name
   end
end

function newgrammar(tbl)
   local postab, rpostab = collectrules(tbl)
   local g = setmetatable({
         tag = TTag.Grammar,
         code = nil,
         sib1 = nil, -- will fill this in
         n = #rpostab, -- number of rules
         ruletab = {}, -- map rule names to rules
   }, metareg)
   buildgrammar(g, tbl, postab, rpostab)
   finalfix(g, g.sib1, postab)
   initialrulename(g)
   verifygrammar(g)
   return g
end

--[[ ====================================================== ]]--

function prepcompile(p)
   finalfix(nil, p, {}) -- correct associativity
   return compile(p)
end

function lp_printtree(patt, c)
   local tree = getpatt(patt)
   if c then
      finalfix(nil, tree, {}) -- correct associativity
   end
   print("[]") -- for compatibility, this is a fake 'ktable'
   io.write(table.concat(printtree(tree, 0, {})))
end

function lp_printcode(patt)
   local p = getpatt(patt)
   if p.code == nil then
      prepcompile(p)
   end
   print("[]") -- for compatibility, this is a fake 'ktable'
   io.write(table.concat(printpatt(p.code, {})))
end

--[[
** Get the initial position for the match, interpreting negative
** values from the end of the subject.  Result is 1-based.
]]--
function initposition(ii, len)
   if ii > 0 then -- positive index?
      if ii <= len then -- inside the string?
         return ii -- return it (no correction to 0-base)
      else
         return len + 1 -- crop at the end
      end
   else -- negative index
      if (-ii) <= len then -- inside the string?
         return len + 1 - (-ii) -- return position from the end
      else
         return 1
      end
   end
end

-- Main match function
function lp_match(pattern, subject, init, ...)
   local p = getpatt(pattern)
   if p.code == nil then prepcompile(p) end
   local code = p.code
   if type(subject) ~= 'string' then error("subject is not a string") end
   local i
   if init == nil then
      i = 1
   else
      i = initposition(checkint(init), #subject)
   end
   return match(subject, i, code, ...)
end


--[[
** ======================================================
** Library creation and functions not related to matching
** ======================================================
]]--

function lp_setmax(lim)
   lim = 0 + lim -- convert to integer
   if lim <= 0 then
      error("out of range")
   end
   MAXSTACK = lim
end

local lp_type = define_type_visitor{
   pattern = function() return "pattern" end,
   default = function() return nil end,
}

function lp_gc(p)
   p._code = nil
end

function createcat(charspec)
   local t = newcharset()
   for i=0,CHARMAX do -- XXX not unicode safe
      local s = compat.utf8char(i)
      if s:find(charspec) ~= nil then
         t.set[i] = true
      end
   end
   return t
end

function lp_locale(tbl)
   if tbl == nil then
      tbl = {}
   end
   tbl.alnum = createcat("%w")
   tbl.alpha = createcat("%a")
   tbl.cntrl = createcat("%c")
   tbl.digit = createcat("%d")
   tbl.graph = createcat("[%p%w]") -- printable except space
   tbl.lower = createcat("%l")
   tbl.print = createcat("%C") -- printable = "not a control character"?
   tbl.punct = createcat("%p") -- "printable but not space or alnum
   tbl.space = createcat("%s")
   tbl.upper = createcat("%u")
   tbl.xdigit = createcat("%x")
   return tbl
end

--[[ lpltree.c ]]--

metareg.__mul = lp_seq
metareg.__add = lp_choice
metareg.__pow = lp_star
metareg.__gc = lp_gc
metareg.__len = lp_and
metareg.__div = lp_divcapture
metareg.__mod = lp_acccapture
metareg.__unm = lp_not
metareg.__sub = lp_sub
metareg.__tostring = printrepl

local pattreg = {
   ptree = lp_printtree,
   pcode = lp_printcode,
   match = lp_match,
   B = lp_behind,
   V = lp_V,
   C = lp_simplecapture,
   Cc = lp_constcapture,
   Cmt = lp_matchtime,
   Cb = lp_backref,
   Carg = lp_argcapture,
   Cp = lp_poscapture,
   Cs = lp_substcapture,
   Ct = lp_tablecapture,
   Cf = lp_foldcapture,
   Cg = lp_groupcapture,
   P = lp_P,
   S = lp_set,
   R = lp_range,
   utfR = lp_utfr,
   locale = lp_locale,
   version = "LLPegLabel " .. VERSION,
   setmaxstack = lp_setmax,
   type = lp_type,
   T = lp_throw, -- labeled failure throw
}
metareg.__index = pattreg

return pattreg

end)

local modules = {}
modules['bit32'] = require('bit32')
modules['string'] = require('string')
modules['strict'] = {}
modules['table'] = require('table')
local function myrequire(name)
  if modules[name] == nil then
    modules[name] = true
    modules[name] = (builders[name])(myrequire)
  end
  return modules[name]
end
return myrequire('llpeg')
end)()
From Wikipedia, the free encyclopedia

return (function()
local builders = {}
local function register(name, f)
  builders[name] = f
end
register('advent.compat', function() return require [[Module:User:Cscott/compat]] end)

register('llpeg.types', function(myrequire)
myrequire('strict')
local compat = myrequire('advent.compat')
local CHARMAX = 0x7F -- maximum codepoint for charsets

-- metatable for pattern objects; will be filled in later
local metareg = {}

local enum = function(keys)
   local Enum = {}
   Enum.__index = Enum
   function Enum:__tostring() return self.name end
   function Enum:pairs() return keys end
   function Enum:type() return Enum end

    for name, value in pairs(keys) do
       Enum[name] = setmetatable({ name = name, value = value }, Enum)
    end
    return Enum
end

local CapKind = enum{
   close = "close",  -- not used in trees */
   position = "position",
   const = "constant",  -- ktable[key] is Lua constant
   backref = "backref",  -- ktable[key] is "name" of group to get capture
   arg = "argument",  -- 'key' is arg's number
   simple = "simple",  -- next node is pattern
   table = "table",  -- next node is pattern
   ["function"] = "function",  -- ktable[key] is function; next node is pattern
   acc = "acc", -- ktable[key] is function; next node is pattern
   query = "query",  -- ktable[key] is table; next node is pattern
   string = "string",  -- ktable[key] is string; next node is pattern
   num = "num",  -- numbered capture; 'key' is number of value to return
   subst = "substitution",  -- substitution capture; next node is pattern
   fold = "fold",  -- ktable[key] is function; next node is pattern
   runtime = "runtime",  -- not used in trees (is uses another type for tree)
   group = "group",  -- ktable[key] is group's "name"
}

local TTag = enum{
  Char = "char", -- 'n' has unicode codepoint
  Set = "set", -- 'set' has sparse array codepoint->true for codepoint <=CHARMAX
                -- 'rest' indicates whether all codepoints > CHARMAX should be
                -- part of the set (true) or not (false)
  Any = "any",
  True = "true",
  False = "false",
  UTFR = "utf8.range",  --[[ range of UTF-8 codepoints;
                 'from' has initial codepoint; 'to' has final codepoint ]]--
  Rep = "rep",  -- 'sib1' *
  Seq = "seq",  -- 'sib1' 'sib2'
  Choice = "choice",  -- 'sib1' / 'sib2'
  Not = "not",  -- !'sib1'
  And = "and",  -- &'sib1'
  Call = "call",  -- 'sib2' is rule being called; otherwise same as TOpenCall
  OpenCall = "opencall",  -- 'key' is rule name
  Rule = "rule",  --[[ 'key' is rule name (but key == nil for unused rules);
             'sib1' is rule's pattern pre-rule; 'sib2' is next rule;
             'n' is rule's sequential number, 'name' is rule name (even
             for unused rules) ]]--
  XInfo = "xinfo",  -- extra info (not used)
  Grammar = "grammar",  -- 'sib1' is initial (and first) rule, 'n' is # rules
  Behind = "behind",  -- 'sib1' is pattern, 'n' is how much to go back
  Capture = "capture",  --[[ captures: 'cap' is kind of capture (enum 'CapKind');
                'key' is Lua value associated with capture;
               'sib1' is capture body ]]--
  RunTime = "run-time",  --[[ run-time capture: 'key' is Lua function;
                 'sib1' is capture body ]]--
  Throw = "throw",    -- labeled failure: 'key' is label's name,
                       -- sib2 is associated recovery rule
}

local PE = enum{
   nullable = "nullable",
   nofail = "nofail",
}

-- virtual machine instructions
local Opcode = enum{
  Any = "any", -- if no char, fail
  Char = "char",  -- if char != aux, fail
  Set = "set",  -- if char not in buff, fail
  TestAny = "testany",  -- in no char, jump to 'offset'
  TestChar = "testchar",  -- if char != aux, jump to 'offset'
  TestSet = "testset",  -- if char not in buff, jump to 'offset'
  Span = "span",  -- read a span of chars in buff
  UTFR = "utf-range",  -- if codepoint not in range [offset, utf_to], fail
  Behind = "behind",  -- walk back 'aux' characters (fail if not possible)
  Ret = "ret",  -- return from a rule
  End = "end",  -- end of pattern
  Choice = "choice",  -- stack a choice; next fail will jump to 'offset'
  PredChoice = "pred_choice",  -- labeled failure: stack a choice; changes label env next fail will jump to 'offset'
  Jmp = "jmp",  -- jump to 'offset'
  Call = "call",  -- call rule at 'offset'
  OpenCall = "open_call",  -- call rule number 'key' (must be closed to a ICall)
  Commit = "commit",  -- pop choice and jump to 'offset'
  PartialCommit = "partial_commit",  -- update top choice to current position and jump
  BackCommit = "back_commit",  -- backtrack like "fail" but jump to its own 'offset'
  FailTwice = "failtwice",  -- pop one choice and then fail
  Fail = "fail",  -- go back to saved state on choice and jump to saved offset
  Giveup = "giveup",  -- internal use
  FullCapture = "fullcapture",  -- complete capture of last 'off' chars
  OpenCapture = "opencapture",  -- start a capture
  CloseCapture = "closecapture",
  CloseRunTime = "closeruntime",
  Throw = "throw",    -- fails with a given label --labeled failure
  ThrowRec = "throw_rec", -- fails with a given label and call rule at 'offset' --labeled failure
  Empty = "--",  -- to fill empty slots left by optimizations
}

-- helper for visitor pattern definitions
function define(dispatch, which, f)
   for _,v in pairs(which) do
      assert(v ~= nil) -- catch typos
      dispatch[v] = f
   end
end

local numsiblings = {}
define(numsiblings, {
          TTag.Char, TTag.Set, TTag.Any,
          TTag.True, TTag.False, TTag.UTFR,
          TTag.Call, TTag.OpenCall,
          TTag.Throw,
}, 0)
define(numsiblings, {
          TTag.Rep, TTag.Not, TTag.And, TTag.Grammar,
          TTag.Behind, TTag.Capture, TTag.RunTime,
}, 1)
define(numsiblings, {
          TTag.Seq, TTag.Choice, TTag.Rule,
}, 2)

-- more help for visitor functions

local function_name_registry = {}
function register_fname(name, f)
   assert(type(name) == "string")
   assert(type(f) == "function")
   function_name_registry[f] = name
end

function report_ferror(f, msg)
   local fname = function_name_registry[f]
   if fname ~= nil then
      msg = fname .. ": " .. msg
   end
   error(msg)
end

function define_type_visitor(tbl)
   local dispatch = {}
   for keys,func in pairs(tbl) do
      if type(keys) ~= "table" then
         keys = { keys }
      end
      define(dispatch, keys, func)
   end
   local visit
   visit = function(val, ...)
      local a = dispatch["assert"]
      if a ~= nil then a(val, ...) end -- assert preconditions
      local ty = type(val)
      if ty == 'table' and getmetatable(val) == metareg then
         ty = 'pattern'
      end
      local f = dispatch[ty]
      if f ~= nil then return f(val, ...) end
      f = dispatch.default
      if f == nil then
         report_ferror(visit, "no default for " .. ty)
      end
      return f(val, ...)
   end
   return visit
end

function define_tree_visitor(tbl, opt_name)
   local dispatch = {}
   for keys,func in pairs(tbl) do
      if type(keys) ~= "table" or getmetatable(keys) == TTag then
         keys = { keys }
      end
      define(dispatch, keys, func)
   end
   local visit
   visit = function(tree, ...)
      if tree == nil then report_ferror(visit, "nil tree") end
      local a = dispatch["assert"]
      if a ~= nil then a(tree, ...) end -- assert preconditions
      local f = dispatch[tree.tag]
      if f ~= nil then return f(tree, ...) end
      f = dispatch.default
      if f == nil then
         report_ferror(visit, "no default for " .. tree.tag)
      end
      return f(tree, ...)
   end
   return visit
end

function define_opcode_visitor(tbl)
   local dispatch = {}
   for keys,func in pairs(tbl) do
      if type(keys) ~= "table" or getmetatable(keys) == Opcode then
         keys = { keys }
      end
      define(dispatch, keys, func)
   end
   local visit
   visit = function(op, ...)
      if op == nil then report_ferror(visit, "nil op") end
      local a = dispatch["assert"]
      if a ~= nil then a(op, ...) end -- assert preconditions
      local f = dispatch[op.code]
      if f ~= nil then return f(op, ...) end
      f = dispatch.default
      if f == nil then
         report_ferror(visit, "no default for " .. op.code)
      end
      return f(op, ...)
   end
   return visit
end

-- helper for module imports
function from(mod, list)
   local result = {}
   for _,v in ipairs(list) do
      table.insert(result, mod[v])
   end
   return compat.unpack(result)
end

function newcharset()
   return setmetatable({
         tag = TTag.Set,
         code = nil,
         rest = false,
         set = {}
   }, metareg)
end

local fullset = newcharset()
for i = 0,CHARMAX do
   fullset.set[i] = true
end
fullset.rest = true -- make sure non-ascii unicode chars are included!
assert(fullset.tag == TTag.Set)

return {
   CHARMAX = CHARMAX,
   CapKind = CapKind,
   Opcode = Opcode,
   PE = PE,
   TTag = TTag,
   define = define,
   define_tree_visitor = define_tree_visitor,
   enum = enum,
   from = from,
   fullset = fullset,
   metareg = metareg,
   newcharset = newcharset,
   numsiblings = numsiblings,
   register_fname = register_fname,
}

end)

register('llpeg.print', function(myrequire)
myrequire('strict')
local compat = myrequire('advent.compat')
local from = myrequire('llpeg.types').from
local
   CHARMAX,
   CapKind,
   Opcode,
   TTag,
   define,
   define_tree_visitor,
   numsiblings,
   _ = from(myrequire('llpeg.types'), {
               'CHARMAX',
               'CapKind',
               'Opcode',
               'TTag',
               'define',
               'define_tree_visitor',
               'numsiblings',
   })

function printcharset(tree)
   local result = "["
   local i = 0
   while i <= CHARMAX do
      local first = i
      while tree.set[i] and i <= CHARMAX do
         i = i + 1
      end
      if first == (i - 1) then -- unary range
         result = result .. string.format("(%02x)", first)
      elseif first < (i-1) then -- non-empty range
         result = result .. string.format("(%02x-%02x)", first, i - 1)
      end
      i = i + 1
   end
   if tree.rest then
      result = result .. "(80-FFFF)"
   end
   return result .. "]"
end

function printjmp(op, pc)
   return "-> " .. op.target
end

local printinst_helper = define_opcode_visitor{
   [Opcode.Char] = function(op, pc)
      return string.format("'%c' (%02x)", op.aux, op.aux)
   end,
   [Opcode.TestChar] = function(op, pc)
      return string.format("'%c' (%02x)", op.aux, op.aux) .. printjmp(op, pc)
   end,
   [Opcode.UTFR] = function(op, pc)
      return string.format("%d - %d", op.from, op.to)
   end,
   [Opcode.FullCapture] = function(op, pc)
      return string.format("%s (size = %s)  (idx = %s)",
                           op.cap.value, op.aux, op.key)
   end,
   [Opcode.OpenCapture] = function(op, pc)
      return string.format("%s (idx = %s)",
                           op.cap.value, op.key)
   end,
   [Opcode.Set] = function(op, pc)
      return printcharset(op)
   end,
   [Opcode.TestSet] = function(op, pc)
      return printcharset(op) .. printjmp(op, pc)
   end,
   [Opcode.Span] = function(op, pc)
      return printcharset(op)
   end,
   [Opcode.OpenCall] = function(op, pc)
      return string.format("-> %d", op.target) -- rule number
   end,
   [Opcode.Behind] = function(op, pc)
      return string.format("%d", op.aux)
   end,
   [{Opcode.Jmp, Opcode.Call, Opcode.Commit, Opcode.Choice,
     Opcode.PartialCommit, Opcode.BackCommit, Opcode.TestAny,
     Opcode.PredChoice}] = function(op, pc)
      return printjmp(op, pc)
   end,
   [Opcode.Throw] = function(op, pc) -- labeled failure
      return string.format("(idx = %s)", op.key)
   end,
   [Opcode.ThrowRec] = function(op, pc)
      return printjmp(op, pc) .. string.format("(idx = %s)", op.key)
   end,
   default = function() return '' end,
}
function printinst(pc, op, accum)
   table.insert(accum, string.format("%02d: %s ", pc, op.code.value))
   table.insert(accum, printinst_helper(op, pc))
   table.insert(accum, "\n")
   return accum
end

function printpatt(code, accum)
   for pc,op in ipairs(code) do
      printinst(pc, op, accum)
   end
   return accum
end

function printcap(cap, indent)
   print(string.format("%s%s", string.rep(' ', indent), cap))
end

function printcap2close(captures, ncaptures, i, indent)
   local head = captures[i]
   i = i + 1
   printcap(head, indent) -- print head capture
   while i <= ncaptures and head:inside(captures[i]) do
      i = printcap2close(captures, ncaptures, i, indent + 2) -- print nested captures
   end
   if i <= ncaptures and head:isopencap() then
      assert(captures[i]:isclosecap())
      printcap(captures[i], indent) -- print and skip close capture
      i = i + 1
   end
   return i
end

function printcaplist(captures, ncaptures)
   -- for debugging, first print a raw list of captures
   if ncaptures == nil then ncaptures = #captures end
   for i=1,ncaptures do
      printcap(captures[i], 0)
   end
  print(">======");
  local i=1
  while i <= ncaptures and not captures[i]:isclosecap() do
     i = printcap2close(captures, ncaptures, i, 0)
  end
  if i > ncaptures then
     print("<unmatched>")
  end
  print("=======");
end

local printtree_helper = define_tree_visitor{
   [TTag.Char] = function(tree)
      local c = compat.utf8char(tree.n)
      if c:find("%C") ~= nil then -- printable?
         return " '" .. c .. "'"
      else
         return string.format(" (%02X)", tree.n)
      end
   end,
   [TTag.Set] = function(tree)
      return printcharset(tree)
   end,
   [TTag.UTFR] = function(tree)
      return " " .. tree.from .. " - " .. tree.to
   end,
   [{TTag.OpenCall, TTag.Call}] = function(tree)
      local ret = string.format(" key: %s", tree.key)
      local rule = tree.sib2
      if rule ~= nil then
         ret = ret .. " (rule: " .. rule.n .. ")"
      end
      return ret
   end,
   [TTag.Behind] = function(tree)
      return " " .. tree.n
   end,
   [TTag.Capture] = function(tree)
      return string.format(" kind: '%s'  key: %s", tree.cap.value, tree.key)
   end,
   [TTag.Rule] = function(tree)
      return string.format(" key: %s", tree.key)
   end,
   [TTag.XInfo] = function(tree)
      return " n: " .. tree.n
   end,
   [TTag.Grammar] = function(tree)
      return " " .. tree.n -- number of rules
   end,
   [TTag.Throw] = function(tree)
      return string.format(" key: %s", tree.key)
   end,
   default = function(tree) return '' end
}
function printtree(tree, indent, accum)
   local sibs = numsiblings[tree.tag]

   table.insert(accum, string.rep(' ', indent))
   table.insert(accum, tree.tag.value)

   table.insert(accum, printtree_helper(tree))
   table.insert(accum, "\n")

   if tree.tag == TTag.Rule then
      sibs = 1 -- don't print sib2
   elseif tree.tag == TTag.Grammar then
      local rule = tree.sib1
      for i=1,tree.n do
         printtree(rule, indent + 2, accum)
         rule = rule.sib2
      end
      sibs = 0 -- siblings already handled
   end
   if sibs >= 1 then
      printtree(tree.sib1, indent + 2, accum)
      if sibs >= 2 then
         printtree(tree.sib2, indent + 2, accum)
      end
   end
   return accum
end

local PREFIX = "" -- could also be "l." or "lpeg." etc
local printrepl_helper
printrepl_helper = define_tree_visitor{
   [TTag.True] = function(tree, buf)
      table.insert(buf, PREFIX .. 'P""')
   end,
   [TTag.Any] = function(tree, buf)
      table.insert(buf, PREFIX .. 'P(1)')
   end,
   [TTag.Char] = function(tree, buf)
      table.insert(buf, PREFIX .. 'P"')
      local c = compat.utf8char(tree.n)
      if c:find("%C") ~= nil then -- printable?
         table.insert(buf, c)
      else
         table.insert(buf, string.format('\\%02X', tree.n))
      end
      table.insert(buf, '"')
   end,
   [TTag.Set] = function(tree, buf)
      local nbuf = {}
      local insertchar = function(cp)
         local c = compat.utf8char(cp)
         if string.find(c, "^[^%w%p ]") ~= nil then
            table.insert(nbuf, string.format('\\x%02X', cp))
         else
            table.insert(nbuf, c)
         end
      end
      local nargs = 0
      local inserttwo = function(cp1, cp2)
         if nargs > 0 then table.insert(nbuf, ',') end
         nargs = nargs + 1
         table.insert(nbuf, '"')
         insertchar(cp1)
         insertchar(cp2)
         table.insert(nbuf, '"')
      end

      local i = 0
      while i <= CHARMAX do
         local first = i
         while tree.set[i] and i <= CHARMAX do
            i = i + 1
         end
         if first == (i - 1) then -- unary range
            inserttwo(first, first)
         elseif first < (i-1) then -- non-empty range
            inserttwo(first, i-1)
         end
         i = i + 1
      end

      local r = table.concat(nbuf)
      if nargs == 1 then
         r = PREFIX .. 'S' .. r
      else
         r = PREFIX .. 'S(' .. r .. ')'
      end

      if tree.rest then
         table.insert(buf, '(')
         table.insert(buf, r)
         table.insert(buf, ' + ')
         table.insert(buf, PREFIX)
         table.insert(buf, 'utfR(0x80, 0x10FFFF))')
      else
         table.insert(buf, r)
      end
   end,
   [TTag.UTFR] = function(tree, buf)
      table.insert(buf, string.format("%sutfR(0x%04X, 0x%04X)", PREFIX, tree.from, tree.to))
   end,
   [{TTag.OpenCall, TTag.Call}] = function(tree, buf)
      table.insert(buf, string.format('%sV"%s"', PREFIX, tree.key))
   end,
   [TTag.Not] = function(tree, buf)
      table.insert(buf, '-(')
      printrepl_helper(tree.sib1, buf)
      table.insert(buf, ')')
   end,
   [TTag.Seq] = function(tree, buf)
      table.insert(buf, "(")
      printrepl_helper(tree.sib1, buf)
      table.insert(buf, " * ")
      printrepl_helper(tree.sib2, buf)
      table.insert(buf, ")")
   end,
   [TTag.Choice] = function(tree, buf)
      table.insert(buf, "(")
      printrepl_helper(tree.sib1, buf)
      table.insert(buf, " + ")
      printrepl_helper(tree.sib2, buf)
      table.insert(buf, ")")
   end,
   [TTag.Rep] = function(tree, buf)
      printrepl_helper(tree.sib1, buf)
      table.insert(buf, "^0")
   end,
   --[[
   [TTag.Behind] = function(tree)
      return " " .. tree.n
      end,
   ]]--
   [TTag.Capture] = function(tree, buf)
      local repl = define_type_visitor{
         string = function(v)
            return '"' .. v .. '"' -- xxx should handle escapes
         end,
         default = tostring,
      }
      local name = nil
      local show_patt = false
      local show_key = false
      if tree.cap == CapKind.simple then
         name = 'C'
         show_patt = true
      elseif tree.cap == CapKind.subst then
         name = 'Cs'
         show_patt = true
      elseif tree.cap == CapKind.table then
         name = 'Ct'
         show_patt = true
      elseif tree.cap == CapKind.pos then
         name = 'Cp'
      elseif tree.cap == CapKind.arg then
         name = 'Carg'
         show_key = true
      elseif tree.cap == CapKind.backref then
         name = 'Cb'
         show_key = true
      elseif tree.cap == CapKind.group then
         name = 'Cg'
         show_patt = true
         show_key = (tree.key ~= nil)
      end
      if name ~= nil then
         table.insert(buf, PREFIX)
         table.insert(buf, name)
         table.insert(buf, '(')
         if show_patt then
            printrepl_helper(tree.sib1, buf)
            if show_key then
               table.insert(buf, ', ')
            end
         end
         if show_key then
            table.insert(buf, repl(tree.key))
         end
         table.insert(buf, ')')
         return
      end
      if tree.cap == CapKind.string or
         tree.cap == CapKind.num or
         tree.cap == CapKind.query or
         tree.cap == CapKind['function'] then
         printrepl_helper(tree.sib1, buf)
         table.insert(buf, ' / ')
         table.insert(buf, repl(tree.key))
         return
      end
      -- fallback
      table.insert(buf, string.format("<pattern %s>", tostring(tree.tag)))
   end,
   [TTag.Rule] = function(tree, buf)
      local key = tree.name
      if type(key) == 'number' then key = string.format("[%d]", key) end
      table.insert(buf, key)
      table.insert(buf, " = ")
      printrepl_helper(tree.sib1, buf)
   end,
   [TTag.Grammar] = function(tree, buf)
      table.insert(buf, PREFIX .. "P{")
      local rule = tree.sib1

      local r = {}
      local first_rule_name = rule.name
      r[first_rule_name] = rule
      rule = rule.sib2

      local names = {}
      for i=2,tree.n do
         table.insert(names, rule.name)
         r[rule.name] = rule
         rule = rule.sib2
      end

      -- sort rule names
      table.sort(names)
      table.insert(names, 1, first_rule_name)

      -- now print in order
      for _,name in ipairs(names) do
         printrepl_helper(r[name], buf)
         table.insert(buf, ", ")
      end
      table.insert(buf, "}")
   end,
   --[[
   [TTag.Throw] = function(tree)
      return " key: " .. tree.key .. "  (rule: " .. tree.sib2.cap .. ")"
   end,
   ]]--
   default = function(tree, buf)
      table.insert(buf, string.format("<pattern %s>", tostring(tree.tag)))
   end,
}
function printrepl(tree)
   local buf = {}
   printrepl_helper(tree, buf)
   return table.concat(buf)
end

return {
   printcaplist = printcaplist,
   printcharset = printcharset,
   printinst = printinst,
   printpatt = printpatt,
   printrepl = printrepl,
   printtree = printtree,
}

end)

register('llpeg.code', function(myrequire)
myrequire('strict')
local compat = myrequire('advent.compat')
local from = myrequire('llpeg.types').from
local
   CHARMAX,
   CapKind,
   Opcode,
   PE,
   TTag,
   define,
   define_tree_visitor,
   fullset,
   newcharset,
   numsiblings,
   register_fname,
   _ = from(myrequire('llpeg.types'), {
               'CHARMAX',
               'CapKind',
               'Opcode',
               'PE',
               'TTag',
               'define',
               'define_tree_visitor',
               'fullset',
               'newcharset',
               'numsiblings',
               'register_fname',
})
local printinst = myrequire('llpeg.print').printinst

local TRACE_INSTRUCTIONS = false

-- signals a "no-instruction"
local NOINST = nil

-- don't optimize captures longer than this
local MAXOFF = 15

-- forward declarations
local codegen

local CompileState = {}
CompileState.__index = CompileState

--[[
** {======================================================
** Analysis and some optimizations
** =======================================================
]]--

--[[
** Check whether a charset is empty (returns IFail), singleton (IChar),
** full (IAny), or none of those (ISet). When singleton, '*c' returns
** which character it is. (When generic set, the set was the input,
** so there is no need to return it.)
]]--
function charsettype(cs)
   local count = 0
   local candidate
   for i,_ in pairs(cs.set) do
      candidate = i
      count = count + 1
   end
   if cs.rest then
      if count == (CHARMAX + 1) then
         return Opcode.Any -- full set
      end
   elseif count == 0 then
      return Opcode.Fail -- empty set
   elseif count == 1 then
      return Opcode.Char, candidate -- single char
   end
   return Opcode.Set -- neither full nor empty nor singleton
end

-- A few basic operations on charsets; returns new object

function cs_clone(cs)
   local result = newcharset()
   for i,_ in pairs(cs.set) do
      result.set[i] = true
   end
   result.rest = cs.rest
   return result
end

function cs_complement(cs)
   local result = newcharset()
   for i=0,CHARMAX do
      if not cs.set[i] then
         result.set[i] = true
      end
   end
   result.rest = not cs.rest
   return result
end

function cs_intersection(a, b)
   local result = newcharset()
   for i,_ in pairs(a.set) do
      if a.set[i] and b.set[i] then
         result.set[i] = true
      end
   end
   result.rest = a.rest and b.rest
   return result
end

function cs_union(a, b)
   local result = newcharset()
   for i=0,CHARMAX do
      if a.set[i] or b.set[i] then
         result.set[i] = true
      end
   end
   result.rest = a.rest or b.rest
   return result
end

function cs_diff(a, b)
   local result = newcharset()
   for i=0,CHARMAX do
      if a.set[i] and not b.set[i] then
         result.set[i] = true
      end
   end
   result.rest = a.rest and not b.rest
   return result
end

function cs_disjoint(a, b)
   if a.rest == b.rest then return false end
   for i,_ in pairs(a.set) do
      if b.set[i] then return false end
   end
   for i,_ in pairs(b.set) do
      if a.set[i] then return false end
   end
   return true
end

function cs_equal(a, b)
   if a.rest ~= b.rest then return false end
   for i,_ in pairs(a.set) do
      if not b.set[i] then return false end
   end
   for i,_ in pairs(b.set) do
      if not a.set[i] then return false end
   end
   return true
end

--[[
** If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a
** charset and return it; else return nil.
]]--
local tocharset = define_tree_visitor{
   [TTag.Set] = function(v)
      return v -- copy set
   end,
   [TTag.Char] = function(v)
      -- only one char
      if v.n <= CHARMAX then
         local t = newcharset()
         t.set[v.n] = true
         return t
      else
         return nil
      end
   end,
   [TTag.Any] = function(v)
      return fullset
   end,
   [TTag.False] = function(v)
      return newcharset()
   end,
   default = function(v)
      return nil
   end,
}
register_fname("tocharset", tocharset)

--[[
** Visit a TCall node taking care to stop recursion. If node not yet
** visited, return 'f(rule for call)', otherwise return 'def' (default
** value)
]]--
function CompileState:callrecursive(tree, f, default_value, ...)
   if tree.tag ~= TTag.Call then
      error("unexpected tree tag")
   end
   local rule = self.grammar.ruletab[tree.key]
   if rule.tag ~= TTag.Rule then
      error("unexpected tree sibling")
   end
   if tree.seen == true then
      return default_value -- node already visited
   else
      -- first visit
      local oldseen = tree.seen
      tree.seen = true
      local result = f(rule, ...)
      tree.seen = oldseen -- restore tree
      return result
   end
end

--[[
** Check whether a pattern tree has captures
]]--
local hascaptures
hascaptures = define_tree_visitor{
   [{TTag.Capture, TTag.RunTime}] = function(tree, cs)
         return true
   end,
   [TTag.Call] = function(tree, cs)
      assert(cs ~= nil)
      return cs:callrecursive(tree, hascaptures, false, cs)
   end,
   [TTag.Rule] = function(tree, cs)
      -- do not follow siblings
      return hascaptures(tree.sib1, cs)
   end,
   [TTag.OpenCall] = function(tree, cs)
      error("should not happen")
   end,
   [TTag.Grammar] = function(tree, cs)
      -- make a fake compile state to hold the grammar, if necessary
      if cs == nil then cs = CompileState:new(nil) end
      return cs:withGrammar(tree, hascaptures, tree.sib1, cs)
   end,
   default = function(tree, cs)
      local n = numsiblings[tree.tag]
      if n == 1 then
         return hascaptures(tree.sib1, cs) -- tail call
      elseif n == 2 then
         if hascaptures(tree.sib1, cs) then return true end
         return hascaptures(tree.sib2, cs) -- tail call
      elseif n == 0 then
         return false
      else
         error("how many siblings does this have?")
      end
   end,
}
function CompileState:hascaptures(t) return hascaptures(t, self) end
register_fname("hascaptures", hascaptures)

--[[
** Checks how a pattern behaves regarding the empty string,
** in one of two different ways:
** A pattern is *nullable* if it can match without consuming any character;
** A pattern is *nofail* if it never fails for any string
** (including the empty string).
** The difference is only for predicates and run-time captures;
** for other patterns, the two properties are equivalent.
** (With predicates, &'a' is nullable but not nofail. Of course,
** nofail => nullable.)
** These functions are all convervative in the following way:
**    p is nullable => nullable(p)
**    nofail(p) => p cannot fail
** The function assumes that TOpenCall is not nullable;
** this will be checked again when the grammar is fixed.
** Run-time captures can do whatever they want, so the result
** is conservative.
]]--
local checkaux
checkaux = define_tree_visitor{
   [{
         TTag.Char, TTag.Set, TTag.Any, TTag.UTFR, TTag.False,
         TTag.OpenCall, TTag.Throw,
   }] = function(tree, pred, cs)
      return false -- not nullable
   end,
   [{TTag.Rep,TTag.True}] = function(tree, pred, cs)
      return true -- no fail
   end,
   [{TTag.Not,TTag.Behind}] = function(tree, pred, cs)
      -- can match empty, but can fail
      if pred == PE.nofail then
         return false
      else
         return true
      end
   end,
   [TTag.And] = function(tree, pred, cs)
      -- can match empty; fail iff body does
      if pred == PE.nullable then
         return true
      end
      return checkaux(tree.sib1, pred, cs) -- tail call
   end,
   [TTag.RunTime] = function(tree, pred, cs)
      -- can fail; match empty iff body does
      if pred == PE.nofail then
         return false
      end
      return checkaux(tree.sib1, pred, cs) -- tail call
   end,
   [TTag.Seq] = function(tree, pred, cs)
      if not checkaux(tree.sib1, pred, cs) then
         return false
      end
      return checkaux(tree.sib2, pred, cs) -- tail call
   end,
   [TTag.Choice] = function(tree, pred, cs)
      if checkaux(tree.sib2, pred, cs) then
         return true
      end
      return checkaux(tree.sib1, pred, cs) -- tail call
   end,
   [{ TTag.Capture, TTag.Rule, TTag.XInfo, }] = function(tree, pred, cs)
      return checkaux(tree.sib1, pred, cs)
   end,
   [TTag.Grammar] = function(tree, pred, cs)
      -- make a fake compile state to hold the grammar, if necessary
      if cs == nil then cs = CompileState:new(nil) end
      return cs:withGrammar(tree, checkaux, tree.sib1, pred, cs)
   end,
   [TTag.Call] = function(tree, pred, cs)
      -- open calls are assumed not nullable; checked again after grammar
      -- is fixed
      if cs == nil then return false end
      return checkaux(cs.grammar.ruletab[tree.key], pred, cs)
   end,
}
register_fname("checkaux", checkaux)

function nofail(t, cs) return checkaux(t, PE.nofail, cs) end

function CompileState:nofail(t) return nofail(t, self) end

function nullable(t, cs) return checkaux(t, PE.nullable, cs) end

function CompileState:nullable(t) return nullable(t, self) end

function nullable_with_grammar(t, grm)
   local cs = CompileState:new(nil)
   return cs:withGrammar(grm, nullable, t, cs)
end

-- Note that we are counting characters, not bytes
local fixedlen, fixedlen_helper
fixedlen_helper = define_tree_visitor{
   [{TTag.Char, TTag.Set, TTag.Any, TTag.UTFR}] = function(tree, len)
      return len + 1
   end,
   [{TTag.False, TTag.True, TTag.Not, TTag.And, TTag.Behind}] = function(tree, len)
      return len
   end,
   [{TTag.Rep, TTag.RunTime, TTag.OpenCall, TTag.Throw,}] = function(tree, len)
      return -1 -- variable
   end,
   [{TTag.Capture, TTag.Rule, TTag.XInfo,}] = function(tree, len, cs)
      return fixedlen_helper(tree.sib1, len, cs)
   end,
   [TTag.Grammar] = function(tree, len, cs)
      -- make a fake compile state to hold the grammar, if necessary
      if cs == nil then cs = CompileState:new(nil) end
      return cs:withGrammar(tree, fixedlen_helper, tree.sib1, len, cs)
   end,
   [TTag.Call] = function(tree, len, cs)
      -- If evaluating outside the context of a grammar, conservatively
      -- return "variable"
      if cs == nil then return -1 end
      -- otherwise, carefully recurse
      local n1 = cs:callrecursive(tree, fixedlen, -1, cs)
      if n1 < 0 then return -1 end -- variable
      return len + n1
   end,
   [TTag.Seq] = function(tree, len, cs)
      local n1 = fixedlen_helper(tree.sib1, len, cs)
      if n1 < 0 then return -1 end -- variable
      return fixedlen_helper(tree.sib2, n1, cs)
   end,
   [TTag.Choice] = function(tree, len, cs)
      local n1 = fixedlen_helper(tree.sib1, len, cs)
      local n2 = fixedlen_helper(tree.sib2, len, cs)
      if n1 ~= n2 or n1 < 0 then
         return -1
      else
         return n1
      end
   end,
}
function fixedlen(tree, cs)
   return fixedlen_helper(tree, 0, cs) -- supply default 0 for 'len'
end
function CompileState:fixedlen(t) return fixedlen(t, self) end
register_fname("fixedlen_helper", fixedlen_helper)

--[[
** Computes the 'first set' of a pattern.
** The result is a conservative aproximation:
**   match p ax -> x (for some x) ==> a belongs to first(p)
** or
**   a not in first(p) ==> match p ax -> fail (for all x)
**
** The set 'follow' is the first set of what follows the
** pattern (full set if nothing follows it).
**
** The function returns 0 when this resulting set can be used for
** test instructions that avoid the pattern altogether.
** A non-zero return can happen for two reasons:
** 1) match p '' -> ''            ==> return has bit 1 set
** (tests cannot be used because they would always fail for an empty input);
** 2) there is a match-time capture ==> return has bit 2 set
** (optimizations should not bypass match-time captures).
]]--
local getfirst
getfirst = define_tree_visitor{
   [TTag.Char] = function(t, follow, cs)
      if t.n <= CHARMAX then return 0, tocharset(t) end
      -- conservative approximation!
      local s = newcharset()
      s.rest = true
      return 0, s
   end,
   [{ TTag.Set, TTag.Any, TTag.False }] = function(t, follow, cs)
      return 0, tocharset(t)
   end,
   [TTag.UTFR] = function(t, follow, cs)
      -- conservative approximation!
      local firstset = newcharset()
      if t.from <= CHARMAX then
         for i=t.from, math.min(CHARMAX, t.to) do
            firstset.set[i] = true
         end
      end
      if t.to > CHARMAX then
         -- conservative approximation of non-ascii unicode range
         firstset.rest = true
      end
      return 0, firstset
   end,
   [TTag.True] = function(t, follow, cs)
      return 1, follow -- 1 because this accepts the empty string
   end,
   [TTag.Throw] = function(t, follow, cs)
      -- labeled failure: must always throw the label
      return 1, fullset
   end,
   [TTag.Choice] = function(t, follow, cs)
      local firstset = newcharset()
      local e1,e1set = getfirst(t.sib1, follow, cs)
      local e2,e2set = getfirst(t.sib2, follow, cs)
      local firstset = cs_union(e1set, e2set)
      local ret = 0 -- awkward lua5.1 way to say "e1 | e2"
      if (e1 % 2) == 1 or (e2 % 2) == 1 then
         ret = ret + 1
      end
      e1,e2 = compat.rshift(e1, 1), compat.rshift(e2, 1)
      if (e1 % 2) == 1 or (e2 % 2) == 1 then
         ret = ret + 2
      end
      return ret, firstset
   end,
   [TTag.Seq] = function(t, follow, cs)
      if not nullable(t.sib1, cs) then
         -- when p1 is not nullable, p2 has nothing to contribute
         return getfirst(t.sib1, fullset, cs) -- tail call
      else -- FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl))
         local e2,csaux = getfirst(t.sib2, follow, cs)
         local e1,firstset = getfirst(t.sib1, csaux, cs)
         if e1 == 0 then
            return 0, firstset -- 'e1' ensures that first can be used
         elseif compat.rshift(e1, 1) % 2 == 1 or compat.rshift(e2, 1) % 2 == 1 then
            -- one of the children has a matchtime?
            return 2, firstset -- pattern has a matchtime capture
         else
            return e2, firstset -- else depends on e2
         end
      end
   end,
   [TTag.Rep] = function(t, follow, cs)
      local _,firstset = getfirst(t.sib1, follow, cs)
      return 1, cs_union(firstset, follow, cs) -- accepts the empty string
   end,
   [{ TTag.Capture,TTag.Rule,TTag.XInfo }] = function(t, follow, cs)
      return getfirst(t.sib1, follow, cs) -- tail call
   end,
   [TTag.Grammar] = function(t, follow, cs)
      return cs:withGrammar(t, getfirst, t.sib1, follow, cs)
   end,
   [TTag.RunTime] = function(t, follow, cs)
      -- function invalidates any follow info
      local e,firstset = getfirst(t.sib1, fullset, cs)
      if e ~= 0 then
         -- function is not "protected"?
         return 2,firstset
      else
         -- pattern inside capture ensures first can be used
         return 0,firstset
      end
   end,
   [TTag.Call] = function(t, follow, cs)
      local rule = cs.grammar.ruletab[t.key]
      return getfirst(rule, follow, cs) -- tail call
   end,
   [TTag.And] = function(t, follow, cs)
      local e,firstset = getfirst(t.sib1, follow, cs)
      return e, cs_intersection(firstset, follow, cs)
   end,
   [{ TTag.Not, TTag.Behind }] = function(t, follow, cs)
      if t.tag == TTag.Not then
         local firstset = tocharset(t.sib1)
         if firstset ~= nil then
            return 1,cs_complement(firstset) -- could match empty input
         end
      end
      -- the TNot or TBehind gives no new information
      -- call getfirst only to check for math-time captures
      local e,_ = getfirst(t.sib1, follow, cs)
      if e%2 == 0 then e = e + 1 end -- set the lsb; could match empty input
      return e, follow -- uses follow
   end,
}
function CompileState:getfirst(t, follow) return getfirst(t, follow, self) end
register_fname("getfirst", getfirst)

--[[
** If 'headfail(tree)' true, then 'tree' can fail only depending on the
** next character of the subject.
   -- ie, a single character of lookahead is enough to evaluate the pattern
   -- rooted at this node
]]--
local headfail
headfail = define_tree_visitor{
   [{TTag.Char, TTag.Set, TTag.Any,
     TTag.False}] = function(t, cs)
      return true
     end,
   [{TTag.True, TTag.Rep, TTag.RunTime, TTag.Not,
     -- even though we are codepoint-based, we don't have a TestUTFR instruction
     -- so we can't use a Test instruction on the first character, which is
     -- implicitly what headfail is checking for.
     TTag.UTFR,
     TTag.Behind, TTag.Throw}] = function(t, cs)
      return false
     end,
   [{TTag.Capture, TTag.Rule,
     TTag.XInfo, TTag.And}] = function(t, cs)
      return headfail(t.sib1, cs) -- tail call
     end,
   [TTag.Grammar] = function(t, cs)
      return cs:withGrammar(t, headfail, t.sib1, cs)
   end,
   [TTag.Call] = function(t, cs)
      local rule = cs.grammar.ruletab[t.key]
      return headfail(rule, cs) -- tail call
   end,
   [TTag.Seq] = function(t, cs)
      if not nofail(t.sib2, cs) then
         -- if the second child could possibly fail, then we can't
         -- evaluate the entire seq based just on the first child
         return false
      end
      return headfail(t.sib1, cs) -- tail call
   end,
   [TTag.Choice] = function(t, cs)
      -- both children need to be headfail for this to be headfail
      if not headfail(t.sib1, cs) then
         return false
      end
      return headfail(t.sib2, cs) -- tail call
   end,
}
function CompileState:headfail(t) return headfail(t, self) end
register_fname("headfail", headfail)

--[[
** Check whether the code generation for the given tree can benefit
** from a follow set (to avoid computing the follow set when it is
** not needed)
]]--
local needfollow
needfollow = define_tree_visitor{
   [{TTag.Char, TTag.Set, TTag.Any, TTag.UTFR,
    TTag.False, TTag.True, TTag.And, TTag.Not,
    TTag.RunTime, TTag.Grammar, TTag.Call, TTag.Behind,
    TTag.Throw, }] = function(tree) return false end,
   [{TTag.Choice, TTag.Rep}] = function(tree) return true end,
   [TTag.Capture] = function(tree) return needfollow(tree.sib1) end,
   [TTag.Seq] = function(tree) return needfollow(tree.sib2) end,
}
register_fname("needfollow", needfollow)

--[[
** ======================================================
** Code generation
** ======================================================
]]--

local Instruction = {}
Instruction.__index = Instruction

function Instruction:new(arg)
   local opcode = arg[1]
   if opcode == nil then error("no opcode") end
   -- target is rule # for open calls before correction, and absolute pc after
   local instr = {
      code = opcode,
      exec = opcode.exec, -- copy the exec function from the opcode!
      aux = arg.aux, -- used for the "primary argument"
      key = arg.key, -- used for string-valued arguments
      target = arg.target, -- used for jmp-like instructions
      from = arg.from, -- inclusive start, for ranges
      to = arg.to, -- inclusive end, for ranges
      set = arg.set, -- charset <= CHARMAX
      rest = arg.rest, -- include characters above CHARMAX?
      cap = arg.cap, -- used for "capture kind"
   }
   setmetatable(instr, self)
   instr:setCode(opcode) -- opportunity to add tracing logic!
   return instr
end

function Instruction:setCode(opcode)
   self.code = opcode
   local exec = opcode.exec
   if TRACE_INSTRUCTIONS then
      local str
      self.exec = function(self, state, ...)
         if str == nil then
            str = table.concat(printinst(0, self, { "Executing " })):gsub("\n","")
         end
         print(state.bytePos, state.codepoint, str)
         return exec(self, state, ...)
      end
   else
      self.exec = exec
   end
end

-- state for the compiler

function CompileState:new(p)
   local cs = {
      p = p,
   }
   setmetatable(cs, self)
   return cs
end

function CompileState:withGrammar(g, f, ...)
   local lastGrammar = self.grammar
   self.grammar = g
   local result = compat.pack(f(...))
   self.grammar = lastGrammar
   return compat.unpack(result)
end

function CompileState:codegen(tree, opt, tt, fl)
   assert(fl.tag == TTag.Set)
   -- just a little helper
   return codegen(tree, self, opt, tt, fl)
end

function CompileState:getinstr(i)
   return self.p.code[i]
end

function CompileState:addinstruction(arg)
   local code = self.p.code
   table.insert(code, Instruction:new(arg))
   return #code
end

function CompileState:gethere()
   local code = self.p.code
   return 1 + #code
end

function CompileState:jumptothere(pc, where)
   if pc ~= NOINST then
      local code = self.p.code
      code[pc].target = where
   end
end

function CompileState:jumptohere(pc)
   self:jumptothere(pc, self:gethere())
end

function codethrow(cs, throw)
   local rule = nil
   if cs.grammar ~= nil then
      -- we only lookup/match *string* rule names, not numeric indices
      rule = cs.grammar.ruletab[tostring(throw.key)]
   end
   if rule ~= nil then
      return cs:addinstruction{
         Opcode.ThrowRec,
         key=throw.key, -- rule name / error label
         target=rule.n -- recovery rule number
      }
   else
      return cs:addinstruction{
         Opcode.Throw,
         key=throw.key, -- rule name / error label
         -- no recovery rule
      }
   end
end

function codeutfr(cs, tree)
   return cs:addinstruction{
      Opcode.UTFR,
      from = tree.from,
      to = tree.to,
   }
end

--[[
** Code an IChar instruction, or IAny if there is an equivalent
** test dominating it
]]--
function codechar(cs, codepoint, tt)
   if tt ~= NOINST and
      cs:getinstr(tt).code == Opcode.TestChar and
      cs:getinstr(tt).aux == codepoint then
      cs:addinstruction{Opcode.Any}
   else
      cs:addinstruction{Opcode.Char, aux=codepoint,}
   end
end

--[[
** Add a charset posfix to an instruction
]]--
function addcharset(cs, codepoint)
   --[[
static void addcharset (CompileState *compst, const byte *cs) {
  int p = gethere(compst);
  int i;
  for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++)
    nextinstruction(compst);  /* space for buffer */
  /* fill buffer with charset */
      loopset(j, getinstr(compst, p).buff[j] = cs[j]);
   ]]--
end

--[[
** code a char set, optimizing unit sets for IChar, "complete"
** sets for IAny, and empty sets for IFail; also use an IAny
** when instruction is dominated by an equivalent test.
]]--
function codecharset(cs, tree, tt)
   local op,codepoint = charsettype(tree)
   if op == Opcode.Char then
      return codechar(cs, codepoint, tt)
   elseif op == Opcode.Set then
      -- non-trivial set?
      if tt ~= NOINST and
         cs:getinstr(tt).code == Opcode.TestSet and
         cs_equal(tree, cs:getinstr(tt)) then
         return cs:addinstruction{Opcode.Any}
      else
         return cs:addinstruction{
            Opcode.Set,
            set = tree.set, -- XXX ensure immutable
            rest = tree.rest,
         }
      end
   else
      return cs:addinstruction{op} -- Any or Fail
   end
end

--[[
** code a test set, optimizing unit sets for ITestChar, "complete"
** sets for ITestAny, and empty sets for IJmp (always fails).
** 'e' is nonzero iff test should accept the empty string. (Test
** instructions in the current VM never accept the empty string.)
]]--
function codetestset(cs, tree, e)
   if e ~= 0 then return NOINST end
   local op,codepoint = charsettype(tree)
   if op == Opcode.Fail then
      return cs:addinstruction{Opcode.Jmp, target = NOINST} -- always jump
   elseif op == Opcode.Any then
      return cs:addinstruction{Opcode.TestAny, target = NOINST}
   elseif op == Opcode.Char then
      return cs:addinstruction{
         Opcode.TestChar,
         target = NOINST,
         aux = codepoint,
      }
   elseif op == Opcode.Set then
      return cs:addinstruction{
         Opcode.TestSet,
         target = NOINST,
         set = tree.set, -- XXX ensure immutable
         rest = tree.rest,
      }
   else
      error("unreachable")
   end
end

--[[
** <behind(p)> == behind n; <p>   (where n = fixedlen(p))
]]--
function codebehind(cs, tree)
   if tree.n > 0 then
      cs:addinstruction{ Opcode.Behind, aux = tree.n }
   end
   return cs:codegen(tree.sib1, false, NOINST, fullset)
end

--[[
** Choice; optimizations:
** - when p1 is headfail or when first(p1) and first(p2) are disjoint,
** than a character not in first(p1) cannot go to p1 and a character
** in first(p1) cannot go to p2, either because p1 will accept
** (headfail) or because it is not in first(p2) (disjoint).
** (The second case is not valid if p1 accepts the empty string,
** as then there is no character at all...)
** - when p2 is empty and opt is true; a IPartialCommit can reuse
** the Choice already active in the stack.
]]--
function codechoice(cs, p1, p2, opt, fl)
   local emptyp2 = (p2.tag == TTag.True)
   local e1, cs1 = cs:getfirst(p1, fullset)
   local headfailp1 = cs:headfail(p1)
   local e2, cs2
   if not headfailp1 and e1 == 0 then
      e2, cs2 = cs:getfirst(p2, fl) -- avoid computing unless necessary
   end
   if headfailp1 or (e1 == 0 and cs_disjoint(cs1, cs2)) then
      -- <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2:
      local test = codetestset(cs, cs1, 0)
      local jmp = NOINST
      cs:codegen(p1, false, test, fl)
      if not emptyp2 then
         jmp = cs:addinstruction{Opcode.Jmp, target = NOINST }
      end
      cs:jumptohere(test)
      cs:codegen(p2, opt, NOINST, fl)
      cs:jumptohere(jmp)
   elseif opt and emptyp2 then
      -- p1? == IPartialCommit; p1
      cs:jumptohere(cs:addinstruction{Opcode.PartialCommit, target = NOINST})
      cs:codegen(p1, true, NOINST, fullset)
   else
      -- <p1 / p2> ==
      --  test(first(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2:
      local test = codetestset(cs, cs1, e1)
      local pchoice = cs:addinstruction{Opcode.Choice, target = NOINST}
      cs:codegen(p1, emptyp2, test, fullset)
      local pcommit = cs:addinstruction{Opcode.Commit, target = NOINST}
      cs:jumptohere(pchoice)
      cs:jumptohere(test)
      cs:codegen(p2, opt, NOINST, fl)
      cs:jumptohere(pcommit)
   end
end

--[[
** And predicate
** optimization: fixedlen(p) = n ==> <&p> == <p>; behind n
** (valid only when 'p' has no captures)
]]--
function codeand(cs, tree, tt)
  --[[ labeled failure: optimization disabled because in case of a failure it
     does not report the expected error position (the current subject position
     when begin the matching of <&p>) ]]--
   local pchoice = cs:addinstruction{Opcode.PredChoice, target = NOINST}
   cs:codegen(tree, false, tt, fullset)
   local pcommit = cs:addinstruction{Opcode.BackCommit, target = NOINST}
   cs:jumptohere(pchoice)
   cs:addinstruction{Opcode.Fail}
   cs:jumptohere(pcommit)
end

--[[
** Captures: if pattern has fixed (and not too big) length, and it
** has no nested captures, use a single IFullCapture instruction
** after the match; otherwise, enclose the pattern with OpenCapture -
** CloseCapture.
]]--
function codecapture(cs, tree, tt, fl)
   local len = cs:fixedlen(tree.sib1)
   if len >= 0 and len <= MAXOFF and not cs:hascaptures(tree.sib1) then
      cs:codegen(tree.sib1, false, tt, fl)
      cs:addinstruction{
         Opcode.FullCapture,
         cap = tree.cap,
         key = tree.key, -- capture name
         aux = len,
      }
   else
      assert(tree.cap ~= nil)
      cs:addinstruction({
            Opcode.OpenCapture,
            cap = tree.cap,
            key = tree.key, -- capture name
      })
      cs:codegen(tree.sib1, false, tt, fl)
      cs:addinstruction({
            Opcode.CloseCapture,
            cap = CapKind.close,
      })
   end
end

function coderuntime(cs, tree, tt)
   cs:addinstruction({
         Opcode.OpenCapture,
         cap = CapKind.group,
         key = tree.key, -- capture *function*
   })
   cs:codegen(tree.sib1, false, tt, fullset)
   cs:addinstruction({
         Opcode.CloseRunTime,
         cap = CapKind.close,
   })
end

--[[
** Repetition; optimizations:
** When pattern is a charset, can use special instruction ISpan.
** When pattern is head fail, or if it starts with characters that
** are disjoint from what follows the repetions, a simple test
** is enough (a fail inside the repetition would backtrack to fail
** again in the following pattern, so there is no need for a choice).
** When 'opt' is true, the repetion can reuse the Choice already
** active in the stack.
]]--
function coderep(cs, tree, opt, fl)
   local st = tocharset(tree)
   if st ~= nil then
      return cs:addinstruction{
         Opcode.Span,
         set = st.set,
         rest = st.rest,
      }
   end
   local e1,st = cs:getfirst(tree, fullset)
   if cs:headfail(tree) or (e1 == 0 and cs_disjoint(st, fl)) then
      -- L1: test (fail(p1)) -> L2; <p>; jmp L1; L2:
      local test = codetestset(cs, st, 0)
      cs:codegen(tree, false, test, fullset)
      local jmp = cs:addinstruction{Opcode.Jmp, target = NOINST}
      cs:jumptohere(test)
      cs:jumptothere(jmp, test)
   else
      -- test(fail(p1)) -> L2; choice L2; L1: <p>; partialcommit L1; L2:
      -- or (if 'opt'): partialcommit L1; L1: <p>; partialcommit L1;
      local test = codetestset(cs, st, e1)
      local pchoice = NOINST
      if opt then
         cs:jumptohere(cs:addinstruction{Opcode.PartialCommit, target = NOINST})
      else
         pchoice = cs:addinstruction{Opcode.Choice, target = NOINST}
      end
      local l2 = cs:gethere()
      cs:codegen(tree, false, NOINST, fullset)
      local commit = cs:addinstruction{Opcode.PartialCommit, target = NOINST}
      cs:jumptothere(commit, l2)
      cs:jumptohere(pchoice)
      cs:jumptohere(test)
   end
end

--[[
** Not predicate; optimizations:
** In any case, if first test fails, 'not' succeeds, so it can jump to
** the end. If pattern is headfail, that is all (it cannot fail
** in other parts); this case includes 'not' of simple sets. Otherwise,
** use the default code (a choice plus a failtwice).
]]--
function codenot(cs, tree)
   local e,st = cs:getfirst(tree, fullset)
   local test = codetestset(cs, st, e)
   if cs:headfail(tree) then
      -- test (fail(p1)) -> L1; fail; L1:
      cs:addinstruction{Opcode.Fail}
   else
      -- test(fail(p))-> L1; choice L1; <p>; failtwice; L1:
      local pchoice = cs:addinstruction{Opcode.PredChoice, target = NOINST }
      cs:codegen(tree, false, NOINST, fullset)
      cs:addinstruction{Opcode.FailTwice}
      cs:jumptohere(pchoice)
   end
   cs:jumptohere(test)
end

-- find the final destination of a sequence of jumps
function finaltarget(code, pc)
   while code[pc].code == Opcode.Jmp do
      pc = code[pc].target
   end
   return pc
end

-- final label (after traversing any jumps)
function finallabel(code, pc)
   return finaltarget(code, code[pc].target)
end

--[[
** change open calls to calls, using list 'positions' to find
** correct offsets; also optimize tail calls
]]--
function correctcalls(cs, positions, from, to)
   local code = cs.p.code
   for i=from,(to-1) do
      local op = code[i]
      if op.code == Opcode.OpenCall or op.code == Opcode.ThrowRec then
         local n = op.target -- rule number
         local rule = positions[n] -- rule position
         if rule == from or code[rule - 1].code == Opcode.Ret then
            -- sanity check! ok!
         else
            error("bad rule position")
         end
         if op.code == Opcode.OpenCall then
            if code[finaltarget(code, i+1)].code == Opcode.Ret then
               -- call; ret => tail call
               op:setCode(Opcode.Jmp)
            else
               op:setCode(Opcode.Call) -- open call no more
            end
         end
         op.target = rule
      end
   end
end

--[[
** Code for a grammar:
** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2:
]]--
function codegrammar(cs, tree)
   local firstcall = cs:addinstruction{Opcode.Call, target = NOINST} -- call initial rule
   local jumptoend = cs:addinstruction{Opcode.Jmp, target = NOINST} -- jump to the end
   local start = cs:gethere() -- here starts the initial rule
   cs:jumptohere(firstcall)

   local positions = {}
   local rule = tree.sib1
   for i=1,tree.n do
      local pattern = rule.sib1
      positions[i] = cs:gethere() -- save rule position
      cs:codegen(rule.sib1, false, NOINST, fullset) -- code rule
      cs:addinstruction{Opcode.Ret}
      rule = rule.sib2
   end
   if rule.tag ~= TTag.True then error("impossible") end
   cs:jumptohere(jumptoend)
   correctcalls(cs, positions, start, cs:gethere())
end

function codecall(cs, tree)
   local rule = cs.grammar.ruletab[tree.key]
   assert(rule ~= nil)
   assert(rule.n ~= nil)
   return cs:addinstruction{
      Opcode.OpenCall, -- to be corrected later
      target = rule.n -- rule number
   }
end

--[[
** Code first child of a sequence
** (second child is called in-place to allow tail call)
** Return 'tt' for second child
]]--
function codeseq1(cs, p1, p2, tt, fl)
   assert(fl.tag == TTag.Set)
   if needfollow(p1) then
      local _, fl1 = cs:getfirst(p2, fl) -- p1 follow is p2 first
      cs:codegen(p1, false, tt, fl1)
   else
      -- use fullset as follow
      cs:codegen(p1, false, tt, fullset)
   end
   if cs:fixedlen(p1) ~= 0 then -- can 'p1' consume anything?
      return NOINST -- invalidate test
   else
      return tt -- else 'tt' still protects sib2
   end
end

--[[
** Main code-generation function: dispatch to auxiliar functions
** according to kind of tree. ('needfollow' should return true
** only for consructions that use 'fl'.)
]]--

--[[
** code generation is recursive; 'opt' indicates that the code is being
** generated as the last thing inside an optional pattern (so, if that
** code is optional too, it can reuse the 'IChoice' already in place for
** the outer pattern). 'tt' points to a previous test protecting this
** code (or NOINST). 'fl' is the follow set of the pattern.
]]--
codegen = define_tree_visitor{
   [TTag.Char] = function(tree, cs, opt, tt, fl)
      return codechar(cs, tree.n, tt)
   end,
   [TTag.Any] = function(tree, cs, opt, tt, fl)
      return cs:addinstruction{Opcode.Any}
   end,
   [TTag.Set] = function(tree, cs, opt, tt, fl)
      return codecharset(cs, tree, tt)
   end,
   [TTag.True] = function(tree, cs, opt, tt, fl)
      return -- do nothing
   end,
   [TTag.False] = function(tree, cs, opt, tt, fl)
      return cs:addinstruction{Opcode.Fail}
   end,
   [TTag.UTFR] = function(tree, cs, opt, tt, fl)
      return codeutfr(cs, tree)
   end,
   [TTag.Choice] = function(tree, cs, opt, tt, fl)
      return codechoice(cs, tree.sib1, tree.sib2, opt, fl)
   end,
   [TTag.Rep] = function(tree, cs, opt, tt, fl)
      return coderep(cs, tree.sib1, opt, fl)
   end,
   [TTag.Behind] = function(tree, cs, opt, tt, fl)
      return codebehind(cs, tree)
   end,
   [TTag.Not] = function(tree, cs, opt, tt, fl)
      return codenot(cs, tree.sib1)
   end,
   [TTag.And] = function(tree, cs, opt, tt, fl)
      return codeand(cs, tree.sib1, tt)
   end,
   [TTag.Capture] = function(tree, cs, opt, tt, fl)
      return codecapture(cs, tree, tt, fl)
   end,
   [TTag.RunTime] = function(tree, cs, opt, tt, fl)
      return coderuntime(cs, tree, tt)
   end,
   [TTag.Grammar] = function(tree, cs, opt, tt, fl)
      return cs:withGrammar(tree, codegrammar, cs, tree)
   end,
   [TTag.Call] = function(tree, cs, opt, tt, fl)
      return codecall(cs, tree)
   end,
   [TTag.Seq] = function(tree, cs, opt, tt, fl)
      tt = codeseq1(cs, tree.sib1, tree.sib2, tt, fl) -- code 'p1'
      return cs:codegen(tree.sib2, opt, tt, fl) -- tail call
   end,
   [TTag.Throw] = function(tree, cs, opt, tt, fl)
      return codethrow(cs, tree)
   end,
   ["assert"] = function(tree, cs, opt, tt, fl)
      assert(fl.tag == TTag.Set)
      assert(opt ~= 0)
   end,
}
register_fname("codegen", codegen)

--[[
** Optimize jumps and other jump-like instructions.
** * Update labels of instructions with labels to their final
** destinations (e.g., choice L1; ... L1: jmp L2: becomes
** choice L2)
** * Jumps to other instructions that do jumps become those
** instructions (e.g., jump to return becomes a return; jump
** to commit becomes a commit)
]]--
function peephole(cs)
   local code = cs.p.code
   local jmpswitch
   local switch = define_opcode_visitor{
      -- instructions with labels
      [{Opcode.Choice, Opcode.Call, Opcode.Commit, Opcode.PartialCommit,
        Opcode.BackCommit, Opcode.TestChar, Opcode.TestSet,
        Opcode.TestAny}] = function(op, i)
         cs:jumptothere(i, finallabel(code, i))
        end,
      [Opcode.Jmp] = function(op, i)
         local ft = finaltarget(code, i)
         jmpswitch(code[ft], i, ft) -- jumping to what?
      end,
      default = function() end,
   }
   jmpswitch = define_opcode_visitor{
      -- instructions with unconditional implicit jumps
      [{Opcode.Ret,Opcode.Fail,Opcode.FailTwice,Opcode.End}] = function(op, i, ft)
         code[i]:setCode(code[ft].code) -- jump becomes that instruction
      end,
      -- instructions with unconditional explicit jumps
      [{Opcode.Commit, Opcode.PartialCommit, Opcode.BackCommit}] = function(op, i, ft)
         local fft = finallabel(code, ft)
         code[i]:setCode(code[ft].code) -- jump becomes that instruction
         cs:jumptothere(i, fft) -- with an optimized target
         switch(code[i], i) -- reoptimize the label
      end,
      default = function(op, i, ft)
         cs:jumptothere(i, ft) -- optimize label
      end,
   }
   for i=1,#code do
      switch(code[i], i)
   end
end

-- thread the instructions to speed up dispatch during execution
function thread(cs)
   local code = cs.p.code
   for i=1,#code-1 do
      code[i].next = code[i+1]
      if code[i].target ~= nil then
         code[i].branch = code[code[i].target]
      end
   end
end

function compile(p)
   local compst = CompileState:new(p)
   p.code = {}
   assert(fullset.tag == TTag.Set)
   compst:codegen(p, false, NOINST, fullset)
   compst:addinstruction{Opcode.End}
   peephole(compst)
   thread(compst)
   return p.code
end

return {
   Instruction = Instruction,
   compile = compile,
   cs_clone = cs_clone,
   cs_complement = cs_complement,
   cs_diff = cs_diff,
   cs_intersection = cs_intersection,
   cs_union = cs_union,
   fixedlen = fixedlen,
   hascaptures = hascaptures,
   nofail = nofail,
   nullable = nullable,
   nullable_with_grammar = nullable_with_grammar,
   tocharset = tocharset,
}

end)

register('llpeg.utf8util', function(myrequire)
myrequire('strict')

local utf8util = {}

function utf8util.codepointAt(s, pos)
   local c1 = string.byte(s, pos)
   if c1 <= 0x7F then
      return c1, 1
   end
   local c2 = string.byte(s, pos + 1)
   if c1 <= 0xDF then
      return ((c1 % 0x20 ) * 0x40) + (c2 % 0x40), 2
   end
   local c3 = string.byte(s, pos + 2)
   if c1 <= 0xEF then
      return (((c1 % 0x10) * 0x40) + (c2 % 0x40)) * 0x40 + (c3 % 0x40), 3
   end
   local c4 = string.byte(s, pos + 3)
   if c1 <= 0xF7 then
      return ((((c1 % 0x08) * 0x40) + (c2 % 0x40)) * 0x40 + (c3 % 0x40)) * 0x40 + (c4 % 0x40), 4
   end
   error( "bad utf8" )
end

-- same as utf8.offset in Lua 5.3 standard library
function utf8util.offset(s, n, i)
   if n > 0 then error("unimplemented") end
   while n < 0 do
      i = i - 1
      if i < 1 then return nil end
      local c = string.byte(s, i)
      if c < 0x80 or c > 0xBF then
         n = n + 1
      end
   end
   return i
end

return utf8util

end)

register('llpeg.list', function(myrequire)
local List = {}
List.__index = List

function List:new()
   return setmetatable({ n = 0 }, self)
end

function List:__len()
   return self.n
end

function List:push(val)
   local n = self.n + 1
   self[n] = val
   self.n = n
end

function List:pop()
   local n = self.n
   assert(n > 0)
   local old = self[n]
   self[n] = nil
   self.n = n - 1
   return old
end

function List:insert(pos, val)
   for i=self.n,pos,-1 do
      self[i+1] = self[i]
   end
   self[pos] = val
   self.n = self.n + 1
end

return List

end)

register('llpeg.cap', function(myrequire)
myrequire('strict')
local compat = myrequire('advent.compat')
local from = myrequire('llpeg.types').from
local
   CapKind,
   _ = from(myrequire('llpeg.types'), {
               'CapKind',
   })
local
   printcaplist,
   _ = from(myrequire('llpeg.print'), {
               'printcaplist',
   })
local List = myrequire('llpeg.list')

local MAXRECLEVEL = 200

local Capture = {}
Capture.__index = Capture

-- kind is CapKind of the capture
-- bytePos is the subject position (in bytes)
-- byteLen is the length of the capture (in bytes)
-- extra is extra info (group name, arg index, etc)
function Capture:new(kind, bytePos, byteLen, extra)
   assert(getmetatable(kind) == CapKind)
   return setmetatable({
         kind = kind, bytePos = bytePos, byteLen = byteLen, extra = extra,
   }, self)
end

function Capture:__tostring()
   return string.format("Capture{kind=%s, pos=%d, len=%s, extra=%s}",
                        self.kind, self.bytePos, self.byteLen, self.extra)
end

function Capture:isopencap()
   return self.byteLen == nil
end

-- true if c2 is (any number of levels) inside self
function Capture:inside(c2)
   if self:isopencap() then
      return not c2:isclosecap()
   else
      return c2.bytePos < (self.bytePos + self.byteLen)
   end
end

function Capture:isclosecap()
   return self.kind == CapKind.close
end

--[[
** Return the size of capture 'cap'. If it is an open capture, 'close'
** must be its corresponding close.
]]--
function Capture:size(close)
   if self:isopencap() then
      assert(close:isclosecap())
      return close.bytePos - self.bytePos
   else
      return self.byteLen
   end
end

function CapKind:newCapture(bytePos, byteLen, extra)
   return Capture:new(self, bytePos, byteLen, extra)
end

local CapState = {}
CapState.__index = CapState

-- Capture cap: current capture
-- Capture ocap: (original) capture list
-- int ptop: index of last argument to 'match'
-- string s: original string
-- int valuecached: value stored in cache slot
-- int reclevel: recursion level
function CapState:new(captures, source, extraArgs)
   return setmetatable({
         captures = captures,
         index = 1,
         source = source,
         valuecached = {},
         reclevel = 0,
         extraArgs = extraArgs,
    }, self)
end

function CapState:cap() -- helper
   return self.captures[self.index]
end

function CapState:advance() -- helper
   local i = self.index
   local cap = self.captures[i]
   self.index = i + 1
   return cap, i
end

function CapState:substr(start, len) -- helper
   return string.sub(self.source, start, start + len - 1)
end

function CapState:skipclose(head)
   if head:isopencap() then
      assert(self.captures[self.index]:isclosecap())
      self.index = self.index + 1
   end
end

function CapState:closesize(head)
   return head:size(self:cap())
end

--[[
** Go to the next capture at the same level
]]--
function CapState:nextcap()
   local cap = self:cap()
   if cap:isopencap() then -- must look for a close
      local n = 0 -- number of opens waiting a close
      while true do -- look for corresponding close
         self.index = self.index + 1
         cap = self:cap()
         if cap:isopencap() then
            n = n + 1
         elseif cap:isclosecap() then
            if n == 0 then break end
            n = n - 1
         end
      end
      self.index = self.index + 1 -- skip last close (or entire single capture)
   else
      self.index = self.index + 1
      while cap:inside(self:cap()) do
         self.index = self.index + 1 -- skip captures inside the current one
      end
   end
end

--[[
** Goes back in a list of captures looking for an open capture
** corresponding to a close
]]--
function CapState:findopen(i) -- captures[i] is the close that we want to match
   assert(self.captures[i]:isclosecap())
   local n = 0 -- number of closes waiting an open
   while i > 1 do
      i = i - 1
      local cap = self.captures[i]
      if cap:isclosecap() then
         n = n + 1 -- one more open to skip
      elseif cap:isopencap() then
         if n == 0 then return cap,i end
         n = n - 1
      end
   end
   error("couldn't find open")
end

--[[
** Checks whether group 'grp' is visible to 'ref', that is, 'grp' is
** not nested inside a full capture that does not contain 'ref'.  (We
** only need to care for full captures because the search at 'findback'
** skips open-end blocks; so, if 'grp' is nested in a non-full capture,
** 'ref' is also inside it.)  To check this, we search backward for the
** inner full capture enclosing 'grp'.  A full capture cannot contain
** non-full captures, so a close capture means we cannot be inside a
** full capture anymore.
]]--
function CapState:capvisible(igrp, ref)
   local i = igrp
   local grp = self.captures[igrp]
   while i > 1 do
      i = i - 1
      local cap = self.captures[i]
      if cap:isclosecap() then
         return true -- can stop the search
      elseif cap:inside(grp) then -- is 'grp' inside cap?
         return cap:inside(ref) -- ok iff cap also contains ref
      end
   end
   return true -- 'grp' is not inside any capture
end

--[[
** Try to find a named group capture with the name given;
** goes backward from 'i'.
]]--
function CapState:findback(name, i)
   if i == nil then i = self.index end
   local ref = self.captures[i]
   while i > 1 do
      i = i - 1
      local cap = self.captures[i]
      if cap:isclosecap() or not cap:inside(ref) then
         if cap:isclosecap() then
            cap,i = self:findopen(i)
         end
         if cap.kind == CapKind.group and self:capvisible(i, ref) then
            if cap.extra == name then
               return cap,i
            end
         end
      end
   end
   error("back reference '"..name.."' not found")
end

function CapState:getcaptures()
   local result = List:new()
   while not self:cap():isclosecap() do
      self:pushcapture(result)
   end
   return result
end

function CapState:pushcapture(result)
   self.reclevel = self.reclevel + 1
   if self.reclevel > MAXRECLEVEL then
      error("subcapture nesting too deep")
   end
   local cap = self.captures[self.index]
   assert(cap.kind.push ~= nil)
   local res = cap.kind.push(self, cap, result)
   self.reclevel = self.reclevel - 1
   return res
end

-- helper functions for pushcapture

--[[
** Push on the Lua stack all values generated by nested captures inside
** the current capture. Returns number of values pushed. 'addextra'
** makes it push the entire match after all captured values. The
** entire match is pushed also if there are no other nested values,
** so the function never returns zero.
]]--
function CapState:pushnestedvalues(result, addextra)
   local head = self:advance()
   local n = 0 -- number of pushed subvalues
   -- repeat for all nested patterns
   while head:inside(self:cap()) do
      n = n + self:pushcapture(result)
   end
   if addextra or n == 0 then -- need extra?
      result:push(self:substr(head.bytePos, self:closesize(head)))
      n = n + 1
   end
   self:skipclose(head)
   return n
end

--[[
** Push only the first value generated by nested captures
]]--
function CapState:pushonenestedvalue(result)
   local n = self:pushnestedvalues(result, false)
   if n == 0 then
      result:push(nil) -- ensure there's exactly one value
      return 1
   end
   while n > 1 do
      result:pop() -- pop extra values
      n = n - 1
   end
   return n
end


-- visitor patterns for pushcapture
function CapKind.position.push(capstate, cap, result)
   result:push(cap.bytePos)
   capstate.index = capstate.index + 1
   return 1
end

function CapKind.const.push(capstate, cap, result)
   result:push(cap.extra)
   capstate.index = capstate.index + 1
   return 1
end

function CapKind.arg.push(capstate, cap, result)
   local n = cap.extra
   if n > capstate.extraArgs.n then
      error(string.format("reference to absent extra argument #%d", n))
   end
   result:push(capstate.extraArgs[n])
   capstate.index = capstate.index + 1
   return 1
end

function CapKind.simple.push(capstate, cap, result)
   local k = capstate:pushnestedvalues(result, true)
   -- reorder so that the whole match is the first result, not the last
   local last = result:pop()
   result:insert(2 + #result - k, last)
   return k
end

-- missing a bunch

--[[
** Table capture: creates a new table and populates it with nested
** captures.
]]--
function CapKind.table.push(capstate, cap, result) -- aka tablecap
   local t = {}
   result:push(t)
   local head = capstate:advance()

   local n = 0
   while head:inside(capstate:cap()) do
      cap = capstate:cap()
      if cap.kind == CapKind.group and cap.extra ~= nil then -- named group?
         capstate:pushonenestedvalue(result)
         t[cap.extra] = result:pop() -- move it into table
      else -- not a named group
         local k = capstate:pushcapture(result)
         for i=k,1,-1 do
            t[n + i] = result:pop() -- move it into table (indexed)
         end
         n = n + k
      end
   end
   capstate:skipclose(head)
   return 1 -- number of values pushed (only the table)
end

--[[
** Table-query capture
]]--
function CapKind.query.push(capstate, cap, result) -- aka querycap
   capstate:pushonenestedvalue(result)
   local key = result:pop()
   local tbl = cap.extra
   assert(type(tbl) == "table")
   local val = tbl[key]
   if val ~= nil then
      result:push(val)
      return 1
   else
      return 0
   end
end

--[[
** Fold capture
]]--
function CapKind.fold.push(capstate, cap, result) -- aka foldcap
   local f = cap.extra
   assert(type(f) == "function")
   local head = capstate:advance()
   if capstate:cap():isclosecap() then
      -- no nested captures? (large subject)
      error("no initial value for fold capture")
   end
   local args = List:new()
   local n = capstate:pushcapture(args)
   if n == 0 then
      -- nested captures with no values?
      error("no initial value for fold capture")
   end
   local accum = args[1] -- leave only one result for accumulator
   while head:inside(capstate:cap()) do
      args = List:new()
      args:push( accum ) -- put accumulator first
      n = capstate:pushcapture(args) -- get next capture's values
      accum = f(compat.unpack(args))
   end
   capstate:skipclose(head)
   -- only accumulator left in result
   result:push(accum)
   return 1
end

--[[
** Function capture
]]--
CapKind["function"].push = function(capstate, cap, result)
   local f = cap.extra
   assert(type(f) == "function")
   local args = List:new()
   local n = capstate:pushnestedvalues(args, false)
   local r = compat.pack(f(compat.unpack(args)))
   for i=1,r.n do
      result:push(r[i])
   end
   return r.n
end

--[[
** Accumulator capture
]]--
function CapKind.acc.push(capstate, cap, result) -- aka accumulatorcap
   if #result == 0 then
      error("no previous value for accumulator capture")
   end
   local f = cap.extra
   assert(type(f) == "function")
   local prev = #result
   local args = List:new()
   args:push(result[prev])
   local n = capstate:pushnestedvalues(args, false)
   result[prev] = f(compat.unpack(args))
   return 0 -- did not add any extra value
end

--[[
** Select capture
]]--
function CapKind.num.push(capstate, cap, result) -- aka numcap
   local idx = cap.extra -- value to select
   if idx == 0 then -- no values?
      capstate:nextcap() -- skip entire capture
      return 0 -- no value produced
   else
      local vals = List:new()
      local n = capstate:pushnestedvalues(vals, false)
      if n < idx then -- invalid index?
         error("no capture '"..idx.."'")
      else
         result:push(vals[idx])
         return 1
      end
   end
end

function CapState:runtimecap(closePos)
   local close = self.captures[closePos]
   local open,openPos = self:findopen(closePos) -- get open group capture
   assert(open.kind == CapKind.group)
   self.index = openPos -- set state to the open capture
   local args = List:new()
   args:push( self.source) -- original subject
   args:push( close.bytePos ) -- current position
   local n = self:pushnestedvalues(args, false) -- push nested captures
   local func = open.extra
   local funcRet = compat.pack(func(compat.unpack(args)))
   local res = closePos - openPos -- number of captures to be removed
   return res, funcRet
end

function CapKind.runtime.push(capstate, cap, result) -- aka runtimecap
   result:push(cap.extra)
   capstate.index = capstate.index + 1
   return 1
end

local MAXSTRCAPS = 10

--[[
** Collect values from current capture into array 'cps'. Current
** capture must be Cstring (first call) or Csimple (recursive calls).
** (In first call, fills %0 with whole match for Cstring.)
** Returns number of elements in the array that were filled.
]]--
function CapState:getstrcaps(cps, n)
   local k = n
   n = n + 1
   cps[k] = {
      isstring = true, -- get string value
      bytePos = self:cap().bytePos, -- starts here
   }
   local head = self:advance()
   while head:inside(self:cap()) do
      if n > MAXSTRCAPS then -- too many captures?
         self:nextcap() -- skip extra captures (will not need them)
      elseif self:cap().kind == CapKind.Simple then -- string?
         n = self:getstrcaps(cps, n) -- put info into array
      else
         cps[n] = {
            isstring = false, -- not a string
            cap = self.index, -- keep original capture
         }
         self:nextcap()
         n = n + 1
      end
   end

   cps[k].endPos = head.bytePos + self:closesize(head)
   self:skipclose(head)

   return n
end

function CapState:addonestring(buffer, what)
   local cap = self:cap()
   if cap.kind == CapKind.string then
      -- add capture directly to buffer
      return stringcap(self, buffer)
   elseif cap.kind == CapKind.subst then
      -- add capture directly to buffer
      return substcap(self, buffer)
   elseif cap.kind == CapKind.acc then
      error("invalid context for an accumulator capture")
   end
   local result = List:new()
   local n = self:pushcapture(result)
   if n == 0 then return 0 end -- no values to add
   local val = result[1] -- take only one result (the first)
   if type(val) == "number" then
      val = tostring(val)
   elseif type(val) ~= "string" then
      error("invalid "..what.." value (a "..type(val)..")")
   end
   table.insert(buffer, val)
   return 1
end

--[[
** String capture: add result to buffer 'b' (instead of pushing
** it into the stack)
]]--
function stringcap(capstate, buffer)
   local fmt = capstate:cap().extra
   local cps = {}
   local n = capstate:getstrcaps(cps, 1) - 1 -- collect nested captures
   local sawEscape = false
   for _,c in compat.utf8codes(fmt) do
      if sawEscape then
         sawEscape = false
         if c < 48 or c > 57 then -- not followed by a digit
            table.insert(buffer, compat.utf8char(c))
         else
            local l = 1 + c - 48 -- capture index (1-based)
            if l > n then
               error("invalid capture index ("..(l-1)..")")
            elseif cps[l].isstring then
               table.insert(buffer, capstate:substr(cps[l].bytePos, cps[l].endPos - cps[l].bytePos))
            else
               -- go back to evaluate that nested capture
               local curr = capstate.index
               capstate.index = cps[l].cap
               if capstate:addonestring(buffer, "capture") == 0 then
                  error("no values in capture index "..l)
               end
               capstate.index = curr
            end
         end
      elseif c ~= 37 then -- not a % escape?
         table.insert(buffer, compat.utf8char(c))
      else
         sawEscape = true
      end
   end
   return 1
end

--[[
** Substitution capture: add result to buffer 'b'
]]--
function substcap(capstate, buffer)
   local head = capstate:advance()

   local curr = head.bytePos
   while head:inside(capstate:cap()) do
      local cap = capstate:cap()
      local nextPos = cap.bytePos
      local s = capstate:substr(curr, nextPos - curr)
      table.insert(buffer, s) -- add text up to capture
      if capstate:addonestring(buffer, "replacement") == 0 then
         -- no capture value, keep original text in final result
         curr = nextPos
      else
         -- continue after match
         local lastCap = capstate.captures[capstate.index - 1]
         curr = nextPos + cap:size(lastCap)
      end
   end
   -- add last piece of text
   local s = capstate:substr(curr, head.bytePos + capstate:closesize(head) - curr)
   table.insert(buffer, s)
   capstate:skipclose(head)
end

function CapKind.subst.push(capstate, cap, result) -- aka substcap
   local buffer = {}
   substcap(capstate, buffer)
   result:push(table.concat(buffer))
   return 1
end


function CapKind.string.push(capstate, cap, result) -- aka stringcap
   local buffer = {}
   stringcap(capstate, buffer)
   result:push(table.concat(buffer))
   return 1
end

function CapKind.group.push(capstate, cap, result)
   if cap.extra == nil then -- anonymous group?
      return capstate:pushnestedvalues(result, false) -- add all nested values
   else -- named group: add no values
      capstate:nextcap()
      return 0
   end
end

--[[
** Back-reference capture. Return number of values pushed.
]]--
function CapKind.backref.push(capstate, cap, result)
   local curr = capstate.index
   local _,i = capstate:findback(cap.extra)
   capstate.index = i
   local n = capstate:pushnestedvalues(result, false)
   capstate.index = curr + 1
   return n
end

return {
   CapState = CapState,
   Capture = Capture,
}

end)

register('llpeg.vm', function(myrequire)
myrequire('strict')
local compat = myrequire('advent.compat')
local utf8util = myrequire('llpeg.utf8util')
local from = myrequire('llpeg.types').from
local
   CHARMAX,
   CapKind,
   Opcode,
   enum,
   _ = from(myrequire('llpeg.types'), {
               'CHARMAX',
               'CapKind',
               'Opcode',
               'enum',
   })
local
   CapState,
   Capture,
   __ = from(myrequire('llpeg.cap'), {
                'CapState',
                'Capture',
   })
local
   Instruction,
   ___ = from(myrequire('llpeg.code'), {
                'Instruction',
   })
local
   printcaplist,
   ___ = from(myrequire('llpeg.print'), {
                'printcaplist',
   })

local LFAIL = {}
local InsidePred = enum{
   OUTPRED = 0,
   INPRED = 1,
}

local Stack = {}
Stack.__index = Stack
-- Stack prev: previous entry in the stack
-- int bytePos: saved position, or NULL for calls
-- Instruction pc: saved instruction
-- int caplevel
-- int labenv -- for labeled failure
-- bool predchoice -- for labeled failure
function Stack:new(prev, bytePos, pc, caplevel, labenv, predchoice)
   return setmetatable({
         prev = prev,
         bytePos = bytePos, pc = pc, caplevel = caplevel,
         labenv = labenv, predchoice = predchoice,
   }, self)
end

function Stack:__tostring()
   return string.format(
      "Stack{ bytePos=%d, caplevel=%d, labenv=%s, predchoice=%s }",
      self.bytePos, self.caplevel, self.labenv, self.predchoice
   )
end

function Stack:print()
   local s = self
      while s ~= nil do
         print("Stack", s)
         s = s.prev
      end
end

local MatchResult = {}
MatchResult.__index = MatchResult

function MatchResult:new()
   return setmetatable({
         labelf = nil, -- failure label
         sfail = -1, -- farthest failure
   }, self)
end

local State = {}
State.__index = State

function State:new(source, bytePos, ...)
   local giveup = Instruction:new{Opcode.Giveup}
   local insidepred = InsidePred.OUTPRED -- label environment is off inside predicates
   local stack = Stack:new(nil, bytePos, giveup, 0, insidepred, nil)
   local cp,cpLen
   if bytePos <= #source then
      cp, cpLen = utf8util.codepointAt(source, bytePos)
   else
      cp, cpLen = nil, nil
   end
   assert(bytePos ~= nil)
   return setmetatable({
         source = source, -- the source string
         bytePos = bytePos, -- current position in the string, in bytes
         codepoint = cp, -- the codepoint at 'bytePos' in 'source'
         codepointLen = cpLen, -- the length of the codepoint at 'bytePos'
         stack = stack, -- top of stack
         captures = {}, -- list of captures
         captop = 1, -- point to first empty slot in captures (1-based)
         extraArgs = compat.pack(...),
         -- labeled failures:
         insidepred = insidepred,
         labelf = nil, -- failure label
         sfail = -1, -- farthest failure
   }, self)
end

function State:advance()
   return self:resetPosTo(self.bytePos + self.codepointLen)
end

function State:resetPosTo(newPos)
   assert(newPos ~= nil)
   self.bytePos = newPos
   local source = self.source
   if newPos <= #source then
      local cp, cpLen = utf8util.codepointAt(source, newPos)
      self.codepoint = cp
      self.codepointLen = cpLen
      return cp
   else
      self.codepoint = nil
      self.codepointLen = nil
      return nil
   end
end

function State:backtrack(n)
   local off = utf8util.offset(self.source, -n, self.bytePos)
   if off == nil then return false end -- can't backtrack that far!
   self:resetPosTo(off)
   return true
end

function State:updatefarthest(label)
   self.labelf = label
   if self.bytePos > self.sfail then
      self.sfail = self.bytePos
   end
end

function State:pushcapture(cap)
   self.captures[self.captop] = cap
   self.captop = self.captop + 1
end

function State:fail()
   -- pattern failed, try to backtrack
   local lastStack
   repeat
      lastStack = self.stack
      self.stack = lastStack.prev
   until lastStack.bytePos ~= nil
   self:resetPosTo(lastStack.bytePos)
   self.captop = lastStack.caplevel
   self.insidepred = lastStack.labenv -- labeled failure
   return lastStack.pc:exec(self)
end

function State:giveup()
   local r = nil
   local lab = "fail"
   local errpos = self.sfail
   if self.labelf ~= nil and self.labelf ~= LFAIL then
      lab = self.labelf
   end
   return r, lab, errpos
end

function State:getcaptures()
   local results = {}
   if self.captures[1].kind == CapKind.close then -- are there any captures?
      return results -- no captures
   end
   return CapState:new(self.captures, self.source, self.extraArgs):getcaptures()
end

function Opcode.End:exec(state)
   state:pushcapture(CapKind.close:newCapture(state.bytePos, 0))
   -- trim table to proper length
   while #state.captures > state.captop - 1 do
      table.remove(state.captures)
   end
   -- printcaplist(state.captures, #state.captures) -- for debugging
   local results = state:getcaptures()
   if #results == 0 then -- no captured values?
      return state.bytePos -- return only end position
   else
      return compat.unpack(results)
   end
end

function Opcode.Giveup:exec(state)
   return state:giveup()
end

function Opcode.Ret:exec(state)
   local lastStack = state.stack
   state.stack = lastStack.prev
   return lastStack.pc:exec(state)
end

function Opcode.Any:exec(state)
   if state.codepoint ~= nil then
      state:advance()
      return self.next:exec(state)
   end
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.TestAny:exec(state)
   if state.codepoint ~= nil then
      return self.next:exec(state)
   else
      return self.branch:exec(state)
   end
end

function Opcode.UTFR:exec(state)
   local cp = state.codepoint
   if cp ~= nil and self.from <= cp and cp <= self.to then
      state:advance()
      return self.next:exec(state)
   end
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.Char:exec(state)
   if state.codepoint == self.aux then
      state:advance()
      return self.next:exec(state)
   end
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.TestChar:exec(state)
   if state.codepoint == self.aux then
      return self.next:exec(state)
   else
      return self.branch:exec(state)
   end
end

function Opcode.Set:exec(state)
   local cp = state.codepoint
   if cp ~= nil then
      if cp <= CHARMAX then
         if self.set[cp] then
            state:advance()
            return self.next:exec(state)
         end
      else
         if self.rest then
            state:advance()
            return self.next:exec(state)
         end
      end
   end
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.TestSet:exec(state)
   local cp = state.codepoint
   if cp ~= nil then
      if cp <= CHARMAX then
         if self.set[cp] then
            return self.next:exec(state)
         end
      elseif self.rest then
         return self.next:exec(state)
      end
   end
   return self.branch:exec(state)
end

function Opcode.Behind:exec(state)
   local n = self.aux
   -- XXX SLOW self.aux is in *characters* not *bytes*
   if state:backtrack(n) then
      return self.next:exec(state)
   end
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.Span:exec(state)
   local cp = state.codepoint
   while true do
      local match = false
      if cp ~= nil then
         if cp <= CHARMAX then
            if self.set[cp] then match = true end
         else
            if self.rest then match = true end
         end
      end
      if not match then break end
      cp = state:advance()
   end
   return self.next:exec(state)
end

function Opcode.Jmp:exec(state)
   return self.branch:exec(state)
end

function Opcode.Choice:exec(state)
   state.stack = Stack:new(
      state.stack, state.bytePos, self.branch, state.captop, state.insidepred
   )
   return self.next:exec(state)
end

function Opcode.PredChoice:exec(state)
   state.stack = Stack:new(
      state.stack, state.bytePos, self.branch, state.captop, state.insidepred,
      true -- predchoice
   )
   state.insidepred = InsidePred.INPRED
   return self.next:exec(state)
end

function Opcode.Call:exec(state)
   state.stack = Stack:new(
      state.stack, nil, self.next
   )
   return self.branch:exec(state)
end

function Opcode.Commit:exec(state)
   state.stack = state.stack.prev
   return self.branch:exec(state)
end

function Opcode.PartialCommit:exec(state)
   local stack = state.stack
   stack.bytePos = state.bytePos
   stack.caplevel = state.captop
   return self.branch:exec(state)
end

function Opcode.BackCommit:exec(state)
   local stack = state.stack
   state.stack = stack.prev -- pop the stack
   state:resetPosTo(stack.bytePos) -- but reset the position to that stored
   state.insidepred = stack.labenv -- labeled failure
   state.captop = stack.caplevel
   return self.branch:exec(state)
end

function Opcode.Throw:exec(state)
   if state.insidepred == InsidePred.OUTPRED then
      state.labelf = self.key
      -- pop entire stack
      while state.stack.prev ~= nil do
         state.stack = state.stack.prev
      end
   else
      state.labelf = LFAIL
      -- pop until you read a 'predchoice' state
      while not state.stack.predchoice do
         state.stack = state.stack.prev
      end
   end
   state.sfail = state.bytePos
   return state:fail()
end

function Opcode.ThrowRec:exec(state) -- with recovery
   state.sfail = state.bytePos
   if state.insidepred == InsidePred.OUTPRED then
      state.labelf = self.key
      state.stack = Stack:new(
         state.stack, nil, self.next, state.captop
      )
      return self.branch:exec(state)
   else
      state.labelf = LFAIL
      -- pop until you read a 'predchoice' state
      while not state.stack.predchoice do
         state.stack = state.stack.prev
      end
      return state:fail()
   end
end

function Opcode.FailTwice:exec(state)
   state.stack = state.stack.prev
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.Fail:exec(state)
   state:updatefarthest(LFAIL)
   return state:fail()
end

function Opcode.CloseRunTime:exec(state)
   -- close the group
   state:pushcapture(self.cap:newCapture(state.bytePos, 0))
   -- trim table to proper length
   while #state.captures > state.captop - 1 do
      table.remove(state.captures)
   end
   local cs = CapState:new(state.captures, state.source, state.extraArgs)
   local n, funcRet = cs:runtimecap(state.captop - 1)
   state.captop = state.captop - n -- remove nested captures
   -- resdyncaptures: resolve returned values in `funcRet`
   -- first argument false=fail, true=keep current pos, number=next position
   local firstArg = funcRet[1]
   if funcRet.n == 0 then
      firstArg = false -- returning void means we'll fail
   end
   if not firstArg then -- if it is falsey, discard rest of returned vals & fail
      state:updatefarthest(LFAIL)
      return state:fail() -- tail call
   elseif type(firstArg) == "boolean" then
      -- keep current position, nothing needs to be done
   else
      local npos = tonumber(firstArg)
      if npos < state.bytePos or npos > (1 + #(state.source)) then
         error("invalid position returned by match-time capture")
      end
      state:resetPosTo(npos)
   end
   -- push the rest of the funcRet values as new captures
   local n = funcRet.n - 1 -- number of new captures
   if n == 0 then -- no new captures?
      state.captop = state.captop - 1 -- remove open group
   else
      -- new captures, keep original open group
      -- add new captures + close group to 'capture' list
      -- adddyncaptures:
      assert(state.captures[state.captop - 1].kind == CapKind.group)
      assert(state.captures[state.captop - 1]:isopencap())
      -- make group capture an anonymous group (this used to hold match-time f)
      state.captures[state.captop - 1].extra = nil
      for i=2,funcRet.n do -- add runtime captures
         state:pushcapture(CapKind.runtime:newCapture(state.bytePos, 0, funcRet[i]))
      end
      -- close group
      state:pushcapture(CapKind.close:newCapture(state.bytePos, 0))
   end
   return self.next:exec(state)
end

local MAXLOP = 20
function findopen(captures, i, currPos)
   i = i - 1 -- check last captop
   local cap = captures[i]
   if (not cap:isopencap()) and cap.bytePos == currPos then
      return nil -- current one cannot be a full capture
   end
   -- else, look for an 'open' capture
   for j=1,MAXLOP do
      if cap:isopencap() then -- open capture?
         return cap,i -- that's the one to be closed
      elseif cap.kind == CapKind.close then
         return nil -- a full capture should not nest a non-full one
      end
      i = i - 1
      if i<1 then break end
      cap = captures[i]
   end
   return nil -- not found within allowed search limit
end

function Opcode.CloseCapture:exec(state)
   -- if possible, turn capture into a full capture
   assert(state.captop > 1)
   local open,_ = findopen(state.captures, state.captop, state.bytePos)
   if open ~= nil then -- if possible, turn capture into a full capture
      open.byteLen = state.bytePos - open.bytePos
   else
      -- non-nil length to mark entry as closed
      state:pushcapture(self.cap:newCapture(state.bytePos, 0))
   end
   return self.next:exec(state)
end

function Opcode.OpenCapture:exec(state)
   state:pushcapture(self.cap:newCapture(
      -- byteLen = nil marks entry as open
      state.bytePos, nil, self.key
   ))
   return self.next:exec(state)
end

function Opcode.FullCapture:exec(state)
   -- XXX SLOW: self.aux is in *characters* not *bytes*
   local nPos = utf8util.offset(state.source, -self.aux, state.bytePos)
   state:pushcapture(self.cap:newCapture(
      nPos, state.bytePos - nPos, self.key
   ))
   return self.next:exec(state)
end

function match(s, init, code, ...)
   local state = State:new(s, init, ...)
   return code[1]:exec(state)
end

return {
   match = match,
}

end)

register('llpeg', function(myrequire)
local VERSION = '0.0.1'
local MAXSTACK = 400 -- maximum backtracking
local MAXBEHIND = 255 -- maximum look-behind
local MAXRULES = 1000 -- maximum number of rules
local LPEG_COMPAT = true

myrequire('strict')
local compat = myrequire('advent.compat')

local from = myrequire('llpeg.types').from
local
   CHARMAX,
   CapKind,
   TTag,
   define,
   define_tree_visitor,
   metareg,
   newcharset,
   numsiblings,
   _ = from(myrequire('llpeg.types'), {
               'CHARMAX',
               'CapKind',
               'TTag',
               'define',
               'define_tree_visitor',
               'metareg',
               'newcharset',
               'numsiblings',
   })
local
   compile,
   cs_diff,
   cs_union,
   fixedlen,
   hascaptures,
   nofail,
   nullable,
   nullable_with_grammar,
   tocharset,
   __ = from(myrequire('llpeg.code'), {
                'compile',
                'cs_diff',
                'cs_union',
                'fixedlen',
                'hascaptures',
                'nofail',
                'nullable',
                'nullable_with_grammar',
                'tocharset',
   })
local
   match,
   ___ = from(myrequire('llpeg.vm'), {
                'match',
   })
local
   printpatt,
   printrepl,
   printtree,
   ____ = from(myrequire('llpeg.print'), {
                 'printpatt',
                 'printrepl',
                 'printtree',
   })

function checkint(v)
   if type(v) == 'string' then
      v = tonumber(v)
   end
   if type(v) ~= "number" then
      error("not a number")
   end
   return math.floor(v)
end

local is_pattern = define_type_visitor{
   pattern = function() return true end,
   default = function() return false end,
}

local ptype = define_type_visitor{
   pattern = function() return "pattern" end,
   default = function(v) return type(v) end,
}

function val2str(v)
   if type(v) == 'number' then return tostring(v) end
   if type(v) == 'string' then return v end
   return string.format("(a %s)", ptype(v))
end

--[[ lpltree.c ]]--

function newtree(tag)
   local t = {
      tag = tag,
      code = nil,
   }
   setmetatable(t, metareg)
   return t
end

function newleaf(tag, n)
   return setmetatable({
         tag = tag,
         code = nil,
         n = n,
   }, metareg)
end

function newroot1sib(tag, sib1)
   return setmetatable({
         tag = tag,
         code = nil,
         sib1 = sib1,
   }, metareg)
end

function newroot2sib(tag, sib1, sib2)
   return setmetatable({
         tag = tag,
         code = nil,
         sib1 = sib1,
         sib2 = sib2,
   }, metareg)
end

--[[ Build a sequence of #s nodes from the array 's' with the tag 'tag' ]]--
function fillseq(tag, s)
   if type(s) == 'number' then
      local len = checkint(s)
      s = setmetatable({}, {__len = function() return len end})
   end
   if #s == 0 then
      return newleaf(tag, 0)
   end
   local i = #s
   local result = newleaf(tag, s[i])
   while i > 1 do
      i = i - 1
      result = newroot2sib(
         TTag.Seq,
         newleaf(tag, s[i]),
         result
      )
   end
   return result
end

--[[ Numbers as patterns:
 0 == true (always match); n == TAny repeated 'n' times;
 -n == not (TAny repeated 'n' times)
]]--
function numtree(n)
   n = checkint(n)
   if n == 0 then
      return newleaf(TTag.True)
   elseif n > 0 then
      return fillseq(TTag.Any, n) -- sequence of 'n' anys
   else
      return newroot1sib(TTag.Not, fillseq(TTag.Any, -n))
   end
end

-- Convert value v to a pattern
local getpatt = define_type_visitor{
   ["string"] = function(s)
      if #s == 0 then
         return newleaf(TTag.True) -- always match if string is empty
      end
      local cp = {}
      for _,c in compat.utf8codes(s) do
         table.insert(cp, c)
      end
      return fillseq(TTag.Char, cp)
   end,
   ["number"] = function(n)
      return numtree(n)
   end,
   ["boolean"] = function(b)
      if b then
         return newleaf(TTag.True)
      else
         return newleaf(TTag.False)
      end
   end,
   ["function"] = function(f)
      return setmetatable({
            tag = TTag.RunTime,
            code = nil,
            key = f,
            sib1 = newleaf(TTag.True),
      }, metareg)
   end,
   ["pattern"] = function(v)
      return v
   end,
   ["table"] = function(v)
      return newgrammar(v)
   end,
   default = function(v)
      error("Not a pattern")
   end,
}

-- labeled failure begin
function newthrowleaf(label)
   return setmetatable({
         tag = TTag.Throw,
         code = nil,
         sib2 = nil, -- no recovery rule associated (yet)
         key = label,
   }, metareg)
end
-- labeled failure end

function lp_P(v)
   return getpatt(v)
end

--[[
** sequence operator; optimizations:
** false x => false, x true => x, true x => x
** (cannot do x . false => false because x may have runtime captures)
]]--

function lp_seq(tree1, tree2)
   tree1 = getpatt(tree1)
   tree2 = getpatt(tree2)
   if tree1.tag == TTag.False or tree2.tag == TTag.True then
      -- false . x = false, x . true = x
      return tree1
   elseif tree1.tag == TTag.True then
      -- true . x = x
      return tree2
   else
      return newroot2sib(TTag.Seq, tree1, tree2)
   end
end


--[[
** choice operator; optimizations:
** charset / charset => charset
** true / x => true, x / false => x, false / x => x
** (x / true is not equivalent to true)
]]--
function lp_choice(t1, t2)
   t1 = getpatt(t1)
   t2 = getpatt(t2)
   local t1c = tocharset(t1)
   local t2c = tocharset(t2)
   if t1c ~= nil and t2c ~= nil then
      local t = cs_union(t1c, t2c)
      return t
   elseif nofail(t1) or t2.tag == TTag.False then
      -- true / x => true, x / false => x
      return t1
   elseif t1.tag == TTag.False then
      -- false / x => x
      return t2
   else
      return newroot2sib(TTag.Choice, t1, t2)
   end
end

--[[
   p^n
]]--
function lp_star(p, n)
   local tree1 = getpatt(p)
   n = checkint(n)
   if n >= 0 then -- seq tree1 (seq tree1 ... (seq tree1 (rep tree1)))
      if nullable(tree1) then
         error("loop body may accept empty string")
      end
      local tree = newroot1sib(TTag.Rep, tree1)
      while n > 0 do
         tree = newroot2sib(TTag.Seq, tree1, tree)
         n = n - 1
      end
      return tree
   else -- choice (seq tree1 ... choice tree1 true ...) true
      n = -n
      local tree = newroot2sib( -- at most 1
            TTag.Choice,
            tree1,
            newleaf(TTag.True)
      )
      while n > 1 do
         tree = newroot2sib( -- at most (n-1)
            TTag.Seq,
            tree1,
            tree
         )
         tree = newroot2sib(TTag.Choice, tree, newleaf(TTag.True))
         n = n - 1
      end
      return tree
   end
end

--[[
** #p == &p
]]--
function lp_and(v)
   return newroot1sib(TTag.And, getpatt(v))
end

--[[
** -p == !p
]]--
function lp_not(v)
   return newroot1sib(TTag.Not, getpatt(v))
end

--[[
** [t1 - t2] == Seq (Not t2) t1
** If t1 and t2 are charsets, make their difference.
]]--
function lp_sub(t1, t2)
   t1 = getpatt(t1)
   t2 = getpatt(t2)
   local t1c = tocharset(t1)
   local t2c = tocharset(t2)
   if t1c ~= nil and t2c ~= nil then
      return cs_diff(t1c, t2c)
   else
      return newroot2sib(
         TTag.Seq,
         newroot1sib(TTag.Not, t2),
         t1
      )
   end
end

--[[
   A set with the given characters
]]--
function lp_set(s)
   local t = newcharset()
   local extra = nil
   for _,c in compat.utf8codes(s) do
      if c > CHARMAX then
         -- non ascii, we can't use charset for these
         local one = newleaf(TTag.Char, c)
         if extra == nil then
            extra = one
         else
            extra = newroot2sib(TTag.Choice, extra, one)
         end
      else
         t.set[c] = true
      end
   end
   if extra == nil then
      return t
   else
      return newroot2sib(TTag.Choice, t, extra)
   end
end

function lp_range(...)
   local t = newcharset()
   local extra = nil
   for _,v in ipairs{...} do
      if type(v) ~= "string" then
         error("argument must be string")
      else
         local first, second
         for _,c in compat.utf8codes(v) do
            if first == nil then
               first = c
            elseif second == nil then
               second = c
            else
               error("range must have two characters")
            end
         end
         if first == nil or second == nil then
            error("range must have two characters")
         end
         if first > second then
            if LPEG_COMPAT then
               -- ignore, just silently create an empty range
            else
               error("empty range")
            end
         elseif second <= CHARMAX then -- ascii range
            for i = first, second do
               t.set[i] = true
            end
         else
            local r = lp_utfr(first, second)
            if extra == nil then
               extra = r
            else
               extra = newroot2sib(TTag.Choice, extra, one)
            end
         end
      end
   end
   if extra == nil then
      return t
   else
      return newroot2sib(TTag.Choice, t, extra)
   end
end

function lp_utfr(from, to)
   from = checkint(from)
   to = checkint(to)
   if from > to then
      error("empty range")
   end
   if to > 0x10FFFF then
      error("invalid code point")
   end
   if to <= CHARMAX then -- ascii range?
      local t = newcharset() -- code it as a regular charset
      for i = from, to do
         t.set[i] = true
      end
      return t
   end
   -- multibyte utf-8 range
   return setmetatable({
         tag = TTag.UTFR,
         code = nil,
         from = from,
         to = to,
   }, metareg)
end

--[[
   Look-behind predicate
]]--
function lp_behind(v)
   local tree1 = getpatt(v)
   local n = fixedlen(tree1)
   if n < 0 then
      error("pattern may not have fixed length")
   end
   if hascaptures(tree1) then
      error("pattern has captures")
   end
   if n > MAXBEHIND then
      error("pattern too long to look behind")
   end
   return setmetatable({
         tag = TTag.Behind,
         code = nil,
         sib1 = tree1,
         n = n,
   }, metareg)
end

--[[ labeled failure begin ]]--
--[[
** Throws a label
]]--
local lp_throw = define_type_visitor{
   [{"string","number"}] = newthrowleaf,
   default = function() error("not a string") end,
}
--[[ labeled failure end ]]--


--[[
** Create a non-terminal
]]--
function lp_V(v)
   if v == nil then
      error("non-nil value expected")
   end
   return setmetatable({
         tag = TTag.Call,
         code = nil,
         key = v,
   }, metareg)
end

--[[
** Create a tree for a non-empty capture, with a body and
** optionally with an associated Lua value (at index 'labelidx' in the
** stack)
]]--
function capture_aux(capkind, patt, val)
   local t = newroot1sib(TTag.Capture, getpatt(patt))
   t.cap = capkind
   t.key = val
   return t
end

function newemptycap(capkind, val)
   return capture_aux(capkind, newleaf(TTag.True), val)
end

--[[
** Captures with syntax p / v
** (function capture, query capture, string capture, or number capture)
]]--
local divcapture_helper = define_type_visitor{
   ["function"] = function(v, p)
      return capture_aux(CapKind["function"], p, v)
   end,
   ["table"] = function(v, p)
      return capture_aux(CapKind.query, p, v)
   end,
   ["string"] = function(v, p)
      return capture_aux(CapKind.string, p, v)
   end,
   ["number"] = function(v, p)
      v = checkint(v)
      if v < 0 or v > 65536 then
         error("invalid number")
      end
      return capture_aux(CapKind.num, p, v)
   end,
   default = function(v)
      error("unexpected "..ptype(v).." as 2nd operand to LPeg '/'") end,
}
function lp_divcapture(p, v)
   return divcapture_helper(v, p) -- dispatch on v
end

function lp_acccapture(p, v)
   return capture_aux(CapKind.acc, p, v)
end

-- the match for patt with the values from nested captures replacing their
-- matches
function lp_substcapture(patt)
   return capture_aux(CapKind.subst, patt)
end

-- a table with all captures from patt
function lp_tablecapture(patt)
   return capture_aux(CapKind.table, patt)
end

-- the values produced by patt, optionally tagged with key
function lp_groupcapture(patt, key)
   -- key can be nil
   return capture_aux(CapKind.group, patt, key)
end

-- folding capture (deprecated)
function lp_foldcapture(patt, func)
   if type(func) ~= "function" then
      error("Bad function argument")
   end
   return capture_aux(CapKind.fold, patt, func)
end

-- the match for patt plus all captures made by patt
function lp_simplecapture(patt)
   return capture_aux(CapKind.simple, patt)
end

-- the current position (matches the empty string)
function lp_poscapture()
   return newemptycap(CapKind.position)
end

-- the value of the nth extra argument to lpeg.match (matches the empty string)
function lp_argcapture(n)
   n = checkint(n)
   if n <= 0 or n > 65536 then error("invalid argument index") end
   return newemptycap(CapKind.arg, n)
end

-- the value produced by the previous group capture named `key`
-- (matches the empty string)
function lp_backref(key)
   return newemptycap(CapKind.backref, key)
end

-- Constant capture (matches the empty string)
function lp_constcapture(...)
   local arg = compat.pack(...)
   if arg.n == 0 then -- no values?
      return newleaf(TTag.True) -- no capture
   else
      local i = arg.n
      local tree = newemptycap(CapKind.const, arg[i])
      while i > 1 do
         i = i - 1
         tree = newroot2sib(
            TTag.Seq,
            newemptycap(CapKind.const, arg[i]),
            tree
         )
      end
      if arg.n == 1 then
         -- single constant capture
         return tree
      else
         -- create a group capture with all values
         return lp_groupcapture( tree )
      end
   end
end

-- the returns of function applied to the captures of patt; the application
-- is done at match time
function lp_matchtime(patt, func)
   if type(func) ~= 'function' then
      error('not a function')
   end
   return setmetatable({
         tag = TTag.RunTime,
         code = nil,
         key = func,
         sib1 = getpatt(patt),
   }, metareg)
end

--[[======================================================]]--


--[[
** ======================================================
** Grammar - Tree generation
** ======================================================
]]--

--[[
** push on the stack the index and the pattern for the
** initial rule of grammar at index 'arg' in the stack;
** also add that index into position table.
]]--
function getfirstrule(tbl)
   local first_name, first_rule
   first_name = tbl[1]
   -- is this the name of an initial rule?
   if type(first_name) == 'number' or type(first_name) == 'string' then
      first_rule = tbl[first_name] -- get associated rule
   else
      first_name,first_rule = 1,first_name
   end
   if not is_pattern(first_rule) then
      if first_rule == nil then
         error("grammar has no initial rule")
      else
         error(string.format("initial rule '%s' is not a pattern", first_name))
      end
   end
   -- rule position (after TGrammar)
   -- return map from name to position, and from position to name
   return { [first_name] = 1 }, { first_name }
end

--[[
** traverse grammar at index 'arg', pushing all its keys and patterns
** into the stack. Create a new table (before all pairs key-pattern) to
** collect all keys and their associated positions in the final tree
** (the "position table").
** Return the number of rules and (in 'totalsize') the total size
** for the new tree.
]]--
function collectrules(tbl)
    -- find the first rule and put in position table
   local postab, rpostab = getfirstrule(tbl)

   -- collect and sort rule names (for repeatability)
   local names = {}
   for k,v in pairs(tbl) do
      if k == 1 or postab[k] == 1 then -- initial rule?
         -- skip the initial rules, it's already in the position table
      else
         table.insert(names, k)
      end
   end
   table.sort(names, function(a,b)
                 return tostring(a) < tostring(b)
   end)

   -- fill out rule, name, and position maps
   for _,k in ipairs(names) do
      local v = tbl[k]
      if not is_pattern(v) then
         error("rule '" .. val2str(k) .. "' is not a pattern")
      end
      table.insert(rpostab, k)
      postab[k] = #rpostab
   end
   return postab, rpostab
end

function buildgrammar(g, tbl, postab, rpostab)
   local trees = {}
   for i,name in ipairs(rpostab) do
      local rule = setmetatable({
            tag = TTag.Rule,
            code = nil,
            key = nil, -- will be fixed when rule is used
            n = i, -- rule number
            name = name,
            sib1 = tbl[name], -- pattern
            sib2 = nil,
      }, metareg)
      table.insert(trees, rule)
      g.ruletab[name] = rule
   end
   -- link up siblings
   for i = 1, #trees-1 do
      trees[i].sib2 = trees[i+1]
   end
   trees[#trees].sib2 = newleaf(TTag.True) -- finish list of rules
   g.sib1 = trees[1]
end

--[[
** Check whether a tree has potential infinite loops
]]--
function checkloops(grammar, tree)
   local n = numsiblings[tree.tag]
   if tree.tag == TTag.Rep and nullable_with_grammar(tree.sib1, grammar) then
      return true
   elseif tree.tag == TTag.Grammar then
      return false -- sub-grammars already checked
   elseif n == 1 then
      return checkloops(grammar, tree.sib1) -- tail call
   elseif n == 2 then
      if checkloops(grammar, tree.sib1) then
         return true
      else
         return checkloops(grammar, tree.sib2) -- tail call
      end
   elseif n == 0 then
      return false
   else
      error("surprising number of siblings")
   end
end

--[[
** Give appropriate error message for 'verifyrule'. If a rule appears
** twice in 'passed', there is path from it back to itself without
** advancing the subject.
]]--
function verifyerror(grammar, passed, npassed)
   local i, j
   for i = npassed,1,-1 do -- search for a repetition
      for j = i-1,1,-1 do
         if passed[i] == passed[j] then
            error(string.format("rule '%s' may be left recursive", val2str(passed[i])))
         end
      end
   end
   error("too many left calls in grammar")
end

--[[
** Check whether a rule can be left recursive; raise an error in that
** case; otherwise return 1 iff pattern is nullable.
** The return value is used to check sequences, where the second pattern
** is only relevant if the first is nullable.
** Parameter 'nb' works as an accumulator, to allow tail calls in
** choices. ('nb' true makes function returns true.)
** Parameter 'passed' is a list of already visited rules, 'npassed'
** counts the elements in 'passed'.
** Assume ktable at the top of the stack.
]]--
local verifyrule
verifyrule = define_tree_visitor{
   [{
         TTag.Char, TTag.Set, TTag.Any, TTag.False, TTag.UTFR,
         TTag.Throw, -- labeled failure
   }] = function(tree, g, passed, n, nb)
      return nb -- cannot pass from here
   end,
   [{
         TTag.True, TTag.Behind, -- look-behind cannot have calls
   }] = function(tree, g, passed, n, nb)
      return true
   end,
   [{ TTag.Not, TTag.And, TTag.Rep, }] = function(tree, g, passed, n, nb)
      return verifyrule(tree.sib1, g, passed, n, true) -- tail call
   end,
   [{ TTag.Capture, TTag.RunTime, TTag.XInfo, }] = function(tree, g, passed, n, nb)
      return verifyrule(tree.sib1, g, passed, n, nb) -- tail call
   end,
   [ TTag.Call ] = function(tree, g, passed, n, nb)
      local rule = g.ruletab[tree.key] -- look up rule
      return verifyrule(rule, g, passed, n, nb) -- tail call
   end,
   [ TTag.Seq ] = function(tree, g, passed, n, nb)
      -- only check 2nd child if first is nb
      if not verifyrule(tree.sib1, g, passed, n, false) then
         return nb
      else
         -- note that we don't propagate new npassed from 1st child
         return verifyrule(tree.sib2, g, passed, n, nb) -- tail call
      end
   end,
   [ TTag.Choice ] = function(tree, g, passed, n, nb)
      -- must check both children
      nb = verifyrule(tree.sib1, g, passed, n, nb)
      -- note that we don't propagate new npassed from 1st child
      return verifyrule(tree.sib2, g, passed, n, nb) -- tail call
   end,
   [ TTag.Rule ] = function(tree, g, passed, n, nb)
      if n >= MAXRULES then -- too many steps?
         return verifyerror(g, passed, n) -- error
      else
         passed[n+1] = tree.key -- add rule to path
         return verifyrule(tree.sib1, g, passed, n + 1, nb) -- tail call
      end
   end,
   [ TTag.Grammar ] = function(tree, g, passed, n, nb)
      return nullable(tree) -- sub-grammar cannot be left recursive
   end,
}

function verifygrammar(grammar)
   local passed = {}
   -- check left-recursive rules
   local rule = grammar.sib1
   while rule.tag == TTag.Rule do
      if rule.key ~= nil then -- skip unused rules
         verifyrule(rule.sib1, grammar, passed, 0, false)
      end
      rule = rule.sib2
   end
   if rule.tag ~= TTag.True then
      error("assertion failure")
   end
   -- check infinite loops inside rules
   rule = grammar.sib1
   while rule.tag == TTag.Rule do
      if rule.key ~= nil then -- skip unused rules
         if checkloops(grammar, rule.sib1) then
            error("empty loop in rule '" .. val2str(rule.name) .. "'")
         end
      end
      rule = rule.sib2
   end
   if rule.tag ~= TTag.True then
      error("assertion failure")
   end
end

--[[
** Fix a TOpenCall into a TCall node, using table 'postable' to
** translate a key to its rule address in the tree. Raises an
** error if key does not exist.
]]--
function fixonecall(g, t, postab)
   local name = t.key
   local rule = g.ruletab[name]
   if t.tag == TTag.Call then
      if rule == nil then
         error(string.format("rule '%s' undefined in given grammar", val2str(name)))
      end
      -- unlike our upstream, we don't clone patterns when we build a grammar
      -- so we can't mutate this tree w/o possibly breaking any other grammars
      -- which might hold an alias of this call.  So we don't distinguish
      -- Call and OpenCall and we don't mutate the tag here and
      -- don't link it up.  However, we can mutate the Rule
      -- as those are not shared
      rule.key = name -- mark this as used
   elseif rule ~= nil then -- TTag.Throw
      -- As before, we can't mutate the tree
      rule.key = name -- mark this as used
   end
end

--[[
** Transform left associative constructions into right
** associative ones, for sequence and choice; that is:
** (t11 + t12) + t2  =>  t11 + (t12 + t2)
** (t11 * t12) * t2  =>  t11 * (t12 * t2)
** (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2))
]]--
function  correctassociativity (tree)
   local tag = tree.tag
   if tag ~= TTag.Choice and tag ~= TTag.Seq then
      error("impossible")
   end
   local t1 = tree.sib1
   while t1.tag == tree.tag do
      local t11, t12 = t1.sib1, t1.sib2
      local t2 = tree.sib2
      -- don't mutate t1 in place as others may be keeping copies of it;
      -- mutating 'tree' in place is okay as we're not changing its semantics
      tree.sib1 = t11
      tree.sib2 = newroot2sib(tag, t12, t2)
      t1 = tree.sib1
   end
   return tree
end

--[[
** Make final adjustments in a tree. Fix open calls in tree 't',
** making them refer to their respective rules or raising appropriate
** errors (if not inside a grammar). Correct associativity of associative
** constructions (making them right associative). Assume that tree's
** ktable is at the top of the stack (for error messages).
]]--
local finalfix_helper = define_tree_visitor{
   [TTag.Grammar] = function(t)
      return t -- subgrammars were already fixed
   end,
   [TTag.Call] = function(t, g, postab)
      if g == nil then
         error("rule '" .. val2str(t.key) .. "' used outside a grammar")
      else
         return fixonecall(g, t, postab)
      end
   end,
   [TTag.Throw] = function(t, g, postab)
      if g ~= nil then
         return fixonecall(g, t, postab)
      end
   end,
   [{TTag.Seq, TTag.Choice}] = function(t, g, postab)
      return correctassociativity(t)
   end,
   default = function(t) return t end,
}
function finalfix(g, t, postab)
   finalfix_helper(t, g, postab)
   if t.tag == TTag.Grammar then return end
   local n = numsiblings[t.tag]
   if n == 1 then
      return finalfix(g, t.sib1, postab) -- tail call
   elseif n == 2 then
      finalfix(g, t.sib1, postab)
      return finalfix(g, t.sib2, postab) -- tail call
   elseif n == 0 then
      return
   else
      error("strange number of siblings")
   end
end

--[[
** Give a name for the initial rule if it is not referenced
]]--
function initialrulename(grammar)
   if grammar.sib1.key == nil then -- initial rule is not referenced?
      grammar.sib1.key = grammar.sib1.name
   end
end

function newgrammar(tbl)
   local postab, rpostab = collectrules(tbl)
   local g = setmetatable({
         tag = TTag.Grammar,
         code = nil,
         sib1 = nil, -- will fill this in
         n = #rpostab, -- number of rules
         ruletab = {}, -- map rule names to rules
   }, metareg)
   buildgrammar(g, tbl, postab, rpostab)
   finalfix(g, g.sib1, postab)
   initialrulename(g)
   verifygrammar(g)
   return g
end

--[[ ====================================================== ]]--

function prepcompile(p)
   finalfix(nil, p, {}) -- correct associativity
   return compile(p)
end

function lp_printtree(patt, c)
   local tree = getpatt(patt)
   if c then
      finalfix(nil, tree, {}) -- correct associativity
   end
   print("[]") -- for compatibility, this is a fake 'ktable'
   io.write(table.concat(printtree(tree, 0, {})))
end

function lp_printcode(patt)
   local p = getpatt(patt)
   if p.code == nil then
      prepcompile(p)
   end
   print("[]") -- for compatibility, this is a fake 'ktable'
   io.write(table.concat(printpatt(p.code, {})))
end

--[[
** Get the initial position for the match, interpreting negative
** values from the end of the subject.  Result is 1-based.
]]--
function initposition(ii, len)
   if ii > 0 then -- positive index?
      if ii <= len then -- inside the string?
         return ii -- return it (no correction to 0-base)
      else
         return len + 1 -- crop at the end
      end
   else -- negative index
      if (-ii) <= len then -- inside the string?
         return len + 1 - (-ii) -- return position from the end
      else
         return 1
      end
   end
end

-- Main match function
function lp_match(pattern, subject, init, ...)
   local p = getpatt(pattern)
   if p.code == nil then prepcompile(p) end
   local code = p.code
   if type(subject) ~= 'string' then error("subject is not a string") end
   local i
   if init == nil then
      i = 1
   else
      i = initposition(checkint(init), #subject)
   end
   return match(subject, i, code, ...)
end


--[[
** ======================================================
** Library creation and functions not related to matching
** ======================================================
]]--

function lp_setmax(lim)
   lim = 0 + lim -- convert to integer
   if lim <= 0 then
      error("out of range")
   end
   MAXSTACK = lim
end

local lp_type = define_type_visitor{
   pattern = function() return "pattern" end,
   default = function() return nil end,
}

function lp_gc(p)
   p._code = nil
end

function createcat(charspec)
   local t = newcharset()
   for i=0,CHARMAX do -- XXX not unicode safe
      local s = compat.utf8char(i)
      if s:find(charspec) ~= nil then
         t.set[i] = true
      end
   end
   return t
end

function lp_locale(tbl)
   if tbl == nil then
      tbl = {}
   end
   tbl.alnum = createcat("%w")
   tbl.alpha = createcat("%a")
   tbl.cntrl = createcat("%c")
   tbl.digit = createcat("%d")
   tbl.graph = createcat("[%p%w]") -- printable except space
   tbl.lower = createcat("%l")
   tbl.print = createcat("%C") -- printable = "not a control character"?
   tbl.punct = createcat("%p") -- "printable but not space or alnum
   tbl.space = createcat("%s")
   tbl.upper = createcat("%u")
   tbl.xdigit = createcat("%x")
   return tbl
end

--[[ lpltree.c ]]--

metareg.__mul = lp_seq
metareg.__add = lp_choice
metareg.__pow = lp_star
metareg.__gc = lp_gc
metareg.__len = lp_and
metareg.__div = lp_divcapture
metareg.__mod = lp_acccapture
metareg.__unm = lp_not
metareg.__sub = lp_sub
metareg.__tostring = printrepl

local pattreg = {
   ptree = lp_printtree,
   pcode = lp_printcode,
   match = lp_match,
   B = lp_behind,
   V = lp_V,
   C = lp_simplecapture,
   Cc = lp_constcapture,
   Cmt = lp_matchtime,
   Cb = lp_backref,
   Carg = lp_argcapture,
   Cp = lp_poscapture,
   Cs = lp_substcapture,
   Ct = lp_tablecapture,
   Cf = lp_foldcapture,
   Cg = lp_groupcapture,
   P = lp_P,
   S = lp_set,
   R = lp_range,
   utfR = lp_utfr,
   locale = lp_locale,
   version = "LLPegLabel " .. VERSION,
   setmaxstack = lp_setmax,
   type = lp_type,
   T = lp_throw, -- labeled failure throw
}
metareg.__index = pattreg

return pattreg

end)

local modules = {}
modules['bit32'] = require('bit32')
modules['string'] = require('string')
modules['strict'] = {}
modules['table'] = require('table')
local function myrequire(name)
  if modules[name] == nil then
    modules[name] = true
    modules[name] = (builders[name])(myrequire)
  end
  return modules[name]
end
return myrequire('llpeg')
end)()

Videos

Youtube | Vimeo | Bing

Websites

Google | Yahoo | Bing

Encyclopedia

Google | Yahoo | Bing

Facebook