local info = {
    version   = 1.002,
    comment   = "scintilla lpeg lexer for pdf",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files",
}

-- pdf is normally static .. i.e. not edited so we don't really
-- need embedded lexers.

local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V

local lexers            = require("scite-context-lexer")

local patterns          = lexers.patterns
local token             = lexers.token

local pdflexer          = lexers.new("pdf","scite-context-lexer-pdf")
local pdfwhitespace     = pdflexer.whitespace

----- pdfobjectlexer    = lexers.load("scite-context-lexer-pdf-object")
----- pdfxreflexer      = lexers.load("scite-context-lexer-pdf-xref")

local anything          = patterns.anything
local space             = patterns.space
local spacing           = patterns.spacing
local nospacing         = patterns.nospacing
local anything          = patterns.anything
local restofline        = patterns.restofline

local t_whitespace      = token(pdfwhitespace, spacing)
local t_spacing         = token("default", spacing)
----- t_rest            = token("default", nospacing)
local t_rest            = token("default", anything)

local p_comment         = P("%") * restofline
local t_comment         = token("comment", p_comment)

-- whatever

local space             = patterns.space
local spacing           = patterns.spacing
local nospacing         = patterns.nospacing
local anything          = patterns.anything
local newline           = patterns.eol
local real              = patterns.real
local cardinal          = patterns.cardinal
local alpha             = patterns.alpha

local lparent           = P("(")
local rparent           = P(")")
local langle            = P("<")
local rangle            = P(">")
local escape            = P("\\")
local unicodetrigger    = P("feff")

local nametoken         = 1 - space - S("<>/[]()")
local name              = P("/") * nametoken^1

local p_string          = P { ( escape * anything + lparent * V(1) * rparent + (1 - rparent) )^0 }

local t_spacing         = token("default", spacing)
local t_spaces          = token("default", spacing)^0
local t_rest            = token("default", nospacing) -- anything

local p_stream          = P("stream")
local p_endstream       = P("endstream")
local p_obj             = P("obj")
local p_endobj          = P("endobj")
local p_reference       = P("R")

local p_objectnumber    = patterns.cardinal
local p_comment         = P("%") * (1-S("\n\r"))^0

local t_string          = token("quote",    lparent)
                        * token("string",   p_string)
                        * token("quote",    rparent)
local t_unicode         = token("quote",    langle)
                        * token("plain",    unicodetrigger)
                        * token("string",   (1-rangle)^1)
                        * token("quote",    rangle)
local t_whatsit         = token("quote",    langle)
                        * token("string",   (1-rangle)^1)
                        * token("quote",    rangle)
local t_keyword         = token("command",  name)
local t_constant        = token("constant", name)
local t_number          = token("number",   real)
--    t_reference       = token("number",   cardinal)
--                      * t_spacing
--                      * token("number",   cardinal)
local t_reserved        = token("number",   P("true") + P("false") + P("null"))
--    t_reference       = token("warning",  cardinal * spacing * cardinal * spacing)
--                      * token("keyword",  p_reference)
local t_reference       = token("warning",  cardinal)
                        * t_spacing
                        * token("warning",  cardinal)
                        * t_spacing
                        * token("keyword",  p_reference)

local t_comment         = token("comment",  p_comment)

local t_openobject      = token("warning",  p_objectnumber)
                        * t_spacing
                        * token("warning",  p_objectnumber)
                        * t_spacing
                        * token("keyword",  p_obj)
--    t_openobject      = token("warning",  p_objectnumber * spacing)
--                      * token("warning",  p_objectnumber * spacing)
--                      * token("keyword",  p_obj)
local t_closeobject     = token("keyword",  p_endobj)

local t_opendictionary  = token("grouping", P("<<"))
local t_closedictionary = token("grouping", P(">>"))

local t_openarray       = token("grouping", P("["))
local t_closearray      = token("grouping", P("]"))

local t_stream          = token("keyword", p_stream)
                        * token("text",    (1 - p_endstream)^1)
                        * token("keyword", p_endstream)

local t_other           = t_constant + t_reference + t_string + t_unicode + t_number + t_reserved + t_whatsit

local t_dictionary      = { "dictionary",
                            dictionary = t_opendictionary
                                       * (t_spaces * t_keyword * t_spaces * V("whatever"))^0
                                       * t_spaces
                                       * t_closedictionary,
                            array      = t_openarray
                                       * (t_spaces * V("whatever"))^0
                                       * t_spaces
                                       * t_closearray,
                            whatever   = V("dictionary")
                                       + V("array")
                                       + t_other,
                        }

local t_object          = { "object", -- weird that we need to catch the end here (probably otherwise an invalid lpeg)
                            dictionary = t_dictionary.dictionary,
                            array      = t_dictionary.array,
                            whatever   = t_dictionary.whatever,
                            object     = t_openobject
                                       * t_spaces
                                       * (V("dictionary") * t_spaces * t_stream^-1 + V("array") + t_other)
                                       * t_spaces
                                       * t_closeobject,
                            number     = t_number,
                        }

-- objects ... sometimes NUL characters play havoc ... and in xref we have
-- issues with embedded lexers that have spaces in the start and stop
-- conditions and this cannot be handled well either ... so, an imperfect
-- solution ... but anyway, there is not that much that can end up in
-- the root of the tree see we're sort of safe

local p_trailer         = P("trailer")
local t_trailer         = token("keyword", p_trailer)
                        * t_spacing
                        * t_dictionary
--    t_trailer         = token("keyword", p_trailer * spacing)
--                      * t_dictionary

local p_startxref       = P("startxref")
local t_startxref       = token("keyword", p_startxref)
                        * t_spacing
                        * token("number", cardinal)
--    t_startxref       = token("keyword", p_startxref * spacing)
--                      * token("number", cardinal)

local p_xref            = P("xref")
local t_xref            = token("keyword",p_xref)
                        * t_spacing
                        * token("number", cardinal)
                        * t_spacing
                        * token("number", cardinal)
                        * spacing
--    t_xref            = token("keyword",p_xref)
--                      * token("number", spacing * cardinal * spacing * cardinal * spacing)

local t_number          = token("number", cardinal)
                        * t_spacing
                        * token("number", cardinal)
                        * t_spacing
                        * token("keyword", S("fn"))
--    t_number          = token("number", cardinal * spacing * cardinal * spacing)
--                      * token("keyword", S("fn"))

pdflexer.rules = {
    { "whitespace", t_whitespace },
    { "object",     t_object     },
    { "comment",    t_comment    },
    { "trailer",    t_trailer    },
    { "startxref",  t_startxref  },
    { "xref",       t_xref       },
    { "number",     t_number     },
    { "rest",       t_rest       },
}

-- lexer.inspect(pdflexer)

-- collapser: obj endobj stream endstream

pdflexer.folding = {
    ["obj"]       = { ["keyword"] =  1 },
    ["endobj"]    = { ["keyword"] = -1 },
    ["stream"]    = { ["keyword"] =  1 },
    ["endstream"] = { ["keyword"] = -1 },
}

return pdflexer
