(*
The parser combinator here started off as an adventure in day 16
and then has evolved after finding this series of posts on how to
get better data like the position and labels into error messages
https://fsharpforfunandprofit.com/posts/understanding-parser-combinators/
as well as lots of inspiration for derived parsers from Opal
https://github.com/pyrocat101/opal
*)
type position = { line : int; column : int }
let explode s = List.init (String.length s) (String.get s)
let implode chs = chs |> List.map (String.make 1) |> String.concat ""
module InputStream = struct
type 't input = { lines : 't list; position : position }
(*type 't input = { lines: 't Stream.t ; position : position }*)
let initial_position = { line = 0; column = 0 }
let succ_column pos = { pos with column = succ pos.column }
let succ_line pos = { line = succ pos.line; column = 0 }
let current_position { position; _ } = position
let of_stream stream = { lines = stream; position = initial_position }
(* TODO: Stream.next isn't undoable so backtracking doesn't work. can this be
worked around some how?
let of_string str = str |> Stream.of_string |> of_stream
let of_channel ic = ic |> Stream.of_channel |> of_stream
let of_list il = il |> Stream.of_list |> of_stream
let next (input : 't input) =
try
let p = Stream.next input.lines in
{input with position=succ_column input.position}, Some p
with Stream.Failure -> input, None *)
let of_string str = str |> explode |> of_stream
let of_list il = il |> of_stream
let next (input : 't input) =
try
let p = List.nth input.lines input.position.column in
({ input with position = succ_column input.position }, Some p)
with _ -> (input, None)
end
type ('result, 't) parse_result =
| Success of 'result * 't InputStream.input
| Fail of string * string * position
type ('result, 't) parser = {
label : string;
parse_fn : 't InputStream.input -> ('result, 't) parse_result;
}
let run { parse_fn; _ } input = parse_fn input
let get_label { label; _ } = label
let set_label label { parse_fn = parser; _ } =
let parse_fn input =
match parser input with
| Success _ as res -> res
| Fail (_old_label, msg, pos) -> Fail (label, msg, pos)
in
{ parse_fn; label }
let ( <?> ) px f = set_label f px
let return result =
let parse_fn input = Success (result, input) in
{ parse_fn; label = "return" }
let fail label msg =
let parse_fn input =
let position = InputStream.current_position input in
Fail (label, msg, position)
in
{ parse_fn; label = "fail" }
let bind f { parse_fn = px; _ } =
let parse_fn input =
match px input with
| Fail _ as res -> res
| Success (result, remainder) ->
let { parse_fn = py; _ } = f result in
py remainder
in
{ parse_fn; label = "bind" }
let ( >>= ) px f = bind f px
let and_then parser_1 parser_2 =
let label =
Printf.sprintf "%s THEN %s" (get_label parser_1) (get_label parser_2)
in
parser_1
>>= (fun result_1 -> parser_2 >>= fun result_2 -> return (result_1, result_2))
<?> label
let ( >> ) = and_then
let map_result f parser_1 = bind (fun res -> return (f res)) parser_1
let ( >>| ) px f = map_result f px
let or_else { parse_fn = parser_1; label = label_1 }
{ parse_fn = parser_2; label = label_2 } =
let label = Printf.sprintf "%s OR %s" label_1 label_2 in
let parse_fn input =
match parser_1 input with
| Success _ as res -> res
| Fail (_label_1, msg_1, pos_1) -> (
match parser_2 input with
| Fail (_label_2, msg_2, _pos_2) ->
Fail (label, Printf.sprintf "expected %s OR %s" msg_1 msg_2, pos_1)
| Success _ as res -> res)
in
{ parse_fn; label }
let ( <|> ) = or_else
let ( *>> ) px py = px >> py >>| fun (x, _y) -> x
let ( >>* ) px py = px >> py >>| fun (_x, y) -> y
let any =
let label = "any" in
let parse_fn s =
let input, item = InputStream.next s in
match item with
| Some hd -> Success (hd, input)
| _ ->
let pos = InputStream.current_position input in
Fail (label, "not enough input to consume any", pos)
in
{ parse_fn; label }
let satisfy test =
any >>= fun ch ->
if test ch then return ch else fail "satisfy" "predicate not satisfied"
let ( <~> ) px py =
let label = Printf.sprintf "%s then %s" (get_label px) (get_label py) in
px >>= (fun rx -> py >>= fun ry -> return (rx :: ry)) <?> label
let rec choice = function
| [] -> fail "choice" "not enough parsers to chose between"
| start :: parsers -> start <|> choice parsers
let apply f px =
let label = Printf.sprintf "apply %s" (get_label px) in
f >>= (fun res -> px >>= fun res_1 -> return (res res_1)) <?> label
let ( <*> ) = apply
let lift2 f px py = return f <*> px <*> py
let rec sequence parser_list =
let cons = lift2 List.cons in
match parser_list with
| [] -> return []
| head :: tail -> cons head (sequence tail)
let between op ed x = op >>* x *>> ed
let option default x = x <|> return default
let optional x = option () (x *>> return ())
let rec skip_many x = option () (x >>= fun _ -> skip_many x)
let skip_many1 x = x >> skip_many x
let rec many x =
option []
( x >>= fun r ->
many x >>= fun rs -> return (r :: rs) )
let many1 x = x <~> many x
let sep_by1 x sep = x <~> many (sep >>* x)
let sep_by x sep = sep_by1 x sep <|> return []
(*let end_by1 x sep = sep_by1 x sep *>> sep*)
(*let end_by x sep = end_by1 x sep <|> return []*)
let opt p =
let label = Printf.sprintf "opt %s" (get_label p) in
option None (p >>| Option.some) <?> label
(*let chainl1 x op =*)
(*let rec loop a =*)
(*(op >>= fun f -> x >>= fun b -> loop (f a b)) <|> return a*)
(*in*)
(*x >>= loop*)
(*let chainl x op default = chainl1 x op <|> return default*)
(*let rec chainr1 x op = x >>= fun a -> (op >>= fun f -> chainr1 x op *>> f a) <|> return a*)
(*let chainr x op default = chainr1 x op <|> return default*)
let rec take n px = if n = 0 then return [] else px <~> take (n - 1) px
let one ch = satisfy (( = ) ch) <?> "one"
let any_of l = satisfy (fun x -> List.mem x l) <?> "any_of"
let none_of l = satisfy (fun x -> not (List.mem x l)) <?> "none_of"
let range l r = satisfy (fun x -> l <= x && x <= r) <?> "range"
let string input =
let rec loop input i =
if i = String.length input then return input
else one input.[i] >>* loop input (i + 1)
in
loop input 0 <?> input
let digit = range '0' '9' <?> "digit"
let digits =
opt (one '-') <?> "negate" >> many digit <?> "digits" >>| function
| Some _, parts -> int_of_string ("-" ^ implode parts)
| None, parts -> int_of_string (implode parts)