AdventOfCode

parsers.ml at tip
Login

File 2021/lib/parsers.ml from the latest check-in


(*
  The parser combinator here started off as an adventure in day 16
  and then has evolved after finding this series of posts on how to
  get better data like the position and labels into error messages
  https://fsharpforfunandprofit.com/posts/understanding-parser-combinators/
  as well as lots of inspiration for derived parsers from Opal
  https://github.com/pyrocat101/opal
*)

type position = { line : int; column : int }

let explode s = List.init (String.length s) (String.get s)

let implode chs = chs |> List.map (String.make 1) |> String.concat ""

module InputStream = struct
  type 't input = { lines : 't list; position : position }
  (*type 't input = {  lines: 't Stream.t ; position : position }*)

  let initial_position = { line = 0; column = 0 }

  let succ_column pos = { pos with column = succ pos.column }

  let succ_line pos = { line = succ pos.line; column = 0 }

  let current_position { position; _ } = position

  let of_stream stream = { lines = stream; position = initial_position }

  (* TODO: Stream.next isn't undoable so backtracking doesn't work. can this be
     worked around some how?
     let of_string str = str |> Stream.of_string |> of_stream
     let of_channel ic = ic |> Stream.of_channel |> of_stream
     let of_list il = il |> Stream.of_list |> of_stream

     let next (input : 't input) =
       try
         let p = Stream.next input.lines in
         {input with position=succ_column input.position}, Some p
       with Stream.Failure -> input, None *)

  let of_string str = str |> explode |> of_stream

  let of_list il = il |> of_stream

  let next (input : 't input) =
    try
      let p = List.nth input.lines input.position.column in
      ({ input with position = succ_column input.position }, Some p)
    with _ -> (input, None)
end

type ('result, 't) parse_result =
  | Success of 'result * 't InputStream.input
  | Fail of string * string * position

type ('result, 't) parser = {
  label : string;
  parse_fn : 't InputStream.input -> ('result, 't) parse_result;
}

let run { parse_fn; _ } input = parse_fn input

let get_label { label; _ } = label

let set_label label { parse_fn = parser; _ } =
  let parse_fn input =
    match parser input with
    | Success _ as res -> res
    | Fail (_old_label, msg, pos) -> Fail (label, msg, pos)
  in
  { parse_fn; label }

let ( <?> ) px f = set_label f px

let return result =
  let parse_fn input = Success (result, input) in
  { parse_fn; label = "return" }

let fail label msg =
  let parse_fn input =
    let position = InputStream.current_position input in
    Fail (label, msg, position)
  in
  { parse_fn; label = "fail" }

let bind f { parse_fn = px; _ } =
  let parse_fn input =
    match px input with
    | Fail _ as res -> res
    | Success (result, remainder) ->
        let { parse_fn = py; _ } = f result in
        py remainder
  in
  { parse_fn; label = "bind" }

let ( >>= ) px f = bind f px

let and_then parser_1 parser_2 =
  let label =
    Printf.sprintf "%s THEN %s" (get_label parser_1) (get_label parser_2)
  in
  parser_1
  >>= (fun result_1 -> parser_2 >>= fun result_2 -> return (result_1, result_2))
  <?> label

let ( >> ) = and_then

let map_result f parser_1 = bind (fun res -> return (f res)) parser_1

let ( >>| ) px f = map_result f px

let or_else { parse_fn = parser_1; label = label_1 }
    { parse_fn = parser_2; label = label_2 } =
  let label = Printf.sprintf "%s OR %s" label_1 label_2 in
  let parse_fn input =
    match parser_1 input with
    | Success _ as res -> res
    | Fail (_label_1, msg_1, pos_1) -> (
        match parser_2 input with
        | Fail (_label_2, msg_2, _pos_2) ->
            Fail (label, Printf.sprintf "expected %s OR %s" msg_1 msg_2, pos_1)
        | Success _ as res -> res)
  in
  { parse_fn; label }

let ( <|> ) = or_else

let ( *>> ) px py = px >> py >>| fun (x, _y) -> x

let ( >>* ) px py = px >> py >>| fun (_x, y) -> y

let any =
  let label = "any" in
  let parse_fn s =
    let input, item = InputStream.next s in
    match item with
    | Some hd -> Success (hd, input)
    | _ ->
        let pos = InputStream.current_position input in
        Fail (label, "not enough input to consume any", pos)
  in
  { parse_fn; label }

let satisfy test =
  any >>= fun ch ->
  if test ch then return ch else fail "satisfy" "predicate not satisfied"

let ( <~> ) px py =
  let label = Printf.sprintf "%s then %s" (get_label px) (get_label py) in
  px >>= (fun rx -> py >>= fun ry -> return (rx :: ry)) <?> label

let rec choice = function
  | [] -> fail "choice" "not enough parsers to chose between"
  | start :: parsers -> start <|> choice parsers

let apply f px =
  let label = Printf.sprintf "apply %s" (get_label px) in
  f >>= (fun res -> px >>= fun res_1 -> return (res res_1)) <?> label

let ( <*> ) = apply

let lift2 f px py = return f <*> px <*> py

let rec sequence parser_list =
  let cons = lift2 List.cons in
  match parser_list with
  | [] -> return []
  | head :: tail -> cons head (sequence tail)

let between op ed x = op >>* x *>> ed

let option default x = x <|> return default

let optional x = option () (x *>> return ())

let rec skip_many x = option () (x >>= fun _ -> skip_many x)

let skip_many1 x = x >> skip_many x

let rec many x =
  option []
    ( x >>= fun r ->
      many x >>= fun rs -> return (r :: rs) )

let many1 x = x <~> many x

let sep_by1 x sep = x <~> many (sep >>* x)

let sep_by x sep = sep_by1 x sep <|> return []

(*let end_by1 x sep = sep_by1 x sep *>> sep*)
(*let end_by x sep = end_by1 x sep <|> return []*)

let opt p =
  let label = Printf.sprintf "opt %s" (get_label p) in
  option None (p >>| Option.some) <?> label

(*let chainl1 x op =*)
(*let rec loop a =*)
(*(op >>= fun f -> x >>= fun b -> loop (f a b)) <|> return a*)
(*in*)
(*x >>= loop*)
(*let chainl x op default = chainl1 x op <|> return default*)

(*let rec chainr1 x op = x >>= fun a -> (op >>= fun f -> chainr1 x op *>> f a) <|> return a*)
(*let chainr x op default = chainr1 x op <|> return default*)

let rec take n px = if n = 0 then return [] else px <~> take (n - 1) px

let one ch = satisfy (( = ) ch) <?> "one"

let any_of l = satisfy (fun x -> List.mem x l) <?> "any_of"

let none_of l = satisfy (fun x -> not (List.mem x l)) <?> "none_of"

let range l r = satisfy (fun x -> l <= x && x <= r) <?> "range"

let string input =
  let rec loop input i =
    if i = String.length input then return input
    else one input.[i] >>* loop input (i + 1)
  in
  loop input 0 <?> input

let digit = range '0' '9' <?> "digit"

let digits =
  opt (one '-') <?> "negate" >> many digit <?> "digits" >>| function
  | Some _, parts -> int_of_string ("-" ^ implode parts)
  | None, parts -> int_of_string (implode parts)