// Adaptation of Scott Wlaschin's great "Understanding Parser Combinators" series
// on his site F# for Fun and Profit
// https://fsharpforfunandprofit.com/posts/understanding-parser-combinators/
// https://fsharpforfunandprofit.com/posts/understanding-parser-combinators-2/
public protocol ParsableStream: Collection where Element: Equatable {}
extension Substring: ParsableStream {}
public struct InputState<UserState> {
let stream: any ParsableStream
let userState: UserState
static func fromString(_ input: String, initialState: UserState) -> InputState<UserState> {
return .init(
stream: Substring(input),
userState: initialState
)
}
}
public enum ParserResult<Result, UserState> {
case success(Result, InputState<UserState>)
case failure(String, String, InputState<UserState>)
}
public enum ParserErrors: Error {
case unparsable(String)
}
public class Parser<Result, UserState> {
public let label: String
public let parse: (InputState<UserState>) -> ParserResult<Result, UserState>
internal init(_ label: String, _ parse: @escaping (InputState<UserState>) -> ParserResult<Result, UserState>) {
self.label = label
self.parse = parse
}
}
public func runP<Result, UserState>(
_ parser: Parser<Result, UserState>,
_ inputState: InputState<UserState>
) throws -> Result {
switch parser.parse(inputState) {
case let .success(result, _): return result
case let .failure(label, msg, _):
throw ParserErrors.unparsable("Unable to parse: \(label) - \(msg)")
}
}
/// Builds a new parser which overrides the wrapped parser's `label` field.
public func setLabel<A, UserState>(
_ parser: Parser<A, UserState>,
_ newLabel: String
) -> Parser<A, UserState> {
return Parser(newLabel) { input in
switch parser.parse(input) {
case let .success(result, remaining):
return .success(result, remaining)
case let .failure(_, msg, remaining):
return .failure(newLabel, msg, remaining)
}
}
}
infix operator <?>: ParserApply
public func <?> <A, UserState>(
_ parser: Parser<A, UserState>,
_ label: String
) -> Parser<A, UserState> { setLabel(parser, label) }
// MARK: Core combinators
/// Builds a new parser which runs two parsers in sequence and returns the
/// results of those parsers as a tuple.
///
/// > Tip: This is also aliased as the ``.>>.(_:_:)`` infix operator.
public func andThen<A, B, UserState>(
_ parser1: Parser<A, UserState>,
_ parser2: Parser<B, UserState>
) -> Parser<(A, B), UserState> {
parser1 >>= { result1 in
parser2 >>= { result2 in
returnP((result1, result2))
}
} <?> "\(parser1.label) andThen \(parser2.label)"
}
/// Builds a new parser which runs the first parser, and if it's result is an
/// failure, a second on the same input as the first and returns the result of
/// which parser actually ran.
///
/// > Tip: This is also aliased as the ``<|>(_:_:)`` infix operator
public func orElse<A, UserState>(
_ parser1: Parser<A, UserState>,
_ parser2: Parser<A, UserState>
) -> Parser<A, UserState> {
let label = "\(parser1.label) andThen \(parser2.label)"
return Parser(label) { input in
switch parser1.parse(input) {
case let .success(result, remaining):
return .success(result, remaining)
case .failure: return parser2.parse(input)
}
}
}
/// Takes a list of parsers and returns the result of the first one that runs
/// successfully. Composes ``orElse(_:_:)`` over a list of parsers.
public func choice<A, UserState>(
_ parserList: [Parser<A, UserState>]
) -> Parser<A, UserState> { parserList.dropFirst().reduce(parserList.first!, orElse) }
/// Hoists a parser producing lambda
///
/// > Tip: This is also aliased as the ``>>=(_:_:)`` infix operator
public func bindP<A, B, UserState>(
_ parser: Parser<A, UserState>,
_ lambda: @escaping (A) -> Parser<B, UserState>
) -> Parser<B, UserState> {
let label = "unknown"
return Parser(label) { input in
switch parser.parse(input) {
case let .success(result, remaining):
return lambda(result).parse(remaining)
case let .failure(label, msg, remaining):
return .failure(label, msg, remaining)
}
}
}
/// Hoists a constant into the parser
public func returnP<A, UserState>(
_ constant: A
) -> Parser<A, UserState> { Parser("returnP") { .success(constant, $0) } }
/// Applies a value producing function to the value returned by a parser.
///
/// > Tip: This is also aliased as the ``<!>(_:_:)`` and ``|>>(_:_:)``
/// (for piping) infix operators
public func mapP<A, B, UserState>(
_ lambda: @escaping (A) -> B,
_ parser: Parser<A, UserState>
) -> Parser<B, UserState> { parser >>= { returnP(lambda($0)) } }
/// Applies a wrapped function to a wrapped value
///
/// > Tip: This is also aliased as the ``<*>(_:_:)`` infix operator
public func applyP<A, B, UserState>(
_ lambdaParser: Parser<(A) -> B, UserState>,
_ parser: Parser<A, UserState>
) -> Parser<B, UserState> {
// Can also be written as
// fP >>= { f in xP |>> f }
// or
// fP >>= { f in f <!> xP }
// or
// lambdaParser >>= { lambda in parser >>= { result in returnP(lambda(result)) } }
lambdaParser >>= { lambda in parser |>> lambda }
}
public func sequenceP<A, UserState>(
_ parserList: [Parser<A, UserState>]
) -> Parser<[A], UserState> {
let label = "sequence of \(parserList.first!.label)"
return Parser(label) { input in
var result: [A] = []
var remaining = input
for parser in parserList {
switch parser.parse(remaining) {
case let .success(res, nowRemaining):
result.append(res)
remaining = nowRemaining
case let .failure(label, msg, remaining):
return .failure(label, msg, remaining)
}
}
return .success(result, remaining)
}
}
/// Helper to build many0P and many1P
private func zeroOrMoreP<A, UserState>(
_ parser: Parser<A, UserState>,
_ input: InputState<UserState>
) -> ([A], InputState<UserState>) {
switch parser.parse(input) {
case let .success(result, remaining):
let (nextResult, nextRemaining) = zeroOrMoreP(parser, remaining)
let finalResult = [result] + nextResult
return (finalResult, nextRemaining)
case .failure: return ([], input)
}
}
public func many0P<A, UserState>(
_ parser: Parser<A, UserState>
) -> Parser<[A], UserState> {
let label = "zero or more \(parser.label)"
return Parser(label) { input in
let (result, remaining) = zeroOrMoreP(parser, input)
return .success(result, remaining)
}
}
public func many1P<A, UserState>(
_ parser: Parser<A, UserState>
) -> Parser<[A], UserState> {
parser >>= { head in
many0P(parser) >>= { tail in
returnP([head] + tail)
}
} <?> "one or more \(parser.label)"
}
public func optP<A, UserState>(
_ parser: Parser<A, UserState>
) -> Parser<A?, UserState> {
let some = parser |>> { result in Optional(result) }
let none = Parser("none") { input in .success(nil as A?, input as InputState<UserState>) }
return some <|> none
}
public func betweenP<A, B, C, UserState>(
_ parser1: Parser<A, UserState>,
_ parser2: Parser<B, UserState>,
_ parser3: Parser<C, UserState>
) -> Parser<B, UserState> { parser1 *> parser2 <* parser3 }
public func sepBy1<A, B, UserState>(
_ parser: Parser<A, UserState>,
_ separator: Parser<B, UserState>
) -> Parser<[A], UserState> {
let sepThenP = separator *> parser
return parser .>>. many0P(sepThenP) |>> { (result, pList) in [result] + pList }
}
public func sepBy<A, B, UserState>(
_ parser: Parser<A, UserState>,
_ separator: Parser<B, UserState>
) -> Parser<[A], UserState> { sepBy1(parser, separator) <|> returnP([]) }
// MARK: Element Helpers
public func satisfyP<Element, UserState>(
_ label: String,
_ predicate: @escaping (Element) -> Bool
) -> Parser<Element, UserState> {
return Parser(label) { input in
guard let first = input.stream.first as? Element
else { return .failure(label, "No more input", input) }
if predicate(first) {
return .success(
first,
.init(
// swiftlint:disable:next force_cast
stream: input.stream.dropFirst() as! any ParsableStream,
userState: input.userState
)
)
} else {
return .failure(label, "Unexpected \(first)", input)
}
}
}
public func oneOf<Element, UserState>(_ elem: Element) -> Parser<Element, UserState> where Element: Equatable {
let label = "one of \(elem)"
return satisfyP(label) { char in char == elem }
}
public func anyOf<Element, UserState>(_ elems: [Element]) -> Parser<Element, UserState> where Element: Equatable {
choice(elems.map(oneOf(_:))) <?> "any of \(elems)"
}
// MARK: Skip helpers
public func skip0P<UserState>(_ skippable: Parser<Any, UserState>) -> Parser<(), UserState> {
many0P(skippable) >>= { _ in returnP(()) }
}
// MARK: User state helpers
public func getUserStateP<UserState>() -> Parser<UserState, UserState> {
Parser("getUserState") { input in .success(input.userState, input) }
}
public func setUserState<UserState>(_ newUserState: UserState) -> Parser<(), UserState> {
return Parser("setUserState") { input in
let newInputState = InputState.init(stream: input.stream, userState: newUserState)
return .success((), newInputState)
}
}
// MARK: Operator Precedences
precedencegroup ParserApply {
associativity: left
}
precedencegroup ParserOr {
associativity: left
higherThan: ParserApply
}
precedencegroup ParserAnd {
associativity: left
higherThan: ParserOr
}
// MARK: Infixies
infix operator .>>.: ParserAnd
public func .>>. <A, B, C>(
_ parser1: Parser<A, C>,
_ parser2: Parser<B, C>
) -> Parser<(A, B), C> { andThen(parser1, parser2) }
infix operator <|>: ParserOr
public func <|> <A, C>(
_ parser1: Parser<A, C>,
_ parser2: Parser<A, C>
) -> Parser<A, C> { orElse(parser1, parser2) }
infix operator >>=: ParserApply
public func >>= <A, B, C>(
_ parser: Parser<A, C>,
_ lambda: @escaping (A) -> Parser<B, C>
) -> Parser<B, C> { bindP(parser, lambda) }
infix operator |>>: ParserApply
public func |>> <A, B, C>(
_ parser: Parser<A, C>,
_ lambda: @escaping (A) -> B
) -> Parser<B, C> { mapP(lambda, parser) }
infix operator <!>: ParserApply
public func <!> <A, B, C>(
_ lambda: @escaping (A) -> B,
_ parser: Parser<A, C>
) -> Parser<B, C> { mapP(lambda, parser) }
infix operator <*>: ParserApply
public func <*> <A, B, C>(
_ parser1: Parser<(A) -> B, C>,
_ parser2: Parser<A, C>
) -> Parser<B, C> { applyP(parser1, parser2) }
// These use *> and <* instead of .>> and >>. since swift doesn't like >>. as an op
// Points to which side is being kept.
infix operator <*: ParserApply
public func <* <A, B, C>(
_ parser1: Parser<A, C>,
_ parser2: Parser<B, C>
) -> Parser<A, C> { parser1 .>>. parser2 |>> { (left, _) in left } }
infix operator *>: ParserApply
public func *> <A, B, C>(
_ parser1: Parser<A, C>,
_ parser2: Parser<B, C>
) -> Parser<B, C> { parser1 .>>. parser2 |>> { (_, right) in right } }