AdventOfCode

ParserCombinators.swift at tip
Login

File 2023/Sources/Parsers/ParserCombinators.swift from the latest check-in


// Adaptation of Scott Wlaschin's great "Understanding Parser Combinators" series
// on his site F# for Fun and Profit
// https://fsharpforfunandprofit.com/posts/understanding-parser-combinators/
// https://fsharpforfunandprofit.com/posts/understanding-parser-combinators-2/

public protocol ParsableStream: Collection where Element: Equatable {}

extension Substring: ParsableStream {}

public struct InputState<UserState> {
    let stream: any ParsableStream

    let userState: UserState

    static func fromString(_ input: String, initialState: UserState) -> InputState<UserState> {
        return .init(
            stream: Substring(input),
            userState: initialState
        )
    }
}

public enum ParserResult<Result, UserState> {
    case success(Result, InputState<UserState>)
    case failure(String, String, InputState<UserState>)
}

public enum ParserErrors: Error {
    case unparsable(String)
}

public class Parser<Result, UserState> {
    public let label: String
    public let parse: (InputState<UserState>) -> ParserResult<Result, UserState>

    internal init(_ label: String, _ parse: @escaping (InputState<UserState>) -> ParserResult<Result, UserState>) {
        self.label = label
        self.parse = parse
    }
}

public func runP<Result, UserState>(
    _ parser: Parser<Result, UserState>,
    _ inputState: InputState<UserState>
) throws -> Result {
    switch parser.parse(inputState) {
        case let .success(result, _): return result
        case let .failure(label, msg, _):
            throw ParserErrors.unparsable("Unable to parse: \(label) - \(msg)")
    }
}

/// Builds a new parser which overrides the wrapped parser's `label` field.
public func setLabel<A, UserState>(
    _ parser: Parser<A, UserState>,
    _ newLabel: String
) -> Parser<A, UserState> {
    return Parser(newLabel) { input in
        switch parser.parse(input) {
            case let .success(result, remaining):
                return .success(result, remaining)
            case let .failure(_, msg, remaining):
                return .failure(newLabel, msg, remaining)
        }
    }
}

infix operator <?>: ParserApply
public func <?> <A, UserState>(
    _ parser: Parser<A, UserState>,
    _ label: String
) -> Parser<A, UserState> { setLabel(parser, label) }

// MARK: Core combinators

/// Builds a new parser which runs two parsers in sequence and returns the
/// results of those parsers as a tuple.
///
/// > Tip: This is also aliased as the ``.>>.(_:_:)`` infix operator.
public func andThen<A, B, UserState>(
    _ parser1: Parser<A, UserState>,
    _ parser2: Parser<B, UserState>
) -> Parser<(A, B), UserState> {
    parser1 >>= { result1 in
        parser2 >>= { result2 in
            returnP((result1, result2))
        }
    } <?> "\(parser1.label) andThen \(parser2.label)"
}

/// Builds a new parser which runs the first parser, and if it's result is an
/// failure, a second on the same input as the first and returns the result of
/// which parser actually ran.
///
/// > Tip: This is also aliased as the ``<|>(_:_:)`` infix operator
public func orElse<A, UserState>(
    _ parser1: Parser<A, UserState>,
    _ parser2: Parser<A, UserState>
) -> Parser<A, UserState> {
    let label = "\(parser1.label) andThen \(parser2.label)"

    return Parser(label) { input in
        switch parser1.parse(input) {
            case let .success(result, remaining):
                return .success(result, remaining)
            case .failure: return parser2.parse(input)
        }
    }
}

/// Takes a list of parsers and returns the result of the first one that runs
/// successfully. Composes ``orElse(_:_:)`` over a list of parsers.
public func choice<A, UserState>(
    _ parserList: [Parser<A, UserState>]
) -> Parser<A, UserState> { parserList.dropFirst().reduce(parserList.first!, orElse) }

/// Hoists a parser producing lambda
///
/// > Tip: This is also aliased as the  ``>>=(_:_:)`` infix operator
public func bindP<A, B, UserState>(
    _ parser: Parser<A, UserState>,
    _ lambda: @escaping (A) -> Parser<B, UserState>
) -> Parser<B, UserState> {
    let label = "unknown"
    return Parser(label) { input in
        switch parser.parse(input) {
            case let .success(result, remaining):
                return lambda(result).parse(remaining)
            case let .failure(label, msg, remaining):
                return .failure(label, msg, remaining)
        }
    }
}

/// Hoists a constant into the parser
public func returnP<A, UserState>(
    _ constant: A
) -> Parser<A, UserState> { Parser("returnP") { .success(constant, $0) } }

/// Applies a value producing function to the value returned by a parser.
///
/// > Tip: This is also aliased as the ``<!>(_:_:)`` and ``|>>(_:_:)``
///     (for piping) infix operators
public func mapP<A, B, UserState>(
    _ lambda: @escaping (A) -> B,
    _ parser: Parser<A, UserState>
) -> Parser<B, UserState> { parser >>= { returnP(lambda($0)) } }

/// Applies a wrapped function to a wrapped value
///
/// > Tip: This is also aliased as the ``<*>(_:_:)`` infix operator
public func applyP<A, B, UserState>(
    _ lambdaParser: Parser<(A) -> B, UserState>,
    _ parser: Parser<A, UserState>
) -> Parser<B, UserState> {
    // Can also be written as
    // fP >>= { f in xP |>> f }
    // or
    // fP >>= { f in f <!> xP }
    // or
    // lambdaParser >>= { lambda in parser >>= { result in returnP(lambda(result)) } }
    lambdaParser >>= { lambda in parser |>> lambda }
}

public func sequenceP<A, UserState>(
    _ parserList: [Parser<A, UserState>]
) -> Parser<[A], UserState> {
    let label = "sequence of \(parserList.first!.label)"

    return Parser(label) { input in
        var result: [A] = []
        var remaining = input

        for parser in parserList {
            switch parser.parse(remaining) {
                case let .success(res, nowRemaining):
                    result.append(res)
                    remaining = nowRemaining
                case let .failure(label, msg, remaining):
                    return .failure(label, msg, remaining)
            }
        }

        return .success(result, remaining)
    }
}

/// Helper to build many0P and many1P
private func zeroOrMoreP<A, UserState>(
    _ parser: Parser<A, UserState>,
    _ input: InputState<UserState>
) -> ([A], InputState<UserState>) {
    switch parser.parse(input) {
        case let .success(result, remaining):
            let (nextResult, nextRemaining) = zeroOrMoreP(parser, remaining)
            let finalResult = [result] + nextResult

            return (finalResult, nextRemaining)
        case .failure: return ([], input)
    }
}

public func many0P<A, UserState>(
    _ parser: Parser<A, UserState>
) -> Parser<[A], UserState> {
    let label = "zero or more \(parser.label)"

    return Parser(label) { input in
        let (result, remaining) = zeroOrMoreP(parser, input)
        return .success(result, remaining)
    }
}

public func many1P<A, UserState>(
    _ parser: Parser<A, UserState>
) -> Parser<[A], UserState> {
    parser >>= { head in
        many0P(parser) >>= { tail in
            returnP([head] + tail)
        }
    } <?> "one or more \(parser.label)"
}

public func optP<A, UserState>(
    _ parser: Parser<A, UserState>
) -> Parser<A?, UserState> {
    let some = parser |>> { result in Optional(result) }
    let none = Parser("none") { input in .success(nil as A?, input as InputState<UserState>) }

    return some <|> none
}

public func betweenP<A, B, C, UserState>(
    _ parser1: Parser<A, UserState>,
    _ parser2: Parser<B, UserState>,
    _ parser3: Parser<C, UserState>
) -> Parser<B, UserState> { parser1 *> parser2 <* parser3 }

public func sepBy1<A, B, UserState>(
    _ parser: Parser<A, UserState>,
    _ separator: Parser<B, UserState>
) -> Parser<[A], UserState> {
    let sepThenP = separator *> parser
    return parser .>>. many0P(sepThenP) |>> { (result, pList) in [result] + pList }
}

public func sepBy<A, B, UserState>(
    _ parser: Parser<A, UserState>,
    _ separator: Parser<B, UserState>
) -> Parser<[A], UserState> { sepBy1(parser, separator) <|> returnP([]) }

// MARK: Element Helpers

public func satisfyP<Element, UserState>(
    _ label: String,
    _ predicate: @escaping (Element) -> Bool
) -> Parser<Element, UserState> {
    return Parser(label) { input in
        guard let first = input.stream.first as? Element
        else { return .failure(label, "No more input", input) }

        if predicate(first) {
            return .success(
                first,
                .init(
                    // swiftlint:disable:next force_cast
                    stream: input.stream.dropFirst() as! any ParsableStream,
                    userState: input.userState
                )
            )
        } else {
            return .failure(label, "Unexpected \(first)", input)
        }
    }
}

public func oneOf<Element, UserState>(_ elem: Element) -> Parser<Element, UserState> where Element: Equatable {
    let label = "one of \(elem)"

    return satisfyP(label) { char in char == elem }
}

public func anyOf<Element, UserState>(_ elems: [Element]) -> Parser<Element, UserState> where Element: Equatable {
    choice(elems.map(oneOf(_:))) <?> "any of \(elems)"
}

// MARK: Skip helpers

public func skip0P<UserState>(_ skippable: Parser<Any, UserState>) -> Parser<(), UserState> {
    many0P(skippable) >>= { _ in returnP(()) }
}

// MARK: User state helpers

public func getUserStateP<UserState>() -> Parser<UserState, UserState> {
    Parser("getUserState") { input in .success(input.userState, input) }
}

public func setUserState<UserState>(_ newUserState: UserState) -> Parser<(), UserState> {
    return Parser("setUserState") { input in
        let newInputState = InputState.init(stream: input.stream, userState: newUserState)
        return .success((), newInputState)
    }
}

// MARK: Operator Precedences

precedencegroup ParserApply {
    associativity: left
}

precedencegroup ParserOr {
    associativity: left
    higherThan: ParserApply
}

precedencegroup ParserAnd {
    associativity: left
    higherThan: ParserOr
}

// MARK: Infixies

infix operator .>>.: ParserAnd
public func .>>. <A, B, C>(
    _ parser1: Parser<A, C>,
    _ parser2: Parser<B, C>
) -> Parser<(A, B), C> { andThen(parser1, parser2) }

infix operator <|>: ParserOr
public func <|> <A, C>(
    _ parser1: Parser<A, C>,
    _ parser2: Parser<A, C>
) -> Parser<A, C> { orElse(parser1, parser2) }

infix operator >>=: ParserApply
public func >>= <A, B, C>(
    _ parser: Parser<A, C>,
    _ lambda: @escaping (A) -> Parser<B, C>
) -> Parser<B, C> { bindP(parser, lambda) }

infix operator |>>: ParserApply
public func |>> <A, B, C>(
    _ parser: Parser<A, C>,
    _ lambda: @escaping (A) -> B
) -> Parser<B, C> { mapP(lambda, parser) }

infix operator <!>: ParserApply
public func <!> <A, B, C>(
    _ lambda: @escaping (A) -> B,
    _ parser: Parser<A, C>
) -> Parser<B, C> { mapP(lambda, parser) }

infix operator <*>: ParserApply
public func <*> <A, B, C>(
    _ parser1: Parser<(A) -> B, C>,
    _ parser2: Parser<A, C>
) -> Parser<B, C> { applyP(parser1, parser2) }

// These use *> and <* instead of .>> and >>. since swift doesn't like >>. as an op
// Points to which side is being kept.

infix operator <*: ParserApply
public func <* <A, B, C>(
    _ parser1: Parser<A, C>,
    _ parser2: Parser<B, C>
) -> Parser<A, C> { parser1 .>>. parser2 |>> { (left, _) in left } }

infix operator *>: ParserApply
public func *> <A, B, C>(
    _ parser1: Parser<A, C>,
    _ parser2: Parser<B, C>
) -> Parser<B, C> { parser1 .>>. parser2 |>> { (_, right) in right } }