From 6924864ceb15e9bdd8b7285b9f8d37d9b0d1b973 Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Sat, 19 Mar 2022 08:06:37 -0700 Subject: [PATCH 01/14] CPS internals for better performance and stack safety --- bench/Main.purs | 4 +- spago.dhall | 2 + src/Text/Parsing/Parser.purs | 239 +++++++++++++++++----- src/Text/Parsing/Parser/Combinators.purs | 134 ++++++------ src/Text/Parsing/Parser/Expr.purs | 16 +- src/Text/Parsing/Parser/String.purs | 83 ++++---- src/Text/Parsing/Parser/String/Basic.purs | 22 +- src/Text/Parsing/Parser/Token.purs | 28 +-- test/Main.purs | 4 +- 9 files changed, 338 insertions(+), 194 deletions(-) diff --git a/bench/Main.purs b/bench/Main.purs index f741b87..2100c52 100644 --- a/bench/Main.purs +++ b/bench/Main.purs @@ -43,7 +43,7 @@ import Prelude import Data.Array (fold, replicate) import Data.Either (either) -import Data.List (manyRec) +import Data.List (many, manyRec) import Data.List.Types (List) import Data.String.Regex (Regex, regex) import Data.String.Regex as Regex @@ -100,7 +100,7 @@ pattern23 = either (unsafePerformEffect <<< throw) identity } parseSkidoo :: Parser String (List String) -parseSkidoo = manyRec $ string "skidoo" +parseSkidoo = many $ string "skidoo" patternSkidoo :: Regex patternSkidoo = either (unsafePerformEffect <<< throw) identity diff --git a/spago.dhall b/spago.dhall index ffd8164..642824c 100644 --- a/spago.dhall +++ b/spago.dhall @@ -6,8 +6,10 @@ , "control" , "either" , "foldable-traversable" + , "functions" , "identity" , "integers" + , "lazy" , "lists" , "math" , "maybe" diff --git a/src/Text/Parsing/Parser.purs b/src/Text/Parsing/Parser.purs index 69796f0..f27892e 100644 --- a/src/Text/Parsing/Parser.purs +++ b/src/Text/Parsing/Parser.purs @@ -20,17 +20,18 @@ import Prelude import Control.Alt (class Alt) import Control.Apply (lift2) -import Control.Lazy (class Lazy, defer) -import Control.Monad.Error.Class (class MonadThrow, catchError, throwError) -import Control.Monad.Except (class MonadError, ExceptT(..), mapExceptT, runExceptT) -import Control.Monad.Rec.Class (class MonadRec) -import Control.Monad.State (class MonadState, StateT(..), evalStateT, gets, mapStateT, modify_, runStateT) -import Control.Monad.Trans.Class (class MonadTrans, lift) -import Control.MonadPlus (class Alternative, class MonadPlus, class MonadZero, class Plus) +import Control.Lazy (class Lazy) +import Control.Monad.Error.Class (class MonadError, class MonadThrow, catchError, throwError) +import Control.Monad.Rec.Class (class MonadRec, Step(..), tailRecM) +import Control.Monad.State.Class (class MonadState, gets, modify_) +import Control.Monad.Trans.Class (class MonadTrans) +import Control.MonadPlus (class Alternative, class Plus) import Data.Either (Either(..)) +import Data.Function.Uncurried (Fn2, Fn5, mkFn2, mkFn5, runFn2, runFn5) import Data.Identity (Identity) -import Data.Newtype (class Newtype, over, unwrap) -import Data.Tuple (Tuple(..)) +import Data.Lazy as Lazy +import Data.Newtype (unwrap) +import Data.Tuple (Tuple(..), fst) import Text.Parsing.Parser.Pos (Position, initialPos) -- | A parsing error, consisting of a message and position information. @@ -68,16 +69,45 @@ data ParseState s = ParseState s Position Boolean -- | -- | The first type argument is the stream type. Typically, this is either `String`, -- | or some sort of token stream. -newtype ParserT s m a = ParserT (ExceptT ParseError (StateT (ParseState s) m) a) +newtype ParserT s m a = ParserT + ( forall r + . Fn5 + (ParseState s) + ((Unit -> r) -> r) -- Trampoline + (m (Unit -> r) -> r) -- Lift + (Fn2 (ParseState s) ParseError r) -- Throw + (Fn2 (ParseState s) a r) -- Pure + r + ) -derive instance newtypeParserT :: Newtype (ParserT s m a) _ +data Run s m a + = More (Unit -> Run s m a) + | Lift (m (Unit -> Run s m a)) + | Stop (ParseState s) (Either ParseError a) -- | Apply a parser, keeping only the parsed result. -runParserT :: forall m s a. Monad m => s -> ParserT s m a -> m (Either ParseError a) -runParserT s p = evalStateT (runExceptT (unwrap p)) initialState +runParserT :: forall m s a. MonadRec m => s -> ParserT s m a -> m (Either ParseError a) +runParserT s p = fst <$> runParserT' initialState p where + initialState :: ParseState s initialState = ParseState s initialPos false +runParserT' :: forall m s a. MonadRec m => ParseState s -> ParserT s m a -> m (Tuple (Either ParseError a) (ParseState s)) +runParserT' state1 (ParserT k1) = + tailRecM go \_ -> + runFn5 k1 state1 More Lift + (mkFn2 \state2 err -> Stop state2 (Left err)) + (mkFn2 \state2 res -> Stop state2 (Right res)) + where + go :: (Unit -> Run s m a) -> m (Step (Unit -> Run s m a) (Tuple (Either ParseError a) (ParseState s))) + go step = case step unit of + More next -> + go next + Lift m -> + Loop <$> m + Stop s res -> + pure $ Done (Tuple res s) + -- | The `Parser` monad is a synonym for the parser monad transformer applied to the `Identity` monad. type Parser s = ParserT s Identity @@ -86,36 +116,135 @@ runParser :: forall s a. s -> Parser s a -> Either ParseError a runParser s = unwrap <<< runParserT s hoistParserT :: forall s m n a. (m ~> n) -> ParserT s m a -> ParserT s n a -hoistParserT = mapParserT +hoistParserT f (ParserT k) = ParserT + ( mkFn5 \state1 more lift throw done -> + runFn5 k state1 more (lift <<< f) throw done + ) -- | Change the underlying monad action and data type in a ParserT monad action. mapParserT :: forall b n s a m - . ( m (Tuple (Either ParseError a) (ParseState s)) + . MonadRec m + => Functor n + => ( m (Tuple (Either ParseError a) (ParseState s)) -> n (Tuple (Either ParseError b) (ParseState s)) ) -> ParserT s m a -> ParserT s n b -mapParserT = over ParserT <<< mapExceptT <<< mapStateT +mapParserT f p = ParserT + ( mkFn5 \state1 _ lift throw done -> + lift $ map + ( \(Tuple res state2) _ -> + case res of + Left err -> + runFn2 throw state2 err + Right a -> + runFn2 done state2 a + ) + (f (runParserT' state1 p)) + ) -instance lazyParserT :: Lazy (ParserT s m a) where - defer f = ParserT (ExceptT (defer (runExceptT <<< unwrap <<< f))) +instance Lazy (ParserT s m a) where + defer f = ParserT + ( mkFn5 \state1 more lift throw done -> do + let (ParserT k1) = Lazy.force m + runFn5 k1 state1 more lift throw done + ) + where + m = Lazy.defer f -instance semigroupParserT :: (Monad m, Semigroup a) => Semigroup (ParserT s m a) where +instance Semigroup a => Semigroup (ParserT s m a) where append = lift2 (<>) -instance monoidParserT :: (Monad m, Monoid a) => Monoid (ParserT s m a) where +instance Monoid a => Monoid (ParserT s m a) where mempty = pure mempty -derive newtype instance functorParserT :: Functor m => Functor (ParserT s m) -derive newtype instance applyParserT :: Monad m => Apply (ParserT s m) -derive newtype instance applicativeParserT :: Monad m => Applicative (ParserT s m) -derive newtype instance bindParserT :: Monad m => Bind (ParserT s m) -derive newtype instance monadParserT :: Monad m => Monad (ParserT s m) -derive newtype instance monadRecParserT :: MonadRec m => MonadRec (ParserT s m) -derive newtype instance monadStateParserT :: Monad m => MonadState (ParseState s) (ParserT s m) -derive newtype instance monadThrowParserT :: Monad m => MonadThrow ParseError (ParserT s m) -derive newtype instance monadErrorParserT :: Monad m => MonadError ParseError (ParserT s m) +instance Functor (ParserT s m) where + map f (ParserT k) = ParserT + ( mkFn5 \state1 more lift throw done -> + more \_ -> + runFn5 k state1 more lift throw + ( mkFn2 \state2 a -> + more \_ -> + runFn2 done state2 (f a) + ) + ) + +instance Apply (ParserT s m) where + apply (ParserT k1) (ParserT k2) = ParserT + ( mkFn5 \state1 more lift throw done -> + more \_ -> + runFn5 k1 state1 more lift throw + ( mkFn2 \state2 f -> + more \_ -> + runFn5 k2 state2 more lift throw + ( mkFn2 \state3 a -> + more \_ -> + runFn2 done state3 (f a) + ) + ) + ) + +instance Applicative (ParserT s m) where + pure a = ParserT + ( mkFn5 \state1 _ _ _ done -> + runFn2 done state1 a + ) + +instance Bind (ParserT s m) where + bind (ParserT k1) next = ParserT + ( mkFn5 \state1 more lift throw done -> + more \_ -> + runFn5 k1 state1 more lift throw + ( mkFn2 \state2 a -> do + let (ParserT k2) = next a + runFn5 k2 state2 more lift throw done + ) + ) + +instance Monad (ParserT s m) + +instance MonadRec (ParserT s m) where + tailRecM next = go + where + go a = ParserT + ( mkFn5 \state1 more lift throw done -> + more \_ -> do + let (ParserT k1) = next a + runFn5 k1 state1 more lift throw + ( mkFn2 \state2 step -> case step of + Loop b -> do + let (ParserT k2) = go b + runFn5 k2 state2 more lift throw done + Done c -> + runFn2 done state2 c + ) + ) + +instance MonadState (ParseState s) (ParserT s m) where + state k = ParserT + ( mkFn5 \state1 _ _ _ done -> do + let (Tuple a state2) = k state1 + runFn2 done state2 a + ) + +instance MonadThrow ParseError (ParserT s m) where + throwError err = ParserT + ( mkFn5 \state1 _ _ throw _ -> + runFn2 throw state1 err + ) + +instance MonadError ParseError (ParserT s m) where + catchError (ParserT k1) next = ParserT + ( mkFn5 \state1 more lift throw done -> + more \_ -> + runFn5 k1 state1 more lift + ( mkFn2 \state2 err -> do + let (ParserT k2) = next err + runFn5 k2 state2 more lift throw done + ) + done + ) -- | The alternative `Alt` instance provides the `alt` combinator `<|>`. -- | @@ -139,7 +268,7 @@ derive newtype instance monadErrorParserT :: Monad m => MonadError ParseError (P -- | -- | If we read a file from disk and run this `fileParser` on it and the -- | `string ""` parser succeeds, then we know that the first branch --- | is the correct branch, so we want to commit to the first branch. +-- | is the correct branch, so we want to commit to the first branch. -- | Even if the `parseTheRestOfTheHtml` parser fails -- | we don’t want to try the second branch. -- | @@ -151,49 +280,57 @@ derive newtype instance monadErrorParserT :: Monad m => MonadError ParseError (P -- | error messages while also allowing an efficient implementation. See -- | [*Parsec: Direct Style Monadic Parser Combinators For The Real World*](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/parsec-paper-letter.pdf) -- | section __2.3 Backtracking__. -instance altParserT :: Monad m => Alt (ParserT s m) where - alt p1 p2 = (ParserT <<< ExceptT <<< StateT) \(s@(ParseState i p _)) -> do - Tuple e s'@(ParseState _ _ consumed) <- runStateT (runExceptT (unwrap p1)) (ParseState i p false) - case e of - Left _ - | not consumed -> runStateT (runExceptT (unwrap p2)) s - _ -> pure (Tuple e s') - -instance plusParserT :: Monad m => Plus (ParserT s m) where - empty = fail "No alternative" - -instance alternativeParserT :: Monad m => Alternative (ParserT s m) +instance Alt (ParserT s m) where + alt (ParserT k1) (ParserT k2) = ParserT + ( mkFn5 \state1@(ParseState input pos _) more lift throw done -> + more \_ -> + runFn5 k1 (ParseState input pos false) more lift + ( mkFn2 \state2@(ParseState _ _ consumed) err -> + if consumed then + runFn2 throw state2 err + else + runFn5 k2 state1 more lift throw done + ) + done + ) -instance monadZeroParserT :: Monad m => MonadZero (ParserT s m) +instance Plus (ParserT s m) where + empty = ParserT + ( mkFn5 \state1@(ParseState _ pos _) _ _ throw _ -> + runFn2 throw state1 (ParseError "No alternative" pos) + ) -instance monadPlusParserT :: Monad m => MonadPlus (ParserT s m) +instance Alternative (ParserT s m) -instance monadTransParserT :: MonadTrans (ParserT s) where - lift = ParserT <<< lift <<< lift +instance MonadTrans (ParserT s) where + lift m = ParserT + ( mkFn5 \state1 _ lift' _ done -> + lift' $ map (\a _ -> runFn2 done state1 a) m + ) -- | Set the consumed flag. -- | -- | Setting the consumed flag means that we're committed to this parsing branch -- | of an alternative (`<|>`), so that if this branch fails then we want to -- | fail the entire parse instead of trying the other alternative. -consume :: forall s m. Monad m => ParserT s m Unit +consume :: forall s m. ParserT s m Unit consume = modify_ \(ParseState input pos _) -> ParseState input pos true -- | Returns the current position in the stream. -position :: forall s m. Monad m => ParserT s m Position +position :: forall s m. ParserT s m Position position = gets \(ParseState _ pos _) -> pos -- | Fail with a message. -fail :: forall m s a. Monad m => String -> ParserT s m a +fail :: forall m s a. String -> ParserT s m a fail message = failWithPosition message =<< position -- | Fail with a message and a position. -failWithPosition :: forall m s a. Monad m => String -> Position -> ParserT s m a +failWithPosition :: forall m s a. String -> Position -> ParserT s m a failWithPosition message pos = throwError (ParseError message pos) -- | Contextualize parsing failures inside a region. If a parsing failure -- | occurs, then the `ParseError` will be transformed by each containing -- | `region` as the parser backs out the call stack. -region :: forall m s a. Monad m => (ParseError -> ParseError) -> ParserT s m a -> ParserT s m a +region :: forall m s a. (ParseError -> ParseError) -> ParserT s m a -> ParserT s m a region context p = catchError p $ \err -> throwError $ context err diff --git a/src/Text/Parsing/Parser/Combinators.purs b/src/Text/Parsing/Parser/Combinators.purs index 7452aee..b5ccd2a 100644 --- a/src/Text/Parsing/Parser/Combinators.purs +++ b/src/Text/Parsing/Parser/Combinators.purs @@ -98,17 +98,14 @@ module Text.Parsing.Parser.Combinators import Prelude import Control.Lazy (defer) -import Control.Monad.Except (ExceptT(..), runExceptT) -import Control.Monad.Rec.Class (class MonadRec, Step(..), tailRecM) -import Control.Monad.State (StateT(..), runStateT) +import Control.Monad.Rec.Class (Step(..), tailRecM) import Control.Plus (empty, (<|>), alt) -import Data.Either (Either(..)) import Data.Foldable (class Foldable, foldl) +import Data.Function.Uncurried (mkFn2, mkFn5, runFn2, runFn5) import Data.List (List(..), many, manyRec, reverse, (:)) import Data.List.NonEmpty (NonEmptyList, cons') import Data.List.NonEmpty as NEL import Data.Maybe (Maybe(..)) -import Data.Newtype (unwrap) import Data.Tuple (Tuple(..)) import Data.Tuple.Nested (type (/\), (/\)) import Data.Unfoldable (replicateA) @@ -116,7 +113,7 @@ import Data.Unfoldable1 (replicate1A) import Text.Parsing.Parser (ParseError(..), ParseState(..), ParserT(..), fail) -- | Provide an error message in the case of failure. -withErrorMessage :: forall m s a. Monad m => ParserT s m a -> String -> ParserT s m a +withErrorMessage :: forall m s a. ParserT s m a -> String -> ParserT s m a withErrorMessage p msg = p <|> fail ("Expected " <> msg) infixl 3 withErrorMessage as @@ -129,13 +126,13 @@ infixl 3 withErrorMessage as -- |parseBang :: Parser Char -- |parseBang = char '!' <~?> \_ -> "Expected a bang" -- |``` -withLazyErrorMessage :: forall m s a. Monad m => ParserT s m a -> (Unit -> String) -> ParserT s m a +withLazyErrorMessage :: forall m s a. ParserT s m a -> (Unit -> String) -> ParserT s m a withLazyErrorMessage p msg = p <|> defer \_ -> fail ("Expected " <> msg unit) infixl 3 withLazyErrorMessage as <~?> -- | Flipped `()`. -asErrorMessage :: forall m s a. Monad m => String -> ParserT s m a -> ParserT s m a +asErrorMessage :: forall m s a. String -> ParserT s m a -> ParserT s m a asErrorMessage = flip () infixl 3 asErrorMessage as @@ -147,24 +144,24 @@ infixl 3 asErrorMessage as -- | ```purescript -- | parens = between (string "(") (string ")") -- | ``` -between :: forall m s a open close. Monad m => ParserT s m open -> ParserT s m close -> ParserT s m a -> ParserT s m a +between :: forall m s a open close. ParserT s m open -> ParserT s m close -> ParserT s m a -> ParserT s m a between open close p = open *> p <* close -- | Provide a default result in the case where a parser fails without consuming input. -option :: forall m s a. Monad m => a -> ParserT s m a -> ParserT s m a +option :: forall m s a. a -> ParserT s m a -> ParserT s m a option a p = p <|> pure a -- | Optionally parse something, failing quietly. -- | -- | To optionally parse `p` and never fail: `optional (try p)`. -optional :: forall m s a. Monad m => ParserT s m a -> ParserT s m Unit +optional :: forall m s a. ParserT s m a -> ParserT s m Unit optional p = void p <|> pure unit -- TODO Is this optional parser correct? Isn't this parser supposed to succeed -- even if p fails? Otherwise what's the point? I think we need try (void p). -- | pure `Nothing` in the case where a parser fails without consuming input. -optionMaybe :: forall m s a. Monad m => ParserT s m a -> ParserT s m (Maybe a) +optionMaybe :: forall m s a. ParserT s m a -> ParserT s m (Maybe a) optionMaybe p = option Nothing (Just <$> p) -- | If the parser fails then backtrack the input stream to the unconsumed state. @@ -180,12 +177,15 @@ optionMaybe p = option Nothing (Just <$> p) -- | >>> runParser "ac" (try (char 'a' *> char 'b') <|> (char 'a' *> char 'c')) -- | Right 'c' -- | ``` -try :: forall m s a. Monad m => ParserT s m a -> ParserT s m a -try p = (ParserT <<< ExceptT <<< StateT) \(s@(ParseState _ _ consumed)) -> do - Tuple e s'@(ParseState input position _) <- runStateT (runExceptT (unwrap p)) s - case e of - Left _ -> pure (Tuple e (ParseState input position consumed)) - _ -> pure (Tuple e s') +try :: forall m s a. ParserT s m a -> ParserT s m a +try (ParserT k1) = ParserT + ( mkFn5 \state1@(ParseState _ _ consumed) more lift throw done -> + runFn5 k1 state1 more lift + ( mkFn2 \(ParseState input position _) err -> + runFn2 throw (ParseState input position consumed) err + ) + done + ) -- | If the parser fails then backtrack the input stream to the unconsumed state. -- | @@ -200,27 +200,33 @@ try p = (ParserT <<< ExceptT <<< StateT) \(s@(ParseState _ _ consumed)) -> do -- | >>> runParser "ac" (tryRethrow (char 'a' *> char 'b')) -- | Left (ParseError "Expected 'b'" (Position { line: 1, column: 1 })) -- | ``` -tryRethrow :: forall m s a. Monad m => ParserT s m a -> ParserT s m a -tryRethrow p = (ParserT <<< ExceptT <<< StateT) \(s@(ParseState _ position consumed)) -> do - Tuple e s'@(ParseState input' position' _) <- runStateT (runExceptT (unwrap p)) s - case e of - Left (ParseError err _) -> pure (Tuple (Left (ParseError err position)) (ParseState input' position' consumed)) - _ -> pure (Tuple e s') +tryRethrow :: forall m s a. ParserT s m a -> ParserT s m a +tryRethrow (ParserT k1) = ParserT + ( mkFn5 \state1@(ParseState _ position consumed) more lift throw done -> + runFn5 k1 state1 more lift + ( mkFn2 \(ParseState input' position' _) (ParseError err _) -> + runFn2 throw (ParseState input' position' consumed) (ParseError err position) + ) + done + ) -- | Parse a phrase, without modifying the consumed state or stream position. -lookAhead :: forall s a m. Monad m => ParserT s m a -> ParserT s m a -lookAhead p = (ParserT <<< ExceptT <<< StateT) \s -> do - Tuple e _ <- runStateT (runExceptT (unwrap p)) s - pure (Tuple e s) +lookAhead :: forall s a m. ParserT s m a -> ParserT s m a +lookAhead (ParserT k1) = ParserT + ( mkFn5 \state1 more lift throw done -> + runFn5 k1 state1 more lift + (mkFn2 \_ err -> runFn2 throw state1 err) + (mkFn2 \_ res -> runFn2 done state1 res) + ) -- | Match one or more times. -many1 :: forall m s a. Monad m => ParserT s m a -> ParserT s m (NonEmptyList a) +many1 :: forall m s a. ParserT s m a -> ParserT s m (NonEmptyList a) many1 p = NEL.cons' <$> p <*> many p -- | Match one or more times. -- | -- | Stack-safe version of `many1` at the expense of a `MonadRec` constraint. -many1Rec :: forall m s a. MonadRec m => ParserT s m a -> ParserT s m (NonEmptyList a) +many1Rec :: forall m s a. ParserT s m a -> ParserT s m (NonEmptyList a) many1Rec p = NEL.cons' <$> p <*> manyRec p -- | Parse phrases delimited by a separator. @@ -230,17 +236,17 @@ many1Rec p = NEL.cons' <$> p <*> manyRec p -- | ```purescript -- | digit `sepBy` string "," -- | ``` -sepBy :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a) +sepBy :: forall m s a sep. ParserT s m a -> ParserT s m sep -> ParserT s m (List a) sepBy p sep = map NEL.toList (sepBy1 p sep) <|> pure Nil -- | Parse phrases delimited by a separator. -- | -- | Stack-safe version of `sepBy` at the expense of a `MonadRec` constraint. -sepByRec :: forall m s a sep. MonadRec m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a) +sepByRec :: forall m s a sep. ParserT s m a -> ParserT s m sep -> ParserT s m (List a) sepByRec p sep = map NEL.toList (sepBy1Rec p sep) <|> pure Nil -- | Parse phrases delimited by a separator, requiring at least one match. -sepBy1 :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a) +sepBy1 :: forall m s a sep. ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a) sepBy1 p sep = do a <- p as <- many $ sep *> p @@ -249,24 +255,24 @@ sepBy1 p sep = do -- | Parse phrases delimited by a separator, requiring at least one match. -- | -- | Stack-safe version of `sepBy1` at the expense of a `MonadRec` constraint. -sepBy1Rec :: forall m s a sep. MonadRec m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a) +sepBy1Rec :: forall m s a sep. ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a) sepBy1Rec p sep = do a <- p as <- manyRec $ sep *> p pure (NEL.cons' a as) -- | Parse phrases delimited and optionally terminated by a separator. -sepEndBy :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a) +sepEndBy :: forall m s a sep. ParserT s m a -> ParserT s m sep -> ParserT s m (List a) sepEndBy p sep = map NEL.toList (sepEndBy1 p sep) <|> pure Nil -- | Parse phrases delimited and optionally terminated by a separator. -- | -- | Stack-safe version of `sepEndBy` at the expense of a `MonadRec` constraint. -sepEndByRec :: forall m s a sep. MonadRec m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a) +sepEndByRec :: forall m s a sep. ParserT s m a -> ParserT s m sep -> ParserT s m (List a) sepEndByRec p sep = map NEL.toList (sepEndBy1Rec p sep) <|> pure Nil -- | Parse phrases delimited and optionally terminated by a separator, requiring at least one match. -sepEndBy1 :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a) +sepEndBy1 :: forall m s a sep. ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a) sepEndBy1 p sep = do a <- p ( do @@ -278,7 +284,7 @@ sepEndBy1 p sep = do -- | Parse phrases delimited and optionally terminated by a separator, requiring at least one match. -- | -- | Stack-safe version of `sepEndBy1` at the expense of a `MonadRec` constraint. -sepEndBy1Rec :: forall m s a sep. MonadRec m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a) +sepEndBy1Rec :: forall m s a sep. ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a) sepEndBy1Rec p sep = do a <- p (NEL.cons' a <$> tailRecM go Nil) <|> pure (NEL.singleton a) @@ -295,23 +301,23 @@ sepEndBy1Rec p sep = do done = defer \_ -> pure $ Done $ reverse acc -- | Parse phrases delimited and terminated by a separator, requiring at least one match. -endBy1 :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a) +endBy1 :: forall m s a sep. ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a) endBy1 p sep = many1 $ p <* sep -- | Parse phrases delimited and terminated by a separator, requiring at least one match. -- | -- | Stack-safe version of `endBy1` at the expense of a `MonadRec` constraint. -endBy1Rec :: forall m s a sep. MonadRec m => ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a) +endBy1Rec :: forall m s a sep. ParserT s m a -> ParserT s m sep -> ParserT s m (NonEmptyList a) endBy1Rec p sep = many1Rec $ p <* sep -- | Parse phrases delimited and terminated by a separator. -endBy :: forall m s a sep. Monad m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a) +endBy :: forall m s a sep. ParserT s m a -> ParserT s m sep -> ParserT s m (List a) endBy p sep = many $ p <* sep -- | Parse phrases delimited and terminated by a separator. -- | -- | Stack-safe version of `endBy` at the expense of a `MonadRec` constraint. -endByRec :: forall m s a sep. MonadRec m => ParserT s m a -> ParserT s m sep -> ParserT s m (List a) +endByRec :: forall m s a sep. ParserT s m a -> ParserT s m sep -> ParserT s m (List a) endByRec p sep = manyRec $ p <* sep -- | Parse phrases delimited by a right-associative operator. @@ -321,13 +327,13 @@ endByRec p sep = manyRec $ p <* sep -- | ```purescript -- | chainr digit (string "+" $> add) 0 -- | ``` -chainr :: forall m s a. Monad m => ParserT s m a -> ParserT s m (a -> a -> a) -> a -> ParserT s m a +chainr :: forall m s a. ParserT s m a -> ParserT s m (a -> a -> a) -> a -> ParserT s m a chainr p f a = chainr1 p f <|> pure a -- | Parse phrases delimited by a right-associative operator. -- | -- | Stack-safe version of `chainr` at the expense of a `MonadRec` constraint. -chainrRec :: forall m s a. MonadRec m => ParserT s m a -> ParserT s m (a -> a -> a) -> a -> ParserT s m a +chainrRec :: forall m s a. ParserT s m a -> ParserT s m (a -> a -> a) -> a -> ParserT s m a chainrRec p f a = chainr1Rec p f <|> pure a -- | Parse phrases delimited by a left-associative operator. @@ -337,17 +343,17 @@ chainrRec p f a = chainr1Rec p f <|> pure a -- | ```purescript -- | chainr digit (string "+" $> add) 0 -- | ``` -chainl :: forall m s a. Monad m => ParserT s m a -> ParserT s m (a -> a -> a) -> a -> ParserT s m a +chainl :: forall m s a. ParserT s m a -> ParserT s m (a -> a -> a) -> a -> ParserT s m a chainl p f a = chainl1 p f <|> pure a -- | Parse phrases delimited by a left-associative operator. -- | -- | Stack-safe version of `chainl` at the expense of a `MonadRec` constraint. -chainlRec :: forall m s a. MonadRec m => ParserT s m a -> ParserT s m (a -> a -> a) -> a -> ParserT s m a +chainlRec :: forall m s a. ParserT s m a -> ParserT s m (a -> a -> a) -> a -> ParserT s m a chainlRec p f a = chainl1Rec p f <|> pure a -- | Parse phrases delimited by a left-associative operator, requiring at least one match. -chainl1 :: forall m s a. Monad m => ParserT s m a -> ParserT s m (a -> a -> a) -> ParserT s m a +chainl1 :: forall m s a. ParserT s m a -> ParserT s m (a -> a -> a) -> ParserT s m a chainl1 p f = do a <- p chainl1' a @@ -362,7 +368,7 @@ chainl1 p f = do -- | Parse phrases delimited by a left-associative operator, requiring at least one match. -- | -- | Stack-safe version of `chainl1` at the expense of a `MonadRec` constraint. -chainl1Rec :: forall m s a. MonadRec m => ParserT s m a -> ParserT s m (a -> a -> a) -> ParserT s m a +chainl1Rec :: forall m s a. ParserT s m a -> ParserT s m (a -> a -> a) -> ParserT s m a chainl1Rec p f = do a <- p tailRecM go a @@ -377,7 +383,7 @@ chainl1Rec p f = do <|> pure (Done a) -- | Parse phrases delimited by a right-associative operator, requiring at least one match. -chainr1 :: forall m s a. Monad m => ParserT s m a -> ParserT s m (a -> a -> a) -> ParserT s m a +chainr1 :: forall m s a. ParserT s m a -> ParserT s m (a -> a -> a) -> ParserT s m a chainr1 p f = do a <- p chainr1' a @@ -392,7 +398,7 @@ chainr1 p f = do -- | Parse phrases delimited by a right-associative operator, requiring at least one match. -- | -- | Stack-safe version of `chainr1` at the expense of a `MonadRec` constraint. -chainr1Rec :: forall m s a. MonadRec m => ParserT s m a -> ParserT s m (a -> a -> a) -> ParserT s m a +chainr1Rec :: forall m s a. ParserT s m a -> ParserT s m (a -> a -> a) -> ParserT s m a chainr1Rec p f = do a <- p tailRecM go { last: a, init: Nil } @@ -435,21 +441,21 @@ chainr1Rec p f = do apply y (x /\ op) = x `op` y -- | Parse one of a set of alternatives. -choice :: forall f m s a. Foldable f => Monad m => f (ParserT s m a) -> ParserT s m a +choice :: forall f m s a. Foldable f => f (ParserT s m a) -> ParserT s m a choice = foldl (<|>) empty -- | Skip many instances of a phrase. -skipMany :: forall s a m. Monad m => ParserT s m a -> ParserT s m Unit +skipMany :: forall s a m. ParserT s m a -> ParserT s m Unit skipMany p = skipMany1 p <|> pure unit -- | Skip many instances of a phrase. -- | -- | Stack-safe version of `skipMany` at the expense of a `MonadRec` constraint. -skipManyRec :: forall s a m. MonadRec m => ParserT s m a -> ParserT s m Unit +skipManyRec :: forall s a m. ParserT s m a -> ParserT s m Unit skipManyRec p = skipMany1Rec p <|> pure unit -- | Skip at least one instance of a phrase. -skipMany1 :: forall s a m. Monad m => ParserT s m a -> ParserT s m Unit +skipMany1 :: forall s a m. ParserT s m a -> ParserT s m Unit skipMany1 p = do _ <- p _ <- skipMany p @@ -458,7 +464,7 @@ skipMany1 p = do -- | Skip at least one instance of a phrase. -- | -- | Stack-safe version of `skipMany1` at the expense of a `MonadRec` constraint. -skipMany1Rec :: forall s a m. MonadRec m => ParserT s m a -> ParserT s m Unit +skipMany1Rec :: forall s a m. ParserT s m a -> ParserT s m Unit skipMany1Rec p = p *> tailRecM go unit where go _ = (p $> Loop unit) <|> pure (Done unit) @@ -466,11 +472,11 @@ skipMany1Rec p = p *> tailRecM go unit -- | Fail if the parser succeeds. -- | -- | Will never consume input. -notFollowedBy :: forall s a m. Monad m => ParserT s m a -> ParserT s m Unit +notFollowedBy :: forall s a m. ParserT s m a -> ParserT s m Unit notFollowedBy p = try $ (try p *> fail "Negated parser succeeded") <|> pure unit -- | Parse many phrases until the terminator phrase matches. -manyTill :: forall s a m e. Monad m => ParserT s m a -> ParserT s m e -> ParserT s m (List a) +manyTill :: forall s a m e. ParserT s m a -> ParserT s m e -> ParserT s m (List a) manyTill p end = scan where scan = (end $> Nil) <|> do @@ -481,7 +487,7 @@ manyTill p end = scan -- | Parse many phrases until the terminator phrase matches. -- | -- | Stack-safe version of `manyTill` at the expense of a `MonadRec` constraint. -manyTillRec :: forall s a m e. MonadRec m => ParserT s m a -> ParserT s m e -> ParserT s m (List a) +manyTillRec :: forall s a m e. ParserT s m a -> ParserT s m e -> ParserT s m (List a) manyTillRec p end = tailRecM go Nil where go :: List a -> ParserT s m (Step (List a) (List a)) @@ -490,7 +496,7 @@ manyTillRec p end = tailRecM go Nil <|> (p <#> \x -> Loop $ x : acc) -- | Parse at least one phrase until the terminator phrase matches. -many1Till :: forall s a m e. Monad m => ParserT s m a -> ParserT s m e -> ParserT s m (NonEmptyList a) +many1Till :: forall s a m e. ParserT s m a -> ParserT s m e -> ParserT s m (NonEmptyList a) many1Till p end = do x <- p xs <- manyTill p end @@ -499,7 +505,7 @@ many1Till p end = do -- | Parse at least one phrase until the terminator phrase matches. -- | -- | Stack-safe version of `many1Till` at the expense of a `MonadRec` constraint. -many1TillRec :: forall s a m e. MonadRec m => ParserT s m a -> ParserT s m e -> ParserT s m (NonEmptyList a) +many1TillRec :: forall s a m e. ParserT s m a -> ParserT s m e -> ParserT s m (NonEmptyList a) many1TillRec p end = NEL.cons' <$> p <*> manyTillRec p end -- | Parse many phrases until the terminator phrase matches. @@ -537,7 +543,7 @@ many1TillRec p end = NEL.cons' <$> p <*> manyTillRec p end -- | ``` -- | (Tuple ('a' : 'a' : Nil) 'b') -- | ``` -manyTill_ :: forall s a m e. Monad m => ParserT s m a -> ParserT s m e -> ParserT s m (Tuple (List a) e) +manyTill_ :: forall s a m e. ParserT s m a -> ParserT s m e -> ParserT s m (Tuple (List a) e) manyTill_ p end = scan where scan = @@ -552,7 +558,7 @@ manyTill_ p end = scan -- | Parse many phrases until the terminator phrase matches, requiring at least one match. -- | Returns the list of phrases and the terminator phrase. -many1Till_ :: forall s a m e. Monad m => ParserT s m a -> ParserT s m e -> ParserT s m (Tuple (NonEmptyList a) e) +many1Till_ :: forall s a m e. ParserT s m a -> ParserT s m e -> ParserT s m (Tuple (NonEmptyList a) e) many1Till_ p end = do x <- p Tuple xs t <- manyTill_ p end @@ -562,7 +568,7 @@ many1Till_ p end = do -- | Returns the list of phrases and the terminator phrase. -- | -- | Stack-safe version of `manyTill_` at the expense of a `MonadRec` constraint. -manyTillRec_ :: forall s a m e. MonadRec m => ParserT s m a -> ParserT s m e -> ParserT s m (Tuple (List a) e) +manyTillRec_ :: forall s a m e. ParserT s m a -> ParserT s m e -> ParserT s m (Tuple (List a) e) manyTillRec_ p end = tailRecM go Nil where go :: List a -> ParserT s m (Step (List a) (Tuple (List a) e)) @@ -579,7 +585,7 @@ manyTillRec_ p end = tailRecM go Nil -- | Returns the list of phrases and the terminator phrase. -- | -- | Stack-safe version of `many1Till_` at the expense of a `MonadRec` constraint. -many1TillRec_ :: forall s a m e. MonadRec m => ParserT s m a -> ParserT s m e -> ParserT s m (Tuple (NonEmptyList a) e) +many1TillRec_ :: forall s a m e. ParserT s m a -> ParserT s m e -> ParserT s m (Tuple (NonEmptyList a) e) many1TillRec_ p end = do x <- p Tuple xs t <- manyTillRec_ p end diff --git a/src/Text/Parsing/Parser/Expr.purs b/src/Text/Parsing/Parser/Expr.purs index cc0e148..f9d0330 100644 --- a/src/Text/Parsing/Parser/Expr.purs +++ b/src/Text/Parsing/Parser/Expr.purs @@ -45,10 +45,10 @@ type SplitAccum m s a = -- | , [ Infix (string "+" $> add) AssocRight ] -- | ] digit -- | ``` -buildExprParser :: forall m s a. Monad m => OperatorTable m s a -> ParserT s m a -> ParserT s m a +buildExprParser :: forall m s a. OperatorTable m s a -> ParserT s m a -> ParserT s m a buildExprParser operators simpleExpr = foldl makeParser simpleExpr operators -makeParser :: forall m s a. Monad m => ParserT s m a -> Array (Operator m s a) -> ParserT s m a +makeParser :: forall m s a. ParserT s m a -> Array (Operator m s a) -> ParserT s m a makeParser term ops = do x <- termP prefixP term postfixP rassocP x rassocOp prefixP term postfixP @@ -82,7 +82,7 @@ splitOp (Infix op AssocRight) accum = accum { rassoc = op : accum.rassoc } splitOp (Prefix op) accum = accum { prefix = op : accum.prefix } splitOp (Postfix op) accum = accum { postfix = op : accum.postfix } -rassocP :: forall m a b c s. Monad m => a -> ParserT s m (a -> a -> a) -> ParserT s m (b -> c) -> ParserT s m b -> ParserT s m (c -> a) -> ParserT s m a +rassocP :: forall m a b c s. a -> ParserT s m (a -> a -> a) -> ParserT s m (b -> c) -> ParserT s m b -> ParserT s m (c -> a) -> ParserT s m a rassocP x rassocOp prefixP term postfixP = do f <- rassocOp y <- do @@ -90,25 +90,25 @@ rassocP x rassocOp prefixP term postfixP = do rassocP1 z rassocOp prefixP term postfixP pure (f x y) -rassocP1 :: forall m a b c s. Monad m => a -> ParserT s m (a -> a -> a) -> ParserT s m (b -> c) -> ParserT s m b -> ParserT s m (c -> a) -> ParserT s m a +rassocP1 :: forall m a b c s. a -> ParserT s m (a -> a -> a) -> ParserT s m (b -> c) -> ParserT s m b -> ParserT s m (c -> a) -> ParserT s m a rassocP1 x rassocOp prefixP term postfixP = rassocP x rassocOp prefixP term postfixP <|> pure x -lassocP :: forall m a b c s. Monad m => a -> ParserT s m (a -> a -> a) -> ParserT s m (b -> c) -> ParserT s m b -> ParserT s m (c -> a) -> ParserT s m a +lassocP :: forall m a b c s. a -> ParserT s m (a -> a -> a) -> ParserT s m (b -> c) -> ParserT s m b -> ParserT s m (c -> a) -> ParserT s m a lassocP x lassocOp prefixP term postfixP = do f <- lassocOp y <- termP prefixP term postfixP lassocP1 (f x y) lassocOp prefixP term postfixP -lassocP1 :: forall m a b c s. Monad m => a -> ParserT s m (a -> a -> a) -> ParserT s m (b -> c) -> ParserT s m b -> ParserT s m (c -> a) -> ParserT s m a +lassocP1 :: forall m a b c s. a -> ParserT s m (a -> a -> a) -> ParserT s m (b -> c) -> ParserT s m b -> ParserT s m (c -> a) -> ParserT s m a lassocP1 x lassocOp prefixP term postfixP = lassocP x lassocOp prefixP term postfixP <|> pure x -nassocP :: forall m a b c d e s. Monad m => a -> ParserT s m (a -> d -> e) -> ParserT s m (b -> c) -> ParserT s m b -> ParserT s m (c -> d) -> ParserT s m e +nassocP :: forall m a b c d e s. a -> ParserT s m (a -> d -> e) -> ParserT s m (b -> c) -> ParserT s m b -> ParserT s m (c -> d) -> ParserT s m e nassocP x nassocOp prefixP term postfixP = do f <- nassocOp y <- termP prefixP term postfixP pure (f x y) -termP :: forall m s a b c. Monad m => ParserT s m (a -> b) -> ParserT s m a -> ParserT s m (b -> c) -> ParserT s m c +termP :: forall m s a b c. ParserT s m (a -> b) -> ParserT s m a -> ParserT s m (b -> c) -> ParserT s m c termP prefixP term postfixP = do pre <- prefixP x <- term diff --git a/src/Text/Parsing/Parser/String.purs b/src/Text/Parsing/Parser/String.purs index b2e14b5..5104bd3 100644 --- a/src/Text/Parsing/Parser/String.purs +++ b/src/Text/Parsing/Parser/String.purs @@ -42,13 +42,14 @@ module Text.Parsing.Parser.String import Prelude hiding (between) -import Control.Monad.State (get, put) +import Control.Monad.State (get, put, state) import Data.Array (notElem) import Data.Array.NonEmpty as NonEmptyArray import Data.Char (fromCharCode) import Data.CodePoint.Unicode (isSpace) import Data.Either (Either(..)) import Data.Foldable (elem) +import Data.Function.Uncurried (mkFn5, runFn2) import Data.Maybe (Maybe(..)) import Data.String (CodePoint, Pattern(..), length, null, singleton, splitAt, stripPrefix, uncons) import Data.String.CodeUnits as SCU @@ -57,40 +58,40 @@ import Data.String.Regex.Flags (RegexFlags(..), RegexFlagsRec) import Data.Tuple (Tuple(..), fst) import Prim.Row (class Nub, class Union) import Record (merge) -import Text.Parsing.Parser (ParseState(..), ParserT, consume, fail) +import Text.Parsing.Parser (ParseError(..), ParseState(..), ParserT(..), consume, fail) import Text.Parsing.Parser.Combinators (skipMany, tryRethrow, (), (<~?>)) import Text.Parsing.Parser.Pos (Position(..)) import Unsafe.Coerce (unsafeCoerce) -- | Match “end-of-file,” the end of the input stream. -eof :: forall m. Monad m => ParserT String m Unit +eof :: forall m. ParserT String m Unit eof = do ParseState input _ _ <- get if (null input) -- We must consume so this combines correctly with notFollowedBy then consume - else (fail "Expected EOF") + else fail "Expected EOF" -- | Match the entire rest of the input stream. Always succeeds. -rest :: forall m. Monad m => ParserT String m String -rest = do - ParseState input position _ <- get - put $ ParseState "" (updatePosString position input) true - pure input +rest :: forall m. ParserT String m String +rest = state \(ParseState input position _) -> + Tuple input (ParseState "" (updatePosString position input) true) -- | Match the specified string. -string :: forall m. Monad m => String -> ParserT String m String -string str = do - ParseState input position _ <- get - case stripPrefix (Pattern str) input of - Just remainder -> do - put $ ParseState remainder (updatePosString position str) true - pure str - _ -> fail ("Expected " <> show str) +string :: forall m. String -> ParserT String m String +string str = ParserT + ( mkFn5 \state1@(ParseState input position _) _ _ throw done -> + case stripPrefix (Pattern str) input of + Just remainder -> + runFn2 done (ParseState remainder (updatePosString position str) true) str + _ -> + runFn2 throw state1 (ParseError ("Expected " <> show str) position) + + ) -- | Match any BMP `Char`. -- | Parser will fail if the character is not in the Basic Multilingual Plane. -anyChar :: forall m. Monad m => ParserT String m Char +anyChar :: forall m. ParserT String m Char anyChar = tryRethrow do cp :: Int <- unCodePoint <$> anyCodePoint -- the `fromCharCode` function doesn't check if this is beyond the @@ -104,66 +105,64 @@ anyChar = tryRethrow do -- | Match any Unicode character. -- | Always succeeds. -anyCodePoint :: forall m. Monad m => ParserT String m CodePoint -anyCodePoint = do - ParseState input position _ <- get +anyCodePoint :: forall m. ParserT String m CodePoint +anyCodePoint = join $ state \state1@(ParseState input position _) -> case uncons input of - Nothing -> fail "Unexpected EOF" - Just { head, tail } -> do - put $ ParseState tail (updatePosSingle position head) true - pure head + Nothing -> + Tuple (fail "Unexpected EOF") state1 + Just { head, tail } -> + Tuple (pure head) (ParseState tail (updatePosSingle position head) true) -- | Match a BMP `Char` satisfying the predicate. -satisfy :: forall m. Monad m => (Char -> Boolean) -> ParserT String m Char +satisfy :: forall m. (Char -> Boolean) -> ParserT String m Char satisfy f = tryRethrow do c <- anyChar if f c then pure c else fail "Predicate unsatisfied" -- | Match a Unicode character satisfying the predicate. -satisfyCodePoint :: forall m. Monad m => (CodePoint -> Boolean) -> ParserT String m CodePoint +satisfyCodePoint :: forall m. (CodePoint -> Boolean) -> ParserT String m CodePoint satisfyCodePoint f = tryRethrow do c <- anyCodePoint if f c then pure c else fail "Predicate unsatisfied" -- | Match the specified BMP `Char`. -char :: forall m. Monad m => Char -> ParserT String m Char +char :: forall m. Char -> ParserT String m Char char c = satisfy (_ == c) show c -- | Match a `String` exactly *N* characters long. -takeN :: forall m. Monad m => Int -> ParserT String m String -takeN n = do - ParseState input position _ <- get +takeN :: forall m. Int -> ParserT String m String +takeN n = join $ state \state1@(ParseState input position _) -> do let { before, after } = splitAt n input if length before == n then do - put $ ParseState after (updatePosString position before) true - pure before - else fail ("Could not take " <> show n <> " characters") + Tuple (pure before) (ParseState after (updatePosString position before) true) + else + Tuple (fail ("Could not take " <> show n <> " characters")) state1 -- | Match zero or more whitespace characters satisfying -- | `Data.CodePoint.Unicode.isSpace`. Always succeeds. -whiteSpace :: forall m. Monad m => ParserT String m String +whiteSpace :: forall m. ParserT String m String whiteSpace = fst <$> match skipSpaces -- | Skip whitespace characters and throw them away. Always succeeds. -skipSpaces :: forall m. Monad m => ParserT String m Unit +skipSpaces :: forall m. ParserT String m Unit skipSpaces = skipMany (satisfyCodePoint isSpace) -- | Match one of the BMP `Char`s in the array. -oneOf :: forall m. Monad m => Array Char -> ParserT String m Char +oneOf :: forall m. Array Char -> ParserT String m Char oneOf ss = satisfy (flip elem ss) <~?> \_ -> "one of " <> show ss -- | Match any BMP `Char` not in the array. -noneOf :: forall m. Monad m => Array Char -> ParserT String m Char +noneOf :: forall m. Array Char -> ParserT String m Char noneOf ss = satisfy (flip notElem ss) <~?> \_ -> "none of " <> show ss -- | Match one of the Unicode characters in the array. -oneOfCodePoints :: forall m. Monad m => Array CodePoint -> ParserT String m CodePoint +oneOfCodePoints :: forall m. Array CodePoint -> ParserT String m CodePoint oneOfCodePoints ss = satisfyCodePoint (flip elem ss) <~?> \_ -> "one of " <> show (singleton <$> ss) -- | Match any Unicode character not in the array. -noneOfCodePoints :: forall m. Monad m => Array CodePoint -> ParserT String m CodePoint +noneOfCodePoints :: forall m. Array CodePoint -> ParserT String m CodePoint noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) <~?> \_ -> "none of " <> show (singleton <$> ss) -- | Updates a `Position` by adding the columns and lines in `String`. @@ -199,7 +198,7 @@ updatePosSingle (Position { line, column }) cp = case unCodePoint cp of -- | ``` -- | fst <$> match (Combinators.skipMany (char 'x')) -- | ``` -match :: forall m a. Monad m => ParserT String m a -> ParserT String m (Tuple String a) +match :: forall m a. ParserT String m a -> ParserT String m (Tuple String a) match p = do ParseState input1 _ _ <- get x <- p @@ -306,4 +305,4 @@ type RegexFlagsRow = , multiline :: Boolean , sticky :: Boolean , unicode :: Boolean - ) \ No newline at end of file + ) diff --git a/src/Text/Parsing/Parser/String/Basic.purs b/src/Text/Parsing/Parser/String/Basic.purs index 78d8a95..f48cf9f 100644 --- a/src/Text/Parsing/Parser/String/Basic.purs +++ b/src/Text/Parsing/Parser/String/Basic.purs @@ -34,36 +34,36 @@ import Text.Parsing.Parser.String (noneOf, noneOfCodePoints, oneOf, oneOfCodePoi import Text.Parsing.Parser.String as Parser.String -- | Parse a digit. Matches any char that satisfies `Data.CodePoint.Unicode.isDecDigit`. -digit :: forall m. Monad m => ParserT String m Char +digit :: forall m. ParserT String m Char digit = satisfyCP isDecDigit "digit" -- | Parse a hex digit. Matches any char that satisfies `Data.CodePoint.Unicode.isHexDigit`. -hexDigit :: forall m. Monad m => ParserT String m Char +hexDigit :: forall m. ParserT String m Char hexDigit = satisfyCP isHexDigit "hex digit" -- | Parse an octal digit. Matches any char that satisfies `Data.CodePoint.Unicode.isOctDigit`. -octDigit :: forall m. Monad m => ParserT String m Char +octDigit :: forall m. ParserT String m Char octDigit = satisfyCP isOctDigit "oct digit" -- | Parse a lowercase letter. Matches any char that satisfies `Data.CodePoint.Unicode.isLower`. -lower :: forall m. Monad m => ParserT String m Char +lower :: forall m. ParserT String m Char lower = satisfyCP isLower "lowercase letter" -- | Parse an uppercase letter. Matches any char that satisfies `Data.CodePoint.Unicode.isUpper`. -upper :: forall m. Monad m => ParserT String m Char +upper :: forall m. ParserT String m Char upper = satisfyCP isUpper "uppercase letter" -- | Parse a space character. Matches any char that satisfies `Data.CodePoint.Unicode.isSpace`. -space :: forall m. Monad m => ParserT String m Char +space :: forall m. ParserT String m Char space = satisfyCP isSpace "space" -- | Parse an alphabetical character. Matches any char that satisfies `Data.CodePoint.Unicode.isAlpha`. -letter :: forall m. Monad m => ParserT String m Char +letter :: forall m. ParserT String m Char letter = satisfyCP isAlpha "letter" -- | Parse an alphabetical or numerical character. -- | Matches any char that satisfies `Data.CodePoint.Unicode.isAlphaNum`. -alphaNum :: forall m. Monad m => ParserT String m Char +alphaNum :: forall m. ParserT String m Char alphaNum = satisfyCP isAlphaNum "letter or digit" -- | Parser based on the __Data.Number.fromString__ function. @@ -80,7 +80,7 @@ alphaNum = satisfyCP isAlphaNum "letter or digit" -- | * `"-3.0E-1.0"` -- | * `"NaN"` -- | * `"-Infinity"` -number :: forall m. Monad m => ParserT String m Number +number :: forall m. ParserT String m Number -- TODO because the JavaScript parseFloat function will successfully parse -- a Number up until it doesn't understand something and then return -- the partially parsed Number, this parser will sometimes consume more @@ -110,7 +110,7 @@ number = -- | * `"3"` -- | * `"-3"` -- | * `"+300"` -intDecimal :: forall m. Monad m => ParserT String m Int +intDecimal :: forall m. ParserT String m Int intDecimal = do Tuple section _ <- Parser.String.match do _ <- Parser.String.oneOf [ '+', '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ] @@ -120,5 +120,5 @@ intDecimal = do Just x -> pure x -- | Helper function -satisfyCP :: forall m. Monad m => (CodePoint -> Boolean) -> ParserT String m Char +satisfyCP :: forall m. (CodePoint -> Boolean) -> ParserT String m Char satisfyCP p = Parser.String.satisfy (p <<< codePointFromChar) diff --git a/src/Text/Parsing/Parser/Token.purs b/src/Text/Parsing/Parser/Token.purs index e9fe7ff..14dfe54 100644 --- a/src/Text/Parsing/Parser/Token.purs +++ b/src/Text/Parsing/Parser/Token.purs @@ -51,7 +51,7 @@ import Text.Parsing.Parser.String.Basic as Basic import Text.Parsing.Parser.String.Basic (digit, hexDigit, octDigit, upper, space, letter, alphaNum) -- | A parser which returns the first token in the stream. -token :: forall m a. Monad m => (a -> Position) -> ParserT (List a) m a +token :: forall m a. (a -> Position) -> ParserT (List a) m a token tokpos = do input <- gets \(ParseState input _ _) -> input case List.uncons input of @@ -62,18 +62,18 @@ token tokpos = do pure head -- | A parser which matches any token satisfying the predicate. -when :: forall m a. Monad m => (a -> Position) -> (a -> Boolean) -> ParserT (List a) m a +when :: forall m a. (a -> Position) -> (a -> Boolean) -> ParserT (List a) m a when tokpos f = tryRethrow do a <- token tokpos guard $ f a pure a -- | Match the specified token at the head of the stream. -match :: forall a m. Monad m => Eq a => (a -> Position) -> a -> ParserT (List a) m a +match :: forall a m. Eq a => (a -> Position) -> a -> ParserT (List a) m a match tokpos tok = when tokpos (_ == tok) -- | Match the “end-of-file,” the end of the input stream. -eof :: forall a m. Monad m => ParserT (List a) m Unit +eof :: forall a m. ParserT (List a) m Unit eof = do ParseState input _ _ <- get if (List.null input) @@ -355,7 +355,7 @@ type GenTokenParser s m = -- | reserved = tokenParser.reserved -- | ... -- | ``` -makeTokenParser :: forall m. Monad m => GenLanguageDef String m -> GenTokenParser String m +makeTokenParser :: forall m. GenLanguageDef String m -> GenTokenParser String m makeTokenParser (LanguageDef languageDef) = { identifier: identifier , reserved: reserved @@ -806,7 +806,7 @@ makeTokenParser (LanguageDef languageDef) = -- Identifiers & Reserved words ----------------------------------------------------------- -isReservedName :: forall m. Monad m => GenLanguageDef String m -> String -> Boolean +isReservedName :: forall m. GenLanguageDef String m -> String -> Boolean isReservedName langDef@(LanguageDef languageDef) name = isReserved (theReservedNames langDef) caseName where @@ -823,7 +823,7 @@ isReserved names name = EQ -> true GT -> false -theReservedNames :: forall m. Monad m => GenLanguageDef String m -> Array String +theReservedNames :: forall m. GenLanguageDef String m -> Array String theReservedNames (LanguageDef languageDef) | languageDef.caseSensitive = Array.sort languageDef.reservedNames | otherwise = Array.sort $ map toLower languageDef.reservedNames @@ -832,7 +832,7 @@ theReservedNames (LanguageDef languageDef) -- White space & symbols ----------------------------------------------------------- -whiteSpace' :: forall m. Monad m => GenLanguageDef String m -> ParserT String m Unit +whiteSpace' :: forall m. GenLanguageDef String m -> ParserT String m Unit whiteSpace' langDef@(LanguageDef languageDef) | null languageDef.commentLine && null languageDef.commentStart = skipMany (simpleSpace "") @@ -843,22 +843,22 @@ whiteSpace' langDef@(LanguageDef languageDef) | otherwise = skipMany (simpleSpace <|> oneLineComment langDef <|> multiLineComment langDef "") -simpleSpace :: forall m. Monad m => ParserT String m Unit +simpleSpace :: forall m. ParserT String m Unit simpleSpace = skipMany1 (satisfyCodePoint isSpace) -oneLineComment :: forall m. Monad m => GenLanguageDef String m -> ParserT String m Unit +oneLineComment :: forall m. GenLanguageDef String m -> ParserT String m Unit oneLineComment (LanguageDef languageDef) = try (string languageDef.commentLine) *> skipMany (satisfy (_ /= '\n')) -multiLineComment :: forall m. Monad m => GenLanguageDef String m -> ParserT String m Unit +multiLineComment :: forall m. GenLanguageDef String m -> ParserT String m Unit multiLineComment langDef@(LanguageDef languageDef) = try (string languageDef.commentStart) *> inComment langDef -inComment :: forall m. Monad m => GenLanguageDef String m -> ParserT String m Unit +inComment :: forall m. GenLanguageDef String m -> ParserT String m Unit inComment langDef@(LanguageDef languageDef) = if languageDef.nestedComments then inCommentMulti langDef else inCommentSingle langDef -inCommentMulti :: forall m. Monad m => GenLanguageDef String m -> ParserT String m Unit +inCommentMulti :: forall m. GenLanguageDef String m -> ParserT String m Unit inCommentMulti langDef@(LanguageDef languageDef) = fix \p -> (void $ try (string languageDef.commentEnd)) <|> (multiLineComment langDef *> p) @@ -869,7 +869,7 @@ inCommentMulti langDef@(LanguageDef languageDef) = startEnd :: Array Char startEnd = SCU.toCharArray languageDef.commentEnd <> SCU.toCharArray languageDef.commentStart -inCommentSingle :: forall m. Monad m => GenLanguageDef String m -> ParserT String m Unit +inCommentSingle :: forall m. GenLanguageDef String m -> ParserT String m Unit inCommentSingle (LanguageDef languageDef) = fix \p -> (void $ try (string languageDef.commentEnd)) <|> (skipMany1 (noneOf startEnd) *> p) diff --git a/test/Main.purs b/test/Main.purs index eb239c9..6f4d192 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -30,10 +30,10 @@ import Text.Parsing.Parser.String.Basic (intDecimal, number, letter) import Text.Parsing.Parser.Token (TokenParser, makeTokenParser, match, token, when) import Text.Parsing.Parser.Token as Parser.Token -parens :: forall m a. Monad m => ParserT String m a -> ParserT String m a +parens :: forall m a. ParserT String m a -> ParserT String m a parens = between (string "(") (string ")") -nested :: forall m. Monad m => ParserT String m Int +nested :: forall m. ParserT String m Int nested = fix \p -> ( do _ <- string "a" From 03992184db352b4539ff0168f2b896a68914c785 Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Sun, 20 Mar 2022 17:14:30 -0700 Subject: [PATCH 02/14] Add json benchmark, update string-parsers --- bench/Json/Common.purs | 21 +++++++++++ bench/Json/Parsing.purs | 58 +++++++++++++++++++++++++++++ bench/Json/StringParsers.purs | 58 +++++++++++++++++++++++++++++ bench/Json/TestData.purs | 26 +++++++++++++ bench/Main.purs | 37 ++++++++++++++++-- packages.dhall | 2 +- src/Text/Parsing/Parser/String.purs | 23 ++++++------ 7 files changed, 209 insertions(+), 16 deletions(-) create mode 100644 bench/Json/Common.purs create mode 100644 bench/Json/Parsing.purs create mode 100644 bench/Json/StringParsers.purs create mode 100644 bench/Json/TestData.purs diff --git a/bench/Json/Common.purs b/bench/Json/Common.purs new file mode 100644 index 0000000..db7b3f6 --- /dev/null +++ b/bench/Json/Common.purs @@ -0,0 +1,21 @@ +module Bench.Json.Common where + +import Prelude + +import Data.Generic.Rep (class Generic) +import Data.List (List) +import Data.Show.Generic (genericShow) +import Data.Tuple (Tuple) + +data Json + = JsonNull + | JsonNumber Number + | JsonString String + | JsonBoolean Boolean + | JsonArray (List Json) + | JsonObject (List (Tuple String Json)) + +derive instance Generic Json _ + +instance Show Json where + show a = genericShow a diff --git a/bench/Json/Parsing.purs b/bench/Json/Parsing.purs new file mode 100644 index 0000000..3dd57fb --- /dev/null +++ b/bench/Json/Parsing.purs @@ -0,0 +1,58 @@ +module Bench.Json.Parsing where + +import Prelude hiding (between) + +import Bench.Json.Common (Json(..)) +import Control.Lazy (defer) +import Data.List (List) +import Data.Maybe (Maybe(..)) +import Data.Number as Number +import Data.Tuple (Tuple(..)) +import Text.Parsing.Parser (Parser, fail) +import Text.Parsing.Parser.Combinators (between, choice, sepBy, try) +import Text.Parsing.Parser.String (regex, skipSpaces, string) + +json :: Parser String Json +json = defer \_ -> + skipSpaces *> choice + [ JsonObject <$> jsonObject + , JsonArray <$> jsonArray + , JsonString <$> jsonString + , JsonNumber <$> jsonNumber + , JsonBoolean <$> jsonBoolean + , JsonNull <$ jsonNull + ] + +jsonObject :: Parser String (List (Tuple String Json)) +jsonObject = defer \_ -> + between (string "{") (skipSpaces *> string "}") do + skipSpaces *> jsonObjectPair `sepBy` (try (skipSpaces *> string ",")) + +jsonObjectPair :: Parser String (Tuple String Json) +jsonObjectPair = defer \_ -> + Tuple <$> (skipSpaces *> jsonString <* skipSpaces <* string ":") <*> json + +jsonArray :: Parser String (List Json) +jsonArray = defer \_ -> + between (string "[") (skipSpaces *> string "]") do + json `sepBy` (try (skipSpaces *> string ",")) + +jsonString :: Parser String String +jsonString = between (string "\"") (string "\"") do + regex {} """\\"|[^"]*""" + +jsonNumber :: Parser String Number +jsonNumber = do + n <- regex {} """(\+|-)?(\d+(\.\d*)?|\d*\.\d+)([eE](\+|-)?\d+)?""" + case Number.fromString n of + Just n' -> pure n' + Nothing -> fail "Expected number" + +jsonBoolean :: Parser String Boolean +jsonBoolean = choice + [ true <$ string "true" + , false <$ string "false" + ] + +jsonNull :: Parser String String +jsonNull = string "null" diff --git a/bench/Json/StringParsers.purs b/bench/Json/StringParsers.purs new file mode 100644 index 0000000..858633f --- /dev/null +++ b/bench/Json/StringParsers.purs @@ -0,0 +1,58 @@ +module Bench.Json.StringParser where + +import Prelude hiding (between) + +import Bench.Json.Common (Json(..)) +import Control.Lazy (defer) +import Data.List (List) +import Data.Maybe (Maybe(..)) +import Data.Number as Number +import Data.Tuple (Tuple(..)) +import StringParser (Parser, fail, try) +import StringParser.CodePoints (regex, skipSpaces, string) +import StringParser.Combinators (between, choice, sepBy) + +json :: Parser Json +json = defer \_ -> + skipSpaces *> choice + [ JsonObject <$> jsonObject + , JsonArray <$> jsonArray + , JsonString <$> jsonString + , JsonNumber <$> jsonNumber + , JsonBoolean <$> jsonBoolean + , JsonNull <$ jsonNull + ] + +jsonObject :: Parser (List (Tuple String Json)) +jsonObject = defer \_ -> + between (string "{") (skipSpaces *> string "}") do + skipSpaces *> jsonObjectPair `sepBy` (try (skipSpaces *> string ",")) + +jsonObjectPair :: Parser (Tuple String Json) +jsonObjectPair = defer \_ -> + Tuple <$> (skipSpaces *> jsonString <* skipSpaces <* string ":") <*> json + +jsonArray :: Parser (List Json) +jsonArray = defer \_ -> + between (string "[") (skipSpaces *> string "]") do + json `sepBy` (try (skipSpaces *> string ",")) + +jsonString :: Parser String +jsonString = between (string "\"") (string "\"") do + regex """\\"|[^"]*""" + +jsonNumber :: Parser Number +jsonNumber = do + n <- regex """(\+|-)?(\d+(\.\d*)?|\d*\.\d+)([eE](\+|-)?\d+)?""" + case Number.fromString n of + Just n' -> pure n' + Nothing -> fail "Expected number" + +jsonBoolean :: Parser Boolean +jsonBoolean = choice + [ true <$ string "true" + , false <$ string "false" + ] + +jsonNull :: Parser String +jsonNull = string "null" diff --git a/bench/Json/TestData.purs b/bench/Json/TestData.purs new file mode 100644 index 0000000..a5bcd36 --- /dev/null +++ b/bench/Json/TestData.purs @@ -0,0 +1,26 @@ +module Bench.Json.TestData where + +import Prelude + +import Data.Array (replicate) +import Data.String (joinWith) + +jsonProps :: String +jsonProps = """ + "some_number": 42.00009 + , "some_string": "foobarbazquux" + , "some_null": null + , "some_boolean": true + , "some_other_boolean": false + , "some_array": [ 1, 2, "foo", true, 99 ] + , "some_object": { "foo": 42, "bar": "wat", "baz": false } + """ + +smallJson :: String +smallJson = "{" <> jsonProps <> "}" + +mediumJson :: String +mediumJson = "{" <> joinWith ", " (replicate 30 jsonProps) <> "}" + +largeJson :: String +largeJson = "[" <> joinWith ", " (replicate 100 smallJson) <> "]" diff --git a/bench/Main.purs b/bench/Main.purs index 2100c52..7833e38 100644 --- a/bench/Main.purs +++ b/bench/Main.purs @@ -41,14 +41,18 @@ module Bench.Main where import Prelude +import Bench.Json.Parsing as BenchParsing +import Bench.Json.StringParser as BenchStringParser +import Bench.Json.TestData (largeJson, mediumJson, smallJson) import Data.Array (fold, replicate) -import Data.Either (either) +import Data.Either (Either(..), either) import Data.List (many, manyRec) import Data.List.Types (List) import Data.String.Regex (Regex, regex) import Data.String.Regex as Regex import Data.String.Regex.Flags (RegexFlags(..)) import Effect (Effect) +import Effect.Class.Console (logShow) import Effect.Console (log) import Effect.Exception (throw) import Effect.Unsafe (unsafePerformEffect) @@ -56,9 +60,10 @@ import Performance.Minibench (benchWith) import Text.Parsing.Parser (Parser, runParser) import Text.Parsing.Parser.String (string) import Text.Parsing.Parser.String.Basic (digit) -import Text.Parsing.StringParser as StringParser -import Text.Parsing.StringParser.CodePoints as StringParser.CodePoints -import Text.Parsing.StringParser.CodeUnits as StringParser.CodeUnits +import StringParser (ParseError) +import StringParser as StringParser +import StringParser.CodePoints as StringParser.CodePoints +import StringParser.CodeUnits as StringParser.CodeUnits string23 :: String string23 = "23" @@ -138,3 +143,27 @@ main = do log "Regex.match patternSkidoo" benchWith 200 $ \_ -> Regex.match patternSkidoo stringSkidoo_10000 + + log "runParser json smallJson" + benchWith 1000 + $ \_ -> runParser smallJson BenchParsing.json + + log "StringParser.runParser json smallJson" + benchWith 500 + $ \_ -> StringParser.runParser BenchStringParser.json smallJson + + log "runParser json mediumJson" + benchWith 500 + $ \_ -> runParser mediumJson BenchParsing.json + + log "StringParser.runParser json mediumJson" + benchWith 1000 + $ \_ -> StringParser.runParser BenchStringParser.json mediumJson + + log "runParser json largeJson" + benchWith 100 + $ \_ -> runParser largeJson BenchParsing.json + + log "StringParser.runParser json largeJson" + benchWith 100 + $ \_ -> StringParser.runParser BenchStringParser.json largeJson diff --git a/packages.dhall b/packages.dhall index 7a6905b..6dc6069 100644 --- a/packages.dhall +++ b/packages.dhall @@ -1,4 +1,4 @@ let upstream = - https://github.com/purescript/package-sets/releases/download/psc-0.14.3-20210722/packages.dhall sha256:1ceb43aa59436bf5601bac45f6f3781c4e1f0e4c2b8458105b018e5ed8c30f8c + https://github.com/purescript/package-sets/releases/download/psc-0.14.7-20220320/packages.dhall sha256:523f5eed3b3d8c7b04b6fcb2b60b1421c06eeb26b2fedae0bd9ddcfebaf0a919 in upstream diff --git a/src/Text/Parsing/Parser/String.purs b/src/Text/Parsing/Parser/String.purs index 5104bd3..09554aa 100644 --- a/src/Text/Parsing/Parser/String.purs +++ b/src/Text/Parsing/Parser/String.purs @@ -58,19 +58,21 @@ import Data.String.Regex.Flags (RegexFlags(..), RegexFlagsRec) import Data.Tuple (Tuple(..), fst) import Prim.Row (class Nub, class Union) import Record (merge) -import Text.Parsing.Parser (ParseError(..), ParseState(..), ParserT(..), consume, fail) +import Text.Parsing.Parser (ParseError(..), ParseState(..), ParserT(..), fail) import Text.Parsing.Parser.Combinators (skipMany, tryRethrow, (), (<~?>)) import Text.Parsing.Parser.Pos (Position(..)) import Unsafe.Coerce (unsafeCoerce) -- | Match “end-of-file,” the end of the input stream. eof :: forall m. ParserT String m Unit -eof = do - ParseState input _ _ <- get - if (null input) - -- We must consume so this combines correctly with notFollowedBy - then consume - else fail "Expected EOF" +eof = ParserT + ( mkFn5 \state1@(ParseState input pos _) _ _ throw done -> + if null input then + -- We must consume so this combines correctly with notFollowedBy + runFn2 done (ParseState input pos true) unit + else + runFn2 throw state1 (ParseError "Expected EOF" pos) + ) -- | Match the entire rest of the input stream. Always succeeds. rest :: forall m. ParserT String m String @@ -80,13 +82,12 @@ rest = state \(ParseState input position _) -> -- | Match the specified string. string :: forall m. String -> ParserT String m String string str = ParserT - ( mkFn5 \state1@(ParseState input position _) _ _ throw done -> + ( mkFn5 \state1@(ParseState input pos _) _ _ throw done -> case stripPrefix (Pattern str) input of Just remainder -> - runFn2 done (ParseState remainder (updatePosString position str) true) str + runFn2 done (ParseState remainder (updatePosString pos str) true) str _ -> - runFn2 throw state1 (ParseError ("Expected " <> show str) position) - + runFn2 throw state1 (ParseError ("Expected " <> show str) pos) ) -- | Match any BMP `Char`. From 68adc70964d0a2142926131b878ec9c54e6a262b Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Sun, 20 Mar 2022 17:16:58 -0700 Subject: [PATCH 03/14] Remove redundant imports --- bench/Main.purs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bench/Main.purs b/bench/Main.purs index 7833e38..5998cdb 100644 --- a/bench/Main.purs +++ b/bench/Main.purs @@ -45,14 +45,13 @@ import Bench.Json.Parsing as BenchParsing import Bench.Json.StringParser as BenchStringParser import Bench.Json.TestData (largeJson, mediumJson, smallJson) import Data.Array (fold, replicate) -import Data.Either (Either(..), either) +import Data.Either (either) import Data.List (many, manyRec) import Data.List.Types (List) import Data.String.Regex (Regex, regex) import Data.String.Regex as Regex import Data.String.Regex.Flags (RegexFlags(..)) import Effect (Effect) -import Effect.Class.Console (logShow) import Effect.Console (log) import Effect.Exception (throw) import Effect.Unsafe (unsafePerformEffect) @@ -60,7 +59,6 @@ import Performance.Minibench (benchWith) import Text.Parsing.Parser (Parser, runParser) import Text.Parsing.Parser.String (string) import Text.Parsing.Parser.String.Basic (digit) -import StringParser (ParseError) import StringParser as StringParser import StringParser.CodePoints as StringParser.CodePoints import StringParser.CodeUnits as StringParser.CodeUnits From f50e8dd01d09d619a837e502b0c82d5d84a82844 Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Mon, 21 Mar 2022 10:41:04 -0700 Subject: [PATCH 04/14] More string specializations and performance improvements --- bench/Main.purs | 4 +- spago-dev.dhall | 1 + spago.dhall | 2 +- src/Text/Parsing/Parser.purs | 27 +++++---- src/Text/Parsing/Parser/Combinators.purs | 4 +- src/Text/Parsing/Parser/String.purs | 72 +++++++++++++----------- 6 files changed, 59 insertions(+), 51 deletions(-) diff --git a/bench/Main.purs b/bench/Main.purs index 5998cdb..332b634 100644 --- a/bench/Main.purs +++ b/bench/Main.purs @@ -147,7 +147,7 @@ main = do $ \_ -> runParser smallJson BenchParsing.json log "StringParser.runParser json smallJson" - benchWith 500 + benchWith 1000 $ \_ -> StringParser.runParser BenchStringParser.json smallJson log "runParser json mediumJson" @@ -155,7 +155,7 @@ main = do $ \_ -> runParser mediumJson BenchParsing.json log "StringParser.runParser json mediumJson" - benchWith 1000 + benchWith 500 $ \_ -> StringParser.runParser BenchStringParser.json mediumJson log "runParser json largeJson" diff --git a/spago-dev.dhall b/spago-dev.dhall index a7d3a38..d12fef8 100644 --- a/spago-dev.dhall +++ b/spago-dev.dhall @@ -12,6 +12,7 @@ in conf // , dependencies = conf.dependencies # [ "assert" , "console" + , "enums" , "effect" , "psci-support" , "minibench" diff --git a/spago.dhall b/spago.dhall index 642824c..6e7ce76 100644 --- a/spago.dhall +++ b/spago.dhall @@ -5,6 +5,7 @@ [ "arrays" , "control" , "either" + , "enums" , "foldable-traversable" , "functions" , "identity" @@ -23,7 +24,6 @@ , "tuples" , "unfoldable" , "unicode" - , "unsafe-coerce" ] , packages = ./packages.dhall , sources = [ "src/**/*.purs" ] diff --git a/src/Text/Parsing/Parser.purs b/src/Text/Parsing/Parser.purs index f27892e..1c475ba 100644 --- a/src/Text/Parsing/Parser.purs +++ b/src/Text/Parsing/Parser.purs @@ -205,21 +205,20 @@ instance Bind (ParserT s m) where instance Monad (ParserT s m) instance MonadRec (ParserT s m) where - tailRecM next = go - where - go a = ParserT - ( mkFn5 \state1 more lift throw done -> - more \_ -> do - let (ParserT k1) = next a - runFn5 k1 state1 more lift throw - ( mkFn2 \state2 step -> case step of - Loop b -> do - let (ParserT k2) = go b - runFn5 k2 state2 more lift throw done - Done c -> - runFn2 done state2 c + tailRecM next initArg = ParserT + ( mkFn5 \state1 more lift throw done -> do + let + loop = mkFn2 \state2 arg -> do + let (ParserT k1) = next arg + runFn5 k1 state2 more lift throw + ( mkFn2 \state3 step -> case step of + Loop nextArg -> + runFn2 loop state3 nextArg + Done res -> + runFn2 done state3 res ) - ) + runFn2 loop state1 initArg + ) instance MonadState (ParseState s) (ParserT s m) where state k = ParserT diff --git a/src/Text/Parsing/Parser/Combinators.purs b/src/Text/Parsing/Parser/Combinators.purs index b5ccd2a..5aea851 100644 --- a/src/Text/Parsing/Parser/Combinators.purs +++ b/src/Text/Parsing/Parser/Combinators.purs @@ -100,7 +100,7 @@ import Prelude import Control.Lazy (defer) import Control.Monad.Rec.Class (Step(..), tailRecM) import Control.Plus (empty, (<|>), alt) -import Data.Foldable (class Foldable, foldl) +import Data.Foldable (class Foldable, foldl, foldr) import Data.Function.Uncurried (mkFn2, mkFn5, runFn2, runFn5) import Data.List (List(..), many, manyRec, reverse, (:)) import Data.List.NonEmpty (NonEmptyList, cons') @@ -442,7 +442,7 @@ chainr1Rec p f = do -- | Parse one of a set of alternatives. choice :: forall f m s a. Foldable f => f (ParserT s m a) -> ParserT s m a -choice = foldl (<|>) empty +choice = foldr (<|>) empty -- | Skip many instances of a phrase. skipMany :: forall s a m. ParserT s m a -> ParserT s m Unit diff --git a/src/Text/Parsing/Parser/String.purs b/src/Text/Parsing/Parser/String.purs index 09554aa..4e2a54a 100644 --- a/src/Text/Parsing/Parser/String.purs +++ b/src/Text/Parsing/Parser/String.purs @@ -45,23 +45,23 @@ import Prelude hiding (between) import Control.Monad.State (get, put, state) import Data.Array (notElem) import Data.Array.NonEmpty as NonEmptyArray -import Data.Char (fromCharCode) import Data.CodePoint.Unicode (isSpace) import Data.Either (Either(..)) +import Data.Enum (fromEnum, toEnum) import Data.Foldable (elem) import Data.Function.Uncurried (mkFn5, runFn2) -import Data.Maybe (Maybe(..)) -import Data.String (CodePoint, Pattern(..), length, null, singleton, splitAt, stripPrefix, uncons) +import Data.Maybe (Maybe(..), fromJust) +import Data.String (CodePoint, Pattern(..), codePointAt, length, null, singleton, splitAt, stripPrefix, takeWhile, uncons) import Data.String.CodeUnits as SCU import Data.String.Regex as Regex import Data.String.Regex.Flags (RegexFlags(..), RegexFlagsRec) import Data.Tuple (Tuple(..), fst) +import Partial.Unsafe (unsafePartial) import Prim.Row (class Nub, class Union) import Record (merge) import Text.Parsing.Parser (ParseError(..), ParseState(..), ParserT(..), fail) -import Text.Parsing.Parser.Combinators (skipMany, tryRethrow, (), (<~?>)) +import Text.Parsing.Parser.Combinators (tryRethrow, (), (<~?>)) import Text.Parsing.Parser.Pos (Position(..)) -import Unsafe.Coerce (unsafeCoerce) -- | Match “end-of-file,” the end of the input stream. eof :: forall m. ParserT String m Unit @@ -93,26 +93,33 @@ string str = ParserT -- | Match any BMP `Char`. -- | Parser will fail if the character is not in the Basic Multilingual Plane. anyChar :: forall m. ParserT String m Char -anyChar = tryRethrow do - cp :: Int <- unCodePoint <$> anyCodePoint - -- the `fromCharCode` function doesn't check if this is beyond the - -- BMP, so we check that ourselves. - -- https://github.com/purescript/purescript-strings/issues/153 - if cp > 65535 -- BMP - then fail "Not a Char" - else case fromCharCode cp of - Nothing -> fail "Not a Char" - Just c -> pure c +anyChar = ParserT + ( mkFn5 \state1@(ParseState input pos _) _ _ throw done -> + case uncons input of + Nothing -> + runFn2 throw state1 (ParseError "Unexpected EOF" pos) + Just { head, tail } -> do + let cp = fromEnum head + -- the `fromCharCode` function doesn't check if this is beyond the + -- BMP, so we check that ourselves. + -- https://github.com/purescript/purescript-strings/issues/153 + if cp < 0 || cp > 65535 then + runFn2 throw state1 (ParseError "Expected Char" pos) + else + runFn2 done (ParseState tail (updatePosSingle pos head) true) (unsafePartial fromJust (toEnum cp)) + ) -- | Match any Unicode character. -- | Always succeeds. anyCodePoint :: forall m. ParserT String m CodePoint -anyCodePoint = join $ state \state1@(ParseState input position _) -> - case uncons input of - Nothing -> - Tuple (fail "Unexpected EOF") state1 - Just { head, tail } -> - Tuple (pure head) (ParseState tail (updatePosSingle position head) true) +anyCodePoint = ParserT + ( mkFn5 \state1@(ParseState input pos _) _ _ throw done -> + case uncons input of + Nothing -> + runFn2 throw state1 (ParseError "Unexpected EOF" pos) + Just { head, tail } -> + runFn2 done (ParseState tail (updatePosSingle pos head) true) head + ) -- | Match a BMP `Char` satisfying the predicate. satisfy :: forall m. (Char -> Boolean) -> ParserT String m Char @@ -148,7 +155,12 @@ whiteSpace = fst <$> match skipSpaces -- | Skip whitespace characters and throw them away. Always succeeds. skipSpaces :: forall m. ParserT String m Unit -skipSpaces = skipMany (satisfyCodePoint isSpace) +skipSpaces = ParserT + ( mkFn5 \(ParseState input pos _) _ _ _ done -> do + let head = takeWhile isSpace input + let tail = SCU.drop (SCU.length head) input + runFn2 done (ParseState tail (updatePosString pos head) true) unit + ) -- | Match one of the BMP `Char`s in the array. oneOf :: forall m. Array Char -> ParserT String m Char @@ -168,14 +180,16 @@ noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) <~?> \_ -> "none of " < -- | Updates a `Position` by adding the columns and lines in `String`. updatePosString :: Position -> String -> Position -updatePosString pos str = case uncons str of - Nothing -> pos - Just { head, tail } -> updatePosString (updatePosSingle pos head) tail -- tail recursive +updatePosString = go 0 + where + go ix pos str = case codePointAt ix str of + Nothing -> pos + Just cp -> go (ix + 1) (updatePosSingle pos cp) str -- | Updates a `Position` by adding the columns and lines in a -- | single `CodePoint`. updatePosSingle :: Position -> CodePoint -> Position -updatePosSingle (Position { line, column }) cp = case unCodePoint cp of +updatePosSingle (Position { line, column }) cp = case fromEnum cp of 10 -> Position { line: line + 1, column: 1 } -- "\n" 13 -> Position { line: line + 1, column: 1 } -- "\r" 9 -> Position { line, column: column + 8 - ((column - 1) `mod` 8) } -- "\t" Who says that one tab is 8 columns? @@ -211,12 +225,6 @@ match p = do -- boundary. pure $ Tuple (SCU.take (SCU.length input1 - SCU.length input2) input1) x --- | The CodePoint newtype constructor is not exported, so here's a helper. --- | This will break at runtime if the definition of CodePoint ever changes --- | to something other than `newtype CodePoint = CodePoint Int`. -unCodePoint :: CodePoint -> Int -unCodePoint = unsafeCoerce - -- | Parser which uses the `Data.String.Regex` module to match the regular -- | expression pattern passed as the `String` -- | argument to the parser. From 503be7548355ab85fb3d24c721d4725feb544c6e Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Mon, 21 Mar 2022 10:43:28 -0700 Subject: [PATCH 05/14] Fix dependency on partial --- spago.dhall | 1 + 1 file changed, 1 insertion(+) diff --git a/spago.dhall b/spago.dhall index 6e7ce76..c980c1d 100644 --- a/spago.dhall +++ b/spago.dhall @@ -16,6 +16,7 @@ , "maybe" , "newtype" , "numbers" + , "partial" , "prelude" , "record" , "strings" From a5f1d5f535ba1d61dd9e42bc2866e5d8e1daa317 Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Mon, 21 Mar 2022 10:45:43 -0700 Subject: [PATCH 06/14] Formatting --- bench/Json/TestData.purs | 3 ++- src/Text/Parsing/Parser/String.purs | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/bench/Json/TestData.purs b/bench/Json/TestData.purs index a5bcd36..34ac68c 100644 --- a/bench/Json/TestData.purs +++ b/bench/Json/TestData.purs @@ -6,7 +6,8 @@ import Data.Array (replicate) import Data.String (joinWith) jsonProps :: String -jsonProps = """ +jsonProps = + """ "some_number": 42.00009 , "some_string": "foobarbazquux" , "some_null": null diff --git a/src/Text/Parsing/Parser/String.purs b/src/Text/Parsing/Parser/String.purs index 4e2a54a..1846f1c 100644 --- a/src/Text/Parsing/Parser/String.purs +++ b/src/Text/Parsing/Parser/String.purs @@ -100,9 +100,9 @@ anyChar = ParserT runFn2 throw state1 (ParseError "Unexpected EOF" pos) Just { head, tail } -> do let cp = fromEnum head - -- the `fromCharCode` function doesn't check if this is beyond the - -- BMP, so we check that ourselves. - -- https://github.com/purescript/purescript-strings/issues/153 + -- the `fromCharCode` function doesn't check if this is beyond the + -- BMP, so we check that ourselves. + -- https://github.com/purescript/purescript-strings/issues/153 if cp < 0 || cp > 65535 then runFn2 throw state1 (ParseError "Expected Char" pos) else @@ -112,7 +112,7 @@ anyChar = ParserT -- | Match any Unicode character. -- | Always succeeds. anyCodePoint :: forall m. ParserT String m CodePoint -anyCodePoint = ParserT +anyCodePoint = ParserT ( mkFn5 \state1@(ParseState input pos _) _ _ throw done -> case uncons input of Nothing -> From 46a4c2e438e22a5f31cb0ec3f0d1f3febafe28d7 Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Tue, 22 Mar 2022 11:21:38 -0700 Subject: [PATCH 07/14] Address feedback, add esoteric low-level splitMap combinator. --- src/Text/Parsing/Parser.purs | 53 +++++++++++++---- src/Text/Parsing/Parser/String.purs | 90 ++++++++++++++++++----------- 2 files changed, 98 insertions(+), 45 deletions(-) diff --git a/src/Text/Parsing/Parser.purs b/src/Text/Parsing/Parser.purs index 1c475ba..ef44d38 100644 --- a/src/Text/Parsing/Parser.purs +++ b/src/Text/Parsing/Parser.purs @@ -27,7 +27,7 @@ import Control.Monad.State.Class (class MonadState, gets, modify_) import Control.Monad.Trans.Class (class MonadTrans) import Control.MonadPlus (class Alternative, class Plus) import Data.Either (Either(..)) -import Data.Function.Uncurried (Fn2, Fn5, mkFn2, mkFn5, runFn2, runFn5) +import Data.Function.Uncurried (Fn2, Fn5, mkFn2, mkFn3, mkFn5, runFn2, runFn3, runFn5) import Data.Identity (Identity) import Data.Lazy as Lazy import Data.Newtype (unwrap) @@ -70,19 +70,35 @@ data ParseState s = ParseState s Position Boolean -- | The first type argument is the stream type. Typically, this is either `String`, -- | or some sort of token stream. newtype ParserT s m a = ParserT + -- The parser is implemented using continuation-passing-style with uncurried + -- functions. In addition to the usual error and success continuations, there + -- are continuations for trampolining and lifting. Trampolining lets us retain + -- stack safety, and an explicit continuation for lifting lets us only pay + -- a transformer abstraction tax when it's actually used. Pure parsers which + -- never call `lift` pay no additional runtime cost. Additionally, this + -- approach lets us run a parser in terms of the base Monad's MonadRec instance, + -- so when lift _is_ used, it's still always stack safe. + + -- When should the trampoline be invoked? Downstream combinators should not need + -- to worry about invoking the trampoline, as it's handled by the core instances + -- of the parser (the Monad and Alternative hierarchies). These instances invoke + -- the trampoline before calling continuations, so each step in the parser will + -- always progress in a fresh stack. ( forall r . Fn5 - (ParseState s) + (ParseState s) -- Current state ((Unit -> r) -> r) -- Trampoline (m (Unit -> r) -> r) -- Lift (Fn2 (ParseState s) ParseError r) -- Throw - (Fn2 (ParseState s) a r) -- Pure + (Fn2 (ParseState s) a r) -- Done/Success r ) -data Run s m a - = More (Unit -> Run s m a) - | Lift (m (Unit -> Run s m a)) +-- When we want to run a parser, continuations are reified as data +-- constructors and processed in a tail-recursive loop. +data RunParser s m a + = More (Unit -> RunParser s m a) + | Lift (m (Unit -> RunParser s m a)) | Stop (ParseState s) (Either ParseError a) -- | Apply a parser, keeping only the parsed result. @@ -92,14 +108,21 @@ runParserT s p = fst <$> runParserT' initialState p initialState :: ParseState s initialState = ParseState s initialPos false -runParserT' :: forall m s a. MonadRec m => ParseState s -> ParserT s m a -> m (Tuple (Either ParseError a) (ParseState s)) +runParserT' + :: forall m s a + . MonadRec m + => ParseState s + -> ParserT s m a + -> m (Tuple (Either ParseError a) (ParseState s)) runParserT' state1 (ParserT k1) = tailRecM go \_ -> runFn5 k1 state1 More Lift (mkFn2 \state2 err -> Stop state2 (Left err)) (mkFn2 \state2 res -> Stop state2 (Right res)) where - go :: (Unit -> Run s m a) -> m (Step (Unit -> Run s m a) (Tuple (Either ParseError a) (ParseState s))) + go + :: (Unit -> RunParser s m a) + -> m (Step (Unit -> RunParser s m a) (Tuple (Either ParseError a) (ParseState s))) go step = case step unit of More next -> go next @@ -208,16 +231,24 @@ instance MonadRec (ParserT s m) where tailRecM next initArg = ParserT ( mkFn5 \state1 more lift throw done -> do let - loop = mkFn2 \state2 arg -> do + -- In most cases, trampolining MonadRec is unnecessary since all the + -- core semantics are trampolined. But given the case where a loop might + -- otherwise be pure, we still want to guarantee stack usage so we use + -- a "gas" accumulator to avoid bouncing too much. + loop = mkFn3 \state2 arg gas -> do let (ParserT k1) = next arg runFn5 k1 state2 more lift throw ( mkFn2 \state3 step -> case step of Loop nextArg -> - runFn2 loop state3 nextArg + if gas == 0 then + more \_ -> + runFn3 loop state3 nextArg 30 + else + runFn3 loop state3 nextArg (gas - 1) Done res -> runFn2 done state3 res ) - runFn2 loop state1 initArg + runFn3 loop state1 initArg 30 ) instance MonadState (ParseState s) (ParserT s m) where diff --git a/src/Text/Parsing/Parser/String.purs b/src/Text/Parsing/Parser/String.purs index 1846f1c..91df8e0 100644 --- a/src/Text/Parsing/Parser/String.purs +++ b/src/Text/Parsing/Parser/String.purs @@ -38,20 +38,21 @@ module Text.Parsing.Parser.String , match , regex , RegexFlagsRow + , splitMap ) where import Prelude hiding (between) -import Control.Monad.State (get, put, state) -import Data.Array (notElem) +import Control.Monad.State (get, state) +import Data.Array (elem, notElem) import Data.Array.NonEmpty as NonEmptyArray import Data.CodePoint.Unicode (isSpace) import Data.Either (Either(..)) import Data.Enum (fromEnum, toEnum) -import Data.Foldable (elem) import Data.Function.Uncurried (mkFn5, runFn2) import Data.Maybe (Maybe(..), fromJust) import Data.String (CodePoint, Pattern(..), codePointAt, length, null, singleton, splitAt, stripPrefix, takeWhile, uncons) +import Data.String as String import Data.String.CodeUnits as SCU import Data.String.Regex as Regex import Data.String.Regex.Flags (RegexFlags(..), RegexFlagsRec) @@ -77,7 +78,7 @@ eof = ParserT -- | Match the entire rest of the input stream. Always succeeds. rest :: forall m. ParserT String m String rest = state \(ParseState input position _) -> - Tuple input (ParseState "" (updatePosString position input) true) + Tuple input (ParseState "" (updatePosString position input "") true) -- | Match the specified string. string :: forall m. String -> ParserT String m String @@ -85,7 +86,7 @@ string str = ParserT ( mkFn5 \state1@(ParseState input pos _) _ _ throw done -> case stripPrefix (Pattern str) input of Just remainder -> - runFn2 done (ParseState remainder (updatePosString pos str) true) str + runFn2 done (ParseState remainder (updatePosString pos str remainder) true) str _ -> runFn2 throw state1 (ParseError ("Expected " <> show str) pos) ) @@ -106,7 +107,7 @@ anyChar = ParserT if cp < 0 || cp > 65535 then runFn2 throw state1 (ParseError "Expected Char" pos) else - runFn2 done (ParseState tail (updatePosSingle pos head) true) (unsafePartial fromJust (toEnum cp)) + runFn2 done (ParseState tail (updatePosSingle pos head tail) true) (unsafePartial fromJust (toEnum cp)) ) -- | Match any Unicode character. @@ -118,7 +119,7 @@ anyCodePoint = ParserT Nothing -> runFn2 throw state1 (ParseError "Unexpected EOF" pos) Just { head, tail } -> - runFn2 done (ParseState tail (updatePosSingle pos head) true) head + runFn2 done (ParseState tail (updatePosSingle pos head tail) true) head ) -- | Match a BMP `Char` satisfying the predicate. @@ -141,12 +142,12 @@ char c = satisfy (_ == c) show c -- | Match a `String` exactly *N* characters long. takeN :: forall m. Int -> ParserT String m String -takeN n = join $ state \state1@(ParseState input position _) -> do +takeN n = splitMap \input -> do let { before, after } = splitAt n input - if length before == n then do - Tuple (pure before) (ParseState after (updatePosString position before) true) + if length before == n then + Right { value: before, before, after } else - Tuple (fail ("Could not take " <> show n <> " characters")) state1 + Left $ "Could not take " <> show n <> " characters" -- | Match zero or more whitespace characters satisfying -- | `Data.CodePoint.Unicode.isSpace`. Always succeeds. @@ -155,12 +156,10 @@ whiteSpace = fst <$> match skipSpaces -- | Skip whitespace characters and throw them away. Always succeeds. skipSpaces :: forall m. ParserT String m Unit -skipSpaces = ParserT - ( mkFn5 \(ParseState input pos _) _ _ _ done -> do - let head = takeWhile isSpace input - let tail = SCU.drop (SCU.length head) input - runFn2 done (ParseState tail (updatePosString pos head) true) unit - ) +skipSpaces = splitMap \input -> do + let before = takeWhile isSpace input + let after = SCU.drop (SCU.length before) input + Right { value: unit, before, after } -- | Match one of the BMP `Char`s in the array. oneOf :: forall m. Array Char -> ParserT String m Char @@ -179,19 +178,25 @@ noneOfCodePoints :: forall m. Array CodePoint -> ParserT String m CodePoint noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) <~?> \_ -> "none of " <> show (singleton <$> ss) -- | Updates a `Position` by adding the columns and lines in `String`. -updatePosString :: Position -> String -> Position -updatePosString = go 0 - where - go ix pos str = case codePointAt ix str of - Nothing -> pos - Just cp -> go (ix + 1) (updatePosSingle pos cp) str +updatePosString :: Position -> String -> String -> Position +updatePosString pos before after = case uncons before of + Nothing -> pos + Just { head, tail } -> do + let + newPos + | String.null tail = updatePosSingle pos head after + | otherwise = updatePosSingle pos head tail + updatePosString newPos tail after -- | Updates a `Position` by adding the columns and lines in a -- | single `CodePoint`. -updatePosSingle :: Position -> CodePoint -> Position -updatePosSingle (Position { line, column }) cp = case fromEnum cp of +updatePosSingle :: Position -> CodePoint -> String -> Position +updatePosSingle (Position { line, column }) cp after = case fromEnum cp of 10 -> Position { line: line + 1, column: 1 } -- "\n" - 13 -> Position { line: line + 1, column: 1 } -- "\r" + 13 -> + case codePointAt 0 after of + Just nextCp | fromEnum nextCp == 10 -> Position { line, column } -- "\r\n" lookahead + _ -> Position { line: line + 1, column: 1 } -- "\r" 9 -> Position { line, column: column + 8 - ((column - 1) `mod` 8) } -- "\t" Who says that one tab is 8 columns? _ -> Position { line, column: column + 1 } @@ -286,14 +291,14 @@ regex flags pattern = case Regex.regex ("^(" <> pattern <> ")") flags' of Left paterr -> fail $ "Regex pattern error " <> paterr - Right regexobj -> do - ParseState input position _ <- get - case NonEmptyArray.head <$> Regex.match regexobj input of - Just (Just matched) -> do - let remainder = SCU.drop (SCU.length matched) input - put $ ParseState remainder (updatePosString position matched) true - pure matched - _ -> fail $ "No Regex pattern match" + Right regexobj -> + splitMap \input -> do + case NonEmptyArray.head <$> Regex.match regexobj input of + Just (Just before) -> do + let after = SCU.drop (SCU.length before) input + Right { value: before, before, after } + _ -> + Left "No Regex pattern match" where flags' = RegexFlags ( merge flags @@ -315,3 +320,20 @@ type RegexFlagsRow = , sticky :: Boolean , unicode :: Boolean ) + +-- | Splits the input string while yielding a value. +-- | * `value` is the value to return. +-- | * `before` is the input that was consumed and is used to update the parser position. +-- | * `after` is the new input state. +splitMap + :: forall m a + . (String -> Either String { value :: a, before :: String, after :: String }) + -> ParserT String m a +splitMap f = ParserT + ( mkFn5 \state1@(ParseState input pos _) _ _ throw done -> + case f input of + Left err -> + runFn2 throw state1 (ParseError err pos) + Right { value, before, after } -> + runFn2 done (ParseState after (updatePosString pos before after) true) value + ) From dc2ee8ba473c62c474a996b49b47f3599efde207 Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Tue, 22 Mar 2022 12:31:21 -0700 Subject: [PATCH 08/14] Swap around satisfy/any implementations. --- src/Text/Parsing/Parser/String.purs | 69 ++++++++++++++--------------- 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/src/Text/Parsing/Parser/String.purs b/src/Text/Parsing/Parser/String.purs index 91df8e0..05d4541 100644 --- a/src/Text/Parsing/Parser/String.purs +++ b/src/Text/Parsing/Parser/String.purs @@ -43,7 +43,7 @@ module Text.Parsing.Parser.String import Prelude hiding (between) -import Control.Monad.State (get, state) +import Control.Monad.State (get) import Data.Array (elem, notElem) import Data.Array.NonEmpty as NonEmptyArray import Data.CodePoint.Unicode (isSpace) @@ -61,7 +61,7 @@ import Partial.Unsafe (unsafePartial) import Prim.Row (class Nub, class Union) import Record (merge) import Text.Parsing.Parser (ParseError(..), ParseState(..), ParserT(..), fail) -import Text.Parsing.Parser.Combinators (tryRethrow, (), (<~?>)) +import Text.Parsing.Parser.Combinators ((), (<~?>)) import Text.Parsing.Parser.Pos (Position(..)) -- | Match “end-of-file,” the end of the input stream. @@ -77,65 +77,64 @@ eof = ParserT -- | Match the entire rest of the input stream. Always succeeds. rest :: forall m. ParserT String m String -rest = state \(ParseState input position _) -> - Tuple input (ParseState "" (updatePosString position input "") true) +rest = splitMap \before -> + Right { value: before, before, after: "" } -- | Match the specified string. string :: forall m. String -> ParserT String m String -string str = ParserT - ( mkFn5 \state1@(ParseState input pos _) _ _ throw done -> - case stripPrefix (Pattern str) input of - Just remainder -> - runFn2 done (ParseState remainder (updatePosString pos str remainder) true) str - _ -> - runFn2 throw state1 (ParseError ("Expected " <> show str) pos) - ) +string str = splitMap \input -> + case stripPrefix (Pattern str) input of + Just after -> + Right { value: str, before: str, after } + _ -> + Left $ "Expected " <> show str -- | Match any BMP `Char`. -- | Parser will fail if the character is not in the Basic Multilingual Plane. anyChar :: forall m. ParserT String m Char -anyChar = ParserT +anyChar = satisfy (const true) + +-- | Match any Unicode character. +-- | Always succeeds. +anyCodePoint :: forall m. ParserT String m CodePoint +anyCodePoint = satisfyCodePoint (const true) + +-- | Match a BMP `Char` satisfying the predicate. +satisfy :: forall m. (Char -> Boolean) -> ParserT String m Char +satisfy f = ParserT ( mkFn5 \state1@(ParseState input pos _) _ _ throw done -> case uncons input of Nothing -> runFn2 throw state1 (ParseError "Unexpected EOF" pos) Just { head, tail } -> do let cp = fromEnum head - -- the `fromCharCode` function doesn't check if this is beyond the + -- the `fromEnum` function doesn't check if this is beyond the -- BMP, so we check that ourselves. -- https://github.com/purescript/purescript-strings/issues/153 if cp < 0 || cp > 65535 then runFn2 throw state1 (ParseError "Expected Char" pos) - else - runFn2 done (ParseState tail (updatePosSingle pos head tail) true) (unsafePartial fromJust (toEnum cp)) + else do + let ch = unsafePartial (fromJust (toEnum cp)) + if f ch then + runFn2 done (ParseState tail (updatePosSingle pos head tail) true) ch + else + runFn2 throw state1 (ParseError "Predicate unsatisfied" pos) ) --- | Match any Unicode character. --- | Always succeeds. -anyCodePoint :: forall m. ParserT String m CodePoint -anyCodePoint = ParserT +-- | Match a Unicode character satisfying the predicate. +satisfyCodePoint :: forall m. (CodePoint -> Boolean) -> ParserT String m CodePoint +satisfyCodePoint f = ParserT ( mkFn5 \state1@(ParseState input pos _) _ _ throw done -> case uncons input of Nothing -> runFn2 throw state1 (ParseError "Unexpected EOF" pos) Just { head, tail } -> - runFn2 done (ParseState tail (updatePosSingle pos head tail) true) head + if f head then + runFn2 done (ParseState tail (updatePosSingle pos head tail) true) head + else + runFn2 throw state1 (ParseError "Predicate unsatisfied" pos) ) --- | Match a BMP `Char` satisfying the predicate. -satisfy :: forall m. (Char -> Boolean) -> ParserT String m Char -satisfy f = tryRethrow do - c <- anyChar - if f c then pure c - else fail "Predicate unsatisfied" - --- | Match a Unicode character satisfying the predicate. -satisfyCodePoint :: forall m. (CodePoint -> Boolean) -> ParserT String m CodePoint -satisfyCodePoint f = tryRethrow do - c <- anyCodePoint - if f c then pure c - else fail "Predicate unsatisfied" - -- | Match the specified BMP `Char`. char :: forall m. Char -> ParserT String m Char char c = satisfy (_ == c) show c From acb7121d39b252d7c1b9897c7fe584849361e41d Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Wed, 23 Mar 2022 09:20:48 -0700 Subject: [PATCH 09/14] More descriptive record labels for splitMap --- src/Text/Parsing/Parser/String.purs | 32 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/Text/Parsing/Parser/String.purs b/src/Text/Parsing/Parser/String.purs index 05d4541..da7f76e 100644 --- a/src/Text/Parsing/Parser/String.purs +++ b/src/Text/Parsing/Parser/String.purs @@ -77,15 +77,15 @@ eof = ParserT -- | Match the entire rest of the input stream. Always succeeds. rest :: forall m. ParserT String m String -rest = splitMap \before -> - Right { value: before, before, after: "" } +rest = splitMap \consumed -> + Right { value: consumed, consumed, remainder: "" } -- | Match the specified string. string :: forall m. String -> ParserT String m String string str = splitMap \input -> case stripPrefix (Pattern str) input of - Just after -> - Right { value: str, before: str, after } + Just remainder -> + Right { value: str, consumed: str, remainder } _ -> Left $ "Expected " <> show str @@ -144,7 +144,7 @@ takeN :: forall m. Int -> ParserT String m String takeN n = splitMap \input -> do let { before, after } = splitAt n input if length before == n then - Right { value: before, before, after } + Right { value: before, consumed: before, remainder: after } else Left $ "Could not take " <> show n <> " characters" @@ -156,9 +156,9 @@ whiteSpace = fst <$> match skipSpaces -- | Skip whitespace characters and throw them away. Always succeeds. skipSpaces :: forall m. ParserT String m Unit skipSpaces = splitMap \input -> do - let before = takeWhile isSpace input - let after = SCU.drop (SCU.length before) input - Right { value: unit, before, after } + let consumed = takeWhile isSpace input + let remainder = SCU.drop (SCU.length consumed) input + Right { value: unit, consumed, remainder } -- | Match one of the BMP `Char`s in the array. oneOf :: forall m. Array Char -> ParserT String m Char @@ -293,9 +293,9 @@ regex flags pattern = Right regexobj -> splitMap \input -> do case NonEmptyArray.head <$> Regex.match regexobj input of - Just (Just before) -> do - let after = SCU.drop (SCU.length before) input - Right { value: before, before, after } + Just (Just consumed) -> do + let remainder = SCU.drop (SCU.length consumed) input + Right { value: consumed, consumed, remainder } _ -> Left "No Regex pattern match" where @@ -322,17 +322,17 @@ type RegexFlagsRow = -- | Splits the input string while yielding a value. -- | * `value` is the value to return. --- | * `before` is the input that was consumed and is used to update the parser position. --- | * `after` is the new input state. +-- | * `consumed` is the input that was consumed and is used to update the parser position. +-- | * `remainder` is the new input state. splitMap :: forall m a - . (String -> Either String { value :: a, before :: String, after :: String }) + . (String -> Either String { value :: a, consumed :: String, remainder :: String }) -> ParserT String m a splitMap f = ParserT ( mkFn5 \state1@(ParseState input pos _) _ _ throw done -> case f input of Left err -> runFn2 throw state1 (ParseError err pos) - Right { value, before, after } -> - runFn2 done (ParseState after (updatePosString pos before after) true) value + Right { value, consumed, remainder } -> + runFn2 done (ParseState remainder (updatePosString pos consumed remainder) true) value ) From fe25855430f13b52cb91e6e1652e375f566e0cba Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Wed, 23 Mar 2022 09:20:57 -0700 Subject: [PATCH 10/14] Update changelog --- CHANGELOG.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 376057d..db2c471 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,19 @@ Notable changes to this project are documented in this file. The format is based Breaking changes: +- New optimized internals. `ParserT` now has a more efficient representation, + resulting in (up to) 20x performance improvement. In addition to the performance, + all parser execution is always stack-safe, even monadically, obviating the need + to run parsers with `Trampoline` as the base Monad or to explicitly use `MonadRec`. + + Code that was parametric over the underlying Monad no longer needs to propagate a + Monad constraint. + + Code that constructs parsers via the underlying representation will need to be updated, + but otherwise the interface is unchanged and parsers should just enjoy the speed boost. + + (#154 by @natefaubion) + New features: Bugfixes: From dbd9aae13bd050d14ba30abf16aba024c1dad3a3 Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Wed, 23 Mar 2022 15:05:46 -0700 Subject: [PATCH 11/14] Rename splitMap to consumeWith --- src/Text/Parsing/Parser/String.purs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Text/Parsing/Parser/String.purs b/src/Text/Parsing/Parser/String.purs index da7f76e..d063702 100644 --- a/src/Text/Parsing/Parser/String.purs +++ b/src/Text/Parsing/Parser/String.purs @@ -38,7 +38,7 @@ module Text.Parsing.Parser.String , match , regex , RegexFlagsRow - , splitMap + , consumeWith ) where import Prelude hiding (between) @@ -77,12 +77,12 @@ eof = ParserT -- | Match the entire rest of the input stream. Always succeeds. rest :: forall m. ParserT String m String -rest = splitMap \consumed -> +rest = consumeWith \consumed -> Right { value: consumed, consumed, remainder: "" } -- | Match the specified string. string :: forall m. String -> ParserT String m String -string str = splitMap \input -> +string str = consumeWith \input -> case stripPrefix (Pattern str) input of Just remainder -> Right { value: str, consumed: str, remainder } @@ -141,7 +141,7 @@ char c = satisfy (_ == c) show c -- | Match a `String` exactly *N* characters long. takeN :: forall m. Int -> ParserT String m String -takeN n = splitMap \input -> do +takeN n = consumeWith \input -> do let { before, after } = splitAt n input if length before == n then Right { value: before, consumed: before, remainder: after } @@ -155,7 +155,7 @@ whiteSpace = fst <$> match skipSpaces -- | Skip whitespace characters and throw them away. Always succeeds. skipSpaces :: forall m. ParserT String m Unit -skipSpaces = splitMap \input -> do +skipSpaces = consumeWith \input -> do let consumed = takeWhile isSpace input let remainder = SCU.drop (SCU.length consumed) input Right { value: unit, consumed, remainder } @@ -291,7 +291,7 @@ regex flags pattern = Left paterr -> fail $ "Regex pattern error " <> paterr Right regexobj -> - splitMap \input -> do + consumeWith \input -> do case NonEmptyArray.head <$> Regex.match regexobj input of Just (Just consumed) -> do let remainder = SCU.drop (SCU.length consumed) input @@ -320,15 +320,15 @@ type RegexFlagsRow = , unicode :: Boolean ) --- | Splits the input string while yielding a value. +-- | Consumes a portion of the input string while yielding a value. -- | * `value` is the value to return. -- | * `consumed` is the input that was consumed and is used to update the parser position. -- | * `remainder` is the new input state. -splitMap +consumeWith :: forall m a . (String -> Either String { value :: a, consumed :: String, remainder :: String }) -> ParserT String m a -splitMap f = ParserT +consumeWith f = ParserT ( mkFn5 \state1@(ParseState input pos _) _ _ throw done -> case f input of Left err -> From 05a7ca84dae9acbf8e7556ed67a10f838e8d2995 Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Thu, 24 Mar 2022 09:44:28 -0700 Subject: [PATCH 12/14] Remove more rendundant constraints --- src/Text/Parsing/Parser/Indent.purs | 6 +++--- src/Text/Parsing/Parser/Language.purs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Text/Parsing/Parser/Indent.purs b/src/Text/Parsing/Parser/Indent.purs index fc54c03..53c859a 100644 --- a/src/Text/Parsing/Parser/Indent.purs +++ b/src/Text/Parsing/Parser/Indent.purs @@ -77,7 +77,7 @@ type IndentParser s a = ParserT s (State Position) a -- | @ getPosition @ returns current position -- | should probably be added to Text.Parsing.Parser.Pos -getPosition :: forall m s. (Monad m) => ParserT s m Position +getPosition :: forall m s. ParserT s m Position getPosition = gets \(ParseState _ pos _) -> pos -- | simple helper function to avoid typ-problems with MonadState instance @@ -103,10 +103,10 @@ biAp :: forall a b c. (a -> b) -> (b -> b -> c) -> a -> a -> c biAp f c v1 v2 = c (f v1) (f v2) -- | @ many1 @ should prabably be inside Text.Parsing.Parser.Combinators -many1 :: forall s m a. (Monad m) => ParserT s m a -> ParserT s m (List a) +many1 :: forall s m a. ParserT s m a -> ParserT s m (List a) many1 p = lift2 Cons p (many p) -symbol :: forall m. (Monad m) => String -> ParserT String m String +symbol :: forall m. String -> ParserT String m String symbol name = (many $ oneOf [ ' ', '\t' ]) *> (string name) -- | `withBlock f a p` parses `a` diff --git a/src/Text/Parsing/Parser/Language.purs b/src/Text/Parsing/Parser/Language.purs index 151684a..a079e87 100644 --- a/src/Text/Parsing/Parser/Language.purs +++ b/src/Text/Parsing/Parser/Language.purs @@ -39,7 +39,7 @@ haskellStyle = LanguageDef (unGenLanguageDef emptyDef) , caseSensitive = true } where - op' :: forall m. (Monad m) => ParserT String m Char + op' :: forall m. ParserT String m Char op' = oneOf [ ':', '!', '#', '$', '%', '&', '*', '+', '.', '/', '<', '=', '>', '?', '@', '\\', '^', '|', '-', '~' ] -- | This is a minimal token definition for Java style languages. It @@ -81,7 +81,7 @@ emptyDef = LanguageDef , caseSensitive: true } where - op' :: forall m. (Monad m) => ParserT String m Char + op' :: forall m. ParserT String m Char op' = oneOf [ ':', '!', '#', '$', '%', '&', '*', '+', '.', '/', '<', '=', '>', '?', '@', '\\', '^', '|', '-', '~' ] -- ----------------------------------------------------------- From 850eefeafdf2312bd6de248f71a935803cce2550 Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Thu, 24 Mar 2022 09:53:30 -0700 Subject: [PATCH 13/14] Update fixity of error operators --- src/Text/Parsing/Parser/Combinators.purs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Text/Parsing/Parser/Combinators.purs b/src/Text/Parsing/Parser/Combinators.purs index 5aea851..c4e6ebb 100644 --- a/src/Text/Parsing/Parser/Combinators.purs +++ b/src/Text/Parsing/Parser/Combinators.purs @@ -116,7 +116,7 @@ import Text.Parsing.Parser (ParseError(..), ParseState(..), ParserT(..), fail) withErrorMessage :: forall m s a. ParserT s m a -> String -> ParserT s m a withErrorMessage p msg = p <|> fail ("Expected " <> msg) -infixl 3 withErrorMessage as +infix 2 withErrorMessage as -- | Provide an error message in the case of failure, but lazily. This is handy -- | in cases where constructing the error message is expensive, so it's @@ -129,13 +129,13 @@ infixl 3 withErrorMessage as withLazyErrorMessage :: forall m s a. ParserT s m a -> (Unit -> String) -> ParserT s m a withLazyErrorMessage p msg = p <|> defer \_ -> fail ("Expected " <> msg unit) -infixl 3 withLazyErrorMessage as <~?> +infix 2 withLazyErrorMessage as <~?> -- | Flipped `()`. asErrorMessage :: forall m s a. String -> ParserT s m a -> ParserT s m a asErrorMessage = flip () -infixl 3 asErrorMessage as +infix 2 asErrorMessage as -- | Wrap a parser with opening and closing markers. -- | From 1b765432aa103acd7fb9d86ae7278939a77f2989 Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Thu, 24 Mar 2022 13:59:07 -0700 Subject: [PATCH 14/14] Revert infix changes to infixr 3 --- src/Text/Parsing/Parser/Combinators.purs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Text/Parsing/Parser/Combinators.purs b/src/Text/Parsing/Parser/Combinators.purs index c4e6ebb..43ce066 100644 --- a/src/Text/Parsing/Parser/Combinators.purs +++ b/src/Text/Parsing/Parser/Combinators.purs @@ -116,7 +116,7 @@ import Text.Parsing.Parser (ParseError(..), ParseState(..), ParserT(..), fail) withErrorMessage :: forall m s a. ParserT s m a -> String -> ParserT s m a withErrorMessage p msg = p <|> fail ("Expected " <> msg) -infix 2 withErrorMessage as +infixr 3 withErrorMessage as -- | Provide an error message in the case of failure, but lazily. This is handy -- | in cases where constructing the error message is expensive, so it's @@ -129,13 +129,13 @@ infix 2 withErrorMessage as withLazyErrorMessage :: forall m s a. ParserT s m a -> (Unit -> String) -> ParserT s m a withLazyErrorMessage p msg = p <|> defer \_ -> fail ("Expected " <> msg unit) -infix 2 withLazyErrorMessage as <~?> +infixr 3 withLazyErrorMessage as <~?> -- | Flipped `()`. asErrorMessage :: forall m s a. String -> ParserT s m a -> ParserT s m a asErrorMessage = flip () -infix 2 asErrorMessage as +infixr 3 asErrorMessage as -- | Wrap a parser with opening and closing markers. -- |