@@ -45,23 +45,23 @@ import Prelude hiding (between)
45
45
import Control.Monad.State (get , put , state )
46
46
import Data.Array (notElem )
47
47
import Data.Array.NonEmpty as NonEmptyArray
48
- import Data.Char (fromCharCode )
49
48
import Data.CodePoint.Unicode (isSpace )
50
49
import Data.Either (Either (..))
50
+ import Data.Enum (fromEnum , toEnum )
51
51
import Data.Foldable (elem )
52
52
import Data.Function.Uncurried (mkFn5 , runFn2 )
53
- import Data.Maybe (Maybe (..))
54
- import Data.String (CodePoint , Pattern (..), length , null , singleton , splitAt , stripPrefix , uncons )
53
+ import Data.Maybe (Maybe (..), fromJust )
54
+ import Data.String (CodePoint , Pattern (..), codePointAt , length , null , singleton , splitAt , stripPrefix , takeWhile , uncons )
55
55
import Data.String.CodeUnits as SCU
56
56
import Data.String.Regex as Regex
57
57
import Data.String.Regex.Flags (RegexFlags (..), RegexFlagsRec )
58
58
import Data.Tuple (Tuple (..), fst )
59
+ import Partial.Unsafe (unsafePartial )
59
60
import Prim.Row (class Nub , class Union )
60
61
import Record (merge )
61
62
import Text.Parsing.Parser (ParseError (..), ParseState (..), ParserT (..), fail )
62
- import Text.Parsing.Parser.Combinators (skipMany , tryRethrow , (<?>), (<~?>))
63
+ import Text.Parsing.Parser.Combinators (tryRethrow , (<?>), (<~?>))
63
64
import Text.Parsing.Parser.Pos (Position (..))
64
- import Unsafe.Coerce (unsafeCoerce )
65
65
66
66
-- | Match “end-of-file,” the end of the input stream.
67
67
eof :: forall m . ParserT String m Unit
@@ -93,26 +93,33 @@ string str = ParserT
93
93
-- | Match any BMP `Char`.
94
94
-- | Parser will fail if the character is not in the Basic Multilingual Plane.
95
95
anyChar :: forall m . ParserT String m Char
96
- anyChar = tryRethrow do
97
- cp :: Int <- unCodePoint <$> anyCodePoint
98
- -- the `fromCharCode` function doesn't check if this is beyond the
99
- -- BMP, so we check that ourselves.
100
- -- https://github.com/purescript/purescript-strings/issues/153
101
- if cp > 65535 -- BMP
102
- then fail " Not a Char"
103
- else case fromCharCode cp of
104
- Nothing -> fail " Not a Char"
105
- Just c -> pure c
96
+ anyChar = ParserT
97
+ ( mkFn5 \state1@(ParseState input pos _) _ _ throw done ->
98
+ case uncons input of
99
+ Nothing ->
100
+ runFn2 throw state1 (ParseError " Unexpected EOF" pos)
101
+ Just { head, tail } -> do
102
+ let cp = fromEnum head
103
+ -- the `fromCharCode` function doesn't check if this is beyond the
104
+ -- BMP, so we check that ourselves.
105
+ -- https://github.com/purescript/purescript-strings/issues/153
106
+ if cp < 0 || cp > 65535 then
107
+ runFn2 throw state1 (ParseError " Expected Char" pos)
108
+ else
109
+ runFn2 done (ParseState tail (updatePosSingle pos head) true ) (unsafePartial fromJust (toEnum cp))
110
+ )
106
111
107
112
-- | Match any Unicode character.
108
113
-- | Always succeeds.
109
114
anyCodePoint :: forall m . ParserT String m CodePoint
110
- anyCodePoint = join $ state \state1@(ParseState input position _) ->
111
- case uncons input of
112
- Nothing ->
113
- Tuple (fail " Unexpected EOF" ) state1
114
- Just { head, tail } ->
115
- Tuple (pure head) (ParseState tail (updatePosSingle position head) true )
115
+ anyCodePoint = ParserT
116
+ ( mkFn5 \state1@(ParseState input pos _) _ _ throw done ->
117
+ case uncons input of
118
+ Nothing ->
119
+ runFn2 throw state1 (ParseError " Unexpected EOF" pos)
120
+ Just { head, tail } ->
121
+ runFn2 done (ParseState tail (updatePosSingle pos head) true ) head
122
+ )
116
123
117
124
-- | Match a BMP `Char` satisfying the predicate.
118
125
satisfy :: forall m . (Char -> Boolean ) -> ParserT String m Char
@@ -148,7 +155,12 @@ whiteSpace = fst <$> match skipSpaces
148
155
149
156
-- | Skip whitespace characters and throw them away. Always succeeds.
150
157
skipSpaces :: forall m . ParserT String m Unit
151
- skipSpaces = skipMany (satisfyCodePoint isSpace)
158
+ skipSpaces = ParserT
159
+ ( mkFn5 \(ParseState input pos _) _ _ _ done -> do
160
+ let head = takeWhile isSpace input
161
+ let tail = SCU .drop (SCU .length head) input
162
+ runFn2 done (ParseState tail (updatePosString pos head) true ) unit
163
+ )
152
164
153
165
-- | Match one of the BMP `Char`s in the array.
154
166
oneOf :: forall m . Array Char -> ParserT String m Char
@@ -168,14 +180,16 @@ noneOfCodePoints ss = satisfyCodePoint (flip notElem ss) <~?> \_ -> "none of " <
168
180
169
181
-- | Updates a `Position` by adding the columns and lines in `String`.
170
182
updatePosString :: Position -> String -> Position
171
- updatePosString pos str = case uncons str of
172
- Nothing -> pos
173
- Just { head, tail } -> updatePosString (updatePosSingle pos head) tail -- tail recursive
183
+ updatePosString = go 0
184
+ where
185
+ go ix pos str = case codePointAt ix str of
186
+ Nothing -> pos
187
+ Just cp -> go (ix + 1 ) (updatePosSingle pos cp) str
174
188
175
189
-- | Updates a `Position` by adding the columns and lines in a
176
190
-- | single `CodePoint`.
177
191
updatePosSingle :: Position -> CodePoint -> Position
178
- updatePosSingle (Position { line, column }) cp = case unCodePoint cp of
192
+ updatePosSingle (Position { line, column }) cp = case fromEnum cp of
179
193
10 -> Position { line: line + 1 , column: 1 } -- "\n"
180
194
13 -> Position { line: line + 1 , column: 1 } -- "\r"
181
195
9 -> Position { line, column: column + 8 - ((column - 1 ) `mod` 8 ) } -- "\t" Who says that one tab is 8 columns?
@@ -211,12 +225,6 @@ match p = do
211
225
-- boundary.
212
226
pure $ Tuple (SCU .take (SCU .length input1 - SCU .length input2) input1) x
213
227
214
- -- | The CodePoint newtype constructor is not exported, so here's a helper.
215
- -- | This will break at runtime if the definition of CodePoint ever changes
216
- -- | to something other than `newtype CodePoint = CodePoint Int`.
217
- unCodePoint :: CodePoint -> Int
218
- unCodePoint = unsafeCoerce
219
-
220
228
-- | Parser which uses the `Data.String.Regex` module to match the regular
221
229
-- | expression pattern passed as the `String`
222
230
-- | argument to the parser.
0 commit comments