module Toml.Syntax.LexerUtils (
Action,
Context(..),
Outcome(..),
locatedUncons,
token,
token_,
textToken,
timeValue,
eofToken,
failure,
strFrag,
startMlBstr,
startBstr,
startMlLstr,
startLstr,
endStr,
unicodeEscape,
recommendEscape,
mkError,
) where
import Data.Char (ord, chr, isAscii, isControl)
import Data.Foldable (asum)
import Data.Text (Text)
import Data.Text qualified as Text
import Data.Time.Format (parseTimeM, defaultTimeLocale, ParseTime)
import Numeric (readHex)
import Text.Printf (printf)
import Toml.Syntax.Token (Token(..))
import Toml.Syntax.Position (move, Located(..), Position)
type Action = Located Text -> Context -> Outcome
data Outcome
= Resume Context
| LexerError (Located String)
| EmitToken (Located Token)
data Context
= TopContext
| TableContext
| ValueContext
| MlBstrContext Position [Text]
| BstrContext Position [Text]
| MlLstrContext Position [Text]
| LstrContext Position [Text]
deriving Int -> Context -> ShowS
[Context] -> ShowS
Context -> [Char]
(Int -> Context -> ShowS)
-> (Context -> [Char]) -> ([Context] -> ShowS) -> Show Context
forall a.
(Int -> a -> ShowS) -> (a -> [Char]) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Context -> ShowS
showsPrec :: Int -> Context -> ShowS
$cshow :: Context -> [Char]
show :: Context -> [Char]
$cshowList :: [Context] -> ShowS
showList :: [Context] -> ShowS
Show
strFrag :: Action
strFrag :: Action
strFrag (Located Position
_ Text
s) = \case
BstrContext Position
p [Text]
acc -> Context -> Outcome
Resume (Position -> [Text] -> Context
BstrContext Position
p (Text
s Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc))
MlBstrContext Position
p [Text]
acc -> Context -> Outcome
Resume (Position -> [Text] -> Context
MlBstrContext Position
p (Text
s Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc))
LstrContext Position
p [Text]
acc -> Context -> Outcome
Resume (Position -> [Text] -> Context
LstrContext Position
p (Text
s Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc))
MlLstrContext Position
p [Text]
acc -> Context -> Outcome
Resume (Position -> [Text] -> Context
MlLstrContext Position
p (Text
s Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc))
Context
_ -> [Char] -> Outcome
forall a. HasCallStack => [Char] -> a
error [Char]
"strFrag: panic"
endStr :: Action
endStr :: Action
endStr (Located Position
_ Text
x) = \case
BstrContext Position
p [Text]
acc -> Located Token -> Outcome
EmitToken (Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
p (Text -> Token
TokString ([Text] -> Text
Text.concat ([Text] -> [Text]
forall a. [a] -> [a]
reverse (Text
x Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc)))))
MlBstrContext Position
p [Text]
acc -> Located Token -> Outcome
EmitToken (Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
p (Text -> Token
TokMlString ([Text] -> Text
Text.concat ([Text] -> [Text]
forall a. [a] -> [a]
reverse (Text
x Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc)))))
LstrContext Position
p [Text]
acc -> Located Token -> Outcome
EmitToken (Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
p (Text -> Token
TokString ([Text] -> Text
Text.concat ([Text] -> [Text]
forall a. [a] -> [a]
reverse (Text
x Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc)))))
MlLstrContext Position
p [Text]
acc -> Located Token -> Outcome
EmitToken (Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
p (Text -> Token
TokMlString ([Text] -> Text
Text.concat ([Text] -> [Text]
forall a. [a] -> [a]
reverse (Text
x Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
acc)))))
Context
_ -> [Char] -> Outcome
forall a. HasCallStack => [Char] -> a
error [Char]
"endStr: panic"
startBstr :: Action
startBstr :: Action
startBstr (Located Position
p Text
_) Context
_ = Context -> Outcome
Resume (Position -> [Text] -> Context
BstrContext Position
p [])
startLstr :: Action
startLstr :: Action
startLstr (Located Position
p Text
_) Context
_ = Context -> Outcome
Resume (Position -> [Text] -> Context
LstrContext Position
p [])
startMlBstr :: Action
startMlBstr :: Action
startMlBstr (Located Position
p Text
_) Context
_ = Context -> Outcome
Resume (Position -> [Text] -> Context
MlBstrContext Position
p [])
startMlLstr :: Action
startMlLstr :: Action
startMlLstr (Located Position
p Text
_) Context
_ = Context -> Outcome
Resume (Position -> [Text] -> Context
MlLstrContext Position
p [])
unicodeEscape :: Action
unicodeEscape :: Action
unicodeEscape (Located Position
p Text
lexeme) Context
ctx =
case ReadS Int
forall a. (Eq a, Num a) => ReadS a
readHex (Int -> ShowS
forall a. Int -> [a] -> [a]
drop Int
2 (Text -> [Char]
Text.unpack Text
lexeme)) of
[(Int
n,[Char]
_)] | Int
0xd800 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
n, Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xe000 -> Located [Char] -> Outcome
LexerError (Position -> [Char] -> Located [Char]
forall a. Position -> a -> Located a
Located Position
p [Char]
"non-scalar unicode escape")
| Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x110000 -> Located [Char] -> Outcome
LexerError (Position -> [Char] -> Located [Char]
forall a. Position -> a -> Located a
Located Position
p [Char]
"unicode escape too large")
| Bool
otherwise -> Action
strFrag (Position -> Text -> Located Text
forall a. Position -> a -> Located a
Located Position
p (Char -> Text
Text.singleton (Int -> Char
chr Int
n))) Context
ctx
[(Int, [Char])]
_ -> [Char] -> Outcome
forall a. HasCallStack => [Char] -> a
error [Char]
"unicodeEscape: panic"
recommendEscape :: Action
recommendEscape :: Action
recommendEscape (Located Position
p Text
x) Context
_ =
Located [Char] -> Outcome
LexerError (Position -> [Char] -> Located [Char]
forall a. Position -> a -> Located a
Located Position
p ([Char] -> Int -> [Char]
forall r. PrintfType r => [Char] -> r
printf [Char]
"control characters must be escaped, use: \\u%04X" (Char -> Int
ord (HasCallStack => Text -> Char
Text -> Char
Text.head Text
x))))
token_ :: Token -> Action
token_ :: Token -> Action
token_ Token
t Located Text
x Context
_ = Located Token -> Outcome
EmitToken (Token
t Token -> Located Text -> Located Token
forall a b. a -> Located b -> Located a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Located Text
x)
token :: (String -> Token) -> Action
token :: ([Char] -> Token) -> Action
token [Char] -> Token
f Located Text
x Context
_ = Located Token -> Outcome
EmitToken ([Char] -> Token
f ([Char] -> Token) -> (Text -> [Char]) -> Text -> Token
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Char]
Text.unpack (Text -> Token) -> Located Text -> Located Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Located Text
x)
textToken :: (Text -> Token) -> Action
textToken :: (Text -> Token) -> Action
textToken Text -> Token
f Located Text
x Context
_ = Located Token -> Outcome
EmitToken (Text -> Token
f (Text -> Token) -> Located Text -> Located Token
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Located Text
x)
timeValue ::
ParseTime a =>
String ->
[String] ->
(a -> Token) ->
Action
timeValue :: forall a.
ParseTime a =>
[Char] -> [[Char]] -> (a -> Token) -> Action
timeValue [Char]
description [[Char]]
patterns a -> Token
constructor (Located Position
p Text
str) Context
_ =
case [Maybe a] -> Maybe a
forall (t :: * -> *) (f :: * -> *) a.
(Foldable t, Alternative f) =>
t (f a) -> f a
asum [Bool -> TimeLocale -> [Char] -> [Char] -> Maybe a
forall (m :: * -> *) t.
(MonadFail m, ParseTime t) =>
Bool -> TimeLocale -> [Char] -> [Char] -> m t
parseTimeM Bool
False TimeLocale
defaultTimeLocale [Char]
pat (Text -> [Char]
Text.unpack Text
str) | [Char]
pat <- [[Char]]
patterns] of
Maybe a
Nothing -> Located [Char] -> Outcome
LexerError (Position -> [Char] -> Located [Char]
forall a. Position -> a -> Located a
Located Position
p ([Char]
"malformed " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ [Char]
description))
Just a
t -> Located Token -> Outcome
EmitToken (Position -> Token -> Located Token
forall a. Position -> a -> Located a
Located Position
p (a -> Token
constructor a
t))
locatedUncons :: Located Text -> Maybe (Int, Located Text)
locatedUncons :: Located Text -> Maybe (Int, Located Text)
locatedUncons Located { locPosition :: forall a. Located a -> Position
locPosition = Position
p, locThing :: forall a. Located a -> a
locThing = Text
str } =
case Text -> Maybe (Char, Text)
Text.uncons Text
str of
Maybe (Char, Text)
Nothing -> Maybe (Int, Located Text)
forall a. Maybe a
Nothing
Just (Char
x, Text
xs)
| Located Text
rest Located Text -> Bool -> Bool
forall a b. a -> b -> b
`seq` Bool
False -> Maybe (Int, Located Text)
forall a. HasCallStack => a
undefined
| Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\1' -> (Int, Located Text) -> Maybe (Int, Located Text)
forall a. a -> Maybe a
Just (Int
0, Located Text
rest)
| Char -> Bool
isAscii Char
x -> (Int, Located Text) -> Maybe (Int, Located Text)
forall a. a -> Maybe a
Just (Char -> Int
ord Char
x, Located Text
rest)
| Bool
otherwise -> (Int, Located Text) -> Maybe (Int, Located Text)
forall a. a -> Maybe a
Just (Int
1, Located Text
rest)
where
rest :: Located Text
rest = Located { locPosition :: Position
locPosition = Char -> Position -> Position
move Char
x Position
p, locThing :: Text
locThing = Text
xs }
eofToken :: Context -> Located Text -> Either (Located String) (Located Token, Located Text)
eofToken :: Context
-> Located Text
-> Either (Located [Char]) (Located Token, Located Text)
eofToken (MlBstrContext Position
p [Text]
_) Located Text
_ = Located [Char]
-> Either (Located [Char]) (Located Token, Located Text)
forall a b. a -> Either a b
Left (Position -> [Char] -> Located [Char]
forall a. Position -> a -> Located a
Located Position
p [Char]
"unterminated multi-line basic string")
eofToken (BstrContext Position
p [Text]
_) Located Text
_ = Located [Char]
-> Either (Located [Char]) (Located Token, Located Text)
forall a b. a -> Either a b
Left (Position -> [Char] -> Located [Char]
forall a. Position -> a -> Located a
Located Position
p [Char]
"unterminated basic string")
eofToken (MlLstrContext Position
p [Text]
_) Located Text
_ = Located [Char]
-> Either (Located [Char]) (Located Token, Located Text)
forall a b. a -> Either a b
Left (Position -> [Char] -> Located [Char]
forall a. Position -> a -> Located a
Located Position
p [Char]
"unterminated multi-line literal string")
eofToken (LstrContext Position
p [Text]
_) Located Text
_ = Located [Char]
-> Either (Located [Char]) (Located Token, Located Text)
forall a b. a -> Either a b
Left (Position -> [Char] -> Located [Char]
forall a. Position -> a -> Located a
Located Position
p [Char]
"unterminated literal string")
eofToken Context
_ Located Text
t = (Located Token, Located Text)
-> Either (Located [Char]) (Located Token, Located Text)
forall a b. b -> Either a b
Right (Token
TokEOF Token -> Located Text -> Located Token
forall a b. a -> Located b -> Located a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Located Text
t, Located Text
t)
failure :: String -> Action
failure :: [Char] -> Action
failure [Char]
err Located Text
t Context
_ = Located [Char] -> Outcome
LexerError ([Char]
err [Char] -> Located Text -> Located [Char]
forall a b. a -> Located b -> Located a
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Located Text
t)
mkError :: String -> String
mkError :: ShowS
mkError [Char]
"" = [Char]
"unexpected end-of-input"
mkError (Char
'\n':[Char]
_) = [Char]
"unexpected end-of-line"
mkError (Char
'\r':Char
'\n':[Char]
_) = [Char]
"unexpected end-of-line"
mkError (Char
x:[Char]
_)
| Char -> Bool
isControl Char
x = [Char]
"control characters prohibited"
| Bool
otherwise = [Char]
"unexpected " [Char] -> ShowS
forall a. [a] -> [a] -> [a]
++ Char -> [Char]
forall a. Show a => a -> [Char]
show Char
x