-- -----------------------------------------------------------------------------
--
-- ParseMonad.hs, part of Alex
--
-- (c) Simon Marlow 2003
--
-- ----------------------------------------------------------------------------}

module ParseMonad (
        AlexInput, alexInputPrevChar, alexGetChar, alexGetByte,
        AlexPosn(..), alexStartPos,
        Warning(..), warnIfNullable,
        P, runP, StartCode, failP, lookupSMac, lookupRMac, newSMac, newRMac,
        setStartCode, getStartCode, getInput, setInput,
 ) where

import Control.Monad       ( liftM, ap, when )
import Data.Map            ( Map )
import Data.List.NonEmpty  ( pattern (:|) )
import Data.Word           ( Word8 )
import qualified Data.Map as Map

import AbsSyn   hiding ( StartCode )
import CharSet  ( CharSet )
import UTF8

-- -----------------------------------------------------------------------------
-- The input type
--import Codec.Binary.UTF8.Light as UTF8

type Byte = Word8

type AlexInput = (AlexPosn,     -- current position,
                  Char,         -- previous char
                  [Byte],
                  String)       -- current input string

alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar (AlexPosn
_,Char
c,[Byte]
_,String
_) = Char
c


alexGetChar :: AlexInput -> Maybe (Char, AlexInput)
alexGetChar :: AlexInput -> Maybe (Char, AlexInput)
alexGetChar (AlexPosn
_, Char
_, [], [])  = Maybe (Char, AlexInput)
forall a. Maybe a
Nothing
alexGetChar (AlexPosn
p, Char
_, [], Char
c:String
s) = AlexPosn
p' AlexPosn -> Maybe (Char, AlexInput) -> Maybe (Char, AlexInput)
forall a b. a -> b -> b
`seq` (Char, AlexInput) -> Maybe (Char, AlexInput)
forall a. a -> Maybe a
Just (Char
c, (AlexPosn
p', Char
c, [], String
s))
  where
    p' :: AlexPosn
p' = AlexPosn -> Char -> AlexPosn
alexMove AlexPosn
p Char
c
alexGetChar (AlexPosn
_, Char
_ , Byte
_:[Byte]
_, String
_) = Maybe (Char, AlexInput)
forall a. HasCallStack => a
undefined -- hide compiler warning

alexGetByte :: AlexInput -> Maybe (Byte, AlexInput)
alexGetByte :: AlexInput -> Maybe (Byte, AlexInput)
alexGetByte (AlexPosn
p, Char
c, Byte
b:[Byte]
bs, String
s)   = (Byte, AlexInput) -> Maybe (Byte, AlexInput)
forall a. a -> Maybe a
Just (Byte
b, (AlexPosn
p, Char
c, [Byte]
bs, String
s))
alexGetByte (AlexPosn
_, Char
_, [],   [])  = Maybe (Byte, AlexInput)
forall a. Maybe a
Nothing
alexGetByte (AlexPosn
p, Char
_, [],   Char
c:String
s) = AlexPosn
p' AlexPosn -> Maybe (Byte, AlexInput) -> Maybe (Byte, AlexInput)
forall a b. a -> b -> b
`seq`  (Byte, AlexInput) -> Maybe (Byte, AlexInput)
forall a. a -> Maybe a
Just (Byte
b, (AlexPosn
p', Char
c, [Byte]
bs, String
s))
  where
    p' :: AlexPosn
p' = AlexPosn -> Char -> AlexPosn
alexMove AlexPosn
p Char
c
    Byte
b :| [Byte]
bs = Char -> NonEmpty Byte
UTF8.encode Char
c

-- -----------------------------------------------------------------------------
-- Token positions

-- `Posn' records the location of a token in the input text.  It has three
-- fields: the address (number of characters preceding the token), line number
-- and column of a token within the file. `start_pos' gives the position of the
-- start of the file and `eof_pos' a standard encoding for the end of file.
-- `move_pos' calculates the new position after traversing a given character,
-- assuming the usual eight character tab stops.

data AlexPosn = AlexPn !Int !Int !Int
        deriving (AlexPosn -> AlexPosn -> Bool
(AlexPosn -> AlexPosn -> Bool)
-> (AlexPosn -> AlexPosn -> Bool) -> Eq AlexPosn
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: AlexPosn -> AlexPosn -> Bool
== :: AlexPosn -> AlexPosn -> Bool
$c/= :: AlexPosn -> AlexPosn -> Bool
/= :: AlexPosn -> AlexPosn -> Bool
Eq, Int -> AlexPosn -> ShowS
[AlexPosn] -> ShowS
AlexPosn -> String
(Int -> AlexPosn -> ShowS)
-> (AlexPosn -> String) -> ([AlexPosn] -> ShowS) -> Show AlexPosn
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> AlexPosn -> ShowS
showsPrec :: Int -> AlexPosn -> ShowS
$cshow :: AlexPosn -> String
show :: AlexPosn -> String
$cshowList :: [AlexPosn] -> ShowS
showList :: [AlexPosn] -> ShowS
Show, Eq AlexPosn
Eq AlexPosn
-> (AlexPosn -> AlexPosn -> Ordering)
-> (AlexPosn -> AlexPosn -> Bool)
-> (AlexPosn -> AlexPosn -> Bool)
-> (AlexPosn -> AlexPosn -> Bool)
-> (AlexPosn -> AlexPosn -> Bool)
-> (AlexPosn -> AlexPosn -> AlexPosn)
-> (AlexPosn -> AlexPosn -> AlexPosn)
-> Ord AlexPosn
AlexPosn -> AlexPosn -> Bool
AlexPosn -> AlexPosn -> Ordering
AlexPosn -> AlexPosn -> AlexPosn
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: AlexPosn -> AlexPosn -> Ordering
compare :: AlexPosn -> AlexPosn -> Ordering
$c< :: AlexPosn -> AlexPosn -> Bool
< :: AlexPosn -> AlexPosn -> Bool
$c<= :: AlexPosn -> AlexPosn -> Bool
<= :: AlexPosn -> AlexPosn -> Bool
$c> :: AlexPosn -> AlexPosn -> Bool
> :: AlexPosn -> AlexPosn -> Bool
$c>= :: AlexPosn -> AlexPosn -> Bool
>= :: AlexPosn -> AlexPosn -> Bool
$cmax :: AlexPosn -> AlexPosn -> AlexPosn
max :: AlexPosn -> AlexPosn -> AlexPosn
$cmin :: AlexPosn -> AlexPosn -> AlexPosn
min :: AlexPosn -> AlexPosn -> AlexPosn
Ord)

alexStartPos :: AlexPosn
alexStartPos :: AlexPosn
alexStartPos = Int -> Int -> Int -> AlexPosn
AlexPn Int
0 Int
1 Int
1

alexMove :: AlexPosn -> Char -> AlexPosn
alexMove :: AlexPosn -> Char -> AlexPosn
alexMove (AlexPn Int
a Int
l Int
c) Char
'\t' = Int -> Int -> Int -> AlexPosn
AlexPn (Int
aInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)  Int
l     (((Int
cInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
7) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
8)Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
8Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
alexMove (AlexPn Int
a Int
l Int
_) Char
'\n' = Int -> Int -> Int -> AlexPosn
AlexPn (Int
aInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)   Int
1
alexMove (AlexPn Int
a Int
l Int
c) Char
_    = Int -> Int -> Int -> AlexPosn
AlexPn (Int
aInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)  Int
l     (Int
cInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)

-- -----------------------------------------------------------------------------
-- Alex lexing/parsing monad

data Warning
  = WarnNullableRExp
    { Warning -> AlexPosn
_warnPos  :: AlexPosn  -- ^ The position of the code following the regex.
    , Warning -> String
_warnText :: String    -- ^ Warning text.
    }

type ParseError = (Maybe AlexPosn, String)
type StartCode = Int

data PState = PState
  { PState -> [Warning]
warnings  :: [Warning]           -- ^ Stack of warnings, top = last warning.
  , PState -> Map String CharSet
smac_env  :: Map String CharSet
  , PState -> Map String RExp
rmac_env  :: Map String RExp
  , PState -> Int
startcode :: Int
  , PState -> AlexInput
input     :: AlexInput
  }

newtype P a = P { forall a. P a -> PState -> Either ParseError (PState, a)
unP :: PState -> Either ParseError (PState,a) }

instance Functor P where
  fmap :: forall a b. (a -> b) -> P a -> P b
fmap = (a -> b) -> P a -> P b
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM

instance Applicative P where
  pure :: forall a. a -> P a
pure a
a = (PState -> Either ParseError (PState, a)) -> P a
forall a. (PState -> Either ParseError (PState, a)) -> P a
P ((PState -> Either ParseError (PState, a)) -> P a)
-> (PState -> Either ParseError (PState, a)) -> P a
forall a b. (a -> b) -> a -> b
$ \PState
env -> (PState, a) -> Either ParseError (PState, a)
forall a b. b -> Either a b
Right (PState
env,a
a)
  <*> :: forall a b. P (a -> b) -> P a -> P b
(<*>) = P (a -> b) -> P a -> P b
forall (m :: * -> *) a b. Monad m => m (a -> b) -> m a -> m b
ap

instance Monad P where
 (P PState -> Either ParseError (PState, a)
m) >>= :: forall a b. P a -> (a -> P b) -> P b
>>= a -> P b
k = (PState -> Either ParseError (PState, b)) -> P b
forall a. (PState -> Either ParseError (PState, a)) -> P a
P ((PState -> Either ParseError (PState, b)) -> P b)
-> (PState -> Either ParseError (PState, b)) -> P b
forall a b. (a -> b) -> a -> b
$ \PState
env -> case PState -> Either ParseError (PState, a)
m PState
env of
                        Left ParseError
err -> ParseError -> Either ParseError (PState, b)
forall a b. a -> Either a b
Left ParseError
err
                        Right (PState
env',a
ok) -> P b -> PState -> Either ParseError (PState, b)
forall a. P a -> PState -> Either ParseError (PState, a)
unP (a -> P b
k a
ok) PState
env'
 return :: forall a. a -> P a
return = a -> P a
forall a. a -> P a
forall (f :: * -> *) a. Applicative f => a -> f a
pure

-- | Run the parser on given input.
runP :: String
          -- ^ Input string.
     -> (Map String CharSet, Map String RExp)
          -- ^ Character set and regex definitions.
     -> P a
          -- ^ Parsing computation.
     -> Either ParseError ([Warning], a)
          -- ^ List of warnings in first-to-last order, result.
runP :: forall a.
String
-> (Map String CharSet, Map String RExp)
-> P a
-> Either ParseError ([Warning], a)
runP String
str (Map String CharSet
senv,Map String RExp
renv) (P PState -> Either ParseError (PState, a)
p)
  = case PState -> Either ParseError (PState, a)
p PState
initial_state of
        Left ParseError
err -> ParseError -> Either ParseError ([Warning], a)
forall a b. a -> Either a b
Left ParseError
err
        Right (PState
s, a
a) -> ([Warning], a) -> Either ParseError ([Warning], a)
forall a b. b -> Either a b
Right ([Warning] -> [Warning]
forall a. [a] -> [a]
reverse (PState -> [Warning]
warnings PState
s), a
a)
  where
  initial_state :: PState
initial_state = PState
    { warnings :: [Warning]
warnings  = []
    , smac_env :: Map String CharSet
smac_env  = Map String CharSet
senv
    , rmac_env :: Map String RExp
rmac_env  = Map String RExp
renv
    , startcode :: Int
startcode = Int
0
    , input :: AlexInput
input     = (AlexPosn
alexStartPos, Char
'\n', [], String
str)
    }

failP :: String -> P a
failP :: forall a. String -> P a
failP String
str = (PState -> Either ParseError (PState, a)) -> P a
forall a. (PState -> Either ParseError (PState, a)) -> P a
P ((PState -> Either ParseError (PState, a)) -> P a)
-> (PState -> Either ParseError (PState, a)) -> P a
forall a b. (a -> b) -> a -> b
$ \PState{ input :: PState -> AlexInput
input = (AlexPosn
p,Char
_,[Byte]
_,String
_) } -> ParseError -> Either ParseError (PState, a)
forall a b. a -> Either a b
Left (AlexPosn -> Maybe AlexPosn
forall a. a -> Maybe a
Just AlexPosn
p,String
str)

-- Macros are expanded during parsing, to simplify the abstract
-- syntax.  The parsing monad passes around two environments mapping
-- macro names to sets and regexps respectively.

lookupSMac :: (AlexPosn,String) -> P CharSet
lookupSMac :: (AlexPosn, String) -> P CharSet
lookupSMac (AlexPosn
posn,String
smac)
 = (PState -> Either ParseError (PState, CharSet)) -> P CharSet
forall a. (PState -> Either ParseError (PState, a)) -> P a
P ((PState -> Either ParseError (PState, CharSet)) -> P CharSet)
-> (PState -> Either ParseError (PState, CharSet)) -> P CharSet
forall a b. (a -> b) -> a -> b
$ \s :: PState
s@PState{ smac_env :: PState -> Map String CharSet
smac_env = Map String CharSet
senv } ->
       case String -> Map String CharSet -> Maybe CharSet
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup String
smac Map String CharSet
senv of
        Just CharSet
ok -> (PState, CharSet) -> Either ParseError (PState, CharSet)
forall a b. b -> Either a b
Right (PState
s,CharSet
ok)
        Maybe CharSet
Nothing -> ParseError -> Either ParseError (PState, CharSet)
forall a b. a -> Either a b
Left (AlexPosn -> Maybe AlexPosn
forall a. a -> Maybe a
Just AlexPosn
posn, String
"unknown set macro: $" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
smac)

lookupRMac :: String -> P RExp
lookupRMac :: String -> P RExp
lookupRMac String
rmac
 = (PState -> Either ParseError (PState, RExp)) -> P RExp
forall a. (PState -> Either ParseError (PState, a)) -> P a
P ((PState -> Either ParseError (PState, RExp)) -> P RExp)
-> (PState -> Either ParseError (PState, RExp)) -> P RExp
forall a b. (a -> b) -> a -> b
$ \s :: PState
s@PState{ rmac_env :: PState -> Map String RExp
rmac_env = Map String RExp
renv } ->
       case String -> Map String RExp -> Maybe RExp
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup String
rmac Map String RExp
renv of
        Just RExp
ok -> (PState, RExp) -> Either ParseError (PState, RExp)
forall a b. b -> Either a b
Right (PState
s,RExp
ok)
        Maybe RExp
Nothing -> ParseError -> Either ParseError (PState, RExp)
forall a b. a -> Either a b
Left (Maybe AlexPosn
forall a. Maybe a
Nothing, String
"unknown regex macro: %" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
rmac)

newSMac :: String -> CharSet -> P ()
newSMac :: String -> CharSet -> P ()
newSMac String
smac CharSet
set
  = (PState -> Either ParseError (PState, ())) -> P ()
forall a. (PState -> Either ParseError (PState, a)) -> P a
P ((PState -> Either ParseError (PState, ())) -> P ())
-> (PState -> Either ParseError (PState, ())) -> P ()
forall a b. (a -> b) -> a -> b
$ \PState
s -> (PState, ()) -> Either ParseError (PState, ())
forall a b. b -> Either a b
Right (PState
s{smac_env :: Map String CharSet
smac_env = String -> CharSet -> Map String CharSet -> Map String CharSet
forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert String
smac CharSet
set (PState -> Map String CharSet
smac_env PState
s)}, ())

newRMac :: String -> RExp -> P ()
newRMac :: String -> RExp -> P ()
newRMac String
rmac RExp
rexp
  = (PState -> Either ParseError (PState, ())) -> P ()
forall a. (PState -> Either ParseError (PState, a)) -> P a
P ((PState -> Either ParseError (PState, ())) -> P ())
-> (PState -> Either ParseError (PState, ())) -> P ()
forall a b. (a -> b) -> a -> b
$ \PState
s -> (PState, ()) -> Either ParseError (PState, ())
forall a b. b -> Either a b
Right (PState
s{rmac_env :: Map String RExp
rmac_env = String -> RExp -> Map String RExp -> Map String RExp
forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert String
rmac RExp
rexp (PState -> Map String RExp
rmac_env PState
s)}, ())

setStartCode :: StartCode -> P ()
setStartCode :: Int -> P ()
setStartCode Int
sc = (PState -> Either ParseError (PState, ())) -> P ()
forall a. (PState -> Either ParseError (PState, a)) -> P a
P ((PState -> Either ParseError (PState, ())) -> P ())
-> (PState -> Either ParseError (PState, ())) -> P ()
forall a b. (a -> b) -> a -> b
$ \PState
s -> (PState, ()) -> Either ParseError (PState, ())
forall a b. b -> Either a b
Right (PState
s{ startcode :: Int
startcode = Int
sc }, ())

getStartCode :: P StartCode
getStartCode :: P Int
getStartCode = (PState -> Either ParseError (PState, Int)) -> P Int
forall a. (PState -> Either ParseError (PState, a)) -> P a
P ((PState -> Either ParseError (PState, Int)) -> P Int)
-> (PState -> Either ParseError (PState, Int)) -> P Int
forall a b. (a -> b) -> a -> b
$ \PState
s -> (PState, Int) -> Either ParseError (PState, Int)
forall a b. b -> Either a b
Right (PState
s, PState -> Int
startcode PState
s)

getInput :: P AlexInput
getInput :: P AlexInput
getInput = (PState -> Either ParseError (PState, AlexInput)) -> P AlexInput
forall a. (PState -> Either ParseError (PState, a)) -> P a
P ((PState -> Either ParseError (PState, AlexInput)) -> P AlexInput)
-> (PState -> Either ParseError (PState, AlexInput)) -> P AlexInput
forall a b. (a -> b) -> a -> b
$ \PState
s -> (PState, AlexInput) -> Either ParseError (PState, AlexInput)
forall a b. b -> Either a b
Right (PState
s, PState -> AlexInput
input PState
s)

setInput :: AlexInput -> P ()
setInput :: AlexInput -> P ()
setInput AlexInput
inp = (PState -> Either ParseError (PState, ())) -> P ()
forall a. (PState -> Either ParseError (PState, a)) -> P a
P ((PState -> Either ParseError (PState, ())) -> P ())
-> (PState -> Either ParseError (PState, ())) -> P ()
forall a b. (a -> b) -> a -> b
$ \PState
s -> (PState, ()) -> Either ParseError (PState, ())
forall a b. b -> Either a b
Right (PState
s{ input :: AlexInput
input = AlexInput
inp }, ())

-- | Add a warning if given regular expression is nullable
--   unless the user wrote the regex 'Eps'.
warnIfNullable
  :: RExp       -- ^ Regular expression.
  -> AlexPosn   -- ^ Position associated to regular expression.
  -> P ()
-- If the user wrote @()@, they wanted to match the empty sequence!
-- Thus, skip the warning then.
warnIfNullable :: RExp -> AlexPosn -> P ()
warnIfNullable RExp
Eps AlexPosn
_ = () -> P ()
forall a. a -> P a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
warnIfNullable RExp
r AlexPosn
pos = Bool -> P () -> P ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (RExp -> Bool
nullable RExp
r) (P () -> P ()) -> P () -> P ()
forall a b. (a -> b) -> a -> b
$ (PState -> Either ParseError (PState, ())) -> P ()
forall a. (PState -> Either ParseError (PState, a)) -> P a
P ((PState -> Either ParseError (PState, ())) -> P ())
-> (PState -> Either ParseError (PState, ())) -> P ()
forall a b. (a -> b) -> a -> b
$ \ PState
s ->
  (PState, ()) -> Either ParseError (PState, ())
forall a b. b -> Either a b
Right (PState
s{ warnings :: [Warning]
warnings = AlexPosn -> String -> Warning
WarnNullableRExp AlexPosn
pos String
w Warning -> [Warning] -> [Warning]
forall a. a -> [a] -> [a]
: PState -> [Warning]
warnings PState
s}, ())
  where
  w :: String
w = [String] -> String
unwords
      [ String
"Regular expression"
      , RExp -> String
forall a. Show a => a -> String
show RExp
r
      , String
"matches the empty string."
      ]