{-# LANGUAGE OverloadedLists #-}
module CharSet (
setSingleton,
Encoding(..),
Byte,
ByteSet,
byteSetSingleton,
byteRanges,
byteSetRange,
CharSet,
emptyCharSet,
charSetSingleton,
charSet,
charSetMinus,
charSetComplement,
charSetRange,
charSetUnion,
charSetQuote,
setUnions,
byteSetToArray,
byteSetElems,
byteSetElem
) where
import Data.Array ( Array, array )
import Data.Char ( chr, ord )
import Data.Maybe ( catMaybes )
import Data.Word ( Word8 )
import Data.List.NonEmpty ( pattern (:|), (<|) )
import qualified Data.List.NonEmpty as List1
import UTF8 ( List1, encode )
import Data.Ranged
( Boundary( BoundaryAbove, BoundaryAboveAll, BoundaryBelow, BoundaryBelowAll )
, DiscreteOrdered, Range( Range ), RSet
, makeRangedSet
, rSetDifference, rSetEmpty, rSetHas, rSetNegation, rSetRanges, rSetUnion, rSingleton
)
type Byte = Word8
type CharSet = RSet Char
type ByteSet = RSet Byte
type Utf8Range = Span (List1 Byte)
data Encoding = Latin1 | UTF8
deriving (Encoding -> Encoding -> Bool
(Encoding -> Encoding -> Bool)
-> (Encoding -> Encoding -> Bool) -> Eq Encoding
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Encoding -> Encoding -> Bool
== :: Encoding -> Encoding -> Bool
$c/= :: Encoding -> Encoding -> Bool
/= :: Encoding -> Encoding -> Bool
Eq, Int -> Encoding -> ShowS
[Encoding] -> ShowS
Encoding -> String
(Int -> Encoding -> ShowS)
-> (Encoding -> String) -> ([Encoding] -> ShowS) -> Show Encoding
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Encoding -> ShowS
showsPrec :: Int -> Encoding -> ShowS
$cshow :: Encoding -> String
show :: Encoding -> String
$cshowList :: [Encoding] -> ShowS
showList :: [Encoding] -> ShowS
Show)
emptyCharSet :: CharSet
emptyCharSet :: CharSet
emptyCharSet = CharSet
forall a. DiscreteOrdered a => RSet a
rSetEmpty
byteSetElem :: ByteSet -> Byte -> Bool
byteSetElem :: ByteSet -> Byte -> Bool
byteSetElem = ByteSet -> Byte -> Bool
forall v. DiscreteOrdered v => RSet v -> v -> Bool
rSetHas
charSetSingleton :: Char -> CharSet
charSetSingleton :: Char -> CharSet
charSetSingleton = Char -> CharSet
forall v. DiscreteOrdered v => v -> RSet v
rSingleton
setSingleton :: DiscreteOrdered a => a -> RSet a
setSingleton :: forall v. DiscreteOrdered v => v -> RSet v
setSingleton = a -> RSet a
forall v. DiscreteOrdered v => v -> RSet v
rSingleton
charSet :: [Char] -> CharSet
charSet :: String -> CharSet
charSet = [CharSet] -> CharSet
forall a. DiscreteOrdered a => [RSet a] -> RSet a
setUnions ([CharSet] -> CharSet)
-> (String -> [CharSet]) -> String -> CharSet
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> CharSet) -> String -> [CharSet]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Char -> CharSet
charSetSingleton
charSetMinus :: CharSet -> CharSet -> CharSet
charSetMinus :: CharSet -> CharSet -> CharSet
charSetMinus = CharSet -> CharSet -> CharSet
forall v. DiscreteOrdered v => RSet v -> RSet v -> RSet v
rSetDifference
charSetUnion :: CharSet -> CharSet -> CharSet
charSetUnion :: CharSet -> CharSet -> CharSet
charSetUnion = CharSet -> CharSet -> CharSet
forall v. DiscreteOrdered v => RSet v -> RSet v -> RSet v
rSetUnion
setUnions :: DiscreteOrdered a => [RSet a] -> RSet a
setUnions :: forall a. DiscreteOrdered a => [RSet a] -> RSet a
setUnions = (RSet a -> RSet a -> RSet a) -> RSet a -> [RSet a] -> RSet a
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr RSet a -> RSet a -> RSet a
forall v. DiscreteOrdered v => RSet v -> RSet v -> RSet v
rSetUnion RSet a
forall a. DiscreteOrdered a => RSet a
rSetEmpty
charSetComplement :: CharSet -> CharSet
charSetComplement :: CharSet -> CharSet
charSetComplement = CharSet -> CharSet
forall a. DiscreteOrdered a => RSet a -> RSet a
rSetNegation
charSetRange :: Char -> Char -> CharSet
charSetRange :: Char -> Char -> CharSet
charSetRange Char
c1 Char
c2 = [Range Char] -> CharSet
forall v. DiscreteOrdered v => [Range v] -> RSet v
makeRangedSet [Boundary Char -> Boundary Char -> Range Char
forall v. Boundary v -> Boundary v -> Range v
Range (Char -> Boundary Char
forall a. a -> Boundary a
BoundaryBelow Char
c1) (Char -> Boundary Char
forall a. a -> Boundary a
BoundaryAbove Char
c2)]
{-# INLINE bytes #-}
bytes :: [Byte]
bytes :: [Byte]
bytes = [Byte
Item [Byte]
forall a. Bounded a => a
minBound..Byte
Item [Byte]
forall a. Bounded a => a
maxBound]
byteSetToArray :: ByteSet -> Array Byte Bool
byteSetToArray :: ByteSet -> Array Byte Bool
byteSetToArray ByteSet
set = (Byte, Byte) -> [(Byte, Bool)] -> Array Byte Bool
forall i e. Ix i => (i, i) -> [(i, e)] -> Array i e
array (Byte
forall a. Bounded a => a
minBound, Byte
forall a. Bounded a => a
maxBound) [(Byte
c, ByteSet -> Byte -> Bool
forall v. DiscreteOrdered v => RSet v -> v -> Bool
rSetHas ByteSet
set Byte
c) | Byte
c <- [Byte]
bytes]
byteSetElems :: ByteSet -> [Byte]
byteSetElems :: ByteSet -> [Byte]
byteSetElems ByteSet
set = (Byte -> Bool) -> [Byte] -> [Byte]
forall a. (a -> Bool) -> [a] -> [a]
filter (ByteSet -> Byte -> Bool
forall v. DiscreteOrdered v => RSet v -> v -> Bool
rSetHas ByteSet
set) [Byte]
bytes
charToRanges :: Encoding -> CharSet -> [Utf8Range]
charToRanges :: Encoding -> CharSet -> [Span (List1 Byte)]
charToRanges Encoding
Latin1 =
(Span Char -> Span (List1 Byte))
-> [Span Char] -> [Span (List1 Byte)]
forall a b. (a -> b) -> [a] -> [b]
map ((Char -> List1 Byte) -> Span Char -> Span (List1 Byte)
forall a b. (a -> b) -> Span a -> Span b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Byte -> [Byte] -> List1 Byte
forall a. a -> [a] -> NonEmpty a
:| []) (Byte -> List1 Byte) -> (Char -> Byte) -> Char -> List1 Byte
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Byte
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Byte) -> (Char -> Int) -> Char -> Byte
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord))
([Span Char] -> [Span (List1 Byte)])
-> (CharSet -> [Span Char]) -> CharSet -> [Span (List1 Byte)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Maybe (Span Char)] -> [Span Char]
forall a. [Maybe a] -> [a]
catMaybes
([Maybe (Span Char)] -> [Span Char])
-> (CharSet -> [Maybe (Span Char)]) -> CharSet -> [Span Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Range Char -> Maybe (Span Char))
-> [Range Char] -> [Maybe (Span Char)]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Bool -> Range Char -> Maybe (Span Char)
charRangeToCharSpan Bool
False)
([Range Char] -> [Maybe (Span Char)])
-> (CharSet -> [Range Char]) -> CharSet -> [Maybe (Span Char)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CharSet -> [Range Char]
forall v. DiscreteOrdered v => RSet v -> [Range v]
rSetRanges
charToRanges Encoding
UTF8 =
[[Span (List1 Byte)]] -> [Span (List1 Byte)]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
([[Span (List1 Byte)]] -> [Span (List1 Byte)])
-> (CharSet -> [[Span (List1 Byte)]])
-> CharSet
-> [Span (List1 Byte)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Span (List1 Byte) -> [Span (List1 Byte)])
-> [Span (List1 Byte)] -> [[Span (List1 Byte)]]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Span (List1 Byte) -> [Span (List1 Byte)]
toUtfRange
([Span (List1 Byte)] -> [[Span (List1 Byte)]])
-> (CharSet -> [Span (List1 Byte)])
-> CharSet
-> [[Span (List1 Byte)]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Span Char -> Span (List1 Byte))
-> [Span Char] -> [Span (List1 Byte)]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Char -> List1 Byte) -> Span Char -> Span (List1 Byte)
forall a b. (a -> b) -> Span a -> Span b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Char -> List1 Byte
UTF8.encode)
([Span Char] -> [Span (List1 Byte)])
-> (CharSet -> [Span Char]) -> CharSet -> [Span (List1 Byte)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Maybe (Span Char)] -> [Span Char]
forall a. [Maybe a] -> [a]
catMaybes
([Maybe (Span Char)] -> [Span Char])
-> (CharSet -> [Maybe (Span Char)]) -> CharSet -> [Span Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Range Char -> Maybe (Span Char))
-> [Range Char] -> [Maybe (Span Char)]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Bool -> Range Char -> Maybe (Span Char)
charRangeToCharSpan Bool
True)
([Range Char] -> [Maybe (Span Char)])
-> (CharSet -> [Range Char]) -> CharSet -> [Maybe (Span Char)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CharSet -> [Range Char]
forall v. DiscreteOrdered v => RSet v -> [Range v]
rSetRanges
toUtfRange :: Span (List1 Byte) -> [Span (List1 Byte)]
toUtfRange :: Span (List1 Byte) -> [Span (List1 Byte)]
toUtfRange (Span List1 Byte
x List1 Byte
y) = List1 (Span (List1 Byte)) -> [Span (List1 Byte)]
forall a. NonEmpty a -> [a]
List1.toList (List1 (Span (List1 Byte)) -> [Span (List1 Byte)])
-> List1 (Span (List1 Byte)) -> [Span (List1 Byte)]
forall a b. (a -> b) -> a -> b
$ List1 Byte -> List1 Byte -> List1 (Span (List1 Byte))
fix List1 Byte
x List1 Byte
y
fix :: List1 Byte -> List1 Byte -> List1 (Span (List1 Byte))
fix :: List1 Byte -> List1 Byte -> List1 (Span (List1 Byte))
fix List1 Byte
x List1 Byte
y
| List1 Byte -> Int
forall a. NonEmpty a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length List1 Byte
x Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== List1 Byte -> Int
forall a. NonEmpty a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length List1 Byte
y = [List1 Byte -> List1 Byte -> Span (List1 Byte)
forall a. a -> a -> Span a
Span List1 Byte
x List1 Byte
y]
| List1 Byte -> Int
forall a. NonEmpty a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length List1 Byte
x Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = List1 Byte -> List1 Byte -> Span (List1 Byte)
forall a. a -> a -> Span a
Span List1 Byte
x [Byte
Item (List1 Byte)
0x7F] Span (List1 Byte)
-> List1 (Span (List1 Byte)) -> List1 (Span (List1 Byte))
forall a. a -> NonEmpty a -> NonEmpty a
<| List1 Byte -> List1 Byte -> List1 (Span (List1 Byte))
fix [Byte
Item (List1 Byte)
0xC2,Byte
Item (List1 Byte)
0x80] List1 Byte
y
| List1 Byte -> Int
forall a. NonEmpty a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length List1 Byte
x Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
2 = List1 Byte -> List1 Byte -> Span (List1 Byte)
forall a. a -> a -> Span a
Span List1 Byte
x [Byte
Item (List1 Byte)
0xDF,Byte
Item (List1 Byte)
0xBF] Span (List1 Byte)
-> List1 (Span (List1 Byte)) -> List1 (Span (List1 Byte))
forall a. a -> NonEmpty a -> NonEmpty a
<| List1 Byte -> List1 Byte -> List1 (Span (List1 Byte))
fix [Byte
Item (List1 Byte)
0xE0,Byte
Item (List1 Byte)
0x80,Byte
Item (List1 Byte)
0x80] List1 Byte
y
| List1 Byte -> Int
forall a. NonEmpty a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length List1 Byte
x Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
3 = List1 Byte -> List1 Byte -> Span (List1 Byte)
forall a. a -> a -> Span a
Span List1 Byte
x [Byte
Item (List1 Byte)
0xEF,Byte
Item (List1 Byte)
0xBF,Byte
Item (List1 Byte)
0xBF] Span (List1 Byte)
-> List1 (Span (List1 Byte)) -> List1 (Span (List1 Byte))
forall a. a -> NonEmpty a -> NonEmpty a
<| List1 Byte -> List1 Byte -> List1 (Span (List1 Byte))
fix [Byte
Item (List1 Byte)
0xF0,Byte
Item (List1 Byte)
0x80,Byte
Item (List1 Byte)
0x80,Byte
Item (List1 Byte)
0x80] List1 Byte
y
| Bool
otherwise = String -> List1 (Span (List1 Byte))
forall a. HasCallStack => String -> a
error String
"fix: incorrect input given"
byteRangeToBytePair :: Span a -> (a, a)
byteRangeToBytePair :: forall a. Span a -> (a, a)
byteRangeToBytePair (Span a
x a
y) = (a
x, a
y)
data Span a = Span a a
instance Functor Span where
fmap :: forall a b. (a -> b) -> Span a -> Span b
fmap a -> b
f (Span a
x a
y) = b -> b -> Span b
forall a. a -> a -> Span a
Span (a -> b
f a
x) (a -> b
f a
y)
charRangeToCharSpan :: Bool -> Range Char -> Maybe (Span Char)
charRangeToCharSpan :: Bool -> Range Char -> Maybe (Span Char)
charRangeToCharSpan Bool
_ (Range Boundary Char
BoundaryAboveAll Boundary Char
_) = Maybe (Span Char)
forall a. Maybe a
Nothing
charRangeToCharSpan Bool
_ (Range (BoundaryAbove Char
c) Boundary Char
_) | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
forall a. Bounded a => a
maxBound = Maybe (Span Char)
forall a. Maybe a
Nothing
charRangeToCharSpan Bool
_ (Range Boundary Char
_ Boundary Char
BoundaryBelowAll) = Maybe (Span Char)
forall a. Maybe a
Nothing
charRangeToCharSpan Bool
_ (Range Boundary Char
_ (BoundaryBelow Char
c)) | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
forall a. Bounded a => a
minBound = Maybe (Span Char)
forall a. Maybe a
Nothing
charRangeToCharSpan Bool
uni (Range Boundary Char
x Boundary Char
y) = Span Char -> Maybe (Span Char)
forall a. a -> Maybe a
Just (Char -> Char -> Span Char
forall a. a -> a -> Span a
Span (Boundary Char -> Char
l Boundary Char
x) (Boundary Char -> Char
h Boundary Char
y))
where l :: Boundary Char -> Char
l Boundary Char
b = case Boundary Char
b of
Boundary Char
BoundaryBelowAll -> Char
'\0'
BoundaryBelow Char
a -> Char
a
BoundaryAbove Char
a -> Char -> Char
forall a. Enum a => a -> a
succ Char
a
Boundary Char
BoundaryAboveAll -> String -> Char
forall a. HasCallStack => String -> a
error String
"panic: charRangeToCharSpan"
h :: Boundary Char -> Char
h Boundary Char
b = case Boundary Char
b of
Boundary Char
BoundaryBelowAll -> String -> Char
forall a. HasCallStack => String -> a
error String
"panic: charRangeToCharSpan"
BoundaryBelow Char
a -> Char -> Char
forall a. Enum a => a -> a
pred Char
a
BoundaryAbove Char
a -> Char
a
Boundary Char
BoundaryAboveAll | Bool
uni -> Int -> Char
chr Int
0x10ffff
| Bool
otherwise -> Int -> Char
chr Int
0xff
byteRanges :: Encoding -> CharSet -> [(List1 Byte, List1 Byte)]
byteRanges :: Encoding -> CharSet -> [(List1 Byte, List1 Byte)]
byteRanges Encoding
enc = (Span (List1 Byte) -> (List1 Byte, List1 Byte))
-> [Span (List1 Byte)] -> [(List1 Byte, List1 Byte)]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Span (List1 Byte) -> (List1 Byte, List1 Byte)
forall a. Span a -> (a, a)
byteRangeToBytePair ([Span (List1 Byte)] -> [(List1 Byte, List1 Byte)])
-> (CharSet -> [Span (List1 Byte)])
-> CharSet
-> [(List1 Byte, List1 Byte)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Encoding -> CharSet -> [Span (List1 Byte)]
charToRanges Encoding
enc
byteSetRange :: Byte -> Byte -> ByteSet
byteSetRange :: Byte -> Byte -> ByteSet
byteSetRange Byte
c1 Byte
c2 = [Range Byte] -> ByteSet
forall v. DiscreteOrdered v => [Range v] -> RSet v
makeRangedSet [Boundary Byte -> Boundary Byte -> Range Byte
forall v. Boundary v -> Boundary v -> Range v
Range (Byte -> Boundary Byte
forall a. a -> Boundary a
BoundaryBelow Byte
c1) (Byte -> Boundary Byte
forall a. a -> Boundary a
BoundaryAbove Byte
c2)]
byteSetSingleton :: Byte -> ByteSet
byteSetSingleton :: Byte -> ByteSet
byteSetSingleton = Byte -> ByteSet
forall v. DiscreteOrdered v => v -> RSet v
rSingleton
charSetQuote :: CharSet -> String
charSetQuote :: CharSet -> String
charSetQuote CharSet
s = String
"(\\c -> " String -> ShowS
forall a. [a] -> [a] -> [a]
++ (String -> ShowS) -> String -> [String] -> String
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\String
x String
y -> String
x String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" || " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
y) String
"False" ((Range Char -> String) -> [Range Char] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map Range Char -> String
forall {a}. Show a => Range a -> String
quoteRange (CharSet -> [Range Char]
forall v. DiscreteOrdered v => RSet v -> [Range v]
rSetRanges CharSet
s)) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
")"
where quoteRange :: Range a -> String
quoteRange (Range Boundary a
l Boundary a
h) = Boundary a -> String
forall {a}. Show a => Boundary a -> String
quoteL Boundary a
l String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" && " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Boundary a -> String
forall {a}. Show a => Boundary a -> String
quoteH Boundary a
h
quoteL :: Boundary a -> String
quoteL (BoundaryAbove a
a) = String
"c > " String -> ShowS
forall a. [a] -> [a] -> [a]
++ a -> String
forall a. Show a => a -> String
show a
a
quoteL (BoundaryBelow a
a) = String
"c >= " String -> ShowS
forall a. [a] -> [a] -> [a]
++ a -> String
forall a. Show a => a -> String
show a
a
quoteL (Boundary a
BoundaryAboveAll) = String
"False"
quoteL (Boundary a
BoundaryBelowAll) = String
"True"
quoteH :: Boundary a -> String
quoteH (BoundaryAbove a
a) = String
"c <= " String -> ShowS
forall a. [a] -> [a] -> [a]
++ a -> String
forall a. Show a => a -> String
show a
a
quoteH (BoundaryBelow a
a) = String
"c < " String -> ShowS
forall a. [a] -> [a] -> [a]
++ a -> String
forall a. Show a => a -> String
show a
a
quoteH (Boundary a
BoundaryAboveAll) = String
"True"
quoteH (Boundary a
BoundaryBelowAll) = String
"False"