{-# LANGUAGE TupleSections     #-}
{-# LANGUAGE ViewPatterns      #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
   Module      : Text.Pandoc.Readers.EPUB
   Copyright   : Copyright (C) 2014-2020 Matthew Pickering
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Conversion of EPUB to 'Pandoc' document.
-}

module Text.Pandoc.Readers.EPUB
  (readEPUB)
  where

import Codec.Archive.Zip (Archive (..), Entry(..), findEntryByPath, fromEntry,
                          toArchiveOrFail)
import Control.DeepSeq (NFData, deepseq)
import Control.Monad (guard, liftM, liftM2, mplus)
import Control.Monad.Except (throwError)
import qualified Data.ByteString.Lazy as BL (ByteString)
import qualified Data.Text as T
import Data.Text (Text)
import qualified Data.Map as M (Map, elems, fromList, lookup)
import Data.Maybe (mapMaybe)
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import Network.URI (unEscapeString, parseRelativeReference, URI(..))
import System.FilePath (dropFileName, dropFileName, normalise, splitFileName,
                        takeFileName, (</>))
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class.PandocMonad (PandocMonad, insertMedia)
import Text.Pandoc.Definition hiding (Attr)
import Text.Pandoc.Error
import Text.Pandoc.Extensions (Extension (Ext_raw_html), enableExtension)
import Text.Pandoc.MIME (MimeType)
import Text.Pandoc.Options (ReaderOptions (..))
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Shared (addMetaField, collapseFilePath, tshow)
import Text.Pandoc.URI (escapeURI)
import qualified Text.Pandoc.UTF8 as UTF8 (toTextLazy)
import Text.Pandoc.Walk (query, walk)
import Text.Pandoc.XML.Light

type Items = M.Map Text (FilePath, MimeType)

readEPUB :: PandocMonad m => ReaderOptions -> BL.ByteString -> m Pandoc
readEPUB :: forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> ByteString -> m Pandoc
readEPUB ReaderOptions
opts ByteString
bytes = case ByteString -> Either [Char] Archive
toArchiveOrFail ByteString
bytes of
  Right Archive
archive -> ReaderOptions -> Archive -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Archive -> m Pandoc
archiveToEPUB ReaderOptions
opts Archive
archive
  Left  [Char]
e       -> PandocError -> m Pandoc
forall a. PandocError -> m a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (PandocError -> m Pandoc) -> PandocError -> m Pandoc
forall a b. (a -> b) -> a -> b
$ Text -> PandocError
PandocParseError (Text -> PandocError) -> Text -> PandocError
forall a b. (a -> b) -> a -> b
$
                     Text
"Couldn't extract ePub file: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack [Char]
e

-- runEPUB :: Except PandocError a -> Either PandocError a
-- runEPUB = runExcept

-- Note that internal reference are aggressively normalised so that all ids
-- are of the form "filename#id"
--
archiveToEPUB :: (PandocMonad m) => ReaderOptions -> Archive -> m Pandoc
archiveToEPUB :: forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Archive -> m Pandoc
archiveToEPUB ReaderOptions
os Archive
archive = do
  -- root is path to folder with manifest file in
  (root, content) <- Archive -> m ([Char], Element)
forall (m :: * -> *).
PandocMonad m =>
Archive -> m ([Char], Element)
getManifest Archive
archive
  (coverId, meta) <- parseMeta content
  (cover, items)  <- parseManifest content coverId
  -- No need to collapse here as the image path is from the manifest file
  let coverDoc = Pandoc -> ([Char] -> Pandoc) -> Maybe [Char] -> Pandoc
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Pandoc
forall a. Monoid a => a
mempty [Char] -> Pandoc
imageToPandoc Maybe [Char]
cover
  spine <- parseSpine items content
  let escapedSpine = (([Char], Text) -> Text) -> [([Char], Text)] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text -> Text
escapeURI (Text -> Text)
-> (([Char], Text) -> Text) -> ([Char], Text) -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> Text
T.pack ([Char] -> Text)
-> (([Char], Text) -> [Char]) -> ([Char], Text) -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> [Char]
takeFileName ([Char] -> [Char])
-> (([Char], Text) -> [Char]) -> ([Char], Text) -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Char], Text) -> [Char]
forall a b. (a, b) -> a
fst) [([Char], Text)]
spine
  Pandoc _ bs <-
      foldM' (\Pandoc
a ([Char], Text)
b -> ((Pandoc
a Pandoc -> Pandoc -> Pandoc
forall a. Semigroup a => a -> a -> a
<>) (Pandoc -> Pandoc) -> (Pandoc -> Pandoc) -> Pandoc -> Pandoc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Inline -> Inline) -> Pandoc -> Pandoc
forall a b. Walkable a b => (a -> a) -> b -> b
walk ([Text] -> Inline -> Inline
prependHash [Text]
escapedSpine))
        (Pandoc -> Pandoc) -> m Pandoc -> m Pandoc
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` [Char] -> ([Char], Text) -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
[Char] -> ([Char], Text) -> m Pandoc
parseSpineElem [Char]
root ([Char], Text)
b) mempty spine
  let ast = Pandoc
coverDoc Pandoc -> Pandoc -> Pandoc
forall a. Semigroup a => a -> a -> a
<> Meta -> [Block] -> Pandoc
Pandoc Meta
meta [Block]
bs
  fetchImages (M.elems items) root archive ast
  return ast
  where
    os' :: ReaderOptions
os' = ReaderOptions
os {readerExtensions = enableExtension Ext_raw_html (readerExtensions os)}
    parseSpineElem :: PandocMonad m => FilePath -> (FilePath, MimeType) -> m Pandoc
    parseSpineElem :: forall (m :: * -> *).
PandocMonad m =>
[Char] -> ([Char], Text) -> m Pandoc
parseSpineElem ([Char] -> [Char]
normalise -> [Char]
r) ([Char] -> [Char]
normalise -> [Char]
path, Text
mime) = do
      doc <- Text -> [Char] -> [Char] -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
Text -> [Char] -> [Char] -> m Pandoc
mimeToReader Text
mime [Char]
r [Char]
path
      let docSpan = Blocks -> Pandoc
B.doc (Blocks -> Pandoc) -> Blocks -> Pandoc
forall a b. (a -> b) -> a -> b
$ Inlines -> Blocks
B.para (Inlines -> Blocks) -> Inlines -> Blocks
forall a b. (a -> b) -> a -> b
$ Attr -> Inlines -> Inlines
B.spanWith ([Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char]
takeFileName [Char]
path, [], []) Inlines
forall a. Monoid a => a
mempty
      return $ docSpan <> doc
    mimeToReader :: PandocMonad m => MimeType -> FilePath -> FilePath -> m Pandoc
    mimeToReader :: forall (m :: * -> *).
PandocMonad m =>
Text -> [Char] -> [Char] -> m Pandoc
mimeToReader Text
"application/xhtml+xml" ([Char] -> [Char]
unEscapeString -> [Char]
root)
                                         ([Char] -> [Char]
unEscapeString -> [Char]
path) = do
      fname <- [Char] -> Archive -> m Entry
forall (m :: * -> *). PandocMonad m => [Char] -> Archive -> m Entry
findEntryByPathE ([Char]
root [Char] -> [Char] -> [Char]
</> [Char]
path) Archive
archive
      html <- readHtml os' . TL.toStrict . TL.decodeUtf8 $ fromEntry fname
      return $ fixInternalReferences path html
    mimeToReader Text
s [Char]
_ ([Char] -> [Char]
unEscapeString -> [Char]
path)
      | Text
s Text -> [Text] -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
imageMimes = Pandoc -> m Pandoc
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Pandoc -> m Pandoc) -> Pandoc -> m Pandoc
forall a b. (a -> b) -> a -> b
$ [Char] -> Pandoc
imageToPandoc [Char]
path
      | Bool
otherwise = Pandoc -> m Pandoc
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return Pandoc
forall a. Monoid a => a
mempty

-- paths should be absolute when this function is called
-- renameImages should do this
fetchImages :: PandocMonad m
            => [(FilePath, MimeType)]
            -> FilePath -- ^ Root
            -> Archive
            -> Pandoc
            -> m ()
fetchImages :: forall (m :: * -> *).
PandocMonad m =>
[([Char], Text)] -> [Char] -> Archive -> Pandoc -> m ()
fetchImages [([Char], Text)]
mimes [Char]
root Archive
arc ((Inline -> [[Char]]) -> Pandoc -> [[Char]]
forall c. Monoid c => (Inline -> c) -> Pandoc -> c
forall a b c. (Walkable a b, Monoid c) => (a -> c) -> b -> c
query Inline -> [[Char]]
iq -> [[Char]]
links) =
    (([Char], Maybe Text, ByteString) -> m ())
-> [([Char], Maybe Text, ByteString)] -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (([Char] -> Maybe Text -> ByteString -> m ())
-> ([Char], Maybe Text, ByteString) -> m ()
forall a b c d. (a -> b -> c -> d) -> (a, b, c) -> d
uncurry3 [Char] -> Maybe Text -> ByteString -> m ()
forall (m :: * -> *).
PandocMonad m =>
[Char] -> Maybe Text -> ByteString -> m ()
insertMedia) (([Char] -> Maybe ([Char], Maybe Text, ByteString))
-> [[Char]] -> [([Char], Maybe Text, ByteString)]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe [Char] -> Maybe ([Char], Maybe Text, ByteString)
getEntry [[Char]]
links)
  where
    getEntry :: [Char] -> Maybe ([Char], Maybe Text, ByteString)
getEntry [Char]
link =
        let abslink :: [Char]
abslink = [Char] -> [Char]
normalise ([Char] -> [Char]
unEscapeString ([Char]
root [Char] -> [Char] -> [Char]
</> [Char]
link)) in
        ([Char]
link , [Char] -> [([Char], Text)] -> Maybe Text
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup [Char]
link [([Char], Text)]
mimes, ) (ByteString -> ([Char], Maybe Text, ByteString))
-> (Entry -> ByteString)
-> Entry
-> ([Char], Maybe Text, ByteString)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Entry -> ByteString
fromEntry
          (Entry -> ([Char], Maybe Text, ByteString))
-> Maybe Entry -> Maybe ([Char], Maybe Text, ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Char] -> Archive -> Maybe Entry
findEntryByPath [Char]
abslink Archive
arc

iq :: Inline -> [FilePath]
iq :: Inline -> [[Char]]
iq (Image Attr
_ [Inline]
_ (Text
url, Text
_)) = [Text -> [Char]
T.unpack Text
url]
iq Inline
_                    = []

-- Remove relative paths
renameImages :: FilePath -> Inline -> Inline
renameImages :: [Char] -> Inline -> Inline
renameImages [Char]
root img :: Inline
img@(Image Attr
attr [Inline]
a (Text
url, Text
b))
  | Text
"data:" Text -> Text -> Bool
`T.isPrefixOf` Text
url = Inline
img
  | Bool
otherwise                  = Attr -> [Inline] -> (Text, Text) -> Inline
Image Attr
attr [Inline]
a ( [Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ [Char] -> [Char]
collapseFilePath ([Char]
root [Char] -> [Char] -> [Char]
</> Text -> [Char]
T.unpack Text
url)
                                              , Text
b)
renameImages [Char]
_ Inline
x = Inline
x

imageToPandoc :: FilePath -> Pandoc
imageToPandoc :: [Char] -> Pandoc
imageToPandoc [Char]
s = Blocks -> Pandoc
B.doc (Blocks -> Pandoc) -> (Inlines -> Blocks) -> Inlines -> Pandoc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Inlines -> Blocks
B.para (Inlines -> Pandoc) -> Inlines -> Pandoc
forall a b. (a -> b) -> a -> b
$ Text -> Text -> Inlines -> Inlines
B.image ([Char] -> Text
T.pack [Char]
s) Text
"" Inlines
forall a. Monoid a => a
mempty

imageMimes :: [MimeType]
imageMimes :: [Text]
imageMimes = [Text
"image/gif", Text
"image/jpeg", Text
"image/png"]

type CoverId = Text

type CoverImage = FilePath

parseManifest :: (PandocMonad m)
              => Element -> Maybe CoverId -> m (Maybe CoverImage, Items)
parseManifest :: forall (m :: * -> *).
PandocMonad m =>
Element -> Maybe Text -> m (Maybe [Char], Items)
parseManifest Element
content Maybe Text
coverId = do
  manifest <- QName -> Element -> m Element
forall (m :: * -> *).
PandocMonad m =>
QName -> Element -> m Element
findElementE (Text -> QName
dfName Text
"manifest") Element
content
  let items = QName -> Element -> [Element]
findChildren (Text -> QName
dfName Text
"item") Element
manifest
  r <- mapM parseItem items
  let cover = QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName Text
"href") (Element -> Maybe Text) -> Maybe Element -> Maybe Text
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< (Element -> Bool) -> Element -> Maybe Element
filterChild Element -> Bool
findCover Element
manifest
  return (T.unpack <$> (cover `mplus` coverId), M.fromList r)
  where
    findCover :: Element -> Bool
findCover Element
e = Bool -> (Text -> Bool) -> Maybe Text -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False (Text -> Text -> Bool
T.isInfixOf Text
"cover-image")
                  (QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName Text
"properties") Element
e)
               Bool -> Bool -> Bool
|| Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
True Maybe Bool -> Maybe Bool -> Bool
forall a. Eq a => a -> a -> Bool
== (Text -> Text -> Bool) -> Maybe Text -> Maybe Text -> Maybe Bool
forall (m :: * -> *) a1 a2 r.
Monad m =>
(a1 -> a2 -> r) -> m a1 -> m a2 -> m r
liftM2 Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
(==) Maybe Text
coverId (QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName Text
"id") Element
e)
    parseItem :: Element -> m (Text, ([Char], Text))
parseItem Element
e = do
      uid <- QName -> Element -> m Text
forall (m :: * -> *). PandocMonad m => QName -> Element -> m Text
findAttrE (Text -> QName
emptyName Text
"id") Element
e
      href <- findAttrE (emptyName "href") e
      mime <- findAttrE (emptyName "media-type") e
      return (uid, (T.unpack href, mime))

parseSpine :: PandocMonad m => Items -> Element -> m [(FilePath, MimeType)]
parseSpine :: forall (m :: * -> *).
PandocMonad m =>
Items -> Element -> m [([Char], Text)]
parseSpine Items
is Element
e = do
  spine <- QName -> Element -> m Element
forall (m :: * -> *).
PandocMonad m =>
QName -> Element -> m Element
findElementE (Text -> QName
dfName Text
"spine") Element
e
  let itemRefs = QName -> Element -> [Element]
findChildren (Text -> QName
dfName Text
"itemref") Element
spine
  mapM (mkE "parseSpine" . flip M.lookup is) $ mapMaybe parseItemRef itemRefs
  where
    parseItemRef :: Element -> Maybe Text
parseItemRef Element
ref = do
      let linear :: Bool
linear = Bool -> (Text -> Bool) -> Maybe Text -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
True (Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"yes") (QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName Text
"linear") Element
ref)
      Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard Bool
linear
      QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName Text
"idref") Element
ref

parseMeta :: PandocMonad m => Element -> m (Maybe CoverId, Meta)
parseMeta :: forall (m :: * -> *).
PandocMonad m =>
Element -> m (Maybe Text, Meta)
parseMeta Element
content = do
  meta <- QName -> Element -> m Element
forall (m :: * -> *).
PandocMonad m =>
QName -> Element -> m Element
findElementE (Text -> QName
dfName Text
"metadata") Element
content
  let dcspace (QName Text
_ (Just Text
"http://purl.org/dc/elements/1.1/") (Just Text
"dc")) = Bool
True
      dcspace QName
_ = Bool
False
  let dcs = (QName -> Bool) -> Element -> [Element]
filterChildrenName QName -> Bool
dcspace Element
meta
  let r = (Element -> Meta -> Meta) -> Meta -> [Element] -> Meta
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Element -> Meta -> Meta
parseMetaItem Meta
nullMeta [Element]
dcs
  let coverId = QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName Text
"content") (Element -> Maybe Text) -> Maybe Element -> Maybe Text
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< (Element -> Bool) -> Element -> Maybe Element
filterChild Element -> Bool
findCover Element
meta
  return (coverId, r)
  where
    findCover :: Element -> Bool
findCover Element
e = QName -> Element -> Maybe Text
findAttr (Text -> QName
emptyName Text
"name") Element
e Maybe Text -> Maybe Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"cover"

-- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-metadata-elem
parseMetaItem :: Element -> Meta -> Meta
parseMetaItem :: Element -> Meta -> Meta
parseMetaItem e :: Element
e@(QName -> Text
stripNamespace (QName -> Text) -> (Element -> QName) -> Element -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Element -> QName
elName -> Text
field) Meta
meta =
  Text -> Inlines -> Meta -> Meta
forall a. ToMetaValue a => Text -> a -> Meta -> Meta
addMetaField (Text -> Text
renameMeta Text
field) (Text -> Inlines
B.str (Text -> Inlines) -> Text -> Inlines
forall a b. (a -> b) -> a -> b
$ Element -> Text
strContent Element
e) Meta
meta

renameMeta :: Text -> Text
renameMeta :: Text -> Text
renameMeta Text
"creator" = Text
"author"
renameMeta Text
s         = Text
s

getManifest :: PandocMonad m => Archive -> m (String, Element)
getManifest :: forall (m :: * -> *).
PandocMonad m =>
Archive -> m ([Char], Element)
getManifest Archive
archive = do
  metaEntry <- [Char] -> Archive -> m Entry
forall (m :: * -> *). PandocMonad m => [Char] -> Archive -> m Entry
findEntryByPathE ([Char]
"META-INF" [Char] -> [Char] -> [Char]
</> [Char]
"container.xml") Archive
archive
  docElem <- parseXMLDocE metaEntry
  let namespaces = (Attr -> Maybe (Text, Text)) -> [Attr] -> [(Text, Text)]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Attr -> Maybe (Text, Text)
attrToNSPair (Element -> [Attr]
elAttribs Element
docElem)
  ns <- mkE "xmlns not in namespaces" (lookup "xmlns" namespaces)
  as <- fmap (map attrToPair . elAttribs)
    (findElementE (QName "rootfile" (Just ns) Nothing) docElem)
  manifestFile <- T.unpack <$> mkE "Root not found" (lookup "full-path" as)
  let rootdir = [Char] -> [Char]
dropFileName [Char]
manifestFile
  --mime <- lookup "media-type" as
  manifest <- findEntryByPathE manifestFile archive
  (rootdir,) <$> parseXMLDocE manifest

-- Fixup

fixInternalReferences :: FilePath -> Pandoc -> Pandoc
fixInternalReferences :: [Char] -> Pandoc -> Pandoc
fixInternalReferences [Char]
pathToFile =
   (Inline -> Inline) -> Pandoc -> Pandoc
forall a b. Walkable a b => (a -> a) -> b -> b
walk ([Char] -> Inline -> Inline
renameImages [Char]
root)
  (Pandoc -> Pandoc) -> (Pandoc -> Pandoc) -> Pandoc -> Pandoc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Block -> Block) -> Pandoc -> Pandoc
forall a b. Walkable a b => (a -> a) -> b -> b
walk ([Char] -> Block -> Block
fixBlockIRs [Char]
filename)
  (Pandoc -> Pandoc) -> (Pandoc -> Pandoc) -> Pandoc -> Pandoc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Inline -> Inline) -> Pandoc -> Pandoc
forall a b. Walkable a b => (a -> a) -> b -> b
walk ([Char] -> Inline -> Inline
fixInlineIRs [Char]
filename)
  where
    ([Char]
root, Text -> [Char]
T.unpack (Text -> [Char]) -> ([Char] -> Text) -> [Char] -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
escapeURI (Text -> Text) -> ([Char] -> Text) -> [Char] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> Text
T.pack -> [Char]
filename) =
      [Char] -> ([Char], [Char])
splitFileName [Char]
pathToFile

fixInlineIRs :: String -> Inline -> Inline
fixInlineIRs :: [Char] -> Inline -> Inline
fixInlineIRs [Char]
s (Span Attr
as [Inline]
v) =
  Attr -> [Inline] -> Inline
Span ([Char] -> Attr -> Attr
fixAttrs [Char]
s Attr
as) [Inline]
v
fixInlineIRs [Char]
s (Code Attr
as Text
code) =
  Attr -> Text -> Inline
Code ([Char] -> Attr -> Attr
fixAttrs [Char]
s Attr
as) Text
code
fixInlineIRs [Char]
s (Link Attr
as [Inline]
is (Text
url, Text
tit)) =
  case [Char] -> Maybe URI
parseRelativeReference (Text -> [Char]
T.unpack Text
url) of
    Just URI{ uriScheme :: URI -> [Char]
uriScheme = [Char]
""
            , uriAuthority :: URI -> Maybe URIAuth
uriAuthority = Maybe URIAuth
Nothing
            , uriPath :: URI -> [Char]
uriPath = [Char]
upath
            , uriQuery :: URI -> [Char]
uriQuery = [Char]
""
            , uriFragment :: URI -> [Char]
uriFragment = Char
'#':[Char]
ufrag } ->
         Attr -> [Inline] -> (Text, Text) -> Inline
Link ([Char] -> Attr -> Attr
fixAttrs [Char]
s Attr
as) [Inline]
is ([Char] -> Text -> Text
addHash (if [Char] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Char]
upath
                                              then [Char]
s
                                              else [Char]
upath) ([Char] -> Text
T.pack [Char]
ufrag), Text
tit)
    Maybe URI
_ -> Attr -> [Inline] -> (Text, Text) -> Inline
Link ([Char] -> Attr -> Attr
fixAttrs [Char]
s Attr
as) [Inline]
is (Text
url, Text
tit)
fixInlineIRs [Char]
_ Inline
v = Inline
v

prependHash :: [Text] -> Inline -> Inline
prependHash :: [Text] -> Inline -> Inline
prependHash [Text]
ps l :: Inline
l@(Link Attr
attr [Inline]
is (Text
url, Text
tit))
  | [Bool] -> Bool
forall (t :: * -> *). Foldable t => t Bool -> Bool
or [Text
s Text -> Text -> Bool
`T.isPrefixOf` Text
url | Text
s <- [Text]
ps] =
    Attr -> [Inline] -> (Text, Text) -> Inline
Link Attr
attr [Inline]
is (Text
"#" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
url, Text
tit)
  | Bool
otherwise = Inline
l
prependHash [Text]
_ Inline
i = Inline
i

fixBlockIRs :: String -> Block -> Block
fixBlockIRs :: [Char] -> Block -> Block
fixBlockIRs [Char]
s (Div Attr
as [Block]
b) =
  Attr -> [Block] -> Block
Div ([Char] -> Attr -> Attr
fixAttrs [Char]
s Attr
as) [Block]
b
fixBlockIRs [Char]
s (Header Int
i Attr
as [Inline]
b) =
  Int -> Attr -> [Inline] -> Block
Header Int
i ([Char] -> Attr -> Attr
fixAttrs [Char]
s Attr
as) [Inline]
b
fixBlockIRs [Char]
s (CodeBlock Attr
as Text
code) =
  Attr -> Text -> Block
CodeBlock ([Char] -> Attr -> Attr
fixAttrs [Char]
s Attr
as) Text
code
fixBlockIRs [Char]
_ Block
b = Block
b

fixAttrs :: FilePath -> B.Attr -> B.Attr
fixAttrs :: [Char] -> Attr -> Attr
fixAttrs [Char]
s (Text
ident, [Text]
cs, [(Text, Text)]
kvs) =
  ([Char] -> Text -> Text
addHash [Char]
s Text
ident, (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) [Text]
cs, [(Text, Text)] -> [(Text, Text)]
removeEPUBAttrs [(Text, Text)]
kvs)

addHash :: FilePath -> Text -> Text
addHash :: [Char] -> Text -> Text
addHash [Char]
_ Text
""    = Text
""
addHash [Char]
s Text
ident = [Char] -> Text
T.pack ([Char] -> [Char]
takeFileName [Char]
s) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"_" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
ident

removeEPUBAttrs :: [(Text, Text)] -> [(Text, Text)]
removeEPUBAttrs :: [(Text, Text)] -> [(Text, Text)]
removeEPUBAttrs [(Text, Text)]
kvs = ((Text, Text) -> Bool) -> [(Text, Text)] -> [(Text, Text)]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> ((Text, Text) -> Bool) -> (Text, Text) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text, Text) -> Bool
forall a. (Text, a) -> Bool
isEPUBAttr) [(Text, Text)]
kvs

isEPUBAttr :: (Text, a) -> Bool
isEPUBAttr :: forall a. (Text, a) -> Bool
isEPUBAttr (Text
k, a
_) = Text
"epub:" Text -> Text -> Bool
`T.isPrefixOf` Text
k

-- Library

-- Strict version of foldM
foldM' :: (Monad m, NFData a) => (a -> b -> m a) -> a -> [b] -> m a
foldM' :: forall (m :: * -> *) a b.
(Monad m, NFData a) =>
(a -> b -> m a) -> a -> [b] -> m a
foldM' a -> b -> m a
_ a
z [] = a -> m a
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return a
z
foldM' a -> b -> m a
f a
z (b
x:[b]
xs) = do
  z' <- a -> b -> m a
f a
z b
x
  z' `deepseq` foldM' f z' xs

uncurry3 :: (a -> b -> c -> d) -> (a, b, c) -> d
uncurry3 :: forall a b c d. (a -> b -> c -> d) -> (a, b, c) -> d
uncurry3 a -> b -> c -> d
f (a
a, b
b, c
c) = a -> b -> c -> d
f a
a b
b c
c

-- Utility

stripNamespace :: QName -> Text
stripNamespace :: QName -> Text
stripNamespace (QName Text
v Maybe Text
_ Maybe Text
_) = Text
v

attrToNSPair :: Attr -> Maybe (Text, Text)
attrToNSPair :: Attr -> Maybe (Text, Text)
attrToNSPair (Attr (QName Text
"xmlns" Maybe Text
_ Maybe Text
_) Text
val) = (Text, Text) -> Maybe (Text, Text)
forall a. a -> Maybe a
Just (Text
"xmlns", Text
val)
attrToNSPair Attr
_                              = Maybe (Text, Text)
forall a. Maybe a
Nothing

attrToPair :: Attr -> (Text, Text)
attrToPair :: Attr -> (Text, Text)
attrToPair (Attr (QName Text
name Maybe Text
_ Maybe Text
_) Text
val) = (Text
name, Text
val)

defaultNameSpace :: Maybe Text
defaultNameSpace :: Maybe Text
defaultNameSpace = Text -> Maybe Text
forall a. a -> Maybe a
Just Text
"http://www.idpf.org/2007/opf"

dfName :: Text -> QName
dfName :: Text -> QName
dfName Text
s = Text -> Maybe Text -> Maybe Text -> QName
QName Text
s Maybe Text
defaultNameSpace Maybe Text
forall a. Maybe a
Nothing

emptyName :: Text -> QName
emptyName :: Text -> QName
emptyName Text
s = Text -> Maybe Text -> Maybe Text -> QName
QName Text
s Maybe Text
forall a. Maybe a
Nothing Maybe Text
forall a. Maybe a
Nothing

-- Convert Maybe interface to Either

findAttrE :: PandocMonad m => QName -> Element -> m Text
findAttrE :: forall (m :: * -> *). PandocMonad m => QName -> Element -> m Text
findAttrE QName
q Element
e = Text -> Maybe Text -> m Text
forall (m :: * -> *) a. PandocMonad m => Text -> Maybe a -> m a
mkE Text
"findAttr" (Maybe Text -> m Text) -> Maybe Text -> m Text
forall a b. (a -> b) -> a -> b
$ QName -> Element -> Maybe Text
findAttr QName
q Element
e

findEntryByPathE :: PandocMonad m => FilePath -> Archive -> m Entry
findEntryByPathE :: forall (m :: * -> *). PandocMonad m => [Char] -> Archive -> m Entry
findEntryByPathE ([Char] -> [Char]
normalise ([Char] -> [Char]) -> ([Char] -> [Char]) -> [Char] -> [Char]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> [Char]
unEscapeString -> [Char]
path) Archive
a =
  Text -> Maybe Entry -> m Entry
forall (m :: * -> *) a. PandocMonad m => Text -> Maybe a -> m a
mkE (Text
"No entry on path: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> [Char] -> Text
T.pack [Char]
path) (Maybe Entry -> m Entry) -> Maybe Entry -> m Entry
forall a b. (a -> b) -> a -> b
$ [Char] -> Archive -> Maybe Entry
findEntryByPath [Char]
path Archive
a

parseXMLDocE :: PandocMonad m => Entry -> m Element
parseXMLDocE :: forall (m :: * -> *). PandocMonad m => Entry -> m Element
parseXMLDocE Entry
entry =
  (Text -> m Element)
-> (Element -> m Element) -> Either Text Element -> m Element
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (PandocError -> m Element
forall a. PandocError -> m a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (PandocError -> m Element)
-> (Text -> PandocError) -> Text -> m Element
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text -> PandocError
PandocXMLError Text
fp) Element -> m Element
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (Either Text Element -> m Element)
-> Either Text Element -> m Element
forall a b. (a -> b) -> a -> b
$ LazyText -> Either Text Element
parseXMLElement LazyText
doc
 where
  doc :: LazyText
doc = ByteString -> LazyText
UTF8.toTextLazy (ByteString -> LazyText)
-> (Entry -> ByteString) -> Entry -> LazyText
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Entry -> ByteString
fromEntry (Entry -> LazyText) -> Entry -> LazyText
forall a b. (a -> b) -> a -> b
$ Entry
entry
  fp :: Text
fp  = [Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ Entry -> [Char]
eRelativePath Entry
entry

findElementE :: PandocMonad m => QName -> Element -> m Element
findElementE :: forall (m :: * -> *).
PandocMonad m =>
QName -> Element -> m Element
findElementE QName
e Element
x =
  Text -> Maybe Element -> m Element
forall (m :: * -> *) a. PandocMonad m => Text -> Maybe a -> m a
mkE (Text
"Unable to find element: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> QName -> Text
forall a. Show a => a -> Text
tshow QName
e) (Maybe Element -> m Element) -> Maybe Element -> m Element
forall a b. (a -> b) -> a -> b
$ QName -> Element -> Maybe Element
findElement QName
e Element
x

mkE :: PandocMonad m => Text -> Maybe a -> m a
mkE :: forall (m :: * -> *) a. PandocMonad m => Text -> Maybe a -> m a
mkE Text
s = m a -> (a -> m a) -> Maybe a -> m a
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (PandocError -> m a
forall a. PandocError -> m a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (PandocError -> m a) -> (Text -> PandocError) -> Text -> m a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> PandocError
PandocParseError (Text -> m a) -> Text -> m a
forall a b. (a -> b) -> a -> b
$ Text
s) a -> m a
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return