summaryrefslogtreecommitdiff
path: root/src/Text/Pandoc/Filter
diff options
context:
space:
mode:
authorLaurentRDC <>2019-03-27 22:15:00 (GMT)
committerhdiff <hdiff@hdiff.luite.com>2019-03-27 22:15:00 (GMT)
commit45a607a412483fde3e660e0bfea5cd0fbe9ac02b (patch)
tree3a5366aad454a01428433dba59894f87769942d1 /src/Text/Pandoc/Filter
parentddff8579481f7ce672d15886ab5a38574fc7255f (diff)
version 2.0.0.02.0.0.0
Diffstat (limited to 'src/Text/Pandoc/Filter')
-rw-r--r--src/Text/Pandoc/Filter/FigureSpec.hs107
-rw-r--r--src/Text/Pandoc/Filter/Pyplot.hs303
-rw-r--r--src/Text/Pandoc/Filter/Scripting.hs87
3 files changed, 335 insertions, 162 deletions
diff --git a/src/Text/Pandoc/Filter/FigureSpec.hs b/src/Text/Pandoc/Filter/FigureSpec.hs
new file mode 100644
index 0000000..ff835ec
--- /dev/null
+++ b/src/Text/Pandoc/Filter/FigureSpec.hs
@@ -0,0 +1,107 @@
+{-# LANGUAGE OverloadedStrings #-}
+
+{-|
+Module : Text.Pandoc.Filter.FigureSpec
+Copyright : (c) Laurent P René de Cotret, 2019
+License : MIT
+Maintainer : laurent.decotret@outlook.com
+Stability : internal
+Portability : portable
+
+This module defines types and functions that help
+with keeping track of figure specifications
+-}
+module Text.Pandoc.Filter.FigureSpec
+ ( FigureSpec(..)
+ , SaveFormat(..)
+ , saveFormatFromString
+ , figurePath
+ , hiresFigurePath
+ , addPlotCapture
+ -- for testing purposes
+ , extension
+ ) where
+
+import Data.Hashable (Hashable, hash, hashWithSalt)
+import qualified Data.Text as T
+
+import System.FilePath (FilePath, addExtension,
+ replaceExtension, (</>))
+
+import Text.Pandoc.Definition (Attr)
+import Text.Pandoc.Filter.Scripting (PythonScript)
+
+data SaveFormat
+ = PNG
+ | PDF
+ | SVG
+ | JPG
+ | EPS
+
+-- | Parse an image save format string
+saveFormatFromString :: String -> Maybe SaveFormat
+saveFormatFromString s
+ | s `elem` ["png", "PNG", ".png"] = Just PNG
+ | s `elem` ["pdf", "PDF", ".pdf"] = Just PDF
+ | s `elem` ["svg", "SVG", ".svg"] = Just SVG
+ | s `elem` ["jpg", "jpeg", "JPG", "JPEG", ".jpg", ".jpeg"] = Just JPG
+ | s `elem` ["eps", "EPS", ".eps"] = Just EPS
+ | otherwise = Nothing
+
+-- | Save format file extension
+extension :: SaveFormat -> String
+extension PNG = ".png"
+extension PDF = ".pdf"
+extension SVG = ".svg"
+extension JPG = ".jpg"
+extension EPS = ".eps"
+
+-- | Datatype containing all parameters required
+-- to run pandoc-pyplot
+data FigureSpec = FigureSpec
+ { caption :: String -- ^ Figure caption.
+ , script :: PythonScript -- ^ Source code for the figure.
+ , saveFormat :: SaveFormat -- ^ Save format of the figure
+ , directory :: FilePath -- ^ Directory where to save the file
+ , dpi :: Int -- ^ Dots-per-inch of figure
+ , blockAttrs :: Attr -- ^ Attributes not related to @pandoc-pyplot@ will be propagated.
+ }
+
+instance Hashable FigureSpec where
+ hashWithSalt salt spec =
+ hashWithSalt salt (caption spec, script spec, directory spec, dpi spec, blockAttrs spec)
+
+-- | Determine the path a figure should have.
+figurePath :: FigureSpec -> FilePath
+figurePath spec = (directory spec </> stem spec)
+ where
+ stem = flip addExtension ext . show . hash
+ ext = extension . saveFormat $ spec
+
+-- | The path to the high-resolution figure.
+hiresFigurePath :: FigureSpec -> FilePath
+hiresFigurePath spec = flip replaceExtension (".hires" <> ext) . figurePath $ spec
+ where
+ ext = extension . saveFormat $ spec
+
+-- | Modify a Python plotting script to save the figure to a filename.
+-- An additional file (with extension PNG) will also be captured.
+addPlotCapture ::
+ FigureSpec -- ^ Path where to save the figure
+ -> PythonScript -- ^ Code block with added capture
+addPlotCapture spec =
+ mconcat
+ [ script spec
+ , "\nimport matplotlib.pyplot as plt" -- Just in case
+ , plotCapture (figurePath spec) (dpi spec)
+ , plotCapture (hiresFigurePath spec) (minimum [200, 2 * dpi spec])
+ ]
+ where
+ plotCapture fname' dpi' =
+ mconcat
+ [ "\nplt.savefig("
+ , T.pack $ show fname' -- show is required for quotes
+ , ", dpi="
+ , T.pack $ show dpi'
+ , ")"
+ ]
diff --git a/src/Text/Pandoc/Filter/Pyplot.hs b/src/Text/Pandoc/Filter/Pyplot.hs
index 53152b4..f0f692d 100644
--- a/src/Text/Pandoc/Filter/Pyplot.hs
+++ b/src/Text/Pandoc/Filter/Pyplot.hs
@@ -1,35 +1,43 @@
-{-# LANGUAGE MultiWayIf #-}
-{-# LANGUAGE Unsafe #-}
+{-# LANGUAGE MultiWayIf #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE Unsafe #-}
+
{-|
Module : Text.Pandoc.Filter.Pyplot
Description : Pandoc filter to create Matplotlib figures from code blocks
-Copyright : (c) Laurent P René de Cotret, 2018
+Copyright : (c) Laurent P René de Cotret, 2019
License : MIT
Maintainer : laurent.decotret@outlook.com
Stability : stable
Portability : portable
-This module defines a Pandoc filter @makePlot@ that can be
+This module defines a Pandoc filter @makePlot@ that can be
used to walk over a Pandoc document and generate figures from
Python code blocks.
-The syntax for code blocks is simple, Code blocks with the @plot_target=...@
+The syntax for code blocks is simple, Code blocks with the @.pyplot@
attribute will trigger the filter. The code block will be reworked into a Python
-script and the output figure will be captured.
+script and the output figure will be captured, along with a high-resolution version
+of the figure and the source code used to generate the figure.
+
+To trigger pandoc-pyplot, the following is __required__:
+
+ * @.pyplot@: Trigger pandoc-pyplot but let it decide on a filename
Here are the possible attributes what pandoc-pyplot understands:
- * @plot_target=...@ (_required_): Filepath where the resulting figure should be saved.
- * @plot_alt="..."@ (_optional_): Specify a plot caption (or alternate text).
- * @plot_include=...@ (_optional_): Path to a Python script to include before the code block.
- Ideal to avoid repetition over many figures.
+ * @target=...@: Filepath where the resulting figure should be saved.
+ * @directory=...@ : Directory where to save the figure.
+ * @caption="..."@: Specify a plot caption (or alternate text).
+ * @dpi=...@: Specify a value for figure resolution, or dots-per-inch. Default is 80DPI.
+ * @include=...@: Path to a Python script to include before the code block. Ideal to avoid repetition over many figures.
Here are some example blocks in Markdown:
@
This is a paragraph
-```{plot_target=my_figure.jpg plot_alt="This is a caption."}
+```{.pyplot caption="This is a caption."}
import matplotlib.pyplot as plt
plt.figure()
@@ -37,139 +45,192 @@ plt.plot([0,1,2,3,4], [1,2,3,4,5])
plt.title('This is an example figure')
```
@
+
+This filter was originally designed to be used with [Hakyll](https://jaspervdj.be/hakyll/).
+In case you want to use the filter with your own Hakyll setup, you can use a transform
+function that works on entire documents:
+
+@
+import Text.Pandoc.Filter.Pyplot (plotTransform)
+
+import Hakyll
+
+-- Unsafe compiler is required because of the interaction
+-- in IO (i.e. running an external Python script).
+makePlotPandocCompiler :: Compiler (Item String)
+makePlotPandocCompiler =
+ pandocCompilerWithTransformM
+ defaultHakyllReaderOptions
+ defaultHakyllWriterOptions
+ (unsafeCompiler . plotTransform)
+@
+
-}
-module Text.Pandoc.Filter.Pyplot (
- makePlot
- , makePlot' -- For testing
- , plotTransform
- , PandocPyplotError(..)
+module Text.Pandoc.Filter.Pyplot
+ ( makePlot
+ , plotTransform
+ , PandocPyplotError(..)
+ -- For testing purposes only
+ , makePlot'
+ , directoryKey
+ , captionKey
+ , dpiKey
+ , includePathKey
+ , saveFormatKey
) where
-import Control.Monad ((>=>))
-import qualified Data.Map.Strict as M
-import Data.Maybe (fromMaybe)
-import Data.Monoid ((<>))
-import System.Directory (doesDirectoryExist)
-import System.FilePath (isValid, replaceExtension, takeDirectory)
+import Control.Monad ((>=>))
+
+import Data.List (intersperse)
+
+import qualified Data.Map.Strict as Map
+import Data.Maybe (fromMaybe)
+import Data.Monoid ((<>))
+import qualified Data.Text as T
+import qualified Data.Text.IO as T
+import Data.Version (showVersion)
+
+import Paths_pandoc_pyplot (version)
+
+import System.Directory (createDirectoryIfMissing,
+ doesFileExist)
+import System.FilePath (isValid, makeValid,
+ replaceExtension, takeDirectory)
import Text.Pandoc.Definition
-import Text.Pandoc.Walk (walkM)
+import Text.Pandoc.Walk (walkM)
-import Text.Pandoc.Filter.Scripting
+import Text.Pandoc.Filter.FigureSpec (FigureSpec (..),
+ SaveFormat (..), addPlotCapture,
+ figurePath, hiresFigurePath,
+ saveFormatFromString)
+import Text.Pandoc.Filter.Scripting
-- | Possible errors returned by the filter
-data PandocPyplotError = ScriptError Int -- ^ Running Python script has yielded an error
- | InvalidTargetError FilePath -- ^ Invalid figure path
- | MissingDirectoryError FilePath -- ^ Directory where to save figure does not exist
- | BlockingCallError -- ^ Python script contains a block call to 'show()'
- deriving Eq
+data PandocPyplotError
+ = ScriptError Int -- ^ Running Python script has yielded an error
+ | InvalidTargetError FilePath -- ^ Invalid figure path
+ | BlockingCallError -- ^ Python script contains a block call to 'show()'
+ deriving (Eq)
+-- | Translate filter error to an error message
instance Show PandocPyplotError where
- -- | Translate filter error to an error message
- show (ScriptError exitcode) = "Script error: plot could not be generated. Exit code " <> (show exitcode)
- show (InvalidTargetError fname) = "Target filename " <> fname <> " is not valid."
- show (MissingDirectoryError dirname) = "Target directory " <> dirname <> " does not exist."
- show BlockingCallError = "Script contains a blocking call to show, like 'plt.show()'"
-
-
--- | Datatype containing all parameters required
--- to run pandoc-pyplot
-data FigureSpec = FigureSpec
- { target :: FilePath -- ^ filepath where generated figure will be saved.
- , alt :: String -- ^ Alternate text for the figure (optional).
- , script :: PythonScript -- ^ Source code for the figure.
- , includePath :: Maybe FilePath -- ^ Path to a Python to be included before the script.
- , blockAttrs :: Attr -- ^ Attributes not related to @pandoc-pyplot@ will be propagated.
- }
-
--- | Use figure specification to render a full plot script, including everything except plot capture
-renderScript :: FigureSpec -> IO PythonScript
-renderScript spec = do
- includeScript <- fromMaybe (return "") $ readFile <$> (includePath spec)
- return $ mconcat [ "# Source code for ", target spec, "\n"
- , "# Generated by pandoc-pyplot\n"
- , includeScript, "\n", script spec]
-
--- Keys that pandoc-pyplot will look for in code blocks
-targetKey, altTextKey, includePathKey :: String
-targetKey = "plot_target"
-altTextKey = "plot_alt"
-includePathKey = "plot_include"
+ show (ScriptError exitcode) =
+ "Script error: plot could not be generated. Exit code " <> (show exitcode)
+ show (InvalidTargetError fname) = "Target filename " <> fname <> " is not valid."
+ show BlockingCallError = "Script contains a blocking call to show, like 'plt.show()'"
+
+-- | Keys that pandoc-pyplot will look for in code blocks. These are only exported for testing purposes.
+directoryKey, captionKey, dpiKey, includePathKey, saveFormatKey :: String
+directoryKey = "directory"
+
+captionKey = "caption"
+
+dpiKey = "dpi"
+
+includePathKey = "include"
+
+saveFormatKey = "format"
+
+-- | list of all keys related to pandoc-pyplot.
+inclusionKeys :: [String]
+inclusionKeys = [directoryKey, captionKey, dpiKey, includePathKey, saveFormatKey]
-- | Determine inclusion specifications from Block attributes.
-- Note that the target key is required, but all other parameters are optional
-parseFigureSpec :: Block -> Maybe FigureSpec
-parseFigureSpec (CodeBlock (id', cls, attrs) content) =
- createInclusion <$> M.lookup targetKey attrs'
- where
- attrs' = M.fromList attrs
- inclusionKeys = [ targetKey, altTextKey ]
- filteredAttrs = filter (\(k,_) -> k `notElem` inclusionKeys) attrs
- createInclusion fname = FigureSpec
- { target = fname
- , alt = M.findWithDefault "Figure generated by pandoc-pyplot" altTextKey attrs'
- , script = content
- , includePath = M.lookup includePathKey attrs'
- -- Propagate attributes that are not related to pandoc-pyplot
- , blockAttrs = (id', cls, filteredAttrs)
- }
-parseFigureSpec _ = Nothing
+parseFigureSpec :: Block -> IO (Maybe FigureSpec)
+parseFigureSpec (CodeBlock (id', cls, attrs) content)
+ | "pyplot" `elem` cls = Just <$> figureSpec
+ | otherwise = return Nothing
+ where
+ attrs' = Map.fromList attrs
+ filteredAttrs = filter (\(k, _) -> k `notElem` inclusionKeys) attrs
+ dir = makeValid $ Map.findWithDefault "generated" directoryKey attrs'
+ format = fromMaybe (PNG) $ saveFormatFromString $ Map.findWithDefault "png" saveFormatKey attrs'
+ includePath = Map.lookup includePathKey attrs'
+ figureSpec :: IO FigureSpec
+ figureSpec = do
+ includeScript <- fromMaybe (return "") $ T.readFile <$> includePath
+ let header = "# Generated by pandoc-pyplot " <> ((T.pack . showVersion) version)
+ fullScript = mconcat $ intersperse "\n" [header, includeScript, T.pack content]
+ caption' = Map.findWithDefault mempty captionKey attrs'
+ dpi' = read $ Map.findWithDefault "80" dpiKey attrs'
+ blockAttrs' = (id', filter (/= "pyplot") cls, filteredAttrs)
+ return $ FigureSpec caption' fullScript format dir dpi' blockAttrs'
+parseFigureSpec _ = return Nothing
+
+-- | Check figure specifications for common mistakes
+validateSpec :: FigureSpec -> Maybe PandocPyplotError
+validateSpec spec
+ | not (isValid path) = Just $ InvalidTargetError path
+ | hasBlockingShowCall rendered = Just $ BlockingCallError
+ | otherwise = Nothing
+ where
+ path = figurePath spec
+ rendered = script spec
+
+-- | Run the Python script. In case the file already exists, we can safely assume
+-- there is no need to re-run it.
+runScriptIfNecessary :: FigureSpec -> IO ScriptResult
+runScriptIfNecessary spec = do
+ createDirectoryIfMissing True . takeDirectory $ figurePath spec
+ fileAlreadyExists <- doesFileExist $ figurePath spec
+ if fileAlreadyExists
+ then return ScriptSuccess
+ else do
+ result <- runTempPythonScript $ addPlotCapture spec
+ case result of
+ ScriptFailure code -> return $ ScriptFailure code
+ ScriptSuccess
+ -- Save the original script into a separate file
+ -- so it can be inspected
+ -- Note : using a .txt file allows to view source directly
+ -- in the browser, in the case of HTML output
+ -> do
+ let sourcePath = replaceExtension (figurePath spec) ".txt"
+ T.writeFile sourcePath $ script spec
+ return ScriptSuccess
-- | Main routine to include Matplotlib plots.
--- Code blocks containing the attributes @plot_target@ are considered
+-- Code blocks containing the attributes @.pyplot@ are considered
-- Python plotting scripts. All other possible blocks are ignored.
--- The source code is also saved in another file, which can be access by
--- clicking the image
makePlot' :: Block -> IO (Either PandocPyplotError Block)
-makePlot' block =
- case parseFigureSpec block of
- -- Could not parse - leave code block unchanged
+makePlot' block = do
+ parsed <- parseFigureSpec block
+ case parsed of
Nothing -> return $ Right block
- -- Could parse : run the script and capture output
- Just spec -> do
-
- -- Rendered script, including possible inclusions and other additions
- -- except the plot capture.
- rendered <- renderScript spec
-
- let figurePath = target spec
- figureDir = takeDirectory figurePath
-
- -- Check that the directory in which to save the figure exists
- validDirectory <- doesDirectoryExist $ takeDirectory figurePath
-
- if | not (isValid figurePath) -> return $ Left $ InvalidTargetError figurePath
- | not validDirectory -> return $ Left $ MissingDirectoryError figureDir
- | hasBlockingShowCall rendered -> return $ Left $ BlockingCallError
- | otherwise -> do
-
- -- Running the script
- -- A plot capture (plt.savefig(...)) is added as well
- result <- runTempPythonScript $ addPlotCapture (target spec) rendered
-
- case result of
- ScriptFailure code -> return $ Left $ ScriptError code
- ScriptSuccess -> do
- -- Save the original script into a separate file
- -- so it can be inspected
- -- Note : using a .txt file allows to view source directly
- -- in the browser, in the case of HTML output
- let sourcePath = replaceExtension figurePath ".txt"
- writeFile sourcePath rendered
-
- -- Propagate attributes that are not related to pandoc-pyplot
- let relevantAttrs = blockAttrs spec
- srcTarget = Link nullAttr [Str "Source code"] (sourcePath, "")
- caption' = [Str $ alt spec, Space, Str "(", srcTarget, Str ")"]
+ Just spec ->
+ case validateSpec spec of
+ Just err -> return $ Left err
+ Nothing -> do
+ result <- runScriptIfNecessary spec
+ case result of
+ ScriptFailure code -> return $ Left $ ScriptError code
+ ScriptSuccess -> do
+ let relevantAttrs = blockAttrs spec
+ sourcePath = replaceExtension (figurePath spec) ".txt"
+ hiresPath = hiresFigurePath spec
+ srcTarget = Link nullAttr [Str "Source code"] (sourcePath, "")
+ hiresTarget = Link nullAttr [Str "high res."] (hiresPath, "")
+ -- TODO: use pandoc-types Builder module
+ caption' =
+ [ Str $ caption spec
+ , Space
+ , Str "("
+ , srcTarget
+ , Str ","
+ , Space
+ , hiresTarget
+ , Str ")"
+ ]
-- To render images as figures with captions, the target title
-- must be "fig:"
-- Janky? yes
- image = Image relevantAttrs caption' (figurePath, "fig:")
-
- return $ Right $ Para $ [image]
+ image = Image relevantAttrs caption' (figurePath spec, "fig:")
+ return $ Right $ Para $ [image]
-- | Highest-level function that can be walked over a Pandoc tree.
--- All code blocks that have the 'plot_target' parameter will be considered
+-- All code blocks that have the '.pyplot' parameter will be considered
-- figures.
makePlot :: Block -> IO Block
makePlot = makePlot' >=> either (fail . show) return
diff --git a/src/Text/Pandoc/Filter/Scripting.hs b/src/Text/Pandoc/Filter/Scripting.hs
index c8a4589..889db6d 100644
--- a/src/Text/Pandoc/Filter/Scripting.hs
+++ b/src/Text/Pandoc/Filter/Scripting.hs
@@ -1,7 +1,9 @@
-{-# LANGUAGE Unsafe #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE Unsafe #-}
+
{-|
Module : Text.Pandoc.Filter.Scripting
-Copyright : (c) Laurent P René de Cotret, 2018
+Copyright : (c) Laurent P René de Cotret, 2019
License : MIT
Maintainer : laurent.decotret@outlook.com
Stability : internal
@@ -10,59 +12,62 @@ Portability : portable
This module defines types and functions that help
with running Python scripts.
-}
-
-module Text.Pandoc.Filter.Scripting (
- runTempPythonScript
- , addPlotCapture
+module Text.Pandoc.Filter.Scripting
+ ( runTempPythonScript
, hasBlockingShowCall
, PythonScript
, ScriptResult(..)
-) where
+ ) where
+
+import Data.Text (Text)
+import qualified Data.Text as T
+import qualified Data.Text.IO as T
+
+import Data.Hashable (hash)
-import System.Exit (ExitCode(..))
-import System.FilePath ((</>))
-import System.IO.Temp (getCanonicalTemporaryDirectory)
-import System.Process.Typed (runProcess, shell)
+import System.Exit (ExitCode (..))
+import System.FilePath ((</>))
+import System.IO.Temp (getCanonicalTemporaryDirectory)
+import System.Process.Typed (runProcess, shell)
-import Data.Monoid (Any(..), (<>))
+import Data.Monoid (Any (..), (<>))
-- | String representation of a Python script
-type PythonScript = String
+type PythonScript = Text
-- | Possible result of running a Python script
-data ScriptResult = ScriptSuccess
- | ScriptFailure Int
+data ScriptResult
+ = ScriptSuccess
+ | ScriptFailure Int
-- | Take a python script in string form, write it in a temporary directory,
--- then execute it.
-runTempPythonScript :: PythonScript -- ^ Content of the script
- -> IO ScriptResult -- ^ Result with exit code.
-runTempPythonScript script = do
- -- Write script to temporary directory
- scriptPath <- (</> "pandoc-pyplot.py") <$> getCanonicalTemporaryDirectory
- writeFile scriptPath script
- -- Execute script
- ec <- runProcess $ shell $ "python " <> (show scriptPath)
- case ec of
- ExitSuccess -> return ScriptSuccess
- ExitFailure code -> return $ ScriptFailure code
-
--- | Modify a Python plotting script to save the figure to a filename.
-addPlotCapture :: FilePath -- ^ Path where to save the figure
- -> PythonScript -- ^ Raw code block
- -> PythonScript -- ^ Code block with added capture
-addPlotCapture fname content =
- mconcat [ content
- , "\nimport matplotlib.pyplot as plt" -- Just in case
- , "\nplt.savefig(" <> show fname <> ")\n\n"
- ]
+-- then execute it.
+runTempPythonScript ::
+ PythonScript -- ^ Content of the script
+ -> IO ScriptResult -- ^ Result with exit code.
+runTempPythonScript script
+ -- Write script to temporary directory
+ -- We involve the script hash as a temporary filename
+ -- so that there is never any collision
+ = do
+ scriptPath <- (</> hashedPath) <$> getCanonicalTemporaryDirectory
+ T.writeFile scriptPath script
+ -- Execute script
+ ec <- runProcess $ shell $ "python " <> (show scriptPath)
+ case ec of
+ ExitSuccess -> return ScriptSuccess
+ ExitFailure code -> return $ ScriptFailure code
+ where
+ hashedPath = show . hash $ script
-- | Detect the presence of a blocking show call, for example "plt.show()"
hasBlockingShowCall :: PythonScript -> Bool
-hasBlockingShowCall script = anyOf
+hasBlockingShowCall script =
+ anyOf
[ "plt.show()" `elem` scriptLines
+ , "pyplot.show()" `elem` scriptLines
, "matplotlib.pyplot.show()" `elem` scriptLines
]
- where
- scriptLines = lines script
- anyOf xs = getAny $ mconcat $ Any <$> xs
+ where
+ scriptLines = T.lines script
+ anyOf xs = getAny $ mconcat $ Any <$> xs