summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarekSuchanek <>2020-01-13 14:56:00 (GMT)
committerhdiff <hdiff@hdiff.luite.com>2020-01-13 14:56:00 (GMT)
commit4ad44c90f9b26f0141509509e9d8d8f6fae42588 (patch)
treeb00ed4280d7c8fbe0d70484031cc1549daf10da3
parentcc718dab57042b201fb8805db7663f59543299d4 (diff)
version 1.0.21.0.2
-rwxr-xr-x[-rw-r--r--]ChangeLog.md22
-rwxr-xr-x[-rw-r--r--]LICENSE42
-rwxr-xr-x[-rw-r--r--]README.md32
-rwxr-xr-x[-rw-r--r--]Setup.hs4
-rwxr-xr-x[-rw-r--r--]app/Main.hs48
-rw-r--r--fromhtml.cabal13
-rwxr-xr-x[-rw-r--r--]src/Text/FromHTML.hs170
7 files changed, 166 insertions, 165 deletions
diff --git a/ChangeLog.md b/ChangeLog.md
index a70302a..bf6bb07 100644..100755
--- a/ChangeLog.md
+++ b/ChangeLog.md
@@ -1,11 +1,11 @@
-# Changelog for FromHTML
-
-## [1.0.1] - 2018-08-19
-
-### Changed
-- Getting rid of ugly unsafe performing IO
-- Using process-extra with ByteString to avoid encoding problems
-
-## [1.0.0] - 2018-08-09
-
-Initial release
+# Changelog for FromHTML
+
+## [1.0.1] - 2018-08-19
+
+### Changed
+- Getting rid of ugly unsafe performing IO
+- Using process-extra with ByteString to avoid encoding problems
+
+## [1.0.0] - 2018-08-09
+
+Initial release
diff --git a/LICENSE b/LICENSE
index 3a924e6..694a9a9 100644..100755
--- a/LICENSE
+++ b/LICENSE
@@ -1,21 +1,21 @@
-MIT License
-
-Copyright (c) 2017 Marek Suchánek (marek.suchanek@fit.cvut.cz)
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+MIT License
+
+Copyright (c) 2017 Marek Suchánek (marek.suchanek@fit.cvut.cz)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
index 4b16471..a124e20 100644..100755
--- a/README.md
+++ b/README.md
@@ -1,16 +1,16 @@
-# FromHTML
-
-[![License](https://img.shields.io/github/license/MarekSuchanek/FromHTML.svg)](LICENSE)
-[![Build Status](https://travis-ci.org/MarekSuchanek/FromHTML.svg?branch=master)](https://travis-ci.org/MarekSuchanek/FromHTML)
-[![Hackage](https://img.shields.io/hackage/v/fromhtml.svg)](https://hackage.haskell.org/package/fromhtml)
-[![Hackage-Deps](https://img.shields.io/hackage-deps/v/fromhtml.svg)](http://packdeps.haskellers.com/feed?needle=fromhtml)
-
-Simplified API for pure transformation of HTML to other formats with Pandoc in Haskell code.
-
-## Purpose
-
-Pandoc is awesome but using its Haskell API can be a little bit bothersome. This project aims to provide simpler API to transform documents without using monads or even some more advanced magic.
-
-## License
-
-This project is licensed under the MIT license - see the [LICENSE](LICENSE) file for more details.
+# FromHTML
+
+[![License](https://img.shields.io/github/license/MarekSuchanek/FromHTML.svg)](LICENSE)
+[![Build Status](https://travis-ci.org/MarekSuchanek/FromHTML.svg?branch=master)](https://travis-ci.org/MarekSuchanek/FromHTML)
+[![Hackage](https://img.shields.io/hackage/v/fromhtml.svg)](https://hackage.haskell.org/package/fromhtml)
+[![Hackage-Deps](https://img.shields.io/hackage-deps/v/fromhtml.svg)](http://packdeps.haskellers.com/feed?needle=fromhtml)
+
+Simplified API for pure transformation of HTML to other formats with Pandoc in Haskell code.
+
+## Purpose
+
+Pandoc is awesome but using its Haskell API can be a little bit bothersome. This project aims to provide simpler API to transform documents without using monads or even some more advanced magic.
+
+## License
+
+This project is licensed under the MIT license - see the [LICENSE](LICENSE) file for more details.
diff --git a/Setup.hs b/Setup.hs
index 9a994af..833b4c6 100644..100755
--- a/Setup.hs
+++ b/Setup.hs
@@ -1,2 +1,2 @@
-import Distribution.Simple
-main = defaultMain
+import Distribution.Simple
+main = defaultMain
diff --git a/app/Main.hs b/app/Main.hs
index f153181..9a1a9df 100644..100755
--- a/app/Main.hs
+++ b/app/Main.hs
@@ -1,24 +1,24 @@
-module Main where
-
-import Prelude
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Char8 as C
-import System.Environment
-import System.Exit
-
-import Text.FromHTML
-
-main :: IO ()
-main = do
- args <- getArgs
- let infile = head args
- let outfile = args !! 1
- let format = (read $ args !! 2) :: ExportType
- html <- readFile infile
- result <- fromHTML format html
- case result of
- Right res -> B.writeFile outfile res
- Left err -> do
- putStrLn "Couldn't transform that document:"
- C.putStrLn err
- exitFailure
+module Main where
+
+import Prelude
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Char8 as C
+import System.Environment
+import System.Exit
+
+import Text.FromHTML
+
+main :: IO ()
+main = do
+ args <- getArgs
+ let infile = head args
+ let outfile = args !! 1
+ let format = (read $ args !! 2) :: ExportType
+ html <- readFile infile
+ result <- fromHTML format html
+ case result of
+ Right res -> B.writeFile outfile res
+ Left err -> do
+ putStrLn "Couldn't transform that document:"
+ C.putStrLn err
+ exitFailure
diff --git a/fromhtml.cabal b/fromhtml.cabal
index 8315a2a..dc6afb7 100644
--- a/fromhtml.cabal
+++ b/fromhtml.cabal
@@ -1,11 +1,13 @@
--- This file has been generated from package.yaml by hpack version 0.28.2.
+cabal-version: 1.12
+
+-- This file has been generated from package.yaml by hpack version 0.31.2.
--
-- see: https://github.com/sol/hpack
--
--- hash: 597d2bb8a4387fffb12cb8c6659c2e7f725ef980e87580aeb755d4d2069fcfef
+-- hash: 8bab40642781a60f3d71a513789692466ac753726db91595bdc3a5af39472bb4
name: fromhtml
-version: 1.0.1
+version: 1.0.2
synopsis: Simple adapter for transformation of HTML to other formats
description: Please see the README on GitHub at <https://github.com/MarekSuchanek/FromHTML#readme>
category: Text
@@ -13,14 +15,13 @@ homepage: https://github.com/MarekSuchanek/FromHTML#readme
bug-reports: https://github.com/MarekSuchanek/FromHTML/issues
author: Marek Suchánek
maintainer: marek.suchanek@fit.cvut.cz
-copyright: 2018 Marek Suchánek
+copyright: 2020 Marek Suchánek
license: MIT
license-file: LICENSE
build-type: Simple
-cabal-version: >= 1.10
extra-source-files:
- ChangeLog.md
README.md
+ ChangeLog.md
source-repository head
type: git
diff --git a/src/Text/FromHTML.hs b/src/Text/FromHTML.hs
index 40611e8..1cfe29a 100644..100755
--- a/src/Text/FromHTML.hs
+++ b/src/Text/FromHTML.hs
@@ -1,85 +1,85 @@
-{-|
-Module : Text.FromHTML
-Description : Simple library for transformation of HTML to other formats
-Copyright : (c) Marek Suchánek, 2018
-License : MIT
-Maintainer : marek.suchanek@fit.cvut.cz
-Stability : experimental
-Portability : POSIX
-
-Simplified API for transformation of HTML to other formats with Pandoc
-and wkhtmltopdf in Haskell code. It requires @wkhtmltopdf@ and @pandoc@
-to be installed locally.
--}
-module Text.FromHTML
- ( fromHTML
- , ExportType(..)
- ) where
-
-import qualified Data.Char as C
-import qualified Data.Text as T
-import qualified Data.Text.Encoding as E
-import qualified Data.ByteString as B
-
-import Control.Exception
-import GHC.IO.Encoding
-import System.Exit
-import System.Process.ByteString
-
--- | Allowed export types
-data ExportType = HTML
- | LaTeX
- | RTF
- | RST
- | Markdown
- | AsciiDoc
- | Docx
- | ODT
- | DokuWiki
- | MediaWiki
- | EPUB2
- | EPUB3
- | PDF
- deriving (Show, Read, Enum, Bounded, Eq)
-
-type Input = B.ByteString
-type Output = B.ByteString
-type Command = Input -> IO (Either Output Output)
-
-str2BS :: String -> B.ByteString
-str2BS = E.encodeUtf8 . T.pack
-
--- | Transform given HTML as String to selected format
-fromHTML :: ExportType -> String -> IO (Either Output Output)
-fromHTML HTML html = return $ Right (str2BS html) -- HTML is already provided!
-fromHTML PDF html = wkhtmltopdf (str2BS html)
-fromHTML extp html = pandoc extp (str2BS html)
-
--- | Simple conversion of HTML to PDF using process wkhtmltopdf
-wkhtmltopdf :: Command
-wkhtmltopdf = perform "wkhtmltopdf" ["--quiet", "--encoding", "utf-8", "-", "-"]
-
--- | Simple conversion of HTML to some format using process pandoc
-pandoc :: ExportType -> Command
-pandoc expt = perform "pandoc" args
- where
- format = exportType2PD expt
- args = ["-s", "-f", "html", "-t", format, "-o", "-"]
-
--- | Perform process (catched IOException)
-perform :: String -> [String] -> Command
-perform cmd args input = catch (performUnsafe cmd args input)
- (\e -> do let err = show (e :: SomeException)
- return . Left $ "Exception: " <> str2BS err)
-
--- | Perform process (no caching exceptions)
-performUnsafe :: String -> [String] -> Command
-performUnsafe cmd args input = do
- setLocaleEncoding utf8 -- don't know what was locales are there...
- (exitCode, stdout, stderr) <- readProcessWithExitCode cmd args input
- case exitCode of
- ExitSuccess -> return $ Right stdout
- _ -> return . Left $ str2BS (show exitCode) <> ": " <> stderr
-
-exportType2PD :: ExportType -> String
-exportType2PD = map C.toLower . show
+{-|
+Module : Text.FromHTML
+Description : Simple library for transformation of HTML to other formats
+Copyright : (c) Marek Suchánek, 2018
+License : MIT
+Maintainer : marek.suchanek@fit.cvut.cz
+Stability : experimental
+Portability : POSIX
+
+Simplified API for transformation of HTML to other formats with Pandoc
+and wkhtmltopdf in Haskell code. It requires @wkhtmltopdf@ and @pandoc@
+to be installed locally.
+-}
+module Text.FromHTML
+ ( fromHTML
+ , ExportType(..)
+ ) where
+
+import qualified Data.Char as C
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as E
+import qualified Data.ByteString as B
+
+import Control.Exception
+import GHC.IO.Encoding
+import System.Exit
+import System.Process.ByteString
+
+-- | Allowed export types
+data ExportType = HTML
+ | LaTeX
+ | RTF
+ | RST
+ | Markdown
+ | AsciiDoc
+ | Docx
+ | ODT
+ | DokuWiki
+ | MediaWiki
+ | EPUB2
+ | EPUB3
+ | PDF
+ deriving (Show, Read, Enum, Bounded, Eq)
+
+type Input = B.ByteString
+type Output = B.ByteString
+type Command = Input -> IO (Either Output Output)
+
+str2BS :: String -> B.ByteString
+str2BS = E.encodeUtf8 . T.pack
+
+-- | Transform given HTML as String to selected format
+fromHTML :: ExportType -> String -> IO (Either Output Output)
+fromHTML HTML html = return $ Right (str2BS html) -- HTML is already provided!
+fromHTML PDF html = wkhtmltopdf (str2BS html)
+fromHTML extp html = pandoc extp (str2BS html)
+
+-- | Simple conversion of HTML to PDF using process wkhtmltopdf
+wkhtmltopdf :: Command
+wkhtmltopdf = perform "wkhtmltopdf" ["--quiet", "--disable-smart-shrinking", "--encoding", "utf-8", "-", "-"]
+
+-- | Simple conversion of HTML to some format using process pandoc
+pandoc :: ExportType -> Command
+pandoc expt = perform "pandoc" args
+ where
+ format = exportType2PD expt
+ args = ["-s", "-f", "html", "-t", format, "-o", "-"]
+
+-- | Perform process (catched IOException)
+perform :: String -> [String] -> Command
+perform cmd args input = catch (performUnsafe cmd args input)
+ (\e -> do let err = show (e :: SomeException)
+ return . Left $ "Exception: " <> str2BS err)
+
+-- | Perform process (no caching exceptions)
+performUnsafe :: String -> [String] -> Command
+performUnsafe cmd args input = do
+ setLocaleEncoding utf8 -- don't know what was locales are there...
+ (exitCode, stdout, stderr) <- readProcessWithExitCode cmd args input
+ case exitCode of
+ ExitSuccess -> return $ Right stdout
+ _ -> return . Left $ str2BS (show exitCode) <> ": " <> stderr
+
+exportType2PD :: ExportType -> String
+exportType2PD = map C.toLower . show