summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChrisDornan <>2019-03-14 17:59:00 (GMT)
committerhdiff <hdiff@hdiff.luite.com>2019-03-14 17:59:00 (GMT)
commitb71fb7d2fade94bfa37b26bff03a67e86e96b12d (patch)
treea56c7e4233075a2c0d480a8d65c10db98135562b
version 0.1.0.0HEAD0.1.0.0master
-rw-r--r--ChangeLog.md3
-rw-r--r--LICENSE30
-rw-r--r--README.md5
-rw-r--r--Setup.hs2
-rw-r--r--render-utf8.cabal46
-rw-r--r--src/Text/Utf8.hs176
-rw-r--r--src/Text/Utf8/Compat.hs41
7 files changed, 303 insertions, 0 deletions
diff --git a/ChangeLog.md b/ChangeLog.md
new file mode 100644
index 0000000..9e67e4f
--- /dev/null
+++ b/ChangeLog.md
@@ -0,0 +1,3 @@
+# 0.1.0.0
+
+ * first release
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..3f173ee
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,30 @@
+Copyright Chris Dornan (c) 2019
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following
+ disclaimer in the documentation and/or other materials provided
+ with the distribution.
+
+ * Neither the name of Chris Dornan nor the names of other
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..33f5603
--- /dev/null
+++ b/README.md
@@ -0,0 +1,5 @@
+# render-utf8
+
+A [rio](http://hackage.haskell.org/package/rio)-like [UTF-8](https://en.wikipedia.org/wiki/UTF-8)
+[ByteString Builder](http://hackage.haskell.org/package/bytestring-0.10.8.2/docs/Data-ByteString-Builder.html)
+wrapper with [fmt](http://hackage.haskell.org/package/fmt) integration.
diff --git a/Setup.hs b/Setup.hs
new file mode 100644
index 0000000..9a994af
--- /dev/null
+++ b/Setup.hs
@@ -0,0 +1,2 @@
+import Distribution.Simple
+main = defaultMain
diff --git a/render-utf8.cabal b/render-utf8.cabal
new file mode 100644
index 0000000..0ecce82
--- /dev/null
+++ b/render-utf8.cabal
@@ -0,0 +1,46 @@
+cabal-version: 1.12
+
+-- This file has been generated from package.yaml by hpack version 0.31.1.
+--
+-- see: https://github.com/sol/hpack
+--
+-- hash: 17f07980f5a8f4ae61cbf57f247148649e28f71ee96c2a1c00053483ef17c58b
+
+name: render-utf8
+version: 0.1.0.0
+synopsis: Simple Utf8 wrapper for ByteString Builder with conversion classes
+description: Simple Utf8 wrapper for ByteString Builder with conversion classes. Please see the README on GitHub at <https://github.com/cdornan/render-utf8#readme>
+category: Text
+homepage: https://github.com/cdornan/render-utf8#readme
+bug-reports: https://github.com/cdornan/render-utf8/issues
+author: Chris Dornan
+maintainer: chris@chrisdornan.com
+copyright: 2019 Chris Dornan
+license: BSD3
+license-file: LICENSE
+build-type: Simple
+extra-source-files:
+ README.md
+ ChangeLog.md
+
+source-repository head
+ type: git
+ location: https://github.com/cdornan/render-utf8
+
+library
+ exposed-modules:
+ Text.Utf8
+ other-modules:
+ Text.Utf8.Compat
+ Paths_render_utf8
+ hs-source-dirs:
+ src
+ ghc-options: -Wall
+ build-depends:
+ base >=4.8.2.0 && <10
+ , bytestring
+ , fmt >=0.6.0.0
+ , semigroups
+ , text
+ , transformers
+ default-language: Haskell2010
diff --git a/src/Text/Utf8.hs b/src/Text/Utf8.hs
new file mode 100644
index 0000000..eac79cd
--- /dev/null
+++ b/src/Text/Utf8.hs
@@ -0,0 +1,176 @@
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE GeneralizedNewtypeDeriving #-}
+{-# LANGUAGE OverloadedStrings #-}
+
+{-------------------------------------------------------------------------------
+
+Acknowledgements
+~~~~~~~~~~~~~~~~
+
+ * Utf8 and Renderable were adapted from rio package:
+ http://hackage.haskell.org/package/rio-0.1.8.0/docs/src/RIO.Prelude.Display.html
+
+ * FromUtf8, cvt and cvtLn was modelled after the core types and classes of
+ the fmt package:
+ http://hackage.haskell.org/package/fmt-0.6.1.1/docs/Fmt-Internal-Core.html
+
+-------------------------------------------------------------------------------}
+
+module Text.Utf8
+ (
+ -- * Overview
+ -- $overview
+
+ -- * Utf8
+ Utf8(..)
+ -- * FromUtf8
+ , FromUtf8(..)
+ , cvtLn
+ -- * Renderable
+ , Renderable(..)
+ -- * Toolkit Functions
+ , renderShowable
+ , unsafeInjectByteStringIntoUtf8
+ , utf8ToText
+ , utf8ToLazyText
+ , writeFileUtf8
+ ) where
+
+import Control.Exception
+import Control.Monad.IO.Class
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString.Builder as BB
+import Data.Coerce
+import Data.Int
+import Data.String
+import qualified Data.Text as T
+import qualified Data.Text.Encoding as T
+import Data.Text.Encoding.Error
+import qualified Data.Text.Lazy as TL
+import qualified Data.Text.Lazy.Builder as TB
+import qualified Data.Text.Lazy.Encoding as TL
+import Data.Word
+import Fmt
+import Fmt.Internal.Core
+import System.IO
+import Text.Utf8.Compat
+
+
+{- $overview
+
+This module provides a standalone package for a wrapped 'B.ByteString'
+'B.Builder' for working with UTF-8 encode byte strings. It is adaped from
+the @Utf8Builder@ type and @Display@ clas from the @rio@ package,
+and the @FromBuilder@ class of the @fmt@ package, while endeavouring to
+integrate the 'fmt' types and clases to allow easy interoperation
+between both systems.
+-}
+
+
+--
+-- Utf8
+--
+
+-- | a UTF-8 encoded 'B.ByteString' 'BB.Builder'
+newtype Utf8 = Utf8 { getUtf8 :: BB.Builder }
+ deriving (Semigroup,Monoid)
+
+instance IsString Utf8 where fromString = Utf8 . BB.stringUtf8
+instance Buildable Utf8 where build = cvt
+instance FromBuilder Utf8 where fromBuilder = render
+
+
+--
+-- FromUtf8, cvt
+--
+
+-- | for converting Utf8 into other text types
+class FromUtf8 a where
+ -- | make a 'Utf8' into whatever text type you need it to be
+ cvt :: Utf8 -> a
+
+-- | add a newline to the 'Utf8' before converting it
+cvtLn :: FromUtf8 a => Utf8 -> a
+cvtLn u8 = cvt $ u8 <> "\n"
+
+instance FromUtf8 Utf8 where cvt = id
+instance FromUtf8 BB.Builder where cvt = coerce
+instance FromUtf8 TB.Builder where cvt = TB.fromText . cvt
+instance FromUtf8 T.Text where cvt = TL.toStrict . cvt
+instance FromUtf8 TL.Text where cvt = decUtf8TL . coerce
+instance FromUtf8 B.ByteString where cvt = BL.toStrict . cvt
+instance FromUtf8 BL.ByteString where cvt = BB.toLazyByteString . coerce
+instance a ~ Char => FromUtf8 [a] where cvt = T.unpack . cvt
+instance a ~ () => FromUtf8 (IO a) where cvt = putBuilder . coerce
+
+
+--
+-- Renderable
+--
+
+-- | for rendering things into Utf8
+class Renderable a where
+ -- | render @a@ into a 'Utf8'
+ render :: a -> Utf8
+ render = render . renderText
+
+ -- | render @a@ into a @Text@
+ renderText :: a -> T.Text
+ renderText = utf8ToText . render
+
+instance Renderable Utf8 where render = id
+instance Renderable T.Text where render = coerce . T.encodeUtf8Builder
+instance Renderable TL.Text where render = foldMap render . TL.toChunks
+instance Renderable TB.Builder where render = coerce . encUtf8TB
+instance Renderable Char where render = coerce . BB.charUtf8
+instance Renderable Integer where render = coerce . BB.integerDec
+instance Renderable Float where render = coerce . BB.floatDec
+instance Renderable Double where render = coerce . BB.doubleDec
+instance Renderable Int where render = coerce . BB.intDec
+instance Renderable Int8 where render = coerce . BB.int8Dec
+instance Renderable Int16 where render = coerce . BB.int16Dec
+instance Renderable Int32 where render = coerce . BB.int32Dec
+instance Renderable Int64 where render = coerce . BB.int64Dec
+instance Renderable Word where render = coerce . BB.wordDec
+instance Renderable Word8 where render = coerce . BB.word8Dec
+instance Renderable Word16 where render = coerce . BB.word16Dec
+instance Renderable Word32 where render = coerce . BB.word32Dec
+instance Renderable Word64 where render = coerce . BB.word64Dec
+instance Renderable SomeException where render = fromString . displayException
+instance Renderable IOException where render = fromString . displayException
+
+
+--
+-- Utf8 toolkit functions
+--
+
+-- | show into 'Utf8'
+renderShowable :: Show a => a -> Utf8
+renderShowable = fromString . show
+
+-- | Convert a 'B.ByteString' into a 'Utf8'; NB, if
+-- the @ByteString@ contains text using a non-UTF-8 encoding
+-- then 'bad things' (TM) are liable to happen
+unsafeInjectByteStringIntoUtf8 :: B.ByteString -> Utf8
+unsafeInjectByteStringIntoUtf8 = Utf8 . BB.byteString
+
+-- | Convert a 'Utf8' value into a strict 'T.Text'.
+utf8ToText :: Utf8 -> T.Text
+utf8ToText =
+ T.decodeUtf8With lenientDecode
+ . BL.toStrict
+ . BB.toLazyByteString
+ . coerce
+
+-- | Convert a 'Utf8' value into a lazy 'T.Text'.
+utf8ToLazyText :: Utf8 -> TL.Text
+utf8ToLazyText =
+ TL.decodeUtf8With lenientDecode
+ . BB.toLazyByteString
+ . coerce
+
+-- | Write 'Utf8' to file.
+writeFileUtf8 :: MonadIO m => FilePath -> Utf8 -> m ()
+writeFileUtf8 fp (Utf8 builder) =
+ liftIO $ withBinaryFile fp WriteMode $ \h -> BB.hPutBuilder h builder
diff --git a/src/Text/Utf8/Compat.hs b/src/Text/Utf8/Compat.hs
new file mode 100644
index 0000000..a5bf803
--- /dev/null
+++ b/src/Text/Utf8/Compat.hs
@@ -0,0 +1,41 @@
+{-# LANGUAGE CPP #-}
+
+#if __GLASGOW_HASKELL__ > 802
+module Text.Utf8.Compat where
+
+import qualified Data.ByteString.Builder as BB
+import Data.Text.Encoding.Error
+import qualified Data.Text.Lazy as TL
+import qualified Data.Text.Lazy.Builder as TB
+import qualified Data.Text.Lazy.Encoding as TL
+import System.IO
+
+#else
+module Text.Utf8.Compat
+ ( Semigroup(..)
+ , module Text.Utf8.Compat
+ ) where
+
+import qualified Data.ByteString.Builder as BB
+import Data.Text.Encoding.Error
+import qualified Data.Text.Lazy as TL
+import qualified Data.Text.Lazy.Builder as TB
+import qualified Data.Text.Lazy.Encoding as TL
+import System.IO
+
+import Data.Semigroup
+#endif
+
+
+--
+-- internal helper functions
+--
+
+decUtf8TL :: BB.Builder -> TL.Text
+decUtf8TL = TL.decodeUtf8With lenientDecode . BB.toLazyByteString
+
+encUtf8TB :: TB.Builder -> BB.Builder
+encUtf8TB = TL.encodeUtf8Builder . TB.toLazyText
+
+putBuilder :: BB.Builder -> IO ()
+putBuilder = BB.hPutBuilder stdout