summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaxBolingbroke <>2017-04-03 23:02:00 (GMT)
committerhdiff <hdiff@hdiff.luite.com>2017-04-03 23:02:00 (GMT)
commit2dcd3b2ac7010daf10434e9b6775ab3decedb95e (patch)
tree25f3d1dd5567889798d4e0dfca7fd2253ef5bdf0
parentdd5b18fb5a22a00cfe32bfaf2a783e96c6237b71 (diff)
version 1.1.0.2HEAD1.1.0.2master
-rw-r--r--Codec/Text/Detect.hs10
-rw-r--r--cbits/dso_handle.c1
-rw-r--r--charsetdetect.cabal31
-rw-r--r--libcharsetdetect/nspr-emu/prcpucfg_win.h2
4 files changed, 28 insertions, 16 deletions
diff --git a/Codec/Text/Detect.hs b/Codec/Text/Detect.hs
index 556b93f..8b9368a 100644
--- a/Codec/Text/Detect.hs
+++ b/Codec/Text/Detect.hs
@@ -7,6 +7,7 @@ import Control.Exception
import qualified Data.ByteString.Internal as SI
import qualified Data.ByteString.Lazy as L
import Data.Traversable (traverse)
+import Control.Applicative
import Foreign.C.Types
import Foreign.C.String
@@ -42,7 +43,6 @@ foreign import ccall unsafe "csd_close" c_csd_close :: Csd_t -> IO CString
-- > Big5
-- > EUC-JP
-- > EUC-KR
--- > GB18030
-- > gb18030
-- > HZ-GB-2312
-- > IBM855
@@ -71,9 +71,6 @@ foreign import ccall unsafe "csd_close" c_csd_close :: Csd_t -> IO CString
-- > X-ISO-10646-UCS-4-2143
-- > X-ISO-10646-UCS-4-3412
-- > x-mac-cyrillic
---
--- Note that there are two capitalisations of @gb18030@. For this reason (and to be future-proof against any future behaviour
--- like this for newly-added character sets) we recommend that you compare character set names case insensitively.
{-# NOINLINE detectEncodingName #-}
detectEncodingName :: L.ByteString -> Maybe String
detectEncodingName b = unsafePerformIO $ do
@@ -90,7 +87,10 @@ detectEncodingName b = unsafePerformIO $ do
c_encoding_ptr <- c_csd_close csd
if c_encoding_ptr == nullPtr
then return Nothing
- else fmap Just (peekCString c_encoding_ptr)
+ else Just . normalise <$> peekCString c_encoding_ptr
+ where
+ normalise "GB18030" = "gb18030"
+ normalise x = x
-- | Detect the encoding for a 'L.ByteString' and attempt to create a 'TextEncoding' suitable for decoding it.
detectEncoding :: L.ByteString -> IO (Maybe TextEncoding)
diff --git a/cbits/dso_handle.c b/cbits/dso_handle.c
new file mode 100644
index 0000000..1960a15
--- /dev/null
+++ b/cbits/dso_handle.c
@@ -0,0 +1 @@
+void *__dso_handle;
diff --git a/charsetdetect.cabal b/charsetdetect.cabal
index f1a7def..fbfae8b 100644
--- a/charsetdetect.cabal
+++ b/charsetdetect.cabal
@@ -1,5 +1,5 @@
Name: charsetdetect
-Version: 1.0
+Version: 1.1.0.2
Cabal-Version: >= 1.6
Category: Text
Synopsis: Character set detection using Mozilla's Universal Character Set Detector
@@ -62,16 +62,27 @@ Source-Repository head
Library
Exposed-Modules: Codec.Text.Detect
-
- Build-Depends: base >= 4.2.0.2 && < 5, bytestring >= 0.9.1.7 && < 0.10
-
- -- We really need to ensure that the *final program* links with g++
- -- It is useless to specify that the library links with it..
- --Ghc-Options: -pgml g++
-
+
+ Build-Depends: base >= 4.2.0.2 && < 5, bytestring >= 0.9.1.7
+
+ -- Needed to ensure correct build on GHC 7.6 when imported by a
+ -- library which uses Template Haskell.
+ --
+ -- See http://stackoverflow.com/a/26454930/615030
+ -- and issue #1.
+ if impl(ghc < 7.8)
+ cc-options: -fno-weak
+ c-sources: cbits/dso_handle.c
+
-- This is a bit dodgy since g++ might link in more stuff, but will probably work in practice:
- Extra-Libraries: stdc++
-
+ if os(windows)
+ if arch(x86_64)
+ extra-libraries: stdc++-6 gcc_s_seh-1
+ else
+ extra-libraries: stdc++-6 gcc_s_dw2-1
+ else
+ extra-libraries: stdc++
+
Include-Dirs: libcharsetdetect
libcharsetdetect/mozilla/extensions/universalchardet/src/base
libcharsetdetect/nspr-emu
diff --git a/libcharsetdetect/nspr-emu/prcpucfg_win.h b/libcharsetdetect/nspr-emu/prcpucfg_win.h
index 4ad5345..f599e48 100644
--- a/libcharsetdetect/nspr-emu/prcpucfg_win.h
+++ b/libcharsetdetect/nspr-emu/prcpucfg_win.h
@@ -100,7 +100,7 @@
#define PR_BYTES_PER_WORD_LOG2 2
#define PR_BYTES_PER_DWORD_LOG2 2
-#elif defined(_M_X64) || defined(_M_AMD64) || defined(_AMD64_)
+#elif defined(_M_X64) || defined(_M_AMD64) || defined(_AMD64_) || defined(__x86_64)
#define IS_LITTLE_ENDIAN 1
#undef IS_BIG_ENDIAN