summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBodigrim <>2020-03-25 21:53:00 (GMT)
committerhdiff <hdiff@hdiff.luite.com>2020-03-25 21:53:00 (GMT)
commit3c8862acb017153ed3440ffc7ce4ae4686d29bca (patch)
tree8ce21a3fe8adb2fe53df912bc7626284efe36787
parent2ec21b272d5c72979725d077d7e159c2518a2637 (diff)
version 0.3.1.0HEAD0.3.1.0master
-rw-r--r--Data/Chimera.hs86
-rw-r--r--Data/Chimera/FromIntegral.hs6
-rw-r--r--README.md39
-rw-r--r--bench/Bench.hs42
-rw-r--r--chimera.cabal28
5 files changed, 166 insertions, 35 deletions
diff --git a/Data/Chimera.hs b/Data/Chimera.hs
index 3c458c2..5e6e83d 100644
--- a/Data/Chimera.hs
+++ b/Data/Chimera.hs
@@ -6,14 +6,17 @@
--
-- Lazy infinite streams with O(1) indexing.
-{-# LANGUAGE CPP #-}
-{-# LANGUAGE DeriveFoldable #-}
-{-# LANGUAGE DeriveFunctor #-}
-{-# LANGUAGE DeriveTraversable #-}
-{-# LANGUAGE FlexibleContexts #-}
-{-# LANGUAGE FlexibleInstances #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE DeriveFoldable #-}
+{-# LANGUAGE DeriveFunctor #-}
+{-# LANGUAGE DeriveTraversable #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE TypeFamilies #-}
module Data.Chimera
( -- * Memoization
@@ -49,13 +52,20 @@ module Data.Chimera
import Prelude hiding ((^), (*), div, fromIntegral, not, and, or, cycle, iterate, drop)
import Control.Applicative
+import Control.Monad.Fix
+import Control.Monad.Zip
import Data.Bits
-import Data.Function (fix)
import Data.Functor.Identity
import qualified Data.Vector as V
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Unboxed as U
+#if DefineRepresentable
+import Control.Monad.Reader
+import Data.Distributive
+import qualified Data.Functor.Rep as Rep
+#endif
+
import Data.Chimera.Compat
import Data.Chimera.FromIntegral
@@ -125,6 +135,33 @@ instance Applicative (Chimera V.Vector) where
liftA2 f = zipSubvectors (liftA2 f)
#endif
+instance Monad (Chimera V.Vector) where
+ m >>= f = tabulate $ \w -> index (f (index m w)) w
+
+instance MonadFix (Chimera V.Vector) where
+ mfix = tabulate . mfix . fmap index
+
+instance MonadZip (Chimera V.Vector) where
+ mzip as bs = tabulate (\w -> (index as w, index bs w))
+ mzipWith f as bs = tabulate $ \w -> f (index as w) (index bs w)
+
+#if DefineRepresentable
+
+instance MonadReader Word (Chimera V.Vector) where
+ ask = Rep.askRep
+ local = Rep.localRep
+
+instance Distributive (Chimera V.Vector) where
+ distribute = Rep.distributeRep
+ collect = Rep.collectRep
+
+instance Rep.Representable (Chimera V.Vector) where
+ type Rep (Chimera V.Vector) = Word
+ tabulate = tabulate
+ index = index
+
+#endif
+
bits :: Int
bits = fbs (0 :: Word)
@@ -145,15 +182,13 @@ tabulateM
(Monad m, G.Vector v a)
=> (Word -> m a)
-> m (Chimera v a)
-tabulateM f = do
- z <- f 0
- zs <- V.generateM bits tabulateSubVector
- pure $ Chimera $ G.singleton z `V.cons` zs
+tabulateM f = Chimera <$> V.generateM (bits + 1) tabulateSubVector
where
tabulateSubVector :: Int -> m (v a)
+ tabulateSubVector 0 = G.singleton <$> f 0
tabulateSubVector i = G.generateM ii (\j -> f (int2word (ii + j)))
where
- ii = 1 `shiftL` i
+ ii = 1 `unsafeShiftL` (i - 1)
{-# SPECIALIZE tabulateM :: G.Vector v a => (Word -> Identity a) -> Identity (Chimera v a) #-}
@@ -192,17 +227,15 @@ tabulateFixM
tabulateFixM f = result
where
result :: m (Chimera v a)
- result = do
- z <- fix f 0
- zs <- V.generateM bits tabulateSubVector
- pure $ Chimera $ G.singleton z `V.cons` zs
+ result = Chimera <$> V.generateM (bits + 1) tabulateSubVector
tabulateSubVector :: Int -> m (v a)
+ tabulateSubVector 0 = G.singleton <$> fix f 0
tabulateSubVector i = subResult
where
subResult = G.generateM ii (\j -> f fixF (int2word (ii + j)))
subResultBoxed = V.generateM ii (\j -> f fixF (int2word (ii + j)))
- ii = 1 `shiftL` i
+ ii = 1 `unsafeShiftL` (i - 1)
fixF :: Word -> m a
fixF k
@@ -245,11 +278,14 @@ iterateM f seed = do
-- >>> index ch 9
-- 81
index :: G.Vector v a => Chimera v a -> Word -> a
-index (Chimera vs) 0 = G.unsafeHead (V.unsafeHead vs)
-index (Chimera vs) i = G.unsafeIndex (vs `V.unsafeIndex` (sgm + 1)) (word2int $ i - 1 `shiftL` sgm)
+index (Chimera vs) i =
+ (vs `V.unsafeIndex` (fbs i - lz))
+ `G.unsafeIndex`
+ (word2int (i .&. complement ((1 `shiftL` (fbs i - 1)) `unsafeShiftR` lz)))
where
- sgm :: Int
- sgm = fbs i - 1 - word2int (clz i)
+ lz :: Int
+ !lz = word2int (clz i)
+{-# INLINE index #-}
-- | Convert a stream to an infinite list.
--
@@ -313,7 +349,7 @@ mapSubvectors
=> (u a -> v b)
-> Chimera u a
-> Chimera v b
-mapSubvectors f (Chimera bs) = Chimera (V.map f bs)
+mapSubvectors f (Chimera bs) = Chimera (fmap f bs)
-- | Zip subvectors from two streams, using a given length-preserving function.
zipSubvectors
@@ -322,4 +358,4 @@ zipSubvectors
-> Chimera u a
-> Chimera v b
-> Chimera w c
-zipSubvectors f (Chimera bs1) (Chimera bs2) = Chimera (V.zipWith f bs1 bs2)
+zipSubvectors f (Chimera bs1) (Chimera bs2) = Chimera (mzipWith f bs1 bs2)
diff --git a/Data/Chimera/FromIntegral.hs b/Data/Chimera/FromIntegral.hs
index 9650250..115a01e 100644
--- a/Data/Chimera/FromIntegral.hs
+++ b/Data/Chimera/FromIntegral.hs
@@ -12,10 +12,8 @@ module Data.Chimera.FromIntegral
, int2word
) where
-import Unsafe.Coerce
-
word2int :: Word -> Int
-word2int = unsafeCoerce
+word2int = fromIntegral
int2word :: Int -> Word
-int2word = unsafeCoerce
+int2word = fromIntegral
diff --git a/README.md b/README.md
index 14358a3..ffea8f8 100644
--- a/README.md
+++ b/README.md
@@ -63,7 +63,7 @@ isOddF :: (Word -> Bool) -> Word -> Bool
isOddF f n = if n == 0 then False else not (f (n - 1))
```
-and invoke `tabulateFix` to pass cache into recursive calls as well:
+and invoke `memoizeFix` to pass cache into recursive calls as well:
```haskell
isOdd' :: Word -> Bool
@@ -95,3 +95,40 @@ Now create its memoized version for rapid evaluation:
isPrime' :: Word -> Bool
isPrime' = memoizeFix isPrimeF
```
+
+## Magic and its exposure
+
+Internally `Chimera` is represented as a _boxed_ vector
+of growing (possibly, _unboxed_) vectors `v a`:
+
+```haskell
+newtype Chimera v a = Chimera (Data.Vector.Vector (v a))
+```
+
+Assuming 64-bit architecture, the outer vector consists of 65 inner vectors
+of sizes 1, 1, 2, 2<sup>2</sup>, ..., 2<sup>63</sup>. Since the outer vector
+is boxed, inner vectors are allocated on-demand only: quite fortunately,
+there is no need to allocate all 2<sup>64</sup> elements at once.
+
+To access an element by its index it is enough to find out to which inner
+vector it belongs, which, thanks to the doubling pattern of sizes,
+can be done instantly by [`ffs`](https://en.wikipedia.org/wiki/Find_first_set)
+instruction. The caveat here is
+that accessing an inner vector first time will cause its allocation,
+taking O(n) time. So to restore _amortized_ O(1) time we must assume
+a dense access. `Chimera` is no good for sparse access
+over a thin set of indices.
+
+One can argue that this structure is not infinite,
+because it cannot handle more than 2<sup>64</sup> elements.
+I believe that it is _infinite enough_ and no one would be able to exhaust
+its finiteness any time soon. Strictly speaking, to cope with indices out of
+`Word` range and `memoize`
+[Ackermann function](https://en.wikipedia.org/wiki/Ackermann_function),
+one could use more layers of indirection, raising access time
+to O([log<sup>*</sup>](https://en.wikipedia.org/wiki/Iterated_logarithm) n).
+I still think that it is morally correct to claim O(1) access,
+because all asymptotic estimates of data structures
+are usually made under an assumption that they contain
+less than `maxBound :: Word` elements
+(otherwise you can not even treat pointers as a fixed-size data).
diff --git a/bench/Bench.hs b/bench/Bench.hs
new file mode 100644
index 0000000..b07a667
--- /dev/null
+++ b/bench/Bench.hs
@@ -0,0 +1,42 @@
+module Main where
+
+import Control.Monad.State
+import Data.Chimera
+import Gauge.Main
+import System.Random
+
+main :: IO ()
+main = defaultMain
+ [ bgroup "read/Chimera" (map benchReadChimera [100, 200, 500, 1000])
+ , bgroup "read/List" (map benchReadList [100, 200, 500, 1000])
+ ]
+
+randomChimera :: UChimera Int
+randomChimera = flip evalState (mkStdGen 42) $ tabulateM $ const $ do
+ g <- get
+ let (x, g') = random g
+ put g'
+ pure x
+
+randomList :: [Int]
+randomList = randoms (mkStdGen 42)
+
+randomIndicesWord :: [Word]
+randomIndicesWord = randoms (mkStdGen 42)
+
+randomIndicesInt :: [Int]
+randomIndicesInt = randoms (mkStdGen 42)
+
+benchReadChimera :: Word -> Benchmark
+benchReadChimera n
+ = bench (show n)
+ $ nf (sum . map (index randomChimera))
+ $ map (`rem` n)
+ $ take (fromIntegral n) randomIndicesWord
+
+benchReadList :: Int -> Benchmark
+benchReadList n
+ = bench (show n)
+ $ nf (sum . map (randomList !!))
+ $ map (`mod` n)
+ $ take n randomIndicesInt
diff --git a/chimera.cabal b/chimera.cabal
index ceb5522..63782d8 100644
--- a/chimera.cabal
+++ b/chimera.cabal
@@ -1,5 +1,5 @@
name: chimera
-version: 0.3.0.0
+version: 0.3.1.0
cabal-version: >=1.10
build-type: Simple
license: BSD3
@@ -12,7 +12,7 @@ synopsis: Lazy infinite streams with O(1) indexing
author: Bodigrim
extra-source-files:
README.md
-tested-with: GHC==8.8.1, GHC==8.6.5, GHC==8.4.4, GHC==8.2.2, GHC==8.0.2
+tested-with: GHC==8.10.1, GHC==8.8.3, GHC==8.6.5, GHC==8.4.4, GHC==8.2.2, GHC==8.0.2
description:
There are plenty of memoizing libraries on Hackage, but they
usually fall into two categories:
@@ -43,10 +43,15 @@ source-repository head
type: git
location: https://github.com/Bodigrim/chimera
+flag representable
+ description: Define Representable instance from adjunctions package
+ default: True
+
library
- build-depends:
- base >=4.9 && <5,
- vector
+ build-depends: base >=4.9 && <5, vector
+ if flag(representable)
+ build-depends: adjunctions, distributive, mtl
+ cpp-options: -DDefineRepresentable
exposed-modules:
Data.Chimera
Data.Chimera.ContinuousMapping
@@ -72,3 +77,16 @@ test-suite test
default-language: Haskell2010
hs-source-dirs: test
ghc-options: -Wall
+
+benchmark bench
+ build-depends:
+ base,
+ chimera,
+ gauge,
+ mtl,
+ random
+ type: exitcode-stdio-1.0
+ main-is: Bench.hs
+ default-language: Haskell2010
+ hs-source-dirs: bench
+ ghc-options: -Wall