summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorranjitjhala <>2016-04-20 18:52:00 (GMT)
committerhdiff <hdiff@hdiff.luite.com>2016-04-20 18:52:00 (GMT)
commit77cdf2986b49ad60d203addedba4619688302c96 (patch)
tree497c1f63c71242716451ac76fd1deaf11de615a0
version 0.1.0.00.1.0.0
-rw-r--r--LICENSE20
-rw-r--r--README.md15
-rw-r--r--Setup.hs2
-rw-r--r--csv-table.cabal31
-rw-r--r--src/Data/CSV/Table.hs10
-rw-r--r--src/Data/CSV/Table/Email.hs46
-rw-r--r--src/Data/CSV/Table/Ops.hs204
-rw-r--r--src/Data/CSV/Table/Types.hs107
8 files changed, 435 insertions, 0 deletions
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..bb72a98
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,20 @@
+Copyright (c) 2015 Ranjit Jhala
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..5551348
--- /dev/null
+++ b/README.md
@@ -0,0 +1,15 @@
+CSV Table Scripts
+=================
+
+A collection of scripts to manipulate CSV tables, including:
+
+* `fromFile` : load a CSV table from a file,
+* `toFile` : save a CSV table to a file,
+* `join`, `joinBy` : joining two tables by a particular column,
+* `sortBy` : sorting a table by a particular column,
+* `email` : emailing the contents of each row to an address in the row,
+* `project` : restricting a table to a particular set of columns
+
+
+
+
diff --git a/Setup.hs b/Setup.hs
new file mode 100644
index 0000000..9a994af
--- /dev/null
+++ b/Setup.hs
@@ -0,0 +1,2 @@
+import Distribution.Simple
+main = defaultMain
diff --git a/csv-table.cabal b/csv-table.cabal
new file mode 100644
index 0000000..c306747
--- /dev/null
+++ b/csv-table.cabal
@@ -0,0 +1,31 @@
+-- Initial csv-table.cabal generated by cabal init. For further
+-- documentation, see http://haskell.org/cabal/users-guide/
+
+name: csv-table
+version: 0.1.0.0
+synopsis: Scripts for manipulating tables stored as CSV files
+description: Scripts for manipulating tables, e.g. filter, join, etc. stored as CSV files
+homepage: https://github.com/ucsd-progsys/csv-table
+license: MIT
+license-file: LICENSE
+author: Ranjit Jhala
+maintainer: jhala@cs.ucsd.edu
+-- copyright:
+category: Data
+build-type: Simple
+extra-source-files: README.md
+cabal-version: >=1.10
+
+Library
+ hs-source-dirs: src
+ build-depends: base >= 4 && < 5,
+ csv,
+ containers,
+ filepath,
+ process
+
+ default-language: Haskell2010
+ Exposed-Modules: Data.CSV.Table,
+ Data.CSV.Table.Types,
+ Data.CSV.Table.Ops,
+ Data.CSV.Table.Email
diff --git a/src/Data/CSV/Table.hs b/src/Data/CSV/Table.hs
new file mode 100644
index 0000000..8436980
--- /dev/null
+++ b/src/Data/CSV/Table.hs
@@ -0,0 +1,10 @@
+module Data.CSV.Table (
+ module Data.CSV.Table.Types,
+ module Data.CSV.Table.Ops,
+ module Data.CSV.Table.Email
+ ) where
+
+import Data.CSV.Table.Types
+import Data.CSV.Table.Ops
+import Data.CSV.Table.Email
+
diff --git a/src/Data/CSV/Table/Email.hs b/src/Data/CSV/Table/Email.hs
new file mode 100644
index 0000000..a121b3c
--- /dev/null
+++ b/src/Data/CSV/Table/Email.hs
@@ -0,0 +1,46 @@
+module Data.CSV.Table.Email
+ ( -- * Email representation
+ Email (..)
+
+ -- * Send function
+ , sendMail
+
+ ) where
+
+import Text.Printf (printf)
+import System.Process
+import Control.Monad (forM_)
+import Data.List (intercalate)
+import Data.CSV.Table.Types
+import Data.CSV.Table.Ops
+
+
+data Email = E { uid :: String
+ , to :: String
+ , cc :: [String]
+ , sender :: String
+ , subject :: String
+ , text :: String
+ , send :: Bool
+ } deriving (Show)
+
+sendMail :: Table -> (RowInfo -> Email) -> IO ()
+sendMail t f = forM_ (mapRows f t) sendMail1
+
+sendMail1 :: Email -> IO ()
+sendMail1 e = do
+ let tmp = uid e
+ let cmd = mailCmd e
+ writeFile tmp (text e)
+ status <- if (send e) then show `fmap` system cmd else return "FAKESEND"
+ putStrLn $ printf "[exec: %s] Status[%s]: %s %s" cmd (status) (uid e) (to e)
+
+mailCmd :: Email -> String
+mailCmd e =
+ printf "mail -s \"%s\" -aFrom:%s %s %s < %s" (subject e) (sender e) (to e) (ccs $ cc e) (uid e)
+ where
+ ccs [] = ""
+ ccs xs = "-c " ++ intercalate "," xs
+
+
+
diff --git a/src/Data/CSV/Table/Ops.hs b/src/Data/CSV/Table/Ops.hs
new file mode 100644
index 0000000..c52f71d
--- /dev/null
+++ b/src/Data/CSV/Table/Ops.hs
@@ -0,0 +1,204 @@
+
+module Data.CSV.Table.Ops (
+
+ -- * Core Operators
+ join, joinBy
+
+ -- * Join using default row for missing keys
+ , padJoin, padJoinBy
+
+ -- * Difference of two tables
+ , diff, diffBy
+
+ -- * Index a table by a key
+ , index, indexBy
+
+ -- * Order by the values of a particular column
+ , sortBy
+
+ -- * Restrict table to a subset of columns
+ , project
+ , project1
+
+ -- * Move a column to first (leftmost) position
+ , moveColL
+
+ -- * Map a function over all rows
+ , mapRows
+ ) where
+
+import Text.CSV
+import System.FilePath
+import Control.Applicative ((<$>))
+import Data.Maybe
+import Data.List (sort, elemIndex)
+import qualified Data.Map.Strict as M
+import Data.CSV.Table.Types
+
+-------------------------------------------------------------------
+-- | Swap two columns of a table
+-------------------------------------------------------------------
+moveColL :: Table -> Col -> Table
+moveColL t@(T n cs b) c
+ | i == 0 = t
+ | otherwise = T n (pluck i cs) [R $ pluck i r | R r <- b]
+ where
+ i = columnIndex t c
+
+pluck n xs = go [] n xs
+ where
+ go ls 0 (r:rs) = r : (ls +++ rs)
+ go ls n (r:rs) = go (r:ls) (n-1) rs
+
+[] +++ ys = ys
+(x:xs) +++ ys = xs +++ (x:ys)
+
+-------------------------------------------------------------------
+-- | Join two tables by first column (which should be unique)
+-------------------------------------------------------------------
+
+join :: Table -> Table -> Table
+join t1 t2 = T n' cs b'
+ where
+ m1 = index t1
+ m2 = index t2
+ m' = M.intersectionWith (\r1 r2 -> r1 ++ tail r2) m1 m2
+ b' = R <$> M.elems m'
+ n' = dim t1 + dim t2 - 1
+ cs = (cols t1) ++ (tail $ cols t2)
+
+index :: Table -> M.Map Field [Field]
+index t = M.fromList [(head r, r) | r <- getRows t]
+
+
+-------------------------------------------------------------------
+-- | Join two tables by any unique column
+-------------------------------------------------------------------
+
+joinBy :: Col -> Table -> Table -> Table
+joinBy c t1 t2 = join t1' t2'
+ where
+ t1' = moveColL t1 c
+ t2' = moveColL t2 c
+
+--------------------------------------------------------------------
+-- | Differences of two tables by first column
+-------------------------------------------------------------------
+
+diff :: Table -> Table -> Table
+diff t1@(T n1 c1 b1) t2 = T n1 c1 b1'
+ where
+ m1 = index t1
+ m2 = index t2
+ m1' = M.difference m1 m2
+ b1' = R <$> M.elems m1'
+
+--------------------------------------------------------------------
+-- | Differences of two tables by any column
+-------------------------------------------------------------------
+
+diffBy :: Col -> Table -> Table -> Table
+diffBy c t1 t2 = diff t1' t2'
+ where
+ t1' = moveColL t1 c
+ t2' = moveColL t2 c
+
+
+--------------------------------------------------------------------
+-- | Join two tables by first column, using default row for missing keys
+-------------------------------------------------------------------
+
+padJoin :: Row -> Table -> Table -> Table
+padJoin (R xs) t1 t2 = T n' cs b'
+ where
+ m1 = index t1
+ m2 = index t2
+ m1' = M.difference m1 m2
+ m2' = M.mapWithKey (\k _ -> k:xs) m1
+ m' = M.intersectionWith (\r1 r2 -> r1 ++ tail r2) m1 (M.union m2 m2')
+ b' = R <$> M.elems m'
+ n' = dim t1 + dim t2 - 1
+ cs = (cols t1) ++ (tail $ cols t2)
+
+--------------------------------------------------------------------
+-- | Join two tables by any unique column, using default row for missing keys
+-------------------------------------------------------------------
+
+padJoinBy c r t1 t2 = padJoin r t1' t2'
+ where
+ t1' = moveColL t1 c
+ t2' = moveColL t2 c
+
+------------------------------------------------------------------
+--- | Index table by any column
+-------------------------------------------------------------------
+
+indexBy :: Table -> Col -> M.Map Field [Field]
+indexBy t c
+ | ok = M.fromList [ (r !! i, r) | r <- getRows t]
+ | otherwise = error $ "indexBy: " ++ show c ++ " is not a unique column!"
+ where
+ i = columnIndex t c
+ ok = isUnique t c
+
+-------------------------------------------------------------------
+-- | Is a given column unique
+-------------------------------------------------------------------
+
+isUnique :: Table -> Col -> Bool
+isUnique t c = not $ isDup $ project1 t c
+
+isDup xs = (length xs) /= (length xs') where xs' = nubOrd xs
+
+nubOrd = go . sort
+ where
+ go [] = []
+ go (x:xs) = x : go ys where (_,ys) = span (x ==) xs
+
+-------------------------------------------------------------------
+-- | Project to a particular column
+-------------------------------------------------------------------
+
+project1 :: Table -> Col -> [Field]
+project1 t c = [ r !! i | r <- getRows t]
+ where
+ i = columnIndex t c
+
+project :: Table -> [Col] -> Table
+project t cs = T n cs body'
+ where
+ body' = projRow is' <$> body t
+ is = (t `columnIndex`) <$> cs
+ n = length is
+ is' = sort is
+
+projRow is (R xs) = R [ xs !! i | i <- is ]
+
+
+-------------------------------------------------------------------
+-- | Sort table by a particular column
+-------------------------------------------------------------------
+
+sortBy :: Table -> Col -> Table
+sortBy t c = t { body = [ R fs | (_, fs) <- M.toList $ indexBy t c] }
+
+-------------------------------------------------------------------
+-- | Map a function over all rows of a table
+-------------------------------------------------------------------
+
+mapRows :: (RowInfo -> a) -> Table -> [a]
+mapRows f = map f . rowCols
+
+rowCols :: Table -> [RowInfo]
+rowCols t = [zip cs r | r <- rs]
+ where
+ cs = cols t
+ rs = getRows t
+
+-------------------------------------------------------------------
+-- | Helpers
+-------------------------------------------------------------------
+
+columnIndex :: Table -> Col -> Int
+columnIndex t c = fromJust $ elemIndex c $ cols t
+
diff --git a/src/Data/CSV/Table/Types.hs b/src/Data/CSV/Table/Types.hs
new file mode 100644
index 0000000..1ab45e1
--- /dev/null
+++ b/src/Data/CSV/Table/Types.hs
@@ -0,0 +1,107 @@
+
+module Data.CSV.Table.Types (
+
+ -- * Representation
+ Table (..)
+ , Row (..)
+ , Col (..)
+ , RowInfo
+
+ -- * Accessors
+ , getCols
+ , getRows
+ , lookupCol
+
+ -- * Parsing
+ , fromFile
+
+ -- * Saving
+ , toFile
+
+ ) where
+
+import Text.Printf
+import Text.CSV
+import System.FilePath
+import Control.Applicative ((<$>))
+import Data.Maybe
+import Data.List (sort, elemIndex)
+import qualified Data.Map.Strict as M
+
+newtype Col = C Field deriving (Eq, Ord, Show)
+newtype Row = R [Field] deriving (Eq, Ord, Show)
+type RowInfo = [(Col, Field)]
+
+-----------------------------------------------------------------------------------
+-- | Types
+-----------------------------------------------------------------------------------
+
+data Table = T { dim :: Int, cols :: [Col], body :: [Row]}
+
+{-@ measure width :: Row -> Int
+ width (R xs) = (len xs) @-}
+
+{-@ type ColsN N = {v:[Col] | (len v) = N} @-}
+{-@ type RowN N = {r:Row | (width r) = N} @-}
+{-@ data Table = T (dim :: Nat) (cols :: (ColsN dim)) (body :: [(RowN dim)]) @-}
+
+{-@ getCols :: t:Table -> ListN Field {(dim t)} @-}
+getCols t = [c | C c <- cols t]
+
+{-@ getRows :: t:Table -> ListN Field {(dim t)} @-}
+getRows t = [r | R r <- body t]
+
+lookupCol :: Col -> RowInfo -> Field
+lookupCol c cxs = fromMaybe err $ lookup c cxs
+ where
+ err = printf "lookupCol: cannot find %s in %s" (show c) (show cxs)
+
+----------------------------------------------------------------------
+-- | Converting to CSV
+----------------------------------------------------------------------
+
+fromCSV :: CSV -> Table
+fromCSV [] = error "fromCSV: Empty CSV with no rows!"
+fromCSV (r:rs) = T n cs b
+ where
+ n = length r
+ cs = [C x | x <- r]
+ b = mapMaybe (makeRow n) $ zip [0..] rs
+
+makeRow :: Int -> (Int, Record) -> Maybe Row
+makeRow n (i, xs)
+ | length xs == n = Just $ R xs
+ | empty xs = Nothing
+ | otherwise = error $ printf "Row %d does not have %d columns:\n%s" i n (show xs)
+
+empty :: Record -> Bool
+empty = null . unwords
+
+toCSV :: Table -> CSV
+toCSV t = [c | C c <- cols t] : [xs | R xs <- body t]
+
+-------------------------------------------------------------------
+-- | Parsing
+-------------------------------------------------------------------
+
+toFile :: FilePath -> Table -> IO ()
+toFile f = writeFile f . show
+
+fromFile :: FilePath -> IO Table
+fromFile f = fromString f <$> readFile f
+
+fromString :: FilePath -> String -> Table
+fromString fp s = fromCSV $ parseCSV' fp s
+
+parseCSV' fp s = case parseCSV fp s of
+ Right c -> c
+ Left e -> error $ printf "parseCSV': %s" (show e)
+
+-------------------------------------------------------------------
+-- | Printing
+-------------------------------------------------------------------
+
+instance Show Table where
+ show = printCSV . toCSV
+
+