summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorOleksandrZhabenko <>2020-10-17 21:19:00 (GMT)
committerhdiff <hdiff@hdiff.luite.com>2020-10-17 21:19:00 (GMT)
commit55d40946379ba74ccdaa34eff043aa4c276c18b5 (patch)
tree417ead31647bddc233daf1b56764c3be04d1f0a4
parentd74eab5e7f5baba7d84c7d21bfab8cc303038afc (diff)
version 0.13.0.00.13.0.0
-rw-r--r--ChangeLog.md11
-rw-r--r--Distribution/Main.hs105
-rw-r--r--GetInfo/Main.hs (renamed from Proportion/Main.hs)69
-rw-r--r--uniqueness-periods-vector-examples.cabal17
4 files changed, 154 insertions, 48 deletions
diff --git a/ChangeLog.md b/ChangeLog.md
index 110e14f..c3f1acd 100644
--- a/ChangeLog.md
+++ b/ChangeLog.md
@@ -141,3 +141,14 @@ dependency function uniquenessVariants2GNP that influenced all the related funct
## 0.12.3.1 -- 2020-10-14
* Twelfth version revised D. Some code improvements.
+
+## 0.13.0.0 -- 2020-10-17
+
+* Thirteenth version. Divided the processText executable into two separate executables: propertiesText and distributionText (the second one is intended to process
+the output of the first one). For example, on Linux shell :
+
+ propertiesText <cmdline args> | distributionText <first arg> 1
+
+will produce the similar behaviour to the single passing of the processText in the previous versions. This was made to improve performance of the functionality provided
+earlier by the processText executable.
+
diff --git a/Distribution/Main.hs b/Distribution/Main.hs
new file mode 100644
index 0000000..750b40b
--- /dev/null
+++ b/Distribution/Main.hs
@@ -0,0 +1,105 @@
+-- |
+-- Module : Main
+-- Copyright : (c) OleksandrZhabenko 2020
+-- License : MIT
+-- Stability : Experimental
+-- Maintainer : olexandr543@yahoo.com
+--
+-- Analyzes a poetic text in Ukrainian, for every line prints statistic data and
+-- then for the whole poem prints the hypothesis evaluation information. Since the 0.4.0.0 version
+-- the program tries to be more accurate in cases of the lines consisting entirely of the words
+-- which are unique in phonetic meaning alongside the line. Another hypothesis is for the seventh command line
+-- argument (since the 0.12.0.0 version) equal to \"y0\" that the distribution
+-- of the placement of the actual poetic text in Ukrainian is not one of the standard distributions.
+-- It can probably have approximately a form of and is different for different authors:
+--
+-- > -- -- --
+-- > / \_/ \_/ \
+--
+-- To enable parallel computations (potentially, they can speed up the work), please, run the @distributionText@ executable with
+-- @+RTS -threaded -RTS@ command line options with possibly @-N@ option inside.
+--
+
+{-# OPTIONS_GHC -threaded -rtsopts #-}
+
+{-# LANGUAGE CPP, BangPatterns #-}
+
+module Main where
+
+#ifdef __GLASGOW_HASKELL__
+#if __GLASGOW_HASKELL__>=710
+/* code that applies only to GHC 7.10.* and higher versions */
+import GHC.Base (mconcat)
+#endif
+#endif
+import Control.Parallel.Strategies
+import Data.Maybe (fromMaybe)
+import Text.Read (readMaybe)
+import System.Environment
+import Numeric (showFFloat)
+import Data.List (sort)
+import Numeric.Stats
+import qualified Data.ByteString.Char8 as B
+import Data.Lists.FLines
+import Data.Statistics.RulesIntervals
+#ifdef __GLASGOW_HASKELL__
+#if __GLASGOW_HASKELL__==708
+/* code that applies only to GHC 7.8.* */
+mconcat = concat
+#endif
+#endif
+
+main :: IO ()
+main = do
+ args <- getArgs
+ let !gzS = concat . take 1 $ args
+ !printInput = concat . drop 1 . take 2 $ args
+ contents <- B.getContents
+ innerProc gzS printInput contents
+
+innerProc :: String -> String -> B.ByteString -> IO ()
+innerProc gzS printInput contents = do
+ if printInput == "1" then B.putStr contents else B.putStr B.empty
+ (!data31,!wordsCnt0_data32) <- processContents contents
+ let !gz = getIntervalsN gzS data31 -- Obtained from the first command line argument except those ones that are for RTS
+ !data4 = filter (/= 0.0) data31
+ if null data4 then putStrLn (replicate 102 '-') >> putStrLn "1.000+-0.000\tALL!" >> putStrLn (replicate 102 '=') -- Well, this means that all the text consists of the unique (in phonetic meaning) words alongside every line. A rather rare occurrence.
+ else do
+ let (!mean1,!disp) = meanWithDisp data4
+ !pairs = sort . filter ((/= 0) . snd) $ wordsCnt0_data32
+ g !m !n = (length . takeWhile (\(_,v) -> v == n) . dropWhile (\(_,v) -> v /= n) . takeWhile (\(u,_) -> u == m) . dropWhile (\(u,_) -> u /= m) $ pairs) `using` rdeepseq
+ h !y !x = mconcat [mconcat . map (\m1 -> mconcat [mconcat . map (\n1 -> (if y then show (g m1 n1) else if g m1 n1 == 0 then "." else show (g m1 n1)) ++ "\t") $ [1..gz],newLineEnding]) $ [2..7],replicate 102 x]
+ putStrLn . generalInfo1 gz pairs (mean1, disp) $ data31
+ putStrLn (h False '~')
+ putStrLn (h True '=')
+
+getIntervalsN :: String -> [a] -> Int
+getIntervalsN xs yss
+ | xs == "s" = sturgesH (length yss)
+ | xs == "l" = levynskyiMod (length yss)
+ | otherwise = fromMaybe 9 (readMaybe xs::(Maybe Int))
+{-# INLINE getIntervalsN #-}
+
+processContents :: B.ByteString -> IO ([Float],[(Int,Int)])
+processContents contents = do
+ let !anlines = B.lines contents
+ !anStrs = map (drop 6 . take 9 . B.words) anlines
+ !ratioStrs = map (B.unpack . head) anStrs
+ !wordsNStrs = map (B.unpack . (!! 1)) anStrs
+ !intervalNStrs = map (B.unpack . last) anStrs
+ !ratios = map (\xs -> fromMaybe 1.0 (readMaybe xs::Maybe Float)) ratioStrs
+ !wordsNs = map (\xs -> fromMaybe 0 (readMaybe xs::Maybe Int)) wordsNStrs
+ !intervalNs = map (\xs -> fromMaybe 0 (readMaybe xs::Maybe Int)) intervalNStrs
+ return (ratios,zip wordsNs intervalNs)
+
+generalInfo1 :: Int -> [(Int,Int)] -> (Float,Float) -> [Float] -> String
+generalInfo1 gz pairs (mean1, disp) data31 =
+ let !ks = map (\r -> length . takeWhile (== r) . dropWhile (/= r) . sort . map snd $ pairs) [1..gz]
+ !s = sum ks in
+ mconcat [replicate 102 '-', newLineEnding, mconcat . map (\r -> show r ++ "\t") $ [1..gz], newLineEnding, mconcat . map (\r -> show r ++ "\t") $ ks,
+ newLineEnding, mconcat . map (\r -> showFFloat (Just 2) (fromIntegral (r * 100) / fromIntegral s) "%\t") $ ks,newLineEnding,
+ mconcat [showFFloat (Just 4) mean1 "+-", showFFloat (Just 4) (sqrt disp) "\t", show (length . filter (== 0.0) $ data31),
+ '\t':show (length data31)], newLineEnding, mconcat . map (\r -> show r ++ "\t") $ [2..7], newLineEnding, mconcat .
+ map (\r -> (show . length . takeWhile (== r) . dropWhile (/= r) . map fst $ pairs) ++ "\t") $ [2..7], newLineEnding, replicate 102 '*']
+{-# INLINE generalInfo1 #-}
+
diff --git a/Proportion/Main.hs b/GetInfo/Main.hs
index 36a4af1..a8831e0 100644
--- a/Proportion/Main.hs
+++ b/GetInfo/Main.hs
@@ -16,7 +16,7 @@
-- > -- -- --
-- > / \_/ \_/ \
--
--- To enable parallel computations (potentially, they can speed up the work), please, run the @uniqVec03@ executable with
+-- To enable parallel computations (potentially, they can speed up the work), please, run the @propertiesText@ executable with
-- @+RTS -threaded -RTS@ command line options with possibly @-N@ option inside.
--
@@ -32,28 +32,25 @@ module Main where
import GHC.Base (mconcat)
#endif
#endif
+import System.IO
+import Control.Concurrent
+import Control.Exception
import Control.Parallel.Strategies
import Data.Maybe (fromMaybe)
import Text.Read (readMaybe)
import qualified Data.Vector as V
-import String.Languages.UniquenessPeriods.Vector
import Languages.UniquenessPeriods.Vector.General.Debug hiding (newLineEnding)
import Languages.UniquenessPeriods.Vector.Properties
-import Languages.UniquenessPeriods.Vector.PropertiesFuncRep
import Melodics.Ukrainian
import System.Environment
import Languages.Phonetic.Ukrainian.PrepareText
import Languages.UniquenessPeriods.Vector.Data
import Languages.UniquenessPeriods.Vector.Auxiliary
-import Languages.UniquenessPeriods.Vector.StrictV hiding (mconcat)
+import Languages.UniquenessPeriods.Vector.StrictV
import Numeric (showFFloat)
import Languages.UniquenessPeriods.Vector.Filters
-import GHC.Float (int2Float)
-import GHC.Real (ceiling)
-import Data.List (sort)
import Data.Char (isAlpha)
-import Numeric.Stats
-import Data.Lists.FLines hiding (mconcat)
+import Data.Lists.FLines
import Data.Statistics.RulesIntervals
import Languages.UniquenessPeriods.Vector.FuncRepRelated
#ifdef __GLASGOW_HASKELL__
@@ -81,27 +78,14 @@ main = do
contents <- readFile file
if compareMode == 0 then do
let !flines = fLines toOneLine contents
- innerProc printLine choice flines gzS
- else let tau !k = (let !flines1 = fLines k contents in innerProc printLine choice flines1 gzS) in
+ innerProc gzS printLine choice flines
+ else let tau !k = (let !flines1 = fLines k contents in innerProc gzS printLine choice flines1) in
case twoInParallel of
1 -> ($|) sequence_ rpar (map tau [1, 0])
_ -> mapM_ tau [1, 0]
-innerProc :: Int -> String -> [String] -> String -> IO ()
-innerProc printLine choice flInes gzS = do
- let !gz = getIntervalsN gzS flInes -- Obtained from the second command line argument except those ones that are for RTS
- (!data31,!wordsCnt0_data32) = unzip . getData3 printLine gz choice $ flInes
- !data4 = filter (/= 0) . map fst $ data31
- if null data4 then putStrLn (replicate 102 '-') >> putStrLn "1.000+-0.000\tALL!" >> putStrLn (replicate 102 '=') -- Well, this means that all the text consists of the unique (in phonetic meaning) words alongside every line. A rather rare occurrence.
- else do
- let (!mean1,!disp) = meanWithDisp data4
- !pairs = sort . filter ((/= 0) . snd) $ wordsCnt0_data32
- g !m !n = (length . takeWhile (\(_,v) -> v == n) . dropWhile (\(_,v) -> v /= n) . takeWhile (\(u,_) -> u == m) . dropWhile (\(u,_) -> u /= m) $ pairs) `using` rdeepseq
- h !y !x = mconcat [mconcat . map (\m1 -> mconcat [mconcat . map (\n1 -> (if y then show (g m1 n1) else if g m1 n1 == 0 then "." else show (g m1 n1)) ++ "\t") $ [1..gz],newLineEnding]) $ [2..7],replicate 102 x]
- mapM_ (putStrLn . snd) data31
- putStrLn . generalInfo1 gz pairs (mean1, disp) $ data31
- putStrLn (h False '~')
- putStrLn (h True '=')
+innerProc :: String -> Int -> String -> [String] -> IO ()
+innerProc gzS printLine choice flInes = getData3 (getIntervalsN gzS flInes) printLine choice flInes
getIntervalsN :: String -> [String] -> Int
getIntervalsN xs yss
@@ -110,8 +94,9 @@ getIntervalsN xs yss
| otherwise = fromMaybe 9 (readMaybe xs::(Maybe Int))
{-# INLINE getIntervalsN #-}
-getData3 :: Int -> Int -> String -> [String] -> [((Float,String),(Int,Int))]
-getData3 printLine gz choice = parMap rseq (\ts ->
+getData3 :: Int -> Int -> String -> [String] -> IO ()
+getData3 gz printLine choice = mapM_ (\ts -> bracket (do
+ myThread <- forkIO (
let (!maxE,!minE,!data2) = runEval (parTuple3 rpar rpar rpar ((\k -> if k == 0.0 then 1.0 else k) . (\rs -> if null rs then 0.0 else head rs) . firstFrom3 .
maximumElBy 1 (V.singleton oneProperty) $ UL2 ([],uniquenessVariants2GN " 01-" (V.singleton oneProperty) (chooseMax choice) ts), (\k -> if k == 0.0 then 1.0 else k) . abs . (\rs -> if null rs then 0.0 else head rs) .
firstFrom3 . maximumElBy 1 (V.singleton oneProperty) $ UL2 ([],uniquenessVariants2GN " 01-" (V.singleton oneProperty)
@@ -120,24 +105,20 @@ getData3 printLine gz choice = parMap rseq (\ts ->
(!wordsN,!intervalN)
| maxE == 1 = (0, 0)
| otherwise = runEval (parTuple2 rpar rpar (length . words $ ts, intervalNRealFrac minE maxE gz data2))
- (!ratio,!printedLine) = (if maxE == 1.0 then 0.0 else 2.0 * data2 / (minE + maxE), mconcat [showFFloat (precChoice choice) minE "\t",
- showFFloat (precChoice choice) data2 "\t", showFFloat (precChoice choice) maxE "\t", showFFloat (Just 4) (data2 / minE) "\t",
- showFFloat (Just 4) (maxE / data2) "\t", showFFloat (Just 4) ratio "\t", '\t':show (wordsN::Int),
- '\t':show (intervalN::Int), if printLine == 1 then '\t':ts else ""]) in ((ratio,printedLine),(wordsN,intervalN)))
+ !ratio = if maxE == 1.0 then 0.0 else 2.0 * data2 / (minE + maxE) in do
+ hPutStr stdout . showFFloat (precChoice choice) minE $ "\t"
+ hPutStr stdout . showFFloat (precChoice choice) data2 $ "\t"
+ hPutStr stdout . showFFloat (precChoice choice) maxE $ "\t"
+ hPutStr stdout . showFFloat (Just 4) (data2 / minE) $ "\t"
+ hPutStr stdout . showFFloat (Just 4) (maxE / data2) $ "\t"
+ hPutStr stdout . showFFloat (Just 4) (maxE / data2) $ "\t"
+ hPutStr stdout . showFFloat Nothing ratio $ "\t"
+ hPutStr stdout ('\t':show (wordsN::Int))
+ hPutStr stdout ('\t':show (intervalN::Int))
+ hPutStrLn stdout (if printLine == 1 then '\t':ts else ""))
+ return myThread) (\myThread -> killThread myThread) (\_ -> hPutStr stdout ""))
{-# INLINABLE getData3 #-}
fLines :: Int -> String -> [String]
fLines !toOneLine = getTL (filter (any (\x -> isUkrainian x && isAlpha x))) . F1 . prepareText . (\z -> if toOneLine == 1 then unwords . words $ z else z)
{-# INLINE fLines #-}
-
-generalInfo1 :: Int -> [(Int,Int)] -> (Float,Float) -> [(Float,String)] -> String
-generalInfo1 gz pairs (mean1, disp) data31 =
- let !ks = map (\r -> length . takeWhile (== r) . dropWhile (/= r) . sort . map snd $ pairs) [1..gz]
- !s = sum ks in
- mconcat [replicate 102 '-', newLineEnding, mconcat . map (\r -> show r ++ "\t") $ [1..gz], newLineEnding, mconcat . map (\r -> show r ++ "\t") $ ks,
- newLineEnding, mconcat . map (\r -> showFFloat (Just 2) (fromIntegral (r * 100) / fromIntegral s) "%\t") $ ks,newLineEnding,
- mconcat [showFFloat (Just 4) mean1 "+-", showFFloat (Just 4) (sqrt disp) "\t", show (length . filter (== 0) . map fst $ data31),
- '\t':show (length data31)], newLineEnding, mconcat . map (\r -> show r ++ "\t") $ [2..7], newLineEnding, mconcat .
- map (\r -> (show . length . takeWhile (== r) . dropWhile (/= r) . map fst $ pairs) ++ "\t") $ [2..7], newLineEnding, replicate 102 '*']
-{-# INLINE generalInfo1 #-}
-
diff --git a/uniqueness-periods-vector-examples.cabal b/uniqueness-periods-vector-examples.cabal
index ac9a37e..76445c2 100644
--- a/uniqueness-periods-vector-examples.cabal
+++ b/uniqueness-periods-vector-examples.cabal
@@ -3,7 +3,7 @@
-- http://haskell.org/cabal/users-guide/
name: uniqueness-periods-vector-examples
-version: 0.12.3.1
+version: 0.13.0.0
synopsis: Usage examples for the uniqueness-periods-vector series of packages
description: Usage examples for the uniqueness-periods-vector series of packages. Several executables are planned to demonstrate the libraries work.
homepage: https://hackage.haskell.org/package/uniqueness-periods-vector-examples
@@ -43,11 +43,20 @@ executable rewritePoem
hs-source-dirs: ., Lines
default-language: Haskell2010
-executable processText
+executable propertiesText
main-is: Main.hs
other-modules: Languages.UniquenessPeriods.Vector.FuncRepRelated, Data.Statistics.RulesIntervals
other-extensions: CPP, BangPatterns, FlexibleInstances, MultiParamTypeClasses
- build-depends: base >=4.7 && <4.15, mmsyn6ukr >=0.8.1 && <1, vector >=0.11 && <0.14, uniqueness-periods-vector >=0.3.1.1 && <1, uniqueness-periods-vector-general >=0.5.1 && < 1, uniqueness-periods-vector-common >=0.5.1.1 && <1, uniqueness-periods-vector-properties >=0.5.5 && <1, print-info >=0.1.3 && <1, phonetic-languages-ukrainian >=0.2.3 && <1, uniqueness-periods-vector-filters >=0.3 && <1, uniqueness-periods-vector-stats >=0.1.2 && <1, parallel >=3.2.0.6 && <4, lists-flines >=0.1.1 && <1, vector >=0.11 && <0.14
+ build-depends: base >=4.7 && <4.15, mmsyn6ukr >=0.8.1 && <1, vector >=0.11 && <0.14, uniqueness-periods-vector >=0.3.1.1 && <1, uniqueness-periods-vector-general >=0.5.1 && < 1, uniqueness-periods-vector-common >=0.5.1.1 && <1, uniqueness-periods-vector-properties >=0.5.5 && <1, phonetic-languages-ukrainian >=0.2.3 && <1, uniqueness-periods-vector-filters >=0.3 && <1, uniqueness-periods-vector-stats >=0.1.2 && <1, parallel >=3.2.0.6 && <4, lists-flines >=0.1.1 && <1
ghc-options: -threaded -rtsopts
- hs-source-dirs: ., Proportion
+ hs-source-dirs: ., GetInfo
+ default-language: Haskell2010
+
+executable distributionText
+ main-is: Main.hs
+ other-modules: Data.Statistics.RulesIntervals
+ other-extensions: CPP, BangPatterns
+ build-depends: base >=4.7 && <4.15, bytestring >=0.10 && <0.13, parallel >=3.2.0.6 && <4, lists-flines >=0.1.1 && <1, uniqueness-periods-vector-stats >=0.1.2 && <1
+ ghc-options: -threaded -rtsopts
+ hs-source-dirs: ., Distribution
default-language: Haskell2010