summaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
Diffstat (limited to 'examples')
-rw-r--r--examples/example-radon/ExampleRadon.hs169
-rw-r--r--examples/example-radon/Plotting.hs79
-rw-r--r--examples/example0.1/Example0_1.hs36
-rw-r--r--examples/example0/Example0.hs30
-rw-r--r--examples/example1/Example1.hs75
-rw-r--r--examples/example2/Example2.hs78
6 files changed, 467 insertions, 0 deletions
diff --git a/examples/example-radon/ExampleRadon.hs b/examples/example-radon/ExampleRadon.hs
new file mode 100644
index 0000000..1695e6b
--- /dev/null
+++ b/examples/example-radon/ExampleRadon.hs
@@ -0,0 +1,169 @@
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE DeriveAnyClass #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE DuplicateRecordFields #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE OverloadedLabels #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE TupleSections #-}
+{-# LANGUAGE Arrows #-}
+{-# OPTIONS_GHC -Wwarn -Wno-missing-signatures -Wno-name-shadowing #-}
+
+-- This example is loosely based on the series of blog posts by Thomas Wiecki
+-- https://twiecki.io/blog/2014/03/17/bayesian-glms-3/ .
+
+import Control.Monad
+import Data.Aeson
+import qualified Data.Csv as Csv
+import Data.DocRecord
+import qualified Data.Text as T
+import qualified Data.Vector as V
+import Data.Functor
+import GHC.Generics
+import Porcupine
+import Prelude hiding (id, (.))
+import qualified Control.Foldl as L
+import Graphics.Vega.VegaLite as VL
+import Control.Monad.Bayes.Class
+import Control.Monad.Bayes.Sampler
+import Control.Monad.Bayes.Weighted
+import Control.Monad.Bayes.Traced
+import Numeric.Log
+
+import Plotting -- In the same folder
+
+
+data RadonObservation = RadonObservation
+ { state :: !T.Text
+ , county :: !T.Text
+ , basement :: !T.Text
+ , log_radon :: !Double }
+ deriving (Generic, FromJSON, ToJSON
+ ,Csv.FromNamedRecord, Csv.ToNamedRecord, Csv.DefaultOrdered)
+
+-- | We want to read each RadonObservation as a set of Records. This supports
+-- reading from CSV files with headers and from JSON files. The Vector cannot
+-- directly be read from the CSV, as we would not known whether the columns are
+-- positional or nominal. This is why we use the 'Records' wrapper here (for
+-- nominal columns). This requires our datatype to instantiate
+-- Csv.From/ToNamedRecord
+radonObsSerials :: BidirSerials (V.Vector RadonObservation)
+radonObsSerials = dimap Records fromRecords $ -- We wrap/unwrap the Records
+ someBidirSerial (CSVSerial "csv" True ',')
+ <>
+ someBidirSerial JSONSerial
+
+radonObsFile :: DataSource (V.Vector RadonObservation)
+radonObsFile = dataSource ["data", "radon"] radonObsSerials
+
+filteredCsvFile :: DataSink (V.Vector RadonObservation)
+filteredCsvFile = dataSink ["debug", "radon-filtered"] radonObsSerials
+
+vegaliteSerials :: PureSerials VegaLite
+vegaliteSerials =
+ lmap VL.toHtml (somePureSerial $ PlainTextSerial $ Just "html")
+ <> lmap VL.fromVL (somePureSerial JSONSerial)
+
+writeViz name = writeData (dataSink ["viz", name] vegaliteSerials)
+
+data Summary = Summary { numRows :: Int
+ , uniqueStates :: [T.Text]
+ , numUniqueCounties :: Int }
+ deriving (Show)
+
+foldSummary :: L.Fold RadonObservation Summary
+foldSummary = Summary <$> L.length
+ <*> L.premap state L.nub
+ <*> (L.premap county L.nub <&> length)
+
+data ModelParams = ModelParams
+ { rateWithB :: Double -- ^ ratio of houses with and without basement
+ , radonWithB :: Double -- ^ radon level in houses with basement
+ , radonWithoutB :: Double -- ^ radon level in houses without basement
+ , noiseWithB :: Double -- ^ variation around radonWithB
+ , noiseWithoutB :: Double -- ^ variation around radonWithoutB
+ } deriving (Eq, Show, Generic, ToJSON, FromJSON)
+
+priorModel :: MonadSample m => m ModelParams
+priorModel =
+ ModelParams <$> uniform 0 1
+ <*> uniform 0 10
+ <*> uniform 0 10
+ <*> uniform 0 10
+ <*> uniform 0 10
+
+likelihood :: ModelParams -> (Bool, Double) -> Log Double
+likelihood params (hasBasement, radonObserved) =
+ case hasBasement of
+ True -> let radonModel = radonWithB params
+ noiseModel = noiseWithB params
+ rate = realToFrac $ rateWithB params
+ in rate * normalPdf radonModel noiseModel radonObserved
+ False -> let radonModel = radonWithoutB params
+ noiseModel = noiseWithoutB params
+ rate = realToFrac $ 1 - rateWithB params
+ in rate * normalPdf radonModel noiseModel radonObserved
+
+model :: MonadInfer m => [(Bool, Double)] -> m ModelParams
+model observations = do
+ params <- priorModel
+ mapM_ (score . likelihood params) observations
+ return params
+
+posteriorForward :: MonadSample m => m ModelParams -> m (Bool, Double)
+posteriorForward model = do
+ params <- model
+ hasBasement <- bernoulli (rateWithB params)
+ value <- case hasBasement of
+ True -> normal (radonWithB params) (noiseWithB params)
+ False -> normal (radonWithoutB params) (noiseWithoutB params)
+ return (hasBasement, value)
+
+sampleFlatLinRegModel :: (LogThrow m) => PTask m () ()
+sampleFlatLinRegModel = proc () -> do
+ radonObs <- loadData radonObsFile -< ()
+ writeData filteredCsvFile -< radonObs
+ let (summary,xs,ys) = flip L.fold radonObs $
+ (,,) <$> foldSummary
+ <*> L.premap ((== "Y") . basement) L.list
+ <*> L.premap log_radon L.list
+ xLbl = "has basement"
+ yLbl = "log radon"
+ logInfo -< show summary
+
+ vizSize <- getOption ["viz", "options"]
+ (docField @"vizSize" (400,400) "Width & height of visualisations") -< ()
+ writeViz "1" -< plot vizSize
+ (S $ scatter2 xLbl yLbl (-3,5))
+ (Cols [(xLbl, VL.Booleans xs)
+ ,(yLbl, VL.Numbers ys)])
+ nsamples <- getOption ["sampling", "options"]
+ (docField @"nsamples" 5000 "Number of samples to draw") -< ()
+ samples <- ioTask -<
+ sampleIOfixed $ prior $ mh nsamples $ model (zip xs ys)
+ writeViz "2" -< plot vizSize
+ (H [[density2DPlot "radonWithB" "radonWithoutB" (0,2) (0,2)]
+ ,[density2DPlot "noiseWithB" "noiseWithoutB" (0,2) (0,2)]])
+ (J samples)
+
+ samples <- ioTask -<
+ sampleIOfixed $ prior $ mh nsamples $ posteriorForward $ model (zip xs ys)
+ let (xModel, yModel) = unzip samples
+ writeViz "3" -< plot vizSize
+ (S $ scatter2 xLbl yLbl (-3,5))
+ (Cols [(xLbl, VL.Booleans xModel)
+ ,(yLbl, VL.Numbers yModel)])
+
+
+runIn topdir = runPipelineTask
+ (FullConfig "example-radon" -- Name of the executable (for --help)
+ "example-radon.yaml" -- Default config file path
+ topdir -- Default root directory for mappings
+ ())
+ (baseContexts "")
+ sampleFlatLinRegModel ()
+
+main :: IO ()
+main = runIn "examples/example-radon"
diff --git a/examples/example-radon/Plotting.hs b/examples/example-radon/Plotting.hs
new file mode 100644
index 0000000..f4522da
--- /dev/null
+++ b/examples/example-radon/Plotting.hs
@@ -0,0 +1,79 @@
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ExistentialQuantification #-}
+
+module Plotting where
+
+import Data.Aeson
+import Graphics.Vega.VegaLite
+import Data.Text (Text, pack)
+
+
+barPlot :: Text -> VLSpec
+barPlot xName =
+ let enc = encoding
+ . position X [PName xName, PmType Nominal, PAxis [AxGrid True, AxTitle xName]]
+ . position Y [PName "binnedData", PAggregate Count, PmType Quantitative, PAxis [AxGrid False, AxTitle "count"]]
+ in asSpec $ [mark Bar [MOpacity 1.0, MColor "#a3c6de"], enc []]
+
+barPlot2 :: Text -> Text -> VLSpec
+barPlot2 xName yName =
+ let enc = encoding
+ . position X [PName xName, PmType Nominal, PAxis [AxGrid True, AxTitle xName]]
+ . position Y [PName yName, PmType Quantitative, PAxis [AxGrid False, AxTitle yName]]
+ in asSpec $ [mark Bar [MOpacity 1.0, MColor "#a3c6de"], enc []]
+
+linePlot :: Text -> Text -> VLSpec
+linePlot xName yName =
+ let enc = encoding
+ . position X [PName xName, PmType Quantitative, PAxis [AxGrid True, AxTitle xName]]
+ . position Y [PName yName, PmType Quantitative, PAxis [AxGrid False, AxTitle yName]]
+ in asSpec $ [mark Line [MColor "green"], enc []]
+
+density2DPlot :: Text -> Text -> (Double, Double) -> (Double, Double) -> VLSpec
+density2DPlot xName yName (xmin, xmax) (ymin, ymax) =
+ let enc = encoding
+ . position X [PName xName
+ ,PScale [SDomain (DNumbers [xmin, xmax])]
+ ,PBin [Step 0.1], PmType Quantitative, PAxis [AxGrid True, AxTitle xName]]
+ . position Y [PName yName
+ ,PScale [SDomain (DNumbers [ymin, ymax])]
+ ,PBin [Step 0.1]
+ ,PmType Quantitative
+ ,PAxis [AxGrid True, AxTitle yName]]
+ . color [ MAggregate Count, MName "col", MmType Quantitative
+ , MScale [{-SReverse False,-} SScheme "blues" [0.0, 1.0]]]
+ in asSpec $ [mark Rect [MClip True], enc []]
+
+scatter2 :: Text -> Text -> (Double, Double) -> VLSpec
+scatter2 xName yName (ymin, ymax) =
+ let enc = encoding
+ . position X [PName xName, PmType Nominal, PAxis [AxGrid True, AxTitle xName]]
+ . position Y [PName yName
+ , PScale [SDomain (DNumbers [ymin, ymax])]
+ , PmType Quantitative
+ , PAxis [AxGrid True, AxTitle yName]]
+ in asSpec $ [mark Tick [MClip True], enc []]
+
+data SpecGrid = H [[VLSpec]] | V [[VLSpec]] | L [VLSpec] | S VLSpec
+
+data InputData = Cols [(Text, DataValues)]
+ | forall j. (ToJSON j) => J j
+ | File FilePath
+
+plot :: (Double, Double) -> SpecGrid -> InputData -> VegaLite
+plot (figw,figh) gridOfLayers dat =
+ let desc = description "Plot"
+ dat' = case dat of
+ Cols cols -> foldl (.) (dataFromColumns []) (map (uncurry dataColumn) cols) []
+ J o -> dataFromJson (toJSON o) []
+ File fp -> dataFromSource (pack fp) []
+ conf = configure
+ -- . configuration (Axis [ DomainWidth 1 ])
+ . configuration (SelectionStyle [ ( Single, [ On "dblclick" ] ) ])
+ . configuration (View [ViewStroke (Just "transparent")])
+ spec = case gridOfLayers of
+ S l -> layer [l]
+ L ls -> layer ls
+ H lss -> hConcat (map (asSpec . (:[]) . layer) lss)
+ V lss -> vConcat (map (asSpec . (:[]) . layer) lss)
+ in toVegaLite [width figw, height figh, conf [], desc, dat', spec]
diff --git a/examples/example0.1/Example0_1.hs b/examples/example0.1/Example0_1.hs
new file mode 100644
index 0000000..517921f
--- /dev/null
+++ b/examples/example0.1/Example0_1.hs
@@ -0,0 +1,36 @@
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE Arrows #-}
+
+import Data.DocRecord
+import qualified Data.Text.Lazy as T
+import Porcupine
+import Prelude hiding (id, (.))
+
+
+yzCompress :: T.Text -> T.Text
+yzCompress = T.concat . map counts . T.group
+ where
+ counts s = T.pack (show (T.length s)) <> T.take 1 s <> ","
+
+resultFile :: DataSink T.Text
+resultFile = dataSink ["result"] $
+ somePureSerial (PlainTextSerial (Just "txt"))
+ <> lmap yzCompress
+ (somePureSerial (PlainTextSerial (Just "yz")))
+
+myTask :: (LogThrow m) => PTask m () ()
+myTask = proc () -> do
+ (FV chars :& FV nums :& _) <-
+ getOptions ["options"]
+ ( docField @"chars" "a" "The chars to repeat"
+ :& docField @"replications" [10::Int] "The numbers of replications"
+ :& RNil) -< ()
+ let txt = T.concat $
+ zipWith (\s n -> T.replicate (fromIntegral n) (T.singleton s)) chars nums
+ writeData resultFile -< txt
+
+main :: IO ()
+main = runLocalPipelineTask (FullConfig "example0.1" "example0_1.yaml" "." ()) myTask ()
diff --git a/examples/example0/Example0.hs b/examples/example0/Example0.hs
new file mode 100644
index 0000000..a8e74c3
--- /dev/null
+++ b/examples/example0/Example0.hs
@@ -0,0 +1,30 @@
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE TypeApplications #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE GADTs #-}
+{-# LANGUAGE Arrows #-}
+
+import Data.DocRecord
+import qualified Data.Text.Lazy as T
+import Porcupine
+
+
+resultFile :: DataSink T.Text
+resultFile = dataSink ["result"] $
+ somePureSerial (PlainTextSerial (Just "txt"))
+
+myTask :: (LogThrow m) => PTask m () ()
+myTask = proc () -> do
+ (FV char :& FV num :& _) <- getMyOptions -< ()
+ let txt = T.replicate (fromIntegral num) (T.singleton char)
+ writeData resultFile -< txt
+ where
+ getMyOptions =
+ getOptions ["options"]
+ ( docField @"char" 'a' "The character to repeat"
+ :& docField @"replications" (10::Int) "The number of replications"
+ :& RNil)
+
+main :: IO ()
+-- main = simpleRunPTask myTask ()
+main = runLocalPipelineTask (FullConfig "example0" "example0.yaml" "." ()) myTask ()
diff --git a/examples/example1/Example1.hs b/examples/example1/Example1.hs
new file mode 100644
index 0000000..b81df38
--- /dev/null
+++ b/examples/example1/Example1.hs
@@ -0,0 +1,75 @@
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeApplications #-}
+
+import Data.Aeson
+import Data.DocRecord
+import qualified Data.HashMap.Strict as HM
+import qualified Data.Text as T
+import GHC.Generics
+import Porcupine
+
+
+data User = User { userName :: T.Text
+ , userSurname :: T.Text
+ , userAge :: Int }
+ deriving (Generic)
+instance FromJSON User
+
+newtype Analysis = Analysis { numLetters :: HM.HashMap Char Int }
+ deriving (Generic)
+instance ToJSON Analysis
+
+-- | How to load users
+userFile :: DataSource User
+userFile = dataSource ["Inputs", "User"]
+ (somePureDeserial JSONSerial)
+
+-- | How to write analysis
+analysisFile :: DataSink Analysis
+analysisFile = dataSink ["Outputs", "Analysis"]
+ (somePureSerial JSONSerial)
+
+-- | The simple computation we want to perform
+computeAnalysis :: User -> Analysis
+computeAnalysis (User name surname _) = Analysis $
+ HM.fromListWith (+) $ [(c,1) | c <- T.unpack name]
+ ++ [(c,1) | c <- T.unpack surname]
+
+-- | The task combining the three previous operations.
+--
+-- This task may look very opaque from the outside, having no parameters and no
+-- return value. But we will be able to reuse it over different users without
+-- having to change it at all.
+analyseOneUser :: (LogThrow m) => PTask m () ()
+analyseOneUser =
+ loadData userFile >>> arr computeAnalysis >>> writeData analysisFile
+
+mainTask :: (LogThrow m) => PTask m () ()
+mainTask =
+ -- First we get the ids of the users that we want to analyse. We need only one
+ -- field that will contain a range of values, see IndexRange. By default, this
+ -- range contains just one value, zero.
+ getOption ["Settings"] (docField @"users" (oneIndex (0::Int)) "The user ids to load")
+ -- We turn the range we read into a full lazy list:
+ >>> arr enumTRIndices
+ -- Then we just map over these ids and call analyseOneUser each time:
+ >>> parMapTask_ "userId" analyseOneUser
+
+main :: IO ()
+main = runPipelineTask (FullConfig "example1" "porcupine-example1.yaml" "porcupine-core/examples/example1/data" ())
+ -- The CLI/Yaml configuration to use (prog name,
+ -- default config file to create, and default root to
+ -- use for the porcupine tree)
+ (baseContexts "")
+ -- The contexts to use. 'baseContexts' is the
+ -- minimum. It gives out katip logging and local files
+ -- access (through ResourceT). The string param is the
+ -- top namespace for the logger. When we use
+ -- FullConfig (and therefore CLI), the progName for
+ -- the CLI given above ("example1") will be inherited
+ -- by the logger, so we can leave it blank
+ mainTask ()
diff --git a/examples/example2/Example2.hs b/examples/example2/Example2.hs
new file mode 100644
index 0000000..e9e043c
--- /dev/null
+++ b/examples/example2/Example2.hs
@@ -0,0 +1,78 @@
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeApplications #-}
+
+import Data.Aeson
+import qualified Data.Text as T
+import GHC.Generics
+import Porcupine
+
+import Prelude hiding (id, (.))
+
+
+-- This example uses the porcupine to read a data that represents the evloution of a given stock in given data and
+-- gives back the average and standard deviation of the stock on that date.
+
+data Stockdaily = Stockdaily {date :: String , close :: Double}
+ deriving (Generic)
+instance FromJSON Stockdaily
+
+newtype Stock = Stock { chart :: [Stockdaily] }
+ deriving (Generic)
+instance FromJSON Stock
+
+getCloseStock :: Stock -> [Double]
+getCloseStock s = map close (chart s)
+
+-- | How to load Stock prices
+stockFile :: DataSource Stock
+stockFile = dataSource ["Inputs", "Stock"]
+ (somePureDeserial JSONSerial)
+
+-- | How to write the smoothed stock prices
+globalMatrix :: DataSink (Tabular [[Double]])
+globalMatrix = dataSink ["Outputs" , "globalData"]
+ (somePureSerial (CSVSerial (T.pack "csv") False ','))
+
+avg :: [Double] -> Double
+avg list = let s = sum list
+ n = fromIntegral (length list)
+ in s/n
+
+msliding :: Int -> [a] -> [[a]]
+msliding n p = case p of
+ [] -> []
+ (_:xs) -> [take n p] ++ (msliding n xs)
+
+-- | The simple computation we want to perform
+computeSmoothedCurve :: Stock -> [Double]
+computeSmoothedCurve s = curve
+ where
+ price = getCloseStock s
+ curve = map avg (msliding 10 price)
+
+analyseStocks :: (LogThrow m) => PTask m () ()
+analyseStocks =
+ arr (const ["aapl"::TRIndex, "fb" , "googl"]) -- We want the stocks for some
+ -- fixed set of companies
+ >>> loadDataList "company" stockFile
+ >>> arr (Tabular Nothing . map (\(_idx,stock) -> computeSmoothedCurve stock))
+ >>> writeData globalMatrix
+
+main :: IO ()
+main = runPipelineTask (FullConfig "example2" "porcupine-example2.yaml" "porcupine-core/examples/example2/data" ())
+ -- The CLI/Yaml configuration to use (prog name,
+ -- default config file to create, and default root to
+ -- use for the porcupine tree)
+ (baseContexts "")
+ -- The contexts to use. 'baseContexts' is the
+ -- minimum. It gives out katip logging and local files
+ -- access (through ResourceT). The string param is the
+ -- top namespace for the logger. When we use
+ -- FullConfig (and therefore CLI), the progName for
+ -- the CLI given above ("example1") will be inherited
+ -- by the logger, so we can leave it blank
+ analyseStocks ()