1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
|
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedLabels #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE Arrows #-}
{-# OPTIONS_GHC -Wwarn -Wno-missing-signatures -Wno-name-shadowing #-}
-- This example is loosely based on the series of blog posts by Thomas Wiecki
-- https://twiecki.io/blog/2014/03/17/bayesian-glms-3/ .
import Control.Monad
import Data.Aeson
import qualified Data.Csv as Csv
import Data.DocRecord
import qualified Data.Text as T
import qualified Data.Vector as V
import Data.Functor
import GHC.Generics
import Porcupine
import Prelude hiding (id, (.))
import qualified Control.Foldl as L
import Graphics.Vega.VegaLite as VL
import Control.Monad.Bayes.Class
import Control.Monad.Bayes.Sampler
import Control.Monad.Bayes.Weighted
import Control.Monad.Bayes.Traced
import Numeric.Log
import Plotting -- In the same folder
data RadonObservation = RadonObservation
{ state :: !T.Text
, county :: !T.Text
, basement :: !T.Text
, log_radon :: !Double }
deriving (Generic, FromJSON, ToJSON
,Csv.FromNamedRecord, Csv.ToNamedRecord, Csv.DefaultOrdered)
-- | We want to read each RadonObservation as a set of Records. This supports
-- reading from CSV files with headers and from JSON files. The Vector cannot
-- directly be read from the CSV, as we would not known whether the columns are
-- positional or nominal. This is why we use the 'Records' wrapper here (for
-- nominal columns). This requires our datatype to instantiate
-- Csv.From/ToNamedRecord
radonObsSerials :: BidirSerials (V.Vector RadonObservation)
radonObsSerials = dimap Records fromRecords $ -- We wrap/unwrap the Records
someBidirSerial (CSVSerial "csv" True ',')
<>
someBidirSerial JSONSerial
radonObsFile :: DataSource (V.Vector RadonObservation)
radonObsFile = dataSource ["data", "radon"] radonObsSerials
filteredCsvFile :: DataSink (V.Vector RadonObservation)
filteredCsvFile = dataSink ["debug", "radon-filtered"] radonObsSerials
vegaliteSerials :: PureSerials VegaLite
vegaliteSerials =
lmap VL.toHtml (somePureSerial $ PlainTextSerial $ Just "html")
<> lmap VL.fromVL (somePureSerial JSONSerial)
writeViz name = writeData (dataSink ["viz", name] vegaliteSerials)
data Summary = Summary { numRows :: Int
, uniqueStates :: [T.Text]
, numUniqueCounties :: Int }
deriving (Show)
foldSummary :: L.Fold RadonObservation Summary
foldSummary = Summary <$> L.length
<*> L.premap state L.nub
<*> (L.premap county L.nub <&> length)
data ModelParams = ModelParams
{ rateWithB :: Double -- ^ ratio of houses with and without basement
, radonWithB :: Double -- ^ radon level in houses with basement
, radonWithoutB :: Double -- ^ radon level in houses without basement
, noiseWithB :: Double -- ^ variation around radonWithB
, noiseWithoutB :: Double -- ^ variation around radonWithoutB
} deriving (Eq, Show, Generic, ToJSON, FromJSON)
priorModel :: MonadSample m => m ModelParams
priorModel =
ModelParams <$> uniform 0 1
<*> uniform 0 10
<*> uniform 0 10
<*> uniform 0 10
<*> uniform 0 10
likelihood :: ModelParams -> (Bool, Double) -> Log Double
likelihood params (hasBasement, radonObserved) =
case hasBasement of
True -> let radonModel = radonWithB params
noiseModel = noiseWithB params
rate = realToFrac $ rateWithB params
in rate * normalPdf radonModel noiseModel radonObserved
False -> let radonModel = radonWithoutB params
noiseModel = noiseWithoutB params
rate = realToFrac $ 1 - rateWithB params
in rate * normalPdf radonModel noiseModel radonObserved
model :: MonadInfer m => [(Bool, Double)] -> m ModelParams
model observations = do
params <- priorModel
mapM_ (score . likelihood params) observations
return params
posteriorForward :: MonadSample m => m ModelParams -> m (Bool, Double)
posteriorForward model = do
params <- model
hasBasement <- bernoulli (rateWithB params)
value <- case hasBasement of
True -> normal (radonWithB params) (noiseWithB params)
False -> normal (radonWithoutB params) (noiseWithoutB params)
return (hasBasement, value)
sampleFlatLinRegModel :: (LogThrow m) => PTask m () ()
sampleFlatLinRegModel = proc () -> do
radonObs <- loadData radonObsFile -< ()
writeData filteredCsvFile -< radonObs
let (summary,xs,ys) = flip L.fold radonObs $
(,,) <$> foldSummary
<*> L.premap ((== "Y") . basement) L.list
<*> L.premap log_radon L.list
xLbl = "has basement"
yLbl = "log radon"
logInfo -< show summary
vizSize <- getOption ["viz", "options"]
(docField @"vizSize" (400,400) "Width & height of visualisations") -< ()
writeViz "1" -< plot vizSize
(S $ scatter2 xLbl yLbl (-3,5))
(Cols [(xLbl, VL.Booleans xs)
,(yLbl, VL.Numbers ys)])
nsamples <- getOption ["sampling", "options"]
(docField @"nsamples" 5000 "Number of samples to draw") -< ()
samples <- ioTask -<
sampleIOfixed $ prior $ mh nsamples $ model (zip xs ys)
writeViz "2" -< plot vizSize
(H [[density2DPlot "radonWithB" "radonWithoutB" (0,2) (0,2)]
,[density2DPlot "noiseWithB" "noiseWithoutB" (0,2) (0,2)]])
(J samples)
samples <- ioTask -<
sampleIOfixed $ prior $ mh nsamples $ posteriorForward $ model (zip xs ys)
let (xModel, yModel) = unzip samples
writeViz "3" -< plot vizSize
(S $ scatter2 xLbl yLbl (-3,5))
(Cols [(xLbl, VL.Booleans xModel)
,(yLbl, VL.Numbers yModel)])
runIn topdir = runPipelineTask
(FullConfig "example-radon" -- Name of the executable (for --help)
"example-radon.yaml" -- Default config file path
topdir -- Default root directory for mappings
())
(baseContexts "")
sampleFlatLinRegModel ()
main :: IO ()
main = runIn "examples/example-radon"
|