summaryrefslogtreecommitdiff
path: root/examples/example-radon/ExampleRadon.hs
blob: 1695e6bb15b53c390bae3e57f72957d476f4f884 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
{-# LANGUAGE DataKinds             #-}
{-# LANGUAGE DeriveAnyClass        #-}
{-# LANGUAGE DeriveGeneric         #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE FlexibleContexts      #-}
{-# LANGUAGE OverloadedLabels      #-}
{-# LANGUAGE OverloadedStrings     #-}
{-# LANGUAGE ScopedTypeVariables   #-}
{-# LANGUAGE TypeApplications      #-}
{-# LANGUAGE TupleSections         #-}
{-# LANGUAGE Arrows #-}
{-# OPTIONS_GHC -Wwarn -Wno-missing-signatures -Wno-name-shadowing #-}

-- This example is loosely based on the series of blog posts by Thomas Wiecki
-- https://twiecki.io/blog/2014/03/17/bayesian-glms-3/ .

import           Control.Monad
import           Data.Aeson
import qualified Data.Csv as Csv
import           Data.DocRecord
import qualified Data.Text                     as T
import qualified Data.Vector                   as V
import Data.Functor
import           GHC.Generics
import           Porcupine
import           Prelude                       hiding (id, (.))
import qualified Control.Foldl as L
import           Graphics.Vega.VegaLite        as VL
import Control.Monad.Bayes.Class
import Control.Monad.Bayes.Sampler
import Control.Monad.Bayes.Weighted
import Control.Monad.Bayes.Traced
import Numeric.Log

import Plotting  -- In the same folder


data RadonObservation = RadonObservation
  { state :: !T.Text
  , county :: !T.Text
  , basement :: !T.Text
  , log_radon :: !Double }
  deriving (Generic, FromJSON, ToJSON
           ,Csv.FromNamedRecord, Csv.ToNamedRecord, Csv.DefaultOrdered)

-- | We want to read each RadonObservation as a set of Records. This supports
-- reading from CSV files with headers and from JSON files. The Vector cannot
-- directly be read from the CSV, as we would not known whether the columns are
-- positional or nominal. This is why we use the 'Records' wrapper here (for
-- nominal columns). This requires our datatype to instantiate
-- Csv.From/ToNamedRecord
radonObsSerials :: BidirSerials (V.Vector RadonObservation)
radonObsSerials = dimap Records fromRecords $  -- We wrap/unwrap the Records
  someBidirSerial (CSVSerial "csv" True ',')
  <>
  someBidirSerial JSONSerial

radonObsFile :: DataSource (V.Vector RadonObservation)
radonObsFile = dataSource ["data", "radon"] radonObsSerials

filteredCsvFile :: DataSink (V.Vector RadonObservation)
filteredCsvFile = dataSink ["debug", "radon-filtered"] radonObsSerials

vegaliteSerials :: PureSerials VegaLite
vegaliteSerials =
  lmap VL.toHtml (somePureSerial $ PlainTextSerial $ Just "html")
  <> lmap VL.fromVL (somePureSerial JSONSerial)

writeViz name = writeData (dataSink ["viz", name] vegaliteSerials)

data Summary = Summary { numRows :: Int
                       , uniqueStates :: [T.Text]
                       , numUniqueCounties :: Int }
  deriving (Show)

foldSummary :: L.Fold RadonObservation Summary
foldSummary = Summary <$> L.length
                      <*> L.premap state L.nub
                      <*> (L.premap county L.nub <&> length)

data ModelParams = ModelParams
  { rateWithB :: Double -- ^ ratio of houses with and without basement
  , radonWithB :: Double -- ^ radon level in houses with basement
  , radonWithoutB :: Double -- ^ radon level in houses without basement
  , noiseWithB :: Double -- ^ variation around radonWithB
  , noiseWithoutB :: Double -- ^ variation around radonWithoutB
  } deriving (Eq, Show, Generic, ToJSON, FromJSON)

priorModel :: MonadSample m => m ModelParams
priorModel =
    ModelParams <$> uniform 0 1
                <*> uniform 0 10
                <*> uniform 0 10
                <*> uniform 0 10
                <*> uniform 0 10

likelihood :: ModelParams -> (Bool, Double) -> Log Double
likelihood params (hasBasement, radonObserved) = 
    case hasBasement of
        True -> let radonModel = radonWithB params
                    noiseModel = noiseWithB params
                    rate = realToFrac $ rateWithB params
                in rate * normalPdf radonModel noiseModel radonObserved
        False -> let radonModel = radonWithoutB params
                     noiseModel = noiseWithoutB params
                     rate = realToFrac $ 1 - rateWithB params
                in rate * normalPdf radonModel noiseModel radonObserved

model :: MonadInfer m => [(Bool, Double)] -> m ModelParams
model observations = do
    params <- priorModel
    mapM_ (score . likelihood params) observations
    return params

posteriorForward :: MonadSample m => m ModelParams -> m (Bool, Double)
posteriorForward model = do
    params <- model
    hasBasement <- bernoulli (rateWithB params)
    value <- case hasBasement of
        True -> normal (radonWithB params) (noiseWithB params)
        False -> normal (radonWithoutB params) (noiseWithoutB params)
    return (hasBasement, value)

sampleFlatLinRegModel :: (LogThrow m) => PTask m () ()
sampleFlatLinRegModel = proc () -> do
  radonObs <- loadData radonObsFile -< ()
  writeData filteredCsvFile -< radonObs
  let (summary,xs,ys) = flip L.fold radonObs $
        (,,) <$> foldSummary
             <*> L.premap ((== "Y") . basement) L.list
             <*> L.premap log_radon L.list
      xLbl = "has basement"
      yLbl = "log radon"
  logInfo -< show summary
  
  vizSize <- getOption ["viz", "options"]
             (docField @"vizSize" (400,400) "Width & height of visualisations") -< ()
  writeViz "1" -< plot vizSize
                       (S $ scatter2 xLbl yLbl (-3,5))
                       (Cols [(xLbl, VL.Booleans xs)
                             ,(yLbl, VL.Numbers ys)])
  nsamples <- getOption ["sampling", "options"]
              (docField @"nsamples" 5000 "Number of samples to draw") -< ()
  samples <- ioTask -<
    sampleIOfixed $ prior $ mh nsamples $ model (zip xs ys)
  writeViz "2" -< plot vizSize
                       (H [[density2DPlot "radonWithB" "radonWithoutB" (0,2) (0,2)]
                          ,[density2DPlot "noiseWithB" "noiseWithoutB" (0,2) (0,2)]])
                       (J samples)

  samples <- ioTask -<
    sampleIOfixed $ prior $ mh nsamples $ posteriorForward $ model (zip xs ys)
  let (xModel, yModel) = unzip samples
  writeViz "3" -< plot vizSize
    (S $ scatter2 xLbl yLbl (-3,5))
    (Cols [(xLbl, VL.Booleans xModel)
          ,(yLbl, VL.Numbers yModel)])
  

runIn topdir = runPipelineTask
  (FullConfig "example-radon"  -- Name of the executable (for --help)
              "example-radon.yaml" -- Default config file path
              topdir -- Default root directory for mappings
              ())
  (baseContexts "")
  sampleFlatLinRegModel ()

main :: IO ()
main = runIn "examples/example-radon"