summaryrefslogtreecommitdiff
path: root/examples/example1/Example1.hs
diff options
context:
space:
mode:
Diffstat (limited to 'examples/example1/Example1.hs')
-rw-r--r--examples/example1/Example1.hs75
1 files changed, 75 insertions, 0 deletions
diff --git a/examples/example1/Example1.hs b/examples/example1/Example1.hs
new file mode 100644
index 0000000..b81df38
--- /dev/null
+++ b/examples/example1/Example1.hs
@@ -0,0 +1,75 @@
+{-# LANGUAGE DataKinds #-}
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TypeApplications #-}
+
+import Data.Aeson
+import Data.DocRecord
+import qualified Data.HashMap.Strict as HM
+import qualified Data.Text as T
+import GHC.Generics
+import Porcupine
+
+
+data User = User { userName :: T.Text
+ , userSurname :: T.Text
+ , userAge :: Int }
+ deriving (Generic)
+instance FromJSON User
+
+newtype Analysis = Analysis { numLetters :: HM.HashMap Char Int }
+ deriving (Generic)
+instance ToJSON Analysis
+
+-- | How to load users
+userFile :: DataSource User
+userFile = dataSource ["Inputs", "User"]
+ (somePureDeserial JSONSerial)
+
+-- | How to write analysis
+analysisFile :: DataSink Analysis
+analysisFile = dataSink ["Outputs", "Analysis"]
+ (somePureSerial JSONSerial)
+
+-- | The simple computation we want to perform
+computeAnalysis :: User -> Analysis
+computeAnalysis (User name surname _) = Analysis $
+ HM.fromListWith (+) $ [(c,1) | c <- T.unpack name]
+ ++ [(c,1) | c <- T.unpack surname]
+
+-- | The task combining the three previous operations.
+--
+-- This task may look very opaque from the outside, having no parameters and no
+-- return value. But we will be able to reuse it over different users without
+-- having to change it at all.
+analyseOneUser :: (LogThrow m) => PTask m () ()
+analyseOneUser =
+ loadData userFile >>> arr computeAnalysis >>> writeData analysisFile
+
+mainTask :: (LogThrow m) => PTask m () ()
+mainTask =
+ -- First we get the ids of the users that we want to analyse. We need only one
+ -- field that will contain a range of values, see IndexRange. By default, this
+ -- range contains just one value, zero.
+ getOption ["Settings"] (docField @"users" (oneIndex (0::Int)) "The user ids to load")
+ -- We turn the range we read into a full lazy list:
+ >>> arr enumTRIndices
+ -- Then we just map over these ids and call analyseOneUser each time:
+ >>> parMapTask_ "userId" analyseOneUser
+
+main :: IO ()
+main = runPipelineTask (FullConfig "example1" "porcupine-example1.yaml" "porcupine-core/examples/example1/data" ())
+ -- The CLI/Yaml configuration to use (prog name,
+ -- default config file to create, and default root to
+ -- use for the porcupine tree)
+ (baseContexts "")
+ -- The contexts to use. 'baseContexts' is the
+ -- minimum. It gives out katip logging and local files
+ -- access (through ResourceT). The string param is the
+ -- top namespace for the logger. When we use
+ -- FullConfig (and therefore CLI), the progName for
+ -- the CLI given above ("example1") will be inherited
+ -- by the logger, so we can leave it blank
+ mainTask ()