Skip to content

Commit

Permalink
Performance optimizations on Haskell simulation (#64)
Browse files Browse the repository at this point in the history
* sample: fix parsing of --output option

* runSampleModel: only print progress every 10k events

* add perf flag to ouroboros-leios-sim

includes some ghc-options we might want for more costly runs.

* simulation: forking threads directly for SimPraosP2P

io-sim's representation of ThreadId's is too costly with deeply nested
children, such as what Concurrently's sequenceA produces on a 1000
length list.

* moved to io-sim fork that uses IntPSQ for timers, 20x faster.
  • Loading branch information
Saizan authored Nov 1, 2024
1 parent 2eb3d2b commit 985329c
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 35 deletions.
2 changes: 1 addition & 1 deletion cabal.project
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ source-repository-package
source-repository-package
type: git
location: https://github.com/Saizan/io-sim.git
tag: 2ea49cd65ae82ec11826e2c966d77ffb877bca8c
tag: cb3926463e42d5eff98acdbdeadd4a05c7c81f90
subdir:
io-sim
io-classes
25 changes: 24 additions & 1 deletion simulation/ouroboros-leios-sim.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,17 @@ maintainer: [email protected]
build-type: Simple
extra-source-files: CHANGELOG.md

flag perf
description: Ghc options for improved performance, disables asserts.
default: False
manual: True

-- These give us another 10% improvement (on short runs at least)
common performance-opts
ghc-options:
-fignore-asserts -O2 -fexpose-all-unfoldings
-fspecialise-aggressively

library
exposed-modules:
Chan
Expand Down Expand Up @@ -97,9 +108,18 @@ library

hs-source-dirs: src
default-language: Haskell2010
ghc-options: -Wall -fno-ignore-asserts
ghc-options: -Wall

if flag(perf)
import: performance-opts

else
ghc-options: -fno-ignore-asserts

executable viz
if flag(perf)
import: performance-opts

main-is: src/Main.hs
build-depends:
, base
Expand All @@ -110,6 +130,9 @@ executable viz
ghc-options: -Wall

executable sample
if flag(perf)
import: performance-opts

main-is: src/SampleMain.hs
build-depends:
, base
Expand Down
25 changes: 13 additions & 12 deletions simulation/src/PraosProtocol/PraosNode.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import Control.Tracer
import Data.ByteString (ByteString)
import Data.Coerce (coerce)
import Data.Either (fromLeft, fromRight)
import Data.Foldable (sequenceA_)
import Data.Map (Map)
import qualified Data.Map as Map
import PraosProtocol.BlockFetch (BlockFetchControllerState, BlockFetchMessage, BlockFetchProducerState (..), PeerId, blockFetchController, initBlockFetchConsumerStateForPeerId, newBlockFetchControllerState, runBlockFetchConsumer, runBlockFetchProducer)
Expand Down Expand Up @@ -64,12 +63,11 @@ runPeer ::
PraosNodeState m ->
PeerId ->
Praos (Chan m) ->
Concurrently m ()
[Concurrently m ()]
runPeer tracer cfg st peerId chan = do
let chainConsumerState = st.chainSyncConsumerStates Map.! peerId
let blockFetchConsumerState = initBlockFetchConsumerStateForPeerId tracer peerId st.blockFetchControllerState
sequenceA_
[ Concurrently $ runChainConsumer (protocolChainSync chan) chainConsumerState
[ Concurrently $ runChainConsumer (protocolChainSync chan) chainConsumerState
, Concurrently $ runBlockFetchConsumer tracer cfg (protocolBlockFetch chan) blockFetchConsumerState
]

Expand All @@ -89,12 +87,11 @@ runFollower ::
PraosNodeState m ->
FollowerId ->
Praos (Chan m) ->
Concurrently m ()
[Concurrently m ()]
runFollower st followerId chan = do
let chainProducerStateVar = st.blockFetchControllerState.cpsVar
let blockFetchProducerState = BlockFetchProducerState $ asReadOnly st.blockFetchControllerState.blocksVar
sequenceA_
[ Concurrently $ runChainProducer (protocolChainSync chan) followerId chainProducerStateVar
[ Concurrently $ runChainProducer (protocolChainSync chan) followerId chainProducerStateVar
, Concurrently $ runBlockFetchProducer (protocolBlockFetch chan) blockFetchProducerState
]

Expand All @@ -115,7 +112,11 @@ runPraosNode ::
m ()
runPraosNode tracer cfg chain followers peers = do
st0 <- PraosNodeState <$> newBlockFetchControllerState chain <*> pure Map.empty
runConcurrently =<< setupPraosThreads tracer cfg st0 followers peers
concurrentlyMany . map runConcurrently =<< setupPraosThreads tracer cfg st0 followers peers
where
-- Nested children threads are slow with IOSim, this impl forks them all as direct children.
concurrentlyMany :: MonadAsync m => [m ()] -> m ()
concurrentlyMany xs = mapM_ wait =<< mapM async xs

setupPraosThreads ::
(MonadAsync m, MonadSTM m, MonadDelay m) =>
Expand All @@ -124,14 +125,14 @@ setupPraosThreads ::
PraosNodeState m ->
[Praos (Chan m)] ->
[Praos (Chan m)] ->
m (Concurrently m ())
m [Concurrently m ()]
setupPraosThreads tracer cfg st0 followers peers = do
(st1, followerIds) <- repeatM addFollower (length followers) st0
(st2, peerIds) <- repeatM addPeer (length peers) st1
let controllerThread = Concurrently $ blockFetchController tracer st2.blockFetchControllerState
let followerThreads = zipWith (runFollower st2) followerIds followers
let peerThreads = zipWith (runPeer tracer cfg st2) peerIds peers
return $ sequenceA_ (controllerThread : followerThreads <> peerThreads)
return (controllerThread : concat followerThreads <> concat peerThreads)

data PraosNodeConfig = PraosNodeConfig
{ praosConfig :: PraosConfig
Expand All @@ -147,7 +148,7 @@ praosNode ::
PraosNodeConfig ->
[Praos (Chan m)] ->
[Praos (Chan m)] ->
m ()
m ([m ()])
praosNode tracer cfg followers peers = do
st0 <- PraosNodeState <$> newBlockFetchControllerState cfg.chain <*> pure Map.empty
praosThreads <- setupPraosThreads tracer cfg.praosConfig st0 followers peers
Expand All @@ -159,4 +160,4 @@ praosNode tracer cfg followers peers = do
st0.blockFetchControllerState.cpsVar
(BlockFetch.addProducedBlock st0.blockFetchControllerState)
nextBlock
runConcurrently $ sequenceA_ [Concurrently generationThread, praosThreads]
return $ map runConcurrently $ Concurrently generationThread : praosThreads
36 changes: 18 additions & 18 deletions simulation/src/PraosProtocol/SimPraosP2P.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,20 @@

module PraosProtocol.SimPraosP2P where

import Control.Monad.Class.MonadAsync (
Concurrently (Concurrently, runConcurrently),
)
import Control.Monad.IOSim as IOSim (IOSim, runSimTrace)
import Control.Tracer as Tracer (
Contravariant (contramap),
Tracer (Tracer),
Tracer,
traceWith,
)
import Data.Foldable (sequenceA_)
import Data.List (unfoldr)
import qualified Data.Map.Strict as Map
import System.Random (StdGen, split)

import ChanMux (newConnectionBundleTCP)
import ChanTCP
import Control.Monad (forever)
import Control.Monad.Class.MonadFork (MonadFork (forkIO))
import P2P (P2PTopography (..))
import PraosProtocol.Common
import PraosProtocol.PraosNode
Expand Down Expand Up @@ -75,19 +73,21 @@ tracePraosP2P
]
-- Note that the incomming edges are the output ends of the
-- channels and vice versa. That's why it looks backwards.
runConcurrently $
sequenceA_
[ Concurrently $
praosNode
(nodeTracer nid)
(praosConfig slotConfig nid rng)
(Map.findWithDefault [] nid tcplinksInChan)
(Map.findWithDefault [] nid tcplinksOutChan)
| (nid, rng) <-
zip
(Map.keys p2pNodes)
(unfoldr (Just . split) rng0)
]

-- Nested children threads are slow with IOSim, this impl forks them all as direct children.
mapM_
(\m -> mapM_ forkIO =<< m)
[ praosNode
(nodeTracer nid)
(praosConfig slotConfig nid rng)
(Map.findWithDefault [] nid tcplinksInChan)
(Map.findWithDefault [] nid tcplinksOutChan)
| (nid, rng) <-
zip
(Map.keys p2pNodes)
(unfoldr (Just . split) rng0)
]
forever $ threadDelaySI 1000
where
tracer :: Tracer (IOSim s) PraosEvent
tracer = simTracer
Expand Down
2 changes: 1 addition & 1 deletion simulation/src/Sample.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ runSampleModel ::
IO ()
runSampleModel (SampleModel s0 accum render) stop = go . flip SimVizModel s0 . takeWhile (\(t, _) -> t <= stop)
where
go m = case stepSimViz 1000 m of
go m = case stepSimViz 10000 m of
m'@(SimVizModel ((now, _) : _) _) -> do
putStrLn $ "time reached: " ++ show now
hFlush stdout
Expand Down
3 changes: 1 addition & 2 deletions simulation/src/SampleMain.hs
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,7 @@ options =
)
)
<*> optional
( Opts.option
Opts.auto
( Opts.strOption
( Opts.long "output"
<> Opts.metavar "FILENAME"
<> Opts.help "output filename, (default SIMNAME.json)"
Expand Down

0 comments on commit 985329c

Please sign in to comment.