Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new execution code-path for unified scheduler #31239

Closed
wants to merge 56 commits into from
Closed
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
3597dfb
Add new execution code-path for unified scheduler
ryoqun Apr 18, 2023
a8d66fa
Adjust to use the installed prefix consistently
ryoqun Apr 20, 2023
16a1e9c
Reword comment for simplicity
ryoqun Apr 22, 2023
3a7810c
Make replay_stats locking more granular
ryoqun Apr 22, 2023
8fc00c7
Clean up the local cluster test a bit
ryoqun Apr 23, 2023
3ba582b
Remove extra blank line
ryoqun Apr 23, 2023
9769288
Assert there's no active scheduler before freezing
ryoqun Apr 23, 2023
eed7db0
Move to ClusterConfig
ryoqun Apr 23, 2023
26607d1
Rename replace_{scheduler,scheduling}_context()
ryoqun Apr 27, 2023
b74f316
Add mermaid charts for InstalledScheduler
ryoqun Apr 27, 2023
546596a
Apply recent rename...
ryoqun Apr 27, 2023
43bbe42
Fix ci...
ryoqun Apr 27, 2023
090610f
Fix Arc<Bank> in chart...
ryoqun Apr 27, 2023
21b7250
Rename: strip redundant prefix from trait methods
ryoqun Apr 28, 2023
305e75b
Add some traits for bench
ryoqun May 8, 2023
4c2df8d
Add benches for scheduler
ryoqun May 8, 2023
12b3cb5
Make scheulder more generic over tx type shape
ryoqun May 12, 2023
8bbfaf8
Avoid transaction.clone() by Cow pass-thru
ryoqun May 12, 2023
6a4101c
Add sanity-checks for ledger-tool verify
ryoqun May 12, 2023
f2051db
Run benches in the scheduler-pool crate
ryoqun May 12, 2023
f863c71
Simplify IntoCow helper trait a bit
ryoqun May 13, 2023
b44475b
Move out of too-croweded bank.rs to proper file
ryoqun May 13, 2023
81075ac
Move scheduler out of Bank
ryoqun May 22, 2023
b3db6a0
ci...
ryoqun May 22, 2023
3f3dbde
Fix test...
ryoqun May 22, 2023
5c2d4fe
Fix build failure due to feature unification...
ryoqun May 22, 2023
ab1ed84
Use register_unique_tick() as possible
ryoqun May 24, 2023
c4f44cc
Add benches to show bad frequent synchronization
ryoqun May 24, 2023
ac8193e
Simplify is_done
ryoqun Jun 8, 2023
3d7c60f
Define register_tick to remove with_scheduler_lock
ryoqun Jun 8, 2023
56fc5f5
Add messages to assert!
ryoqun Jun 8, 2023
51d8783
Fix typo
ryoqun Jun 8, 2023
00088ed
Remove needless explicit drop()
ryoqun Jun 8, 2023
b2aa017
Use vec![elm; len]
ryoqun Jun 8, 2023
74b9439
Remove .bank{,_cloned}() fully relying on Deref
ryoqun Jun 8, 2023
ed59ea6
Remove into_bank() as well
ryoqun Jun 9, 2023
9a209d8
Small fixes
ryoqun Jun 15, 2023
1c89b9a
Add race condition test of recent_blockhashes
ryoqun Jun 15, 2023
5158a2d
Fix ci....
ryoqun Jun 16, 2023
2e614f6
Simplify BankWithScheduler::new() arguments
ryoqun Jun 27, 2023
b16d201
Move const to its sole use-site
ryoqun Jun 27, 2023
9f32fac
Release lock more early
ryoqun Jun 27, 2023
df692d7
Simplify ScheduledTransactionHandler::handle args
ryoqun Jun 27, 2023
e2d3d7c
add comment
ryoqun Jun 28, 2023
3843793
Properly document BankWithScheduler
ryoqun Jun 29, 2023
a241a8b
Add comment for weak_self
ryoqun Jun 29, 2023
7549b41
Give up maintaining rather meaningless comments
ryoqun Jun 29, 2023
f9df200
Fix comment
ryoqun Jun 29, 2023
1dc0c2c
Don't mix inlined and non-inlined
ryoqun Jun 29, 2023
1415748
Remove actually used SEA from PhantomData
ryoqun Jun 29, 2023
f2e6442
Name actual types which are causing dyn trait
ryoqun Jun 29, 2023
9f82921
Remove explicit spelling of droped type
ryoqun Jun 29, 2023
7958435
Disable cargo-audit for now
ryoqun Jun 29, 2023
60edf6b
Disable downstream for now
ryoqun Jun 29, 2023
b39e2c9
Add doc comment to SchedulingContext
ryoqun Jun 29, 2023
465205e
Remove needless variable assignment
ryoqun Jun 30, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 98 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ members = [
"rpc-test",
"runtime",
"runtime/store-tool",
"scheduler",
"scheduler-pool",
"sdk",
"sdk/cargo-build-bpf",
"sdk/cargo-build-sbf",
Expand Down Expand Up @@ -121,6 +123,7 @@ license = "Apache-2.0"
edition = "2021"

[workspace.dependencies]
aquamarine = "0.3.1"
aes-gcm-siv = "0.10.3"
ahash = "0.8.3"
anyhow = "1.0.70"
Expand Down Expand Up @@ -231,6 +234,7 @@ memmap2 = "0.5.10"
memoffset = "0.8"
merlin = "3"
min-max-heap = "1.3.0"
mockall = "0.11.4"
modular-bitfield = "0.11.2"
nix = "0.25.1"
num-bigint = "0.4.3"
Expand Down Expand Up @@ -340,6 +344,8 @@ solana-rpc-client = { path = "rpc-client", version = "=1.16.0", default-features
solana-rpc-client-api = { path = "rpc-client-api", version = "=1.16.0" }
solana-rpc-client-nonce-utils = { path = "rpc-client-nonce-utils", version = "=1.16.0" }
solana-runtime = { path = "runtime", version = "=1.16.0" }
solana-scheduler = { path = "scheduler", version = "=1.16.0" }
solana-scheduler-pool = { path = "scheduler-pool", version = "=1.16.0" }
solana-sdk = { path = "sdk", version = "=1.16.0" }
solana-sdk-macro = { path = "sdk/macro", version = "=1.16.0" }
solana-send-transaction-service = { path = "send-transaction-service", version = "=1.16.0" }
Expand Down
3 changes: 2 additions & 1 deletion ci/run-sanity.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,5 @@ $solana_ledger_tool create-snapshot --ledger config/ledger "$snapshot_slot" conf
cp config/ledger/genesis.tar.bz2 config/snapshot-ledger
$solana_ledger_tool copy --ledger config/ledger \
--target-db config/snapshot-ledger --starting-slot "$snapshot_slot" --ending-slot "$latest_slot"
$solana_ledger_tool verify --ledger config/snapshot-ledger
$solana_ledger_tool verify --ledger config/snapshot-ledger --block-verification-method blockstore-processor
$solana_ledger_tool verify --ledger config/snapshot-ledger --block-verification-method unified-scheduler
4 changes: 4 additions & 0 deletions ci/test-bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ _ $cargoNightly bench --manifest-path gossip/Cargo.toml ${V:+--verbose} \
_ $cargoNightly bench --manifest-path poh/Cargo.toml ${V:+--verbose} \
-- -Z unstable-options --format=json | tee -a "$BENCH_FILE"

# Run scheduler-pool benches
_ $cargoNightly bench --manifest-path scheduler-pool/Cargo.toml ${V:+--verbose} \
-- -Z unstable-options --format=json | tee -a "$BENCH_FILE"

# Run core benches
_ $cargoNightly bench --manifest-path core/Cargo.toml ${V:+--verbose} \
-- -Z unstable-options --format=json | tee -a "$BENCH_FILE"
Expand Down
1 change: 1 addition & 0 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ solana-rayon-threadlimit = { workspace = true }
solana-rpc = { workspace = true }
solana-rpc-client-api = { workspace = true }
solana-runtime = { workspace = true }
solana-scheduler-pool = { workspace = true }
solana-sdk = { workspace = true }
solana-send-transaction-service = { workspace = true }
solana-streamer = { workspace = true }
Expand Down
41 changes: 37 additions & 4 deletions core/src/replay_stage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ use {
block_error::BlockError,
blockstore::Blockstore,
blockstore_processor::{
self, BlockstoreProcessorError, ConfirmationProgress, TransactionStatusSender,
self, BlockstoreProcessorError, ConfirmationProgress, ExecuteBatchesInternalMetrics,
TransactionStatusSender,
},
leader_schedule_cache::LeaderScheduleCache,
leader_schedule_utils::first_of_consecutive_leader_slots,
Expand Down Expand Up @@ -2609,7 +2610,6 @@ impl ReplayStage {
match replay_result {
Ok(replay_tx_count) => tx_count += replay_tx_count,
Err(err) => {
// Error means the slot needs to be marked as dead
Self::mark_dead_slot(
blockstore,
bank,
Expand All @@ -2625,8 +2625,7 @@ impl ReplayStage {
ancestor_hashes_replay_update_sender,
purge_repair_slot_counter,
);
// If the bank was corrupted, don't try to run the below logic to check if the
// bank is completed
// don't try to run the below logic to check if the bank is completed
continue;
}
}
Expand All @@ -2640,6 +2639,40 @@ impl ReplayStage {
.expect("Bank fork progress entry missing for completed bank");

let replay_stats = bank_progress.replay_stats.clone();

if let Some((result, complete_execute_timings)) =
bank.wait_for_completed_scheduler()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2/2

{
let metrics = ExecuteBatchesInternalMetrics::new_with_timings_from_all_threads(
complete_execute_timings,
);
replay_stats
.write()
.unwrap()
.batch_execute
.accumulate(metrics);

if let Err(err) = result {
Self::mark_dead_slot(
blockstore,
bank,
bank_forks.read().unwrap().root(),
&BlockstoreProcessorError::InvalidTransaction(err),
rpc_subscriptions,
duplicate_slots_tracker,
gossip_duplicate_confirmed_slots,
epoch_slots_frozen_slots,
progress,
heaviest_subtree_fork_choice,
duplicate_slots_to_repair,
ancestor_hashes_replay_update_sender,
purge_repair_slot_counter,
);
// don't try to run the remaining normal processing for the completed bank
continue;
}
}

let r_replay_stats = replay_stats.read().unwrap();
let replay_progress = bank_progress.replay_progress.clone();
let r_replay_progress = replay_progress.read().unwrap();
Expand Down
31 changes: 26 additions & 5 deletions core/src/validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ use {
snapshot_hash::StartingSnapshotHashes,
snapshot_utils::{self, clean_orphaned_account_snapshot_dirs, move_and_async_delete_path},
},
solana_scheduler_pool::SchedulerPool,
solana_sdk::{
clock::Slot,
epoch_schedule::MAX_LEADER_SCHEDULE_EPOCH_OFFSET,
Expand Down Expand Up @@ -129,6 +130,7 @@ const WAIT_FOR_SUPERMAJORITY_THRESHOLD_PERCENT: u64 = 80;
pub enum BlockVerificationMethod {
#[default]
BlockstoreProcessor,
UnifiedScheduler,
}

impl BlockVerificationMethod {
Expand Down Expand Up @@ -747,6 +749,30 @@ impl Validator {
config.block_verification_method, config.block_production_method
);

let (replay_vote_sender, replay_vote_receiver) = unbounded();

// block min prioritization fee cache should be readable by RPC, and writable by validator
// (by both replay stage and banking stage)
let prioritization_fee_cache = Arc::new(PrioritizationFeeCache::default());

match &config.block_verification_method {
BlockVerificationMethod::BlockstoreProcessor => {
info!("not installing scheduler pool...");
}
BlockVerificationMethod::UnifiedScheduler => {
let scheduler_pool = SchedulerPool::new_dyn(
config.runtime_config.log_messages_bytes_limit,
transaction_status_sender.clone(),
Some(replay_vote_sender.clone()),
prioritization_fee_cache.clone(),
);
bank_forks
.write()
.unwrap()
.install_scheduler_pool(scheduler_pool);
}
}

let leader_schedule_cache = Arc::new(leader_schedule_cache);
let mut process_blockstore = ProcessBlockStore::new(
&id,
Expand Down Expand Up @@ -866,10 +892,6 @@ impl Validator {
false => Arc::new(ConnectionCache::with_udp(tpu_connection_pool_size)),
};

// block min prioritization fee cache should be readable by RPC, and writable by validator
// (by both replay stage and banking stage)
let prioritization_fee_cache = Arc::new(PrioritizationFeeCache::default());

let rpc_override_health_check = Arc::new(AtomicBool::new(false));
let (
json_rpc_service,
Expand Down Expand Up @@ -1068,7 +1090,6 @@ impl Validator {
info!("Disabled banking tracer");
}

let (replay_vote_sender, replay_vote_receiver) = unbounded();
let tvu = Tvu::new(
vote_account,
authorized_voter_keypairs,
Expand Down
1 change: 1 addition & 0 deletions ledger-tool/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ solana-logger = { workspace = true }
solana-measure = { workspace = true }
solana-rpc = { workspace = true }
solana-runtime = { workspace = true }
solana-scheduler-pool = { workspace = true }
solana-sdk = { workspace = true }
solana-stake-program = { workspace = true }
solana-storage-bigtable = { workspace = true }
Expand Down
Loading