diff --git a/CHANGELOG.md b/CHANGELOG.md index e0b31c772..dc94db5b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ operated. - [BREAKING] `GetAccount` can now return all storage map entries with a single request ([#2121](https://github.com/0xMiden/node/issues/2121)). - Persisted attachments of private output notes when applying a block, so they are now returned by `GetNotesById` ([#2172](https://github.com/0xMiden/node/pull/2172)). - [BREAKING] Replaced `StoreStatus` with `chain_tip` field in `RpcStatus` ([#2187](https://github.com/0xMiden/node/pull/2187)). +- Added gRPC health check endpoint to node's RPC service ([#2188](https://github.com/0xMiden/node/pull/2188)). ### Docker diff --git a/Cargo.lock b/Cargo.lock index 4c2a64758..9afa35351 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3362,6 +3362,7 @@ dependencies = [ "tokio", "tokio-stream", "tonic", + "tonic-health", "tracing", "url", ] @@ -3460,6 +3461,7 @@ dependencies = [ "tokio", "tokio-stream", "tonic", + "tonic-health", "tonic-reflection", "tonic-web", "tower", diff --git a/bin/network-monitor/.env b/bin/network-monitor/.env deleted file mode 100644 index 3c9224881..000000000 --- a/bin/network-monitor/.env +++ /dev/null @@ -1,26 +0,0 @@ -# network identity -MIDEN_MONITOR_NETWORK_NAME=Devnet -# service configs -MIDEN_MONITOR_PORT=3001 -MIDEN_MONITOR_ENABLE_OTEL=true -MIDEN_MONITOR_REQUEST_TIMEOUT=10s -# rpc checks -MIDEN_MONITOR_RPC_URL=https://rpc.devnet.miden.io/ -MIDEN_MONITOR_STATUS_CHECK_INTERVAL=30s -MIDEN_MONITOR_STALE_CHAIN_TIP_THRESHOLD=1m -# remote prover checks -MIDEN_MONITOR_REMOTE_PROVER_URLS=https://tx-prover.devnet.miden.io/,https://batch-prover.devnet.miden.io/ -MIDEN_MONITOR_REMOTE_PROVER_TEST_INTERVAL=2m -# faucet checks -MIDEN_MONITOR_FAUCET_URL=https://faucet-api.devnet.miden.io/ -MIDEN_MONITOR_FAUCET_TEST_INTERVAL=2m -# network transaction checks -MIDEN_MONITOR_DISABLE_NTX_SERVICE=false -MIDEN_MONITOR_COUNTER_INCREMENT_INTERVAL=30s -MIDEN_MONITOR_COUNTER_LATENCY_TIMEOUT=2m -# explorer checks -MIDEN_MONITOR_EXPLORER_URL=https://scan-backend-devnet-miden.eu-central-8.gateway.fm/graphql -# note transport checks -MIDEN_MONITOR_NOTE_TRANSPORT_URL=https://transport.miden.io -# validator checks -MIDEN_MONITOR_VALIDATOR_URL= diff --git a/bin/node/.env b/bin/node/.env deleted file mode 100644 index 3ec5b4e82..000000000 --- a/bin/node/.env +++ /dev/null @@ -1,29 +0,0 @@ -# For more info use -h on the relevant commands: -# miden-node start -h - -# Common -MIDEN_NODE_ENABLE_OTEL=true -MIDEN_NODE_DATA_DIRECTORY= - -# Block Producer -MIDEN_NODE_BLOCK_PRODUCER_LISTEN= -MIDEN_NODE_BLOCK_PRODUCER_STORE_URL= -MIDEN_NODE_BLOCK_PRODUCER_VALIDATOR_URL= -MIDEN_NODE_BLOCK_PRODUCER_MAX_TXS_PER_BATCH= -MIDEN_NODE_BLOCK_PRODUCER_MAX_BATCHES_PER_BLOCK= -MIDEN_NODE_BLOCK_PRODUCER_MEMPOOL_TX_CAPACITY= -MIDEN_NODE_BLOCK_PRODUCER_BATCH_PROVER_URL= - -# Store -MIDEN_NODE_STORE_RPC_LISTEN= -MIDEN_NODE_STORE_UPSTREAM_RPC_URL= -MIDEN_NODE_STORE_NTX_BUILDER_LISTEN= -MIDEN_NODE_STORE_BLOCK_PRODUCER_LISTEN= -MIDEN_NODE_STORE_BLOCK_PROVER_URL= - -# RPC -MIDEN_NODE_RPC_LISTEN=0.0.0.0:57291 -MIDEN_NODE_RPC_STORE_URL= -MIDEN_NODE_RPC_BLOCK_PRODUCER_URL= -MIDEN_NODE_RPC_VALIDATOR_URL= -MIDEN_NODE_RPC_NTX_BUILDER_URL= diff --git a/bin/node/src/commands/modes.rs b/bin/node/src/commands/modes.rs index 27c2273ec..088693dd4 100644 --- a/bin/node/src/commands/modes.rs +++ b/bin/node/src/commands/modes.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use anyhow::Context; -use miden_node_block_producer::{RpcSync, Sequencer}; +use miden_node_block_producer::Sequencer; use miden_node_proto::clients::{Builder, NtxBuilderClient, RpcClient, ValidatorClient}; use miden_node_rpc::{Rpc, RpcMode}; use miden_node_store::State; @@ -125,21 +125,16 @@ impl FullNodeCommand { let network_tx_auth = self.runtime.rpc.network_tx_auth()?; let state = load_state(&runtime).await?; let _disk_monitor = state.spawn_disk_monitor(); - let sync = RpcSync { - state: Arc::clone(&state), - source_rpc: source_rpc.clone(), - }; let rpc = Rpc { listener: bind_rpc(runtime.rpc_listen).await?, store: state, - mode: RpcMode::full_node(source_rpc), + mode: RpcMode::full_node(source_rpc, self.sync.readiness_threshold), ntx_builder: None, grpc_options: runtime.external_grpc_options, network_tx_auth, }; let mut tasks = Tasks::new(); - tasks.spawn("RPC sync", sync.run()); tasks.spawn("RPC server", rpc.serve()); tasks.join_next_as_error().await diff --git a/bin/node/src/commands/rpc.rs b/bin/node/src/commands/rpc.rs index 9fc4e56a3..8508e180d 100644 --- a/bin/node/src/commands/rpc.rs +++ b/bin/node/src/commands/rpc.rs @@ -124,4 +124,14 @@ pub struct SyncOptions { value_name = "URL" )] pub block_source_url: Url, + + // Number of blocks that this RPC server must be within that of the sync source to be considered + // ready. + #[arg( + long = "sync.ready-threshold", + env = "MIDEN_NODE_SYNC_READY_THRESHOLD", + value_name = "NUM", + default_value_t = 10 + )] + pub readiness_threshold: u32, } diff --git a/bin/ntx-builder/.env b/bin/ntx-builder/.env deleted file mode 100644 index 72f228ded..000000000 --- a/bin/ntx-builder/.env +++ /dev/null @@ -1,12 +0,0 @@ -# For more info use -h on the relevant commands: -# miden-ntx-builder start -h - -MIDEN_NODE_ENABLE_OTEL=true -MIDEN_NODE_DATA_DIRECTORY= -MIDEN_NODE_NTX_BUILDER_LISTEN= -MIDEN_NODE_NTX_BUILDER_STORE_URL= -MIDEN_NODE_NTX_BUILDER_BLOCK_PRODUCER_URL= -MIDEN_NODE_NTX_BUILDER_VALIDATOR_URL= -MIDEN_NODE_NTX_BUILDER_NTX_PROVER_URL= -MIDEN_NODE_NTX_BUILDER_SCRIPT_CACHE_SIZE= -MIDEN_NODE_NTX_BUILDER_MAX_CYCLES= diff --git a/bin/remote-prover/.env b/bin/remote-prover/.env deleted file mode 100644 index b7191203d..000000000 --- a/bin/remote-prover/.env +++ /dev/null @@ -1,6 +0,0 @@ -# For more info consult the help output: `miden-remote-prover --help` - -MIDEN_PROVER_PORT=8082 -MIDEN_PROVER_KIND=transaction -MIDEN_PROVER_TIMEOUT=100s -MIDEN_PROVER_CAPACITY=10 diff --git a/bin/validator/.env b/bin/validator/.env deleted file mode 100644 index e9cafca7e..000000000 --- a/bin/validator/.env +++ /dev/null @@ -1,9 +0,0 @@ -# For more info use -h on the relevant commands: -# miden-validator start -h - -MIDEN_NODE_ENABLE_OTEL=true -MIDEN_NODE_DATA_DIRECTORY= -MIDEN_NODE_VALIDATOR_LISTEN= -MIDEN_NODE_VALIDATOR_GENESIS_CONFIG_FILE= -MIDEN_NODE_VALIDATOR_KEY= -MIDEN_NODE_VALIDATOR_KMS_KEY_ID= diff --git a/crates/block-producer/Cargo.toml b/crates/block-producer/Cargo.toml index a0c4b73da..ff2a02653 100644 --- a/crates/block-producer/Cargo.toml +++ b/crates/block-producer/Cargo.toml @@ -37,6 +37,7 @@ thiserror = { workspace = true } tokio = { features = ["macros", "net", "rt-multi-thread"], workspace = true } tokio-stream = { workspace = true } tonic = { default-features = true, features = ["transport"], workspace = true } +tonic-health = { workspace = true } tracing = { workspace = true } url = { workspace = true } diff --git a/crates/block-producer/src/lib.rs b/crates/block-producer/src/lib.rs index 35691a6e6..e35100236 100644 --- a/crates/block-producer/src/lib.rs +++ b/crates/block-producer/src/lib.rs @@ -23,7 +23,7 @@ mod errors; pub mod server; pub use errors::MempoolSubmissionError; pub use proof_scheduler::DEFAULT_MAX_CONCURRENT_PROOFS; -pub use rpc_sync::RpcSync; +pub use rpc_sync::{RpcReadiness, RpcSync}; pub use server::{ BlockProducerApi, BlockProducerApiConfig, diff --git a/crates/block-producer/src/rpc_sync.rs b/crates/block-producer/src/rpc_sync.rs index 4f69cf11b..4c913c2f7 100644 --- a/crates/block-producer/src/rpc_sync.rs +++ b/crates/block-producer/src/rpc_sync.rs @@ -10,10 +10,43 @@ use miden_node_utils::tasks::Tasks; use miden_protocol::block::{BlockNumber, SignedBlock}; use miden_protocol::utils::serde::Deserializable; use tokio_stream::StreamExt; +use tonic_health::ServingStatus; +use tonic_health::server::HealthReporter; use tracing::{info, warn}; pub(crate) const RECONNECT_DELAY: Duration = Duration::from_secs(5); +// RPC READINESS +// ================================================================================================ + +/// Tracks readiness of the RPC API service for a full-node. +/// +/// Holds the gRPC [`HealthReporter`] and the readiness threshold. Created by [`Rpc::serve`] +/// once the health pair is available and passed directly into [`BlockSync`]. +#[derive(Clone)] +pub struct RpcReadiness { + reporter: HealthReporter, + threshold: u32, +} + +impl RpcReadiness { + const SERVICE_NAME: &'static str = "rpc.Api"; + + pub fn new(reporter: HealthReporter, threshold: u32) -> Self { + Self { reporter, threshold } + } + + /// Updates the RPC service health status based on the upstream/local tip gap. + pub async fn update(&self, upstream_tip: BlockNumber, local_tip: BlockNumber) { + let status = if upstream_tip.as_u32().saturating_sub(local_tip.as_u32()) <= self.threshold { + ServingStatus::Serving + } else { + ServingStatus::NotServing + }; + self.reporter.set_service_status(Self::SERVICE_NAME, status).await; + } +} + // RPC SYNC // ================================================================================================ @@ -21,6 +54,7 @@ pub(crate) const RECONNECT_DELAY: Duration = Duration::from_secs(5); pub struct RpcSync { pub state: Arc, pub source_rpc: RpcClient, + pub readiness: RpcReadiness, } impl RpcSync { @@ -30,6 +64,7 @@ impl RpcSync { let block_sync = BlockSync { state: Arc::clone(&self.state), source_rpc: self.source_rpc.clone(), + readiness: self.readiness, }; let proof_sync = ProofSync { state: self.state, @@ -49,6 +84,7 @@ impl RpcSync { struct BlockSync { state: Arc, source_rpc: RpcClient, + readiness: RpcReadiness, } struct ProofSync { @@ -86,9 +122,13 @@ impl BlockSync { while let Some(result) = stream.next().await { let event = result?; + let upstream_tip = BlockNumber::from(event.committed_chain_tip); let block = SignedBlock::read_from_bytes(&event.block) .context("failed to deserialize block from upstream")?; self.state.apply_block(block).await?; + + let local_tip = self.state.chain_tip(Finality::Committed).await; + self.readiness.update(upstream_tip, local_tip).await; } Ok(()) diff --git a/crates/rpc/Cargo.toml b/crates/rpc/Cargo.toml index 3819f6cc5..90ef8442b 100644 --- a/crates/rpc/Cargo.toml +++ b/crates/rpc/Cargo.toml @@ -32,9 +32,10 @@ miden-tx = { features = ["concurrent"], workspace = true } miden-tx-batch-prover = { workspace = true } semver = { version = "1.0" } thiserror = { workspace = true } -tokio = { features = ["macros", "net", "rt-multi-thread"], workspace = true } +tokio = { features = ["macros", "net", "rt-multi-thread", "time"], workspace = true } tokio-stream = { features = ["net"], workspace = true } tonic = { default-features = true, features = ["tls-native-roots", "tls-ring"], workspace = true } +tonic-health = { workspace = true } tonic-reflection = { workspace = true } tonic-web = { workspace = true } tower = { workspace = true } diff --git a/crates/rpc/src/server/api.rs b/crates/rpc/src/server/api.rs index 45dc2a79c..5b8204362 100644 --- a/crates/rpc/src/server/api.rs +++ b/crates/rpc/src/server/api.rs @@ -798,7 +798,7 @@ impl api_server::Api for RpcService { RpcMode::Sequencer { block_producer, validator } => { (block_producer.as_ref(), validator.as_ref()) }, - RpcMode::FullNode { source_rpc } => { + RpcMode::FullNode { source_rpc, .. } => { return source_rpc.as_ref().clone().submit_proven_tx(request).await; }, }; @@ -915,7 +915,7 @@ impl api_server::Api for RpcService { RpcMode::Sequencer { block_producer, validator } => { (block_producer.as_ref(), validator.as_ref()) }, - RpcMode::FullNode { source_rpc } => { + RpcMode::FullNode { source_rpc, .. } => { return source_rpc.as_ref().clone().submit_proven_tx_batch(request).await; }, }; @@ -994,7 +994,7 @@ impl api_server::Api for RpcService { RpcMode::Sequencer { block_producer, .. } => { Some(block_producer_status_to_proto(block_producer.status().await)) }, - RpcMode::FullNode { source_rpc } => source_rpc + RpcMode::FullNode { source_rpc, .. } => source_rpc .as_ref() .clone() .status(Request::new(())) @@ -1041,7 +1041,7 @@ impl api_server::Api for RpcService { ntx_builder.clone().get_network_note_status(request).await?.into_inner() }, - RpcMode::FullNode { source_rpc } => { + RpcMode::FullNode { source_rpc, .. } => { source_rpc.as_ref().clone().get_network_note_status(request).await?.into_inner() }, }; diff --git a/crates/rpc/src/server/mod.rs b/crates/rpc/src/server/mod.rs index c6e002b96..125533854 100644 --- a/crates/rpc/src/server/mod.rs +++ b/crates/rpc/src/server/mod.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use accept::AcceptHeaderLayer; use anyhow::Context; -use miden_node_block_producer::BlockProducerApi; +use miden_node_block_producer::{BlockProducerApi, RpcReadiness, RpcSync}; use miden_node_proto::clients::{NtxBuilderClient, RpcClient as SourceRpcClient, ValidatorClient}; use miden_node_proto::generated::rpc::api_server; use miden_node_proto_build::rpc_api_descriptor; @@ -58,7 +58,10 @@ pub enum RpcMode { /// /// The caller is responsible for configuring this client with any request metadata the source /// RPC requires. - FullNode { source_rpc: Box }, + FullNode { + source_rpc: Box, + readiness_threshold: u32, + }, } impl RpcMode { @@ -69,14 +72,20 @@ impl RpcMode { } } - pub fn full_node(source_rpc: SourceRpcClient) -> Self { - Self::FullNode { source_rpc: Box::new(source_rpc) } + pub fn full_node(source_rpc: SourceRpcClient, readiness_threshold: u32) -> Self { + Self::FullNode { + source_rpc: Box::new(source_rpc), + readiness_threshold, + } } } impl Rpc { /// Serves the RPC API. /// + /// In full-node mode, also runs the block/proof sync loop concurrently. Either component + /// failing causes both to stop. + /// /// Note: Executes in place (i.e. not spawned) and will run indefinitely until /// a fatal error is encountered. pub async fn serve(self) -> anyhow::Result<()> { @@ -96,18 +105,40 @@ impl Rpc { api.set_genesis_commitment(genesis.commitment())?; let api_service = api_server::ApiServer::new(api); + + info!(target: COMPONENT, endpoint=?self.listener, mode=?self.mode, "Server initialized"); + + // Initialize health reporter and sync service based on the RPC mode. + let (health_reporter, health_service) = tonic_health::server::health_reporter(); + let maybe_sync = match self.mode { + RpcMode::Sequencer { .. } => { + health_reporter.set_serving::>().await; + None + }, + RpcMode::FullNode { source_rpc, readiness_threshold } => { + health_reporter + .set_not_serving::>() + .await; + let readiness = RpcReadiness::new(health_reporter, readiness_threshold); + Some(RpcSync { + state: Arc::clone(&self.store), + source_rpc: *source_rpc, + readiness, + }) + }, + }; + let reflection_service = server::Builder::configure() .register_file_descriptor_set(rpc_api_descriptor()) + .register_encoded_file_descriptor_set(tonic_health::pb::FILE_DESCRIPTOR_SET) .build_v1() .context("failed to build reflection service")?; - info!(target: COMPONENT, endpoint=?self.listener, mode=?self.mode, "Server initialized"); - let rpc_version = env!("CARGO_PKG_VERSION"); let rpc_version = semver::Version::parse(rpc_version).context("failed to parse crate version")?; - tonic::transport::Server::builder() + let serve = tonic::transport::Server::builder() .accept_http1(true) .max_connection_age(self.grpc_options.max_connection_age) .timeout(self.grpc_options.request_timeout) @@ -139,10 +170,19 @@ impl Rpc { .with_genesis_enforced_method("SubmitProvenTxBatch"), ) .add_service(api_service) + .add_service(health_service) // Enables gRPC reflection service. .add_service(reflection_service) - .serve_with_incoming(TcpListenerStream::new(self.listener)) - .await - .context("failed to serve RPC API") + .serve_with_incoming(TcpListenerStream::new(self.listener)); + + // Run RPC and (optional) sync service. + if let Some(sync) = maybe_sync { + tokio::select! { + result = serve => result.context("failed to serve RPC API"), + result = sync.run() => result, + } + } else { + serve.await.context("failed to serve RPC API") + } } } diff --git a/crates/rpc/src/tests.rs b/crates/rpc/src/tests.rs index 4fc8af544..2c742a5e0 100644 --- a/crates/rpc/src/tests.rs +++ b/crates/rpc/src/tests.rs @@ -440,7 +440,7 @@ async fn rpc_rejects_post_deployment_network_account_tx() { let service = RpcService::new( Arc::clone(&store.state), - RpcMode::full_node(source_rpc_client()), + RpcMode::full_node(source_rpc_client(), 100), None, NonZeroUsize::new(1_000_000).unwrap(), None, @@ -577,7 +577,7 @@ async fn full_node_forwards_get_network_note_status_to_source_rpc() { let local_store = TestStore::start().await; let full_node = RpcService::new( Arc::clone(&local_store.state), - RpcMode::full_node(source_rpc), + RpcMode::full_node(source_rpc, 100), None, NonZeroUsize::new(1_000).unwrap(), None,