From d325381f9ed674d9e9b22c7aa9cf83247b5764ed Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Tue, 3 Apr 2018 16:48:27 -0700 Subject: [PATCH 1/2] Replace CHECK() with CHECK_*() so that when a check is failing, more useful information is logged --- heron/common/src/cpp/basics/basics.cpp | 2 +- heron/stmgr/src/cpp/grouping/fields-grouping.cpp | 2 +- heron/tmaster/src/cpp/manager/stateful-restorer.cpp | 4 ++-- heron/tmaster/src/cpp/manager/stats-interface.cpp | 2 +- heron/tmaster/src/cpp/manager/stmgrstate.cpp | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/heron/common/src/cpp/basics/basics.cpp b/heron/common/src/cpp/basics/basics.cpp index 75d8438f9b1..4bad3e29d76 100644 --- a/heron/common/src/cpp/basics/basics.cpp +++ b/heron/common/src/cpp/basics/basics.cpp @@ -154,7 +154,7 @@ static void InitLogging() { * Instance id of the program calling initialize */ static void InitHelper(const char* argv0, const char* instance, bool istest) { - CHECK(signal(SIGPIPE, SIG_IGN) != SIG_ERR); + CHECK_NE(signal(SIGPIPE, SIG_IGN), SIG_ERR); // create execution meta data object SetMetadata(argv0, instance, istest); diff --git a/heron/stmgr/src/cpp/grouping/fields-grouping.cpp b/heron/stmgr/src/cpp/grouping/fields-grouping.cpp index 88747e30b94..1d660c52407 100644 --- a/heron/stmgr/src/cpp/grouping/fields-grouping.cpp +++ b/heron/stmgr/src/cpp/grouping/fields-grouping.cpp @@ -50,7 +50,7 @@ void FieldsGrouping::GetListToSend(const proto::system::HeronDataTuple& _tuple, size_t prime_num = 633910111UL; for (auto iter = fields_grouping_indices_.begin(); iter != fields_grouping_indices_.end(); ++iter) { - CHECK(_tuple.values_size() > *iter); + CHECK_GT(_tuple.values_size(), *iter); size_t h = str_hash_fn(_tuple.values(*iter)); task_index += (h % prime_num); } diff --git a/heron/tmaster/src/cpp/manager/stateful-restorer.cpp b/heron/tmaster/src/cpp/manager/stateful-restorer.cpp index 01cd7df36c3..97376e85b2f 100644 --- a/heron/tmaster/src/cpp/manager/stateful-restorer.cpp +++ b/heron/tmaster/src/cpp/manager/stateful-restorer.cpp @@ -61,8 +61,8 @@ void StatefulRestorer::HandleStMgrRestored(const std::string& _stmgr_id, int64_t _restore_txid, const StMgrMap& _stmgrs) { CHECK(in_progress_); - CHECK(_checkpoint_id == checkpoint_id_in_progress_); - CHECK(_restore_txid == restore_txid_); + CHECK_EQ(_checkpoint_id, checkpoint_id_in_progress_); + CHECK_EQ(_restore_txid, restore_txid_); unreplied_stmgrs_.erase(_stmgr_id); if (unreplied_stmgrs_.empty()) { Finish2PhaseCommit(_stmgrs); diff --git a/heron/tmaster/src/cpp/manager/stats-interface.cpp b/heron/tmaster/src/cpp/manager/stats-interface.cpp index 7cc87df456d..cdfeca56828 100644 --- a/heron/tmaster/src/cpp/manager/stats-interface.cpp +++ b/heron/tmaster/src/cpp/manager/stats-interface.cpp @@ -58,7 +58,7 @@ StatsInterface::StatsInterface(EventLoop* eventLoop, const NetworkOptions& _opti http_server_->InstallCallBack("/stmgrsregistrationsummary", std::move(cbHandleStmgrsRegistrationSummary)); http_server_->InstallGenericCallBack(std::move(cbHandleUnknown)); - CHECK(http_server_->Start() == SP_OK); + CHECK_EQ(http_server_->Start(), SP_OK); } StatsInterface::~StatsInterface() { delete http_server_; } diff --git a/heron/tmaster/src/cpp/manager/stmgrstate.cpp b/heron/tmaster/src/cpp/manager/stmgrstate.cpp index 448cc72b23a..c2555f45ac7 100644 --- a/heron/tmaster/src/cpp/manager/stmgrstate.cpp +++ b/heron/tmaster/src/cpp/manager/stmgrstate.cpp @@ -123,7 +123,7 @@ StMgrState::AddAssignment(const std::vector >& _assignmen proto::system::Assignment* _assignment) { // A vector of add_assignments(); val->set_nodemgr_id(info_->nodemgr_id()); From c1ecdf6af397b095706d504111a85742ce29e0d7 Mon Sep 17 00:00:00 2001 From: Ning Wang Date: Wed, 4 Apr 2018 12:46:50 -0700 Subject: [PATCH 2/2] add check messages --- .../src/cpp/metrics/metricsmgr-client.cpp | 4 ++-- heron/common/src/cpp/network/baseconnection.cpp | 5 +++-- heron/common/src/cpp/network/client.cpp | 3 ++- heron/common/src/cpp/network/client.h | 2 +- heron/common/src/cpp/network/httpserver.cpp | 2 +- heron/common/src/cpp/network/server.h | 2 +- heron/common/src/cpp/zookeeper/zkclient.cpp | 4 ++-- .../instance/src/cpp/boltimpl/bolt-instance.cpp | 6 +++--- .../src/cpp/slave/imetrics-registrar-impl.cpp | 3 ++- .../src/cpp/spoutimpl/spout-instance.cpp | 6 +++--- .../cpp/statemgr/heron-localfilestatemgr.cpp | 6 +++--- .../src/cpp/statemgr/heron-zkstatemgr.cpp | 16 ++++++++-------- heron/stmgr/src/cpp/manager/instance-server.cpp | 6 ++++-- .../stmgr/src/cpp/manager/stateful-restorer.cpp | 3 ++- heron/stmgr/src/cpp/manager/stmgr-clientmgr.cpp | 4 ++-- heron/stmgr/src/cpp/manager/stmgr-server.cpp | 9 ++++++--- heron/stmgr/src/cpp/manager/stmgr.cpp | 16 ++++++++-------- heron/stmgr/src/cpp/util/tuple-cache.cpp | 4 ++-- heron/stmgr/src/cpp/util/xor-manager.cpp | 6 +++--- .../src/cpp/manager/stateful-controller.cpp | 2 +- .../src/cpp/manager/stateful-restorer.cpp | 8 ++++---- .../tmaster/src/cpp/manager/stats-interface.cpp | 11 +++++++---- heron/tmaster/src/cpp/manager/tmaster.cpp | 17 +++++++++-------- 23 files changed, 79 insertions(+), 66 deletions(-) diff --git a/heron/common/src/cpp/metrics/metricsmgr-client.cpp b/heron/common/src/cpp/metrics/metricsmgr-client.cpp index a898c1f5cc6..52d5d86d736 100644 --- a/heron/common/src/cpp/metrics/metricsmgr-client.cpp +++ b/heron/common/src/cpp/metrics/metricsmgr-client.cpp @@ -142,7 +142,7 @@ void MetricsMgrClient::SendMetrics(proto::system::MetricPublisherPublishMessage* } void MetricsMgrClient::InternalSendTMasterLocation() { - CHECK(tmaster_location_); + CHECK(tmaster_location_) << "TMaster location is not available yet"; proto::system::TMasterLocationRefreshMessage* m = new proto::system::TMasterLocationRefreshMessage(); m->mutable_tmaster()->CopyFrom(*tmaster_location_); @@ -152,7 +152,7 @@ void MetricsMgrClient::InternalSendTMasterLocation() { } void MetricsMgrClient::InternalSendMetricsCacheLocation() { - CHECK(metricscache_location_); + CHECK(metricscache_location_) << "MetricsCache location is not available yet"; proto::system::MetricsCacheLocationRefreshMessage* m = new proto::system::MetricsCacheLocationRefreshMessage(); m->mutable_metricscache()->CopyFrom(*metricscache_location_); diff --git a/heron/common/src/cpp/network/baseconnection.cpp b/heron/common/src/cpp/network/baseconnection.cpp index e992cd99890..f3d75d962c5 100644 --- a/heron/common/src/cpp/network/baseconnection.cpp +++ b/heron/common/src/cpp/network/baseconnection.cpp @@ -56,9 +56,10 @@ BaseConnection::BaseConnection(ConnectionEndPoint* endpoint, ConnectionOptions* } BaseConnection::~BaseConnection() { - CHECK(mState == INIT || mState == DISCONNECTED); - bufferevent_free(buffer_); + CHECK(mState == INIT || mState == DISCONNECTED) + << "Deleting connection object while it is still connected"; disableRateLimit(); // To free the config object + bufferevent_free(buffer_); } sp_int32 BaseConnection::start() { diff --git a/heron/common/src/cpp/network/client.cpp b/heron/common/src/cpp/network/client.cpp index 5103b12c6be..d253bd861ec 100644 --- a/heron/common/src/cpp/network/client.cpp +++ b/heron/common/src/cpp/network/client.cpp @@ -92,7 +92,8 @@ void Client::Init() { message_rid_gen_ = new REQID_Generator(); } void Client::InternalSendRequest(google::protobuf::Message* _request, void* _ctx, sp_int64 _msecs) { auto iter = requestResponseMap_.find(_request->GetTypeName()); - CHECK(iter != requestResponseMap_.end()); + CHECK(iter != requestResponseMap_.end()) + << "Reponse type " << _request->GetTypeName() << " is not found"; const sp_string& _expected_response_type = iter->second; if (state_ != CONNECTED) { delete _request; diff --git a/heron/common/src/cpp/network/client.h b/heron/common/src/cpp/network/client.h index 7a74e31fc23..3faae44b1b9 100644 --- a/heron/common/src/cpp/network/client.h +++ b/heron/common/src/cpp/network/client.h @@ -245,7 +245,7 @@ class Client : public BaseClient { __global_protobuf_pool_release__(m); return; } - CHECK(m->IsInitialized()); + CHECK(m->IsInitialized()) << "Protobuf not initialized"; std::function cb = std::bind(method, _t, m); diff --git a/heron/common/src/cpp/network/httpserver.cpp b/heron/common/src/cpp/network/httpserver.cpp index 75de800ab60..ab089bc5218 100644 --- a/heron/common/src/cpp/network/httpserver.cpp +++ b/heron/common/src/cpp/network/httpserver.cpp @@ -87,7 +87,7 @@ void HTTPServer::HandleHTTPRequest(struct evhttp_request* _request) { void HTTPServer::SendReply(IncomingHTTPRequest* _request, sp_int32 _code, OutgoingHTTPResponse* _response) { - CHECK(_request->underlying_request() == _response->underlying_response()); + CHECK_EQ(_request->underlying_request(), _response->underlying_response()); evhttp_send_reply(_request->underlying_request(), _code, "", NULL); delete _response; } diff --git a/heron/common/src/cpp/network/server.h b/heron/common/src/cpp/network/server.h index 37d91819811..ebbda9c4aed 100644 --- a/heron/common/src/cpp/network/server.h +++ b/heron/common/src/cpp/network/server.h @@ -224,7 +224,7 @@ class Server : public BaseServer { CloseConnection(_conn); return; } - CHECK(m->IsInitialized()); + CHECK(m->IsInitialized()) << "Protobuf not initialized"; std::function cb = std::bind(method, _t, rid, _conn, m); diff --git a/heron/common/src/cpp/zookeeper/zkclient.cpp b/heron/common/src/cpp/zookeeper/zkclient.cpp index 663352017d8..769fd6e87ff 100644 --- a/heron/common/src/cpp/zookeeper/zkclient.cpp +++ b/heron/common/src/cpp/zookeeper/zkclient.cpp @@ -131,7 +131,7 @@ ZKClient::ZKClient(const std::string& hostportlist, EventLoop* eventLoop, : eventLoop_(eventLoop), hostportlist_(hostportlist), client_global_watcher_cb_(std::move(global_watcher_cb)) { - CHECK(client_global_watcher_cb_); + CHECK(client_global_watcher_cb_) << "Client global watcher is not provided"; Init(); } @@ -341,7 +341,7 @@ void ZKClient::ZkWatcherCb(VCallback<> cb) { } void ZKClient::SendWatchEvent(const ZkWatchEvent& event) { - CHECK(client_global_watcher_cb_); + CHECK(client_global_watcher_cb_) << "Client global watcher is not provided"; piper_->ExecuteInEventLoop(std::bind(&RunWatchEventCb, client_global_watcher_cb_, event)); } diff --git a/heron/instance/src/cpp/boltimpl/bolt-instance.cpp b/heron/instance/src/cpp/boltimpl/bolt-instance.cpp index 3f4b49852c8..93f1ee2ab80 100644 --- a/heron/instance/src/cpp/boltimpl/bolt-instance.cpp +++ b/heron/instance/src/cpp/boltimpl/bolt-instance.cpp @@ -70,7 +70,7 @@ BoltInstance::~BoltInstance() { } void BoltInstance::Start() { - CHECK(!active_); + CHECK(!active_) << "Bolt instance has been started already"; LOG(INFO) << "Starting bolt " << taskContext_->getThisComponentName(); bolt_->open(taskContext_->getConfig(), taskContext_, collector_); if (taskContext_->getConfig()->hasConfig(api::config::Config::TOPOLOGY_TICK_TUPLE_FREQ_SECS)) { @@ -86,13 +86,13 @@ void BoltInstance::Start() { void BoltInstance::Activate() { LOG(INFO) << "Not doing anything in Bolt Activate"; - CHECK(!active_); + CHECK(!active_) << "Bolt instance has been started already"; active_ = true; } void BoltInstance::Deactivate() { LOG(INFO) << "Not doing anything in Bolt Dacctivate"; - CHECK(active_); + CHECK(!active_) << "Bolt instance is not active"; active_ = false; } diff --git a/heron/instance/src/cpp/slave/imetrics-registrar-impl.cpp b/heron/instance/src/cpp/slave/imetrics-registrar-impl.cpp index 72408e10118..598aa5de9e3 100644 --- a/heron/instance/src/cpp/slave/imetrics-registrar-impl.cpp +++ b/heron/instance/src/cpp/slave/imetrics-registrar-impl.cpp @@ -76,7 +76,8 @@ void IMetricsRegistrarImpl::sendMetrics(int timeBucketSizeInSecs) { datum->set_name(metricName); datum->set_value(metricValue); } else { - CHECK(multiMetrics_.find(metricName) != multiMetrics_.end()); + CHECK(multiMetrics_.find(metricName) != multiMetrics_.end()) + << "Metric " << metricName << " is not found"; std::map mmap; multiMetrics_[metricName]->getValueAndReset(mmap); for (auto& kv : mmap) { diff --git a/heron/instance/src/cpp/spoutimpl/spout-instance.cpp b/heron/instance/src/cpp/spoutimpl/spout-instance.cpp index 422776a6b02..3d5da27378f 100644 --- a/heron/instance/src/cpp/spoutimpl/spout-instance.cpp +++ b/heron/instance/src/cpp/spoutimpl/spout-instance.cpp @@ -73,7 +73,7 @@ SpoutInstance::~SpoutInstance() { } void SpoutInstance::Start() { - CHECK(!active_); + CHECK(!active_) << "Spout instance has been started already"; LOG(INFO) << "Starting spout " << taskContext_->getThisComponentName() << " with ackingEnabled?: " << ackingEnabled_ << " with enableMessageTimeouts?: " << enableMessageTimeouts_; @@ -94,7 +94,7 @@ void SpoutInstance::Start() { void SpoutInstance::Activate() { LOG(INFO) << "Came in Activate of the spout"; - CHECK(!active_); + CHECK(!active_) << "Spout instance has been started already"; if (spout_) { spout_->activate(); } @@ -103,7 +103,7 @@ void SpoutInstance::Activate() { void SpoutInstance::Deactivate() { LOG(INFO) << "Came in Deactivate of the spout"; - CHECK(active_); + CHECK(active_) << "Spout instance is not active"; if (spout_) { spout_->deactivate(); } diff --git a/heron/statemgrs/src/cpp/statemgr/heron-localfilestatemgr.cpp b/heron/statemgrs/src/cpp/statemgr/heron-localfilestatemgr.cpp index 00c44caaac9..27e32ecbf46 100644 --- a/heron/statemgrs/src/cpp/statemgr/heron-localfilestatemgr.cpp +++ b/heron/statemgrs/src/cpp/statemgr/heron-localfilestatemgr.cpp @@ -65,7 +65,7 @@ void HeronLocalFileStateMgr::InitTree() { void HeronLocalFileStateMgr::SetTMasterLocationWatch(const std::string& topology_name, VCallback<> watcher) { - CHECK(watcher); + CHECK(watcher) << "Watcher callback is missing in SetTMasterLocationWatch()"; // We kind of cheat here. We check periodically time_t tmaster_last_change = FileUtils::getModifiedTime(GetTMasterLocationPath(topology_name)); @@ -78,7 +78,7 @@ void HeronLocalFileStateMgr::SetTMasterLocationWatch(const std::string& topology void HeronLocalFileStateMgr::SetMetricsCacheLocationWatch(const std::string& topology_name, VCallback<> watcher) { - CHECK(watcher); + CHECK(watcher) << "Watcher callback is missing in SetMetricsCacheLocationWatch()"; // We kind of cheat here. We check periodically time_t tmaster_last_change = FileUtils::getModifiedTime( GetMetricsCacheLocationPath(topology_name)); @@ -92,7 +92,7 @@ void HeronLocalFileStateMgr::SetMetricsCacheLocationWatch(const std::string& top void HeronLocalFileStateMgr::SetPackingPlanWatch(const std::string& topology_name, VCallback<> watcher) { - CHECK(watcher); + CHECK(watcher) << "Watcher callback is missing in SetPackingPlanWatch"; // We kind of cheat here. We check periodically time_t packingplan_last_change = FileUtils::getModifiedTime(GetPackingPlanPath(topology_name)); diff --git a/heron/statemgrs/src/cpp/statemgr/heron-zkstatemgr.cpp b/heron/statemgrs/src/cpp/statemgr/heron-zkstatemgr.cpp index b5d050fdca8..4e64f8aa314 100644 --- a/heron/statemgrs/src/cpp/statemgr/heron-zkstatemgr.cpp +++ b/heron/statemgrs/src/cpp/statemgr/heron-zkstatemgr.cpp @@ -74,22 +74,22 @@ HeronZKStateMgr::~HeronZKStateMgr() { void HeronZKStateMgr::InitTree() { // Needs to be implemented - CHECK(false); + CHECK(false) << "HeronZKStateMgr::InitTree() is not implemented yet"; } void HeronZKStateMgr::SetTMasterLocationWatch(const std::string& topology_name, VCallback<> watcher) { - CHECK(watcher); - CHECK(!topology_name.empty()); + CHECK(watcher) << "Watcher callback is missing in SetTMasterLocationWatch"; + CHECK(!topology_name.empty()) << "Topology name is missing"; tmaster_location_watcher_info_ = new TMasterLocationWatchInfo(std::move(watcher), topology_name); SetTMasterLocationWatchInternal(); } void HeronZKStateMgr::SetMetricsCacheLocationWatch(const std::string& topology_name, - VCallback<> watcher) { - CHECK(watcher); - CHECK(!topology_name.empty()); + VCallback<> watcher) { + CHECK(watcher) << "Watcher callback is missing in SetMetricsCacheLocationWatch"; + CHECK(!topology_name.empty()) << "Topology name is missing"; metricscache_location_watcher_info_ = new TMasterLocationWatchInfo( std::move(watcher), topology_name); @@ -97,8 +97,8 @@ void HeronZKStateMgr::SetMetricsCacheLocationWatch(const std::string& topology_n } void HeronZKStateMgr::SetPackingPlanWatch(const std::string& topology_name, VCallback<> watcher) { - CHECK(watcher); - CHECK(!topology_name.empty()); + CHECK(watcher) << "Watcher callback is missing in SetPackingPlanWatch"; + CHECK(!topology_name.empty()) << "Topology name is missing"; packing_plan_watcher_info_ = new TMasterLocationWatchInfo(std::move(watcher), topology_name); SetPackingPlanWatchInternal(); diff --git a/heron/stmgr/src/cpp/manager/instance-server.cpp b/heron/stmgr/src/cpp/manager/instance-server.cpp index 72887496ddb..c072b136c88 100644 --- a/heron/stmgr/src/cpp/manager/instance-server.cpp +++ b/heron/stmgr/src/cpp/manager/instance-server.cpp @@ -222,7 +222,8 @@ void InstanceServer::HandleConnectionClose(Connection* _conn, NetworkErrorCode) auto iiter = active_instances_.find(_conn); if (iiter != active_instances_.end()) { sp_int32 task_id = iiter->second; - CHECK(instance_info_.find(task_id) != instance_info_.end()); + CHECK(instance_info_.find(task_id) != instance_info_.end()) + << "Task " << task_id << " is not found"; sp_string instance_id = instance_info_[task_id]->instance_->instance_id(); LOG(INFO) << "Instance " << instance_id << " closed connection"; @@ -524,7 +525,8 @@ void InstanceServer::StopBackPressureConnectionCb(Connection* _connection) { } CHECK(remote_ends_who_caused_back_pressure_.find(instance_name) != - remote_ends_who_caused_back_pressure_.end()); + remote_ends_who_caused_back_pressure_.end()) + << "Instance " << instance_name << " is not found in the backpressure list"; remote_ends_who_caused_back_pressure_.erase(instance_name); // Indicate which instance component stopped back pressure diff --git a/heron/stmgr/src/cpp/manager/stateful-restorer.cpp b/heron/stmgr/src/cpp/manager/stateful-restorer.cpp index 1a329ac182e..4aeb7487d04 100644 --- a/heron/stmgr/src/cpp/manager/stateful-restorer.cpp +++ b/heron/stmgr/src/cpp/manager/stateful-restorer.cpp @@ -247,7 +247,8 @@ void StatefulRestorer::HandleAllInstancesConnected() { void StatefulRestorer::HandleDeadInstanceConnection(sp_int32 _task_id) { if (in_progress_) { instance_connections_pending_ = true; - CHECK(local_taskids_.find(_task_id) != local_taskids_.end()); + CHECK(local_taskids_.find(_task_id) != local_taskids_.end()) + << "Task " << _task_id << " is not found in local task list"; restore_pending_.insert(_task_id); get_ckpt_pending_.insert(_task_id); } diff --git a/heron/stmgr/src/cpp/manager/stmgr-clientmgr.cpp b/heron/stmgr/src/cpp/manager/stmgr-clientmgr.cpp index 51fba90b7fa..997dbfd97e2 100644 --- a/heron/stmgr/src/cpp/manager/stmgr-clientmgr.cpp +++ b/heron/stmgr/src/cpp/manager/stmgr-clientmgr.cpp @@ -131,7 +131,7 @@ StMgrClient* StMgrClientMgr::CreateClient(const sp_string& _other_stmgr_id, bool StMgrClientMgr::SendTupleStreamMessage(sp_int32 _task_id, const sp_string& _stmgr_id, const proto::system::HeronTupleSet2& _msg) { auto iter = clients_.find(_stmgr_id); - CHECK(iter != clients_.end()); + CHECK(iter != clients_.end()) << "Stmgr " << _stmgr_id << " is not found in client list"; // Acquire the message proto::stmgr::TupleStreamMessage* out = nullptr; @@ -151,7 +151,7 @@ bool StMgrClientMgr::SendTupleStreamMessage(sp_int32 _task_id, const sp_string& void StMgrClientMgr::SendDownstreamStatefulCheckpoint(const sp_string& _stmgr_id, proto::ckptmgr::DownstreamStatefulCheckpoint* _message) { auto iter = clients_.find(_stmgr_id); - CHECK(iter != clients_.end()); + CHECK(iter != clients_.end()) << "Stmgr " << _stmgr_id << " is not found in client list"; iter->second->SendDownstreamStatefulCheckpoint(_message); } diff --git a/heron/stmgr/src/cpp/manager/stmgr-server.cpp b/heron/stmgr/src/cpp/manager/stmgr-server.cpp index d7da9d48ae0..48bac368c0a 100644 --- a/heron/stmgr/src/cpp/manager/stmgr-server.cpp +++ b/heron/stmgr/src/cpp/manager/stmgr-server.cpp @@ -148,7 +148,8 @@ void StMgrServer::StartBackPressureClientCb(const sp_string& _other_stmgr_id) { void StMgrServer::StopBackPressureClientCb(const sp_string& _other_stmgr_id) { CHECK(remote_ends_who_caused_back_pressure_.find(_other_stmgr_id) != - remote_ends_who_caused_back_pressure_.end()); + remote_ends_who_caused_back_pressure_.end()) + << "Stmgr " << _other_stmgr_id << " is not found in backpressure list"; remote_ends_who_caused_back_pressure_.erase(_other_stmgr_id); if (!stmgr_->DidAnnounceBackPressure()) { @@ -175,7 +176,8 @@ void StMgrServer::HandleStartBackPressureMessage(Connection* _conn, return; } auto iter = rstmgrs_.find(_conn); - CHECK(iter != rstmgrs_.end()); + CHECK(iter != rstmgrs_.end()) + << "Connection " << _conn << " is not found in remote stmgr list when starting backpressure"; sp_string stmgr_id = iter->second; stmgrs_who_announced_back_pressure_.insert(stmgr_id); @@ -196,7 +198,8 @@ void StMgrServer::HandleStopBackPressureMessage(Connection* _conn, return; } auto iter = rstmgrs_.find(_conn); - CHECK(iter != rstmgrs_.end()); + CHECK(iter != rstmgrs_.end()) + << "Connection " << _conn << " is not found in remote stmgr list when stopping backpressure"; sp_string stmgr_id = iter->second; // Did we receive a start back pressure message from this stmgr to // begin with? We could have been dead at the time of the announcement diff --git a/heron/stmgr/src/cpp/manager/stmgr.cpp b/heron/stmgr/src/cpp/manager/stmgr.cpp index de12189880c..b8881b00960 100644 --- a/heron/stmgr/src/cpp/manager/stmgr.cpp +++ b/heron/stmgr/src/cpp/manager/stmgr.cpp @@ -282,7 +282,7 @@ void StMgr::FetchMetricsCacheLocation() { } void StMgr::StartStmgrServer() { - CHECK(!stmgr_server_); + CHECK(!stmgr_server_) << "Stmgr server is already running"; LOG(INFO) << "Creating StmgrServer"; NetworkOptions sops; sops.set_host(IpUtils::getHostName()); @@ -302,7 +302,7 @@ void StMgr::StartStmgrServer() { } void StMgr::StartInstanceServer() { - CHECK(!instance_server_); + CHECK(!instance_server_) << "InstanceServer server is already running"; LOG(INFO) << "Creating InstanceServer"; NetworkOptions sops; sops.set_host(IpUtils::getHostName()); @@ -348,7 +348,7 @@ void StMgr::CreateCheckpointMgrClient() { } void StMgr::CreateTMasterClient(proto::tmaster::TMasterLocation* tmasterLocation) { - CHECK(!tmaster_client_); + CHECK(!tmaster_client_) << "TMaster client has already existed"; LOG(INFO) << "Creating Tmaster Client at " << tmasterLocation->host() << ":" << tmasterLocation->master_port(); NetworkOptions master_options; @@ -384,7 +384,7 @@ void StMgr::CreateTMasterClient(proto::tmaster::TMasterLocation* tmasterLocation } void StMgr::CreateTupleCache() { - CHECK(!tuple_cache_); + CHECK(!tuple_cache_) << "Tuple cache has already existed"; LOG(INFO) << "Creating tuple cache "; sp_uint32 drain_threshold_bytes_ = config::HeronInternalsConfigReader::Instance()->GetHeronStreammgrCacheDrainSizeMb() * 1_MB; @@ -999,7 +999,7 @@ void StMgr::InitiateStatefulCheckpoint(sp_string _checkpoint_id) { void StMgr::HandleStoreInstanceStateCheckpoint( const proto::ckptmgr::InstanceStateCheckpoint& _message, const proto::system::Instance& _instance) { - CHECK(stateful_restorer_); + CHECK(stateful_restorer_) << "Stateful restorer doesn't exist when storing checkpoint"; int32_t task_id = _instance.info().task_id(); LOG(INFO) << "Got a checkpoint state message from " << task_id << " for checkpoint " << _message.checkpoint_id(); @@ -1072,7 +1072,7 @@ void StMgr::HandleDownStreamStatefulCheckpoint( void StMgr::RestoreTopologyState(sp_string _checkpoint_id, sp_int64 _restore_txid) { LOG(INFO) << "Got a Restore Topology State message from Tmaster for checkpoint " << _checkpoint_id << " and txid " << _restore_txid; - CHECK(stateful_restorer_); + CHECK(stateful_restorer_) << "Stateful restorer doesn't exist when restoring state"; // Start the restore process std::unordered_set local_taskids; @@ -1085,7 +1085,7 @@ void StMgr::RestoreTopologyState(sp_string _checkpoint_id, sp_int64 _restore_txi void StMgr::StartStatefulProcessing(sp_string _checkpoint_id) { LOG(INFO) << "Received StartProcessing message from tmaster for " << _checkpoint_id; - CHECK(stateful_restorer_); + CHECK(stateful_restorer_) << "Stateful restorer doesn't exist when starting process"; if (stateful_restorer_->InProgress()) { LOG(FATAL) << "StartProcessing received from Tmaster for " << _checkpoint_id << " when we are still in Restore"; @@ -1099,7 +1099,7 @@ void StMgr::HandleRestoreInstanceStateResponse(sp_int32 _task_id, // If we are stateful topology, we might want to see how the restore went // and if it was successful and all other local instances have recovered // send back a success response to tmaster saying that we have recovered - CHECK(stateful_restorer_); + CHECK(stateful_restorer_) << "Stateful restorer doesn't exist when querying restore state"; stateful_restorer_->HandleInstanceRestoredState(_task_id, _status.status(), _checkpoint_id); } diff --git a/heron/stmgr/src/cpp/util/tuple-cache.cpp b/heron/stmgr/src/cpp/util/tuple-cache.cpp index 2ac1d3c1e61..92b515a785c 100644 --- a/heron/stmgr/src/cpp/util/tuple-cache.cpp +++ b/heron/stmgr/src/cpp/util/tuple-cache.cpp @@ -125,8 +125,8 @@ TupleCache::TupleList::TupleList() { } TupleCache::TupleList::~TupleList() { - CHECK(tuples_.empty()); - CHECK(!current_); + CHECK(tuples_.empty()) << "Buffer is not empty when destructing TupleList"; + CHECK(!current_) << "Current tuple set is not empty when destructing TupleList"; } void TupleCache::TupleList::clear() { diff --git a/heron/stmgr/src/cpp/util/xor-manager.cpp b/heron/stmgr/src/cpp/util/xor-manager.cpp index 430290f78e1..cbf8a4f4f7b 100644 --- a/heron/stmgr/src/cpp/util/xor-manager.cpp +++ b/heron/stmgr/src/cpp/util/xor-manager.cpp @@ -59,17 +59,17 @@ void XorManager::rotate(EventLoopImpl::Status) { } void XorManager::create(sp_int32 _task_id, sp_int64 _key, sp_int64 _value) { - CHECK(tasks_.find(_task_id) != tasks_.end()); + CHECK(tasks_.find(_task_id) != tasks_.end()) << "Task " << _task_id << " is not found"; tasks_[_task_id]->create(_key, _value); } bool XorManager::anchor(sp_int32 _task_id, sp_int64 _key, sp_int64 _value) { - CHECK(tasks_.find(_task_id) != tasks_.end()); + CHECK(tasks_.find(_task_id) != tasks_.end()) << "Task " << _task_id << " is not found"; return tasks_[_task_id]->anchor(_key, _value); } bool XorManager::remove(sp_int32 _task_id, sp_int64 _key) { - CHECK(tasks_.find(_task_id) != tasks_.end()); + CHECK(tasks_.find(_task_id) != tasks_.end()) << "Task " << _task_id << " is not found"; return tasks_[_task_id]->remove(_key); } diff --git a/heron/tmaster/src/cpp/manager/stateful-controller.cpp b/heron/tmaster/src/cpp/manager/stateful-controller.cpp index 9130e83dc67..5a9210910f8 100644 --- a/heron/tmaster/src/cpp/manager/stateful-controller.cpp +++ b/heron/tmaster/src/cpp/manager/stateful-controller.cpp @@ -211,7 +211,7 @@ StatefulController::AddNewConsistentCheckpoint(const std::string& _new_checkpoin } bool StatefulController::GotRestoreResponse(const std::string& _stmgr) const { - CHECK(restorer_->IsInProgress()); + CHECK(restorer_->IsInProgress()) << "State restore is not in progress"; return restorer_->GotResponse(_stmgr); } diff --git a/heron/tmaster/src/cpp/manager/stateful-restorer.cpp b/heron/tmaster/src/cpp/manager/stateful-restorer.cpp index 97376e85b2f..b41375ea0d7 100644 --- a/heron/tmaster/src/cpp/manager/stateful-restorer.cpp +++ b/heron/tmaster/src/cpp/manager/stateful-restorer.cpp @@ -60,9 +60,9 @@ void StatefulRestorer::HandleStMgrRestored(const std::string& _stmgr_id, const std::string& _checkpoint_id, int64_t _restore_txid, const StMgrMap& _stmgrs) { - CHECK(in_progress_); - CHECK_EQ(_checkpoint_id, checkpoint_id_in_progress_); - CHECK_EQ(_restore_txid, restore_txid_); + CHECK(in_progress_) << "State restore is not in progress"; + CHECK_EQ(_checkpoint_id, checkpoint_id_in_progress_) << "Checkpoint id doesn't match"; + CHECK_EQ(_restore_txid, restore_txid_) << "Restore txid doesn't match"; unreplied_stmgrs_.erase(_stmgr_id); if (unreplied_stmgrs_.empty()) { Finish2PhaseCommit(_stmgrs); @@ -71,7 +71,7 @@ void StatefulRestorer::HandleStMgrRestored(const std::string& _stmgr_id, void StatefulRestorer::Finish2PhaseCommit(const StMgrMap& _stmgrs) { LOG(INFO) << "Finishing Stateful 2 Phase Commit since all stmgrs have replied back"; - CHECK(unreplied_stmgrs_.empty()); + CHECK(unreplied_stmgrs_.empty()) << "Unreplied stmgrs are found when finishing 2-phrase commit"; for (auto kv : _stmgrs) { kv.second->SendStartStatefulProcessingMessage(checkpoint_id_in_progress_); } diff --git a/heron/tmaster/src/cpp/manager/stats-interface.cpp b/heron/tmaster/src/cpp/manager/stats-interface.cpp index cdfeca56828..b906cc29633 100644 --- a/heron/tmaster/src/cpp/manager/stats-interface.cpp +++ b/heron/tmaster/src/cpp/manager/stats-interface.cpp @@ -77,7 +77,7 @@ void StatsInterface::HandleStatsRequest(IncomingHTTPRequest* _request) { proto::tmaster::MetricResponse* res = metrics_collector_->GetMetrics(req, tmaster_->getInitialTopology()); sp_string response_string; - CHECK(res->SerializeToString(&response_string)); + CHECK(res->SerializeToString(&response_string)) << "Failed to serialize MetricResponse"; OutgoingHTTPResponse* response = new OutgoingHTTPResponse(_request); response->AddHeader("Content-Type", "application/octet-stream"); response->AddHeader("Content-Length", std::to_string(response_string.size())); @@ -102,7 +102,8 @@ void StatsInterface::HandleExceptionRequest(IncomingHTTPRequest* _request) { heron::proto::tmaster::ExceptionLogResponse* exception_response = metrics_collector_->GetExceptions(exception_request); sp_string response_string; - CHECK(exception_response->SerializeToString(&response_string)); + CHECK(exception_response->SerializeToString(&response_string)) + << "Failed to serialize ExceptionLogResponse"; OutgoingHTTPResponse* http_response = new OutgoingHTTPResponse(_request); http_response->AddHeader("Content-Type", "application/octet-stream"); http_response->AddHeader("Content-Length", std::to_string(response_string.size())); @@ -126,7 +127,8 @@ void StatsInterface::HandleExceptionSummaryRequest(IncomingHTTPRequest* _request heron::proto::tmaster::ExceptionLogResponse* exception_response = metrics_collector_->GetExceptionsSummary(exception_request); sp_string response_string; - CHECK(exception_response->SerializeToString(&response_string)); + CHECK(exception_response->SerializeToString(&response_string)) + << "Failed to serialize ExceptionLogResponse"; OutgoingHTTPResponse* http_response = new OutgoingHTTPResponse(_request); http_response->AddHeader("Content-Type", "application/octet-stream"); http_response->AddHeader("Content-Length", std::to_string(response_string.size())); @@ -151,7 +153,8 @@ void StatsInterface::HandleStmgrsRegistrationSummaryRequest(IncomingHTTPRequest* } auto stmgrs_reg_summary_response = tmaster_->GetStmgrsRegSummary(); sp_string response_string; - CHECK(stmgrs_reg_summary_response->SerializeToString(&response_string)); + CHECK(stmgrs_reg_summary_response->SerializeToString(&response_string)) + << "Failed to serialize StmgrsRegSummary"; auto http_response = new OutgoingHTTPResponse(_request); http_response->AddHeader("Content-Type", "application/octet-stream"); http_response->AddHeader("Content-Length", std::to_string(response_string.size())); diff --git a/heron/tmaster/src/cpp/manager/tmaster.cpp b/heron/tmaster/src/cpp/manager/tmaster.cpp index ad2affe6cd8..4e53eef88da 100644 --- a/heron/tmaster/src/cpp/manager/tmaster.cpp +++ b/heron/tmaster/src/cpp/manager/tmaster.cpp @@ -116,7 +116,7 @@ TMaster::TMaster(const std::string& _zk_hostport, const std::string& _topology_n tmaster_location_->set_controller_port(tmaster_controller_port_); tmaster_location_->set_master_port(master_port_); tmaster_location_->set_stats_port(stats_port_); - DCHECK(tmaster_location_->IsInitialized()); + DCHECK(tmaster_location_->IsInitialized()) << "TMaster local is not initialized"; FetchPackingPlan(); // Send tmaster location to metrics mgr @@ -555,7 +555,7 @@ void TMaster::GetPhysicalPlanDone(proto::system::PhysicalPlan* _pplan, void TMaster::ActivateTopology(VCallback cb) { CHECK_EQ(current_pplan_->topology().state(), proto::api::PAUSED); - DCHECK(current_pplan_->topology().IsInitialized()); + DCHECK(current_pplan_->topology().IsInitialized()) << "Topology is not initialized"; // Set the status proto::system::PhysicalPlan* new_pplan = new proto::system::PhysicalPlan(); @@ -572,7 +572,7 @@ void TMaster::ActivateTopology(VCallback cb) { void TMaster::DeActivateTopology(VCallback cb) { CHECK_EQ(current_pplan_->topology().state(), proto::api::RUNNING); - DCHECK(current_pplan_->topology().IsInitialized()); + DCHECK(current_pplan_->topology().IsInitialized()) << "Topology is not initialized"; // Set the status proto::system::PhysicalPlan* new_pplan = new proto::system::PhysicalPlan(); @@ -589,7 +589,7 @@ void TMaster::DeActivateTopology(VCallback cb) { bool TMaster::UpdateRuntimeConfig(const ComponentConfigMap& _config, VCallback cb) { - DCHECK(current_pplan_->topology().IsInitialized()); + DCHECK(current_pplan_->topology().IsInitialized()) << "Topology is not initialized"; // Parse and set the new configs proto::system::PhysicalPlan* new_pplan = new proto::system::PhysicalPlan(); @@ -628,7 +628,7 @@ void TMaster::HandleCleanStatefulCheckpointResponse(proto::system::StatusCode _s // validated using ValidateRuntimeConig() function. bool TMaster::UpdateRuntimeConfigInTopology(proto::api::Topology* _topology, const ComponentConfigMap& _config) { - DCHECK(_topology->IsInitialized()); + DCHECK(_topology->IsInitialized()) << "Topology is not initialized"; ComponentConfigMap::const_iterator iter; for (iter = _config.begin(); iter != _config.end(); ++iter) { @@ -758,7 +758,7 @@ void TMaster::DoPhysicalPlan(EventLoop::Status) { // to use as many portions from it as possible proto::system::PhysicalPlan* pplan = MakePhysicalPlan(); CHECK_NOTNULL(pplan); - DCHECK(pplan->IsInitialized()); + DCHECK(pplan->IsInitialized()) << "Topology is not initialized"; if (!ValidateStMgrsWithPackingPlan()) { // TODO(kramasamy): Do Something better here @@ -854,7 +854,8 @@ proto::system::PhysicalPlan* TMaster::MakePhysicalPlan() { // we need to just adjust the stmgrs mapping // First lets verify that our original pplan and instances // all match up - CHECK(ValidateStMgrsWithPhysicalPlan(current_pplan_)); + CHECK(ValidateStMgrsWithPhysicalPlan(current_pplan_)) + << "Original pplan doesn't match the instances"; proto::system::PhysicalPlan* new_pplan = new proto::system::PhysicalPlan(); new_pplan->mutable_topology()->CopyFrom(current_pplan_->topology()); @@ -1016,7 +1017,7 @@ bool TMaster::ValidateStMgrsWithPhysicalPlan(proto::system::PhysicalPlan* _pplan bool TMaster::ValidateRuntimeConfigNames(const ComponentConfigMap& _config) const { LOG(INFO) << "Validating runtime configs."; const proto::api::Topology& topology = current_pplan_->topology(); - DCHECK(topology.IsInitialized()); + DCHECK(topology.IsInitialized()) << "Topology is not initialized"; std::unordered_set components; config::TopologyConfigHelper::GetAllComponentNames(topology, components);