From cbbeda037df66f943dd624fd4724925cba131c5b Mon Sep 17 00:00:00 2001 From: lilyeyes Date: Thu, 15 Aug 2024 15:41:20 +0800 Subject: [PATCH] Fix HanaSR sporadically fails on ssh cmd 124 error Fix Public Cloud HanaSR sporadically failed on "Stop_site_b-primary/Crash_replica": ssh timed out, returned 124 TEAM-9601 - [PC] HanaSR sporadically failed on "Stop_site_b-primary/Crash_replica": ssh timed out, returned 124 --- lib/sles4sap_publiccloud.pm | 2 +- tests/sles4sap/publiccloud/hana_sr_takeover.pm | 6 +++--- tests/sles4sap/publiccloud/hana_sr_test_secondary.pm | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/sles4sap_publiccloud.pm b/lib/sles4sap_publiccloud.pm index 883d9d1e353e..bf769df8616d 100644 --- a/lib/sles4sap_publiccloud.pm +++ b/lib/sles4sap_publiccloud.pm @@ -1286,7 +1286,7 @@ sub wait_for_idle { my $rc = $self->run_cmd(cmd => 'cs_wait_for_idle --sleep 5', timeout => $timeout, rc_only => 1, proceed_on_failure => 1); if ($rc == 124) { - record_info("cs_wait_for_idle", "cs_wait_for_idle timed out after $timeout. Gathering info and retrying"); + record_info("WARN cs_wait_for_idle", "cs_wait_for_idle timed out after $timeout. Gathering info and retrying"); $self->run_cmd(cmd => 'cs_clusterstate', proceed_on_failure => 1); $self->run_cmd(cmd => 'crm_mon -r -R -n -N -1', proceed_on_failure => 1); $self->run_cmd(cmd => 'SAPHanaSR-showAttr', proceed_on_failure => 1); diff --git a/tests/sles4sap/publiccloud/hana_sr_takeover.pm b/tests/sles4sap/publiccloud/hana_sr_takeover.pm index 49650d36763a..b46150a2a11f 100644 --- a/tests/sles4sap/publiccloud/hana_sr_takeover.pm +++ b/tests/sles4sap/publiccloud/hana_sr_takeover.pm @@ -63,9 +63,9 @@ sub run { $self->{my_instance}->wait_for_ssh(username => 'cloudadmin'); # SBD delay is active only after reboot - if (($takeover_action eq 'crash' and $sbd_delay != 0) || - # Add SBD delay for 'stop' to fix sporadic 'takeover failed to complete' issue on EC2 - ($takeover_action eq 'stop' and check_var('PUBLIC_CLOUD_PROVIDER', 'EC2'))) { + if ($takeover_action eq 'crash' || $takeover_action eq 'stop') { + # Add SBD delay for to fix sporadic 'takeover failed to complete' issue on EC2 + # Also fix sporadic issues (ssh timed out) mentioned in TEAM-9601 record_info('SBD SLEEP', "Waiting $sbd_delay sec for SBD delay timeout."); # test needs to wait a little more than sbd delay sleep($sbd_delay + 30); diff --git a/tests/sles4sap/publiccloud/hana_sr_test_secondary.pm b/tests/sles4sap/publiccloud/hana_sr_test_secondary.pm index 46ad2d43217f..32e4c82d555d 100644 --- a/tests/sles4sap/publiccloud/hana_sr_test_secondary.pm +++ b/tests/sles4sap/publiccloud/hana_sr_test_secondary.pm @@ -67,7 +67,7 @@ sub run { $self->{my_instance}->wait_for_ssh(username => 'cloudadmin'); # SBD delay is active only after reboot - if ($db_action eq 'crash' and $sbd_delay != 0) { + if ($db_action eq 'crash' || $db_action eq 'stop') { record_info('SBD SLEEP', "Waiting $sbd_delay sec for SBD delay timeout."); # sleep needs to be a little longer than sbd start delay sleep($sbd_delay + 30);