Skip to content

Commit

Permalink
DAOS-7485 control: Implement system reint to act on all pools (#15551)
Browse files Browse the repository at this point in the history
Add dmg system reint command to reintegrate a set of storage nodes or
ranks from all the pools they belong to. Takes --ranks or --rank-hosts in
ranged format.

Shorten variable naming from Reintegrate to Reint in C code
Don't export variables unnecessarily in cmd/dmg
Improve reporting of protobuf unmarshal errors
Add system reintegrate command
Implement reint with system drain request flag
Add unit test coverage for new code

Signed-off-by: Tom Nabarro <[email protected]>
  • Loading branch information
tanabarr authored Jan 14, 2025
1 parent 7c0d665 commit a94ac88
Show file tree
Hide file tree
Showing 30 changed files with 1,578 additions and 1,332 deletions.
3 changes: 2 additions & 1 deletion src/control/cmd/dmg/command_test.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//
// (C) Copyright 2019-2024 Intel Corporation.
// (C) Copyright 2025 Hewlett Packard Enterprise Development LP
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
Expand Down Expand Up @@ -166,7 +167,7 @@ func (bci *bridgeConnInvoker) InvokeUnaryRPC(ctx context.Context, uReq control.U
case *control.PoolExtendReq:
resp = control.MockMSResponse("", nil, &mgmtpb.PoolExtendResp{})
case *control.PoolReintegrateReq:
resp = control.MockMSResponse("", nil, &mgmtpb.PoolReintegrateResp{})
resp = control.MockMSResponse("", nil, &mgmtpb.PoolReintResp{})
case *control.SystemCheckEnableReq:
resp = control.MockMSResponse("", nil, &mgmtpb.DaosResp{})
case *control.SystemCheckDisableReq:
Expand Down
4 changes: 3 additions & 1 deletion src/control/cmd/dmg/json_test.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//
// (C) Copyright 2020-2024 Intel Corporation.
// (C) Copyright 2025 Hewlett Packard Enterprise Development LP
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
Expand Down Expand Up @@ -113,7 +114,8 @@ func TestDmg_JsonOutput(t *testing.T) {
testArgs = append(testArgs, "foo:bar")
case "system del-attr":
testArgs = append(testArgs, "foo")
case "system exclude", "system clear-exclude", "system drain":
case "system exclude", "system clear-exclude", "system drain",
"system reintegrate":
testArgs = append(testArgs, "--ranks", "0")
}

Expand Down
37 changes: 19 additions & 18 deletions src/control/cmd/dmg/pool.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//
// (C) Copyright 2019-2024 Intel Corporation.
// (C) Copyright 2025 Hewlett Packard Enterprise Development LP
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
Expand Down Expand Up @@ -162,7 +163,7 @@ func (psf *poolSizeFlag) UnmarshalFlag(fv string) error {
return psf.ByteSizeFlag.UnmarshalFlag(fv)
}

// PoolCreateCmd is the struct representing the command to create a DAOS pool.
// poolCreateCmd is the struct representing the command to create a DAOS pool.
type poolCreateCmd struct {
baseCmd
cfgCmd
Expand Down Expand Up @@ -401,7 +402,7 @@ func (cmd *poolCreateCmd) Execute(args []string) error {
return nil
}

// PoolListCmd represents the command to fetch a list of all DAOS pools in the system.
// poolListCmd represents the command to fetch a list of all DAOS pools in the system.
type poolListCmd struct {
baseCmd
cfgCmd
Expand Down Expand Up @@ -481,7 +482,7 @@ func (cmd *poolCmd) PoolID() *PoolID {
return &cmd.Args.Pool
}

// PoolDestroyCmd is the struct representing the command to destroy a DAOS pool.
// poolDestroyCmd is the struct representing the command to destroy a DAOS pool.
type poolDestroyCmd struct {
poolCmd
Recursive bool `short:"r" long:"recursive" description:"Remove pool with existing containers"`
Expand Down Expand Up @@ -509,7 +510,7 @@ func (cmd *poolDestroyCmd) Execute(args []string) error {
return err
}

// PoolEvictCmd is the struct representing the command to evict a DAOS pool.
// poolEvictCmd is the struct representing the command to evict a DAOS pool.
type poolEvictCmd struct {
poolCmd
}
Expand All @@ -530,7 +531,7 @@ func (cmd *poolEvictCmd) Execute(args []string) error {
return err
}

// PoolExcludeCmd is the struct representing the command to exclude a DAOS target.
// poolExcludeCmd is the struct representing the command to exclude a DAOS target.
type poolExcludeCmd struct {
poolCmd
Rank uint32 `long:"rank" required:"1" description:"Engine rank of the targets to be excluded"`
Expand Down Expand Up @@ -558,7 +559,7 @@ func (cmd *poolExcludeCmd) Execute(args []string) error {
return err
}

// PoolDrainCmd is the struct representing the command to Drain a DAOS target.
// poolDrainCmd is the struct representing the command to Drain a DAOS target.
type poolDrainCmd struct {
poolCmd
Rank uint32 `long:"rank" required:"1" description:"Engine rank of the targets to be drained"`
Expand Down Expand Up @@ -591,7 +592,7 @@ func (cmd *poolDrainCmd) Execute(args []string) error {
return err
}

// PoolExtendCmd is the struct representing the command to Extend a DAOS pool.
// poolExtendCmd is the struct representing the command to Extend a DAOS pool.
type poolExtendCmd struct {
poolCmd
RankList ui.RankSetFlag `long:"ranks" required:"1" description:"Comma-separated list of ranks to add to the pool"`
Expand All @@ -616,14 +617,14 @@ func (cmd *poolExtendCmd) Execute(args []string) error {
return err
}

// PoolReintegrateCmd is the struct representing the command to Add a DAOS target.
// poolReintegrateCmd is the struct representing the command to Add a DAOS target.
type poolReintegrateCmd struct {
poolCmd
Rank uint32 `long:"rank" required:"1" description:"Engine rank of the targets to be reintegrated"`
TargetIdx string `long:"target-idx" description:"Comma-separated list of target idx(s) to be reintegrated into the rank"`
}

// Execute is run when PoolReintegrateCmd subcommand is activated
// Execute is run when poolReintegrateCmd subcommand is activated
func (cmd *poolReintegrateCmd) Execute(args []string) error {
msg := "succeeded"

Expand All @@ -649,7 +650,7 @@ func (cmd *poolReintegrateCmd) Execute(args []string) error {
return err
}

// PoolQueryCmd is the struct representing the command to query a DAOS pool.
// poolQueryCmd is the struct representing the command to query a DAOS pool.
type poolQueryCmd struct {
poolCmd
ShowEnabledRanks bool `short:"e" long:"show-enabled" description:"Show engine unique identifiers (ranks) which are enabled"`
Expand Down Expand Up @@ -694,7 +695,7 @@ func (cmd *poolQueryCmd) Execute(args []string) error {
return nil
}

// PoolQueryTargetsCmd is the struct representing the command to query a DAOS pool engine's targets
// poolQueryTargetsCmd is the struct representing the command to query a DAOS pool engine's targets
type poolQueryTargetsCmd struct {
poolCmd

Expand Down Expand Up @@ -752,7 +753,7 @@ func (cmd *poolQueryTargetsCmd) Execute(args []string) error {
return nil
}

// PoolUpgradeCmd is the struct representing the command to update a DAOS pool.
// poolUpgradeCmd is the struct representing the command to update a DAOS pool.
type poolUpgradeCmd struct {
poolCmd
}
Expand All @@ -772,7 +773,7 @@ func (cmd *poolUpgradeCmd) Execute(args []string) error {
return nil
}

// PoolSetPropCmd represents the command to set a property on a pool.
// poolSetPropCmd represents the command to set a property on a pool.
type poolSetPropCmd struct {
poolCmd

Expand Down Expand Up @@ -816,7 +817,7 @@ func (cmd *poolSetPropCmd) Execute(_ []string) error {
return nil
}

// PoolGetPropCmd represents the command to set a property on a pool.
// poolGetPropCmd represents the command to set a property on a pool.
type poolGetPropCmd struct {
poolCmd
Args struct {
Expand Down Expand Up @@ -847,7 +848,7 @@ func (cmd *poolGetPropCmd) Execute(_ []string) error {
return nil
}

// PoolGetACLCmd represents the command to fetch an Access Control List of a
// poolGetACLCmd represents the command to fetch an Access Control List of a
// DAOS pool.
type poolGetACLCmd struct {
poolCmd
Expand Down Expand Up @@ -911,7 +912,7 @@ func (cmd *poolGetACLCmd) writeACLToFile(acl string) error {
return nil
}

// PoolOverwriteACLCmd represents the command to overwrite the Access Control
// poolOverwriteACLCmd represents the command to overwrite the Access Control
// List of a DAOS pool.
type poolOverwriteACLCmd struct {
poolCmd
Expand Down Expand Up @@ -946,7 +947,7 @@ func (cmd *poolOverwriteACLCmd) Execute(args []string) error {
return nil
}

// PoolUpdateACLCmd represents the command to update the Access Control List of
// poolUpdateACLCmd represents the command to update the Access Control List of
// a DAOS pool.
type poolUpdateACLCmd struct {
poolCmd
Expand Down Expand Up @@ -994,7 +995,7 @@ func (cmd *poolUpdateACLCmd) Execute(args []string) error {
return nil
}

// PoolDeleteACLCmd represents the command to delete an entry from the Access
// poolDeleteACLCmd represents the command to delete an entry from the Access
// Control List of a DAOS pool.
type poolDeleteACLCmd struct {
poolCmd
Expand Down
18 changes: 9 additions & 9 deletions src/control/cmd/dmg/pretty/system.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//
// (C) Copyright 2021-2024 Intel Corporation.
// (C) Copyright 2025 Hewlett Packard Enterprise Development LP
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
Expand Down Expand Up @@ -222,24 +223,23 @@ func PrintSystemCleanupResponse(out io.Writer, resp *control.SystemCleanupResp,
fmt.Fprintln(out, "System Cleanup Success")
}

// PrintSystemDrainResponse generates a human-readable representation of the supplied
// SystemDrainResp struct and writes it to the supplied io.Writer. Result related errors written to
// error io.Writer.
func PrintSystemDrainResponse(out io.Writer, resp *control.SystemDrainResp) {
if len(resp.Results) == 0 {
fmt.Fprintln(out, "No pool ranks drained")
// PrintPoolRankResults generates a table showing results of operations on pool ranks. Each row will
// indicate a result for a group of ranks on a pool.
func PrintPoolRankResults(out io.Writer, results []*control.PoolRankResult) {
if len(results) == 0 {
fmt.Fprintln(out, "No pool ranks processed")
return
}

titles := []string{"Pool", "Ranks", "Result", "Reason"}
formatter := txtfmt.NewTableFormatter(titles...)

var table []txtfmt.TableRow
for _, r := range resp.Results {
for _, r := range results {
result := "OK"
reason := "N/A"
reason := "-"
if r.Status != 0 {
result = "Failed"
result = "FAIL"
reason = r.Msg
}
row := txtfmt.TableRow{
Expand Down
59 changes: 24 additions & 35 deletions src/control/cmd/dmg/pretty/system_test.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//
// (C) Copyright 2021-2024 Intel Corporation.
// (C) Copyright 2025 Hewlett Packard Enterprise Development LP
//
// SPDX-License-Identifier: BSD-2-Clause-Patent
//
Expand Down Expand Up @@ -612,71 +613,59 @@ Unknown 3 hosts: foo[7-9]
}
}

func TestPretty_PrintSystemDrainResp(t *testing.T) {
func TestPretty_PrintPoolRankResults(t *testing.T) {
for name, tc := range map[string]struct {
resp *control.SystemDrainResp
expOut string
results []*control.PoolRankResult
expOut string
}{
"empty response": {
resp: &control.SystemDrainResp{},
expOut: `
No pool ranks drained
`,
},
"normal response": {
resp: &control.SystemDrainResp{
Results: []*control.DrainResult{
{PoolID: test.MockUUID(1), Ranks: "0-3"},
{PoolID: test.MockUUID(2), Ranks: "1-4"},
},
results: []*control.PoolRankResult{
{PoolID: test.MockUUID(1), Ranks: "0-3"},
{PoolID: test.MockUUID(2), Ranks: "1-4"},
},
expOut: `
Pool Ranks Result Reason
---- ----- ------ ------
00000001-0001-0001-0001-000000000001 0-3 OK N/A
00000002-0002-0002-0002-000000000002 1-4 OK N/A
00000001-0001-0001-0001-000000000001 0-3 OK -
00000002-0002-0002-0002-000000000002 1-4 OK -
`,
},
"normal response; use labels": {
resp: &control.SystemDrainResp{
Results: []*control.DrainResult{
{PoolID: "label1", Ranks: "0-3"},
{PoolID: "label2", Ranks: "1-4"},
},
results: []*control.PoolRankResult{
{PoolID: "label1", Ranks: "0-3"},
{PoolID: "label2", Ranks: "1-4"},
},
expOut: `
Pool Ranks Result Reason
---- ----- ------ ------
label1 0-3 OK N/A
label2 1-4 OK N/A
label1 0-3 OK -
label2 1-4 OK -
`,
},
"response with failures": {
resp: &control.SystemDrainResp{
Results: []*control.DrainResult{
{PoolID: test.MockUUID(1), Ranks: "1-2"},
{PoolID: test.MockUUID(2), Ranks: "0"},
{
PoolID: test.MockUUID(2), Ranks: "1-2",
Status: -1, Msg: "fail1",
},
results: []*control.PoolRankResult{
{PoolID: test.MockUUID(1), Ranks: "1-2"},
{PoolID: test.MockUUID(2), Ranks: "0"},
{
PoolID: test.MockUUID(2), Ranks: "1-2",
Status: -1, Msg: "fail1",
},
},
expOut: `
Pool Ranks Result Reason
---- ----- ------ ------
00000001-0001-0001-0001-000000000001 1-2 OK N/A
00000002-0002-0002-0002-000000000002 0 OK N/A
00000002-0002-0002-0002-000000000002 1-2 Failed fail1
00000001-0001-0001-0001-000000000001 1-2 OK -
00000002-0002-0002-0002-000000000002 0 OK -
00000002-0002-0002-0002-000000000002 1-2 FAIL fail1
`,
},
} {
t.Run(name, func(t *testing.T) {
var out strings.Builder
PrintSystemDrainResponse(&out, tc.resp)
PrintPoolRankResults(&out, tc.results)

if diff := cmp.Diff(strings.TrimLeft(tc.expOut, "\n"), out.String()); diff != "" {
t.Fatalf("unexpected stdout (-want, +got):\n%s\n", diff)
Expand Down
Loading

0 comments on commit a94ac88

Please sign in to comment.