Skip to content

Commit

Permalink
fix(node): add the node.Status.Condition ModulesLoaded
Browse files Browse the repository at this point in the history
Check if the module `dm_crypt` is enabled as the first module.

ref: longhorn/longhorn 9153

Signed-off-by: James Lu <[email protected]>
  • Loading branch information
mantissahz committed Sep 5, 2024
1 parent 1cd29fd commit 3a9ddaa
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 38 deletions.
141 changes: 103 additions & 38 deletions controller/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
v1core "k8s.io/client-go/kubernetes/typed/core/v1"

lhexec "github.com/longhorn/go-common-libs/exec"
lhio "github.com/longhorn/go-common-libs/io"
lhns "github.com/longhorn/go-common-libs/ns"
lhtypes "github.com/longhorn/go-common-libs/types"

Expand All @@ -47,6 +48,8 @@ const (

unknownDiskID = "UNKNOWN_DISKID"

kernelConfigFilePathPrefix = "/host/boot/config-"

snapshotChangeEventQueueMax = 1048576
)

Expand Down Expand Up @@ -922,6 +925,7 @@ func (nc *NodeController) environmentCheck(kubeNode *corev1.Node, node *longhorn
namespaces := []lhtypes.Namespace{lhtypes.NamespaceMnt, lhtypes.NamespaceNet}
nc.syncPackagesInstalled(kubeNode, node, namespaces)
nc.syncMultipathd(node, namespaces)
nc.checkModulesLoaded(kubeNode, node, namespaces)
nc.syncNFSClientVersion(kubeNode, node, namespaces)
}

Expand Down Expand Up @@ -1027,60 +1031,87 @@ func (nc *NodeController) syncMultipathd(node *longhorn.Node, namespaces []lhtyp
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, "", "")
}

func (nc *NodeController) syncNFSClientVersion(kubeNode *corev1.Node, node *longhorn.Node, namespaces []lhtypes.Namespace) {
kernelVersion := kubeNode.Status.NodeInfo.KernelVersion
nfsClientVersions := []string{"CONFIG_NFS_V4_2", "CONFIG_NFS_V4_1", "CONFIG_NFS_V4"}
func (nc *NodeController) checkModulesLoaded(kubeNode *corev1.Node, node *longhorn.Node, namespaces []lhtypes.Namespace) {
checkingModules := map[string]string{
"CONFIG_DM_CRYPT": "dm_crypt",
}

nsexec, err := lhns.NewNamespaceExecutor(lhtypes.ProcessNone, lhtypes.HostProcDirectory, namespaces)
notFoundModulesUsingkmod := map[string]string{}
for module, kmodName := range checkingModules {
kmodResult, err := lhexec.NewExecutor().Execute(nil, "kmod", []string{"list"}, lhtypes.ExecuteDefaultTimeout)
if err != nil {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeModulesLoaded, longhorn.ConditionStatusFalse,
string(longhorn.NodeConditionReasonNamespaceExecutorErr),
fmt.Sprintf("Failed to check kernel module %v: %v", module, err.Error()))
return
}
if !strings.Contains(kmodResult, kmodName) {
notFoundModulesUsingkmod[module] = kmodName
}
}

if len(notFoundModulesUsingkmod) == 0 {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeModulesLoaded, longhorn.ConditionStatusTrue, "", "")
return
}

kernelConfigPath := kernelConfigFilePathPrefix + kubeNode.Status.NodeInfo.KernelVersion
configContent, err := lhio.ReadFileContent(kernelConfigPath)
if err != nil {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse,
string(longhorn.NodeConditionReasonNamespaceExecutorErr),
fmt.Sprintf("Failed to get namespace executor: %v", err.Error()))
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeModulesLoaded, longhorn.ConditionStatusFalse,
string(longhorn.NodeConditionReasonKernelConfigIsNotFound),
fmt.Sprintf("Unable to get %v content to check if %v are enabled: %v", kernelConfigPath, checkingModules, err.Error()))
return
}

notLoadedModules := []string{}
for module, kmodName := range notFoundModulesUsingkmod {
moduleEnabled, err := nc.checkKernelModuleEabled(configContent, module, kmodName)
if err != nil {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeModulesLoaded, longhorn.ConditionStatusFalse,
string(longhorn.NodeConditionReasonNamespaceExecutorErr),
fmt.Sprintf("Failed to check kernel module %v: %v", module, err.Error()))
return
}
if !moduleEnabled {
notLoadedModules = append(notLoadedModules, kmodName)
}
}

if len(notLoadedModules) != 0 {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeModulesLoaded, longhorn.ConditionStatusFalse,
string(longhorn.NodeConditionReasonModulesNotLoaded),
fmt.Sprintf("Modules %v are not loaded on node %v", notLoadedModules, node.Name))
return
}

kernelConfigPath := "/boot/config-" + kernelVersion
args := []string{kernelConfigPath}
if _, err := nsexec.Execute(nil, "ls", args, lhtypes.ExecuteDefaultTimeout); err != nil {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeModulesLoaded, longhorn.ConditionStatusTrue, "", "")
}

func (nc *NodeController) syncNFSClientVersion(kubeNode *corev1.Node, node *longhorn.Node, namespaces []lhtypes.Namespace) {
kernelVersion := kubeNode.Status.NodeInfo.KernelVersion
nfsClientVersions := []string{"CONFIG_NFS_V4_2", "CONFIG_NFS_V4_1", "CONFIG_NFS_V4"}

kernelConfigPath := kernelConfigFilePathPrefix + kernelVersion
configContent, err := lhio.ReadFileContent(kernelConfigPath)
if err != nil {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse,
string(longhorn.NodeConditionReasonKernelConfigIsNotFound),
fmt.Sprintf("Unable to find %v for checking %v: %v", kernelConfigPath, nfsClientVersions, err.Error()))
return
}

for _, ver := range nfsClientVersions {
args := []string{ver + "=", kernelConfigPath}
result, err := nsexec.Execute(nil, "grep", args, lhtypes.ExecuteDefaultTimeout)
moduleEnabled, err := nc.checkKernelModuleEabled(configContent, ver, "nfs")
if err != nil {
nc.logger.WithError(err).Debugf("Failed to find kernel config %v on node %v", ver, node.Name)
continue
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeModulesLoaded, longhorn.ConditionStatusFalse,
string(longhorn.NodeConditionReasonNamespaceExecutorErr),
fmt.Sprintf("Failed to check kernel module %v: %v", ver, err.Error()))
return
}
enabled := strings.TrimSpace(strings.Split(result, "=")[1])
switch enabled {
case "y":
if moduleEnabled {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusTrue, "", "")
return
case "m":
kmodResult, err := lhexec.NewExecutor().Execute(nil, "kmod", []string{"list"}, lhtypes.ExecuteDefaultTimeout)
if err != nil {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse,
string(longhorn.NodeConditionReasonNFSClientIsNotFound),
fmt.Sprintf("Failed to execute command `kmod`: %v", err.Error()))
return
}
res, err := lhexec.NewExecutor().ExecuteWithStdinPipe("grep", []string{"nfs"}, kmodResult, lhtypes.ExecuteDefaultTimeout)
if err != nil {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse,
string(longhorn.NodeConditionReasonNFSClientIsNotFound),
fmt.Sprintf("Failed to execute command `grep`: %v", err.Error()))
return
}
if res != "" {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusTrue, "", "")
return
}
default:
nc.logger.Debugf("Unknown kernel config value for %v: %v", ver, enabled)
}
}

Expand All @@ -1089,6 +1120,40 @@ func (nc *NodeController) syncNFSClientVersion(kubeNode *corev1.Node, node *long
fmt.Sprintf("NFS clients %v not found. At least one should be enabled", nfsClientVersions))
}

func (nc *NodeController) checkKernelModuleEabled(configContent, module, kmodName string) (bool, error) {
confingLine := ""

configs := strings.Split(configContent, "\n")
for _, config := range configs {
if strings.Contains(config, module) {
confingLine = config
break
}
}
if confingLine == "" {
nc.logger.Debugf("Kernel config %v not found", module)
return false, nil
}

enabled := strings.TrimSpace(strings.Split(confingLine, "=")[1])
switch enabled {
case "y":
return true, nil
case "m":
kmodResult, err := lhexec.NewExecutor().Execute(nil, "kmod", []string{"list"}, lhtypes.ExecuteDefaultTimeout)
if err != nil {
return false, errors.Wrap(err, "Failed to execute command `kmod`")
}
if strings.Contains(kmodResult, kmodName) {
return true, nil
}
default:
nc.logger.Debugf("Unknown kernel config value for %v: %v", module, enabled)
}

return false, nil
}

func (nc *NodeController) getImTypeDataEngines(node *longhorn.Node) map[longhorn.InstanceManagerType][]longhorn.DataEngineType {
log := getLoggerForNode(nc.logger, node)

Expand Down
10 changes: 10 additions & 0 deletions controller/node_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ func (s *NodeControllerSuite) TestManagerPodUp(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
},
},
Expand Down Expand Up @@ -273,6 +274,7 @@ func (s *NodeControllerSuite) TestManagerPodDown(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonNoMountPropagationSupport),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
},
},
Expand Down Expand Up @@ -360,6 +362,7 @@ func (s *NodeControllerSuite) TestKubeNodeDown(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
},
},
Expand Down Expand Up @@ -447,6 +450,7 @@ func (s *NodeControllerSuite) TestKubeNodePressure(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
},
},
Expand Down Expand Up @@ -569,6 +573,7 @@ func (s *NodeControllerSuite) TestUpdateDiskStatus(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
},
DiskStatus: map[string]*longhorn.DiskStatus{
Expand Down Expand Up @@ -720,6 +725,7 @@ func (s *NodeControllerSuite) TestCleanDiskStatus(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
},
DiskStatus: map[string]*longhorn.DiskStatus{
Expand Down Expand Up @@ -877,6 +883,7 @@ func (s *NodeControllerSuite) TestDisableDiskOnFilesystemChange(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
},
DiskStatus: map[string]*longhorn.DiskStatus{
Expand Down Expand Up @@ -1005,6 +1012,7 @@ func (s *NodeControllerSuite) TestCreateDefaultInstanceManager(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
},
DiskStatus: map[string]*longhorn.DiskStatus{
Expand Down Expand Up @@ -1150,6 +1158,7 @@ func (s *NodeControllerSuite) TestCleanupRedundantInstanceManagers(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
},
DiskStatus: map[string]*longhorn.DiskStatus{
Expand Down Expand Up @@ -1265,6 +1274,7 @@ func (s *NodeControllerSuite) TestCleanupAllInstanceManagers(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
},
DiskStatus: map[string]*longhorn.DiskStatus{},
Expand Down
2 changes: 2 additions & 0 deletions k8s/pkg/apis/longhorn/v1beta2/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ const (
NodeConditionTypeReady = "Ready"
NodeConditionTypeMountPropagation = "MountPropagation"
NodeConditionTypeMultipathd = "Multipathd"
NodeConditionTypeModulesLoaded = "ModulesLoaded"
NodeConditionTypeRequiredPackages = "RequiredPackages"
NodeConditionTypeNFSClientInstalled = "NFSClientInstalled"
NodeConditionTypeSchedulable = "Schedulable"
Expand All @@ -22,6 +23,7 @@ const (
NodeConditionReasonMultipathdIsRunning = "MultipathdIsRunning"
NodeConditionReasonUnknownOS = "UnknownOS"
NodeConditionReasonNamespaceExecutorErr = "NamespaceExecutorErr"
NodeConditionReasonModulesNotLoaded = "ModulesNotLoaded"
NodeConditionReasonPackagesNotInstalled = "PackagesNotInstalled"
NodeConditionReasonKernelConfigIsNotFound = "KernelConfigIsNotFound"
NodeConditionReasonNFSClientIsNotFound = "NFSClientIsNotFound"
Expand Down

0 comments on commit 3a9ddaa

Please sign in to comment.