Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NMS-15776: Parallelize DNS lookups during audit phase #6276

Merged
merged 10 commits into from
Jul 17, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,27 @@ For a large number of devices, you may want to set the `scan-interval` to `0` to
NOTE: Even if `scan-interval` is set to `0`, new nodes added to a requisition are scanned during import.
You can also disable this scan by setting `scan-interval` to `-1`.

=== Tuning DNS reverse lookups

During the provisioning process hostnames are determined for each interface IP address by DNS reverse lookups.
A thread pool is used to allow concurrent DNS lookups.
You can use the following properties to configure this thread pool:

.Thread pool configuration parameters for DNS lookups
[options="header"]
[cols="1,3,2"]
|===
| Parameter
| Description
| Default value

| org.opennms.netmgt.provision.dns.client.rpc.maxThreadPoolSize
| Maximum thread pool size.
| 64
| org.opennms.netmgt.provision.dns.client.rpc.queueCapacity
| Maximum queue size for pending DNS lookups.
| 4096
|===

NOTE: To completely disable DNS reverse lookups, set the property `org.opennms.provisiond.reverseResolveRequisitionIpInterfaceHostnames` to `false`.

10 changes: 9 additions & 1 deletion opennms-base-assembly/src/main/filtered/etc/opennms.properties
Original file line number Diff line number Diff line change
Expand Up @@ -861,6 +861,14 @@ org.opennms.newts.nan_on_counter_wrap=true
# By default this property is not used and empty.
# org.opennms.netmgt.search.info=

# # ###### Mate Expressions ######
# ###### Mate Expressions ######
# Limit of recursion depth while evaluating mate expressions
# org.opennms.mate.maxRecursionDepth=8

# ###### DNS tuning options ######
# A thread pool is used to allow concurrent DNS lookups on OpenNMS instance.
# org.opennms.netmgt.provision.dns.client.rpc.maxThreadPoolSize=64
# org.opennms.netmgt.provision.dns.client.rpc.queueCapacity=4096

# By default hostnames are determined for a node's IP addresses during the provisioning's audit phase.
# org.opennms.provisiond.reverseResolveRequisitionIpInterfaceHostnames=true
5 changes: 5 additions & 0 deletions opennms-provision/opennms-detectorclient-rpc/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@
<artifactId>dnsjava-dependencies</artifactId>
<type>pom</type>
</dependency>
<dependency>
<groupId>io.github.resilience4j</groupId>
<artifactId>resilience4j-bulkhead</artifactId>
<version>${resilience4jVersion}</version>
</dependency>
<dependency>
<groupId>org.opennms.core.ipc.rpc</groupId>
<artifactId>org.opennms.core.ipc.rpc.jms-impl</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,28 @@
import org.slf4j.LoggerFactory;
import org.xbill.DNS.Address;

import io.github.resilience4j.bulkhead.ThreadPoolBulkhead;
import io.github.resilience4j.bulkhead.ThreadPoolBulkheadConfig;

public class DnsLookupClientRpcModule extends AbstractXmlRpcModule<DnsLookupRequestDTO, DnsLookupResponseDTO> {

private static final Logger LOG = LoggerFactory.getLogger(DnsLookupClientRpcModule.class);

public static final String RPC_MODULE_ID = "DNS";

public DnsLookupClientRpcModule() {
private final ThreadPoolBulkhead threadPoolBulkhead;

public DnsLookupClientRpcModule(final int maxThreadPoolSize, final int queueCapacity) {
super(DnsLookupRequestDTO.class, DnsLookupResponseDTO.class);

final ThreadPoolBulkheadConfig threadPoolBulkheadConfig = ThreadPoolBulkheadConfig.custom()
.queueCapacity(queueCapacity)
.maxThreadPoolSize(maxThreadPoolSize)
.build();

this.threadPoolBulkhead = ThreadPoolBulkhead.of("dns-lookup-rpc-module", threadPoolBulkheadConfig);

LOG.debug("Configuring ThreadPoolBulkhead: maxThreadPoolSize={}, queueSize={}", maxThreadPoolSize, queueCapacity);
}

@Override
Expand All @@ -60,36 +74,31 @@ public String getId() {

@Override
public CompletableFuture<DnsLookupResponseDTO> execute(DnsLookupRequestDTO request) {
final CompletableFuture<DnsLookupResponseDTO> future = new CompletableFuture<>();
try {
final InetAddress addr = InetAddressUtils.addr(request.getHostRequest());
final DnsLookupResponseDTO dto = new DnsLookupResponseDTO();
final QueryType queryType = request.getQueryType();
if (queryType.equals(QueryType.LOOKUP)) {
dto.setHostResponse(addr.getHostAddress());
} else if (queryType.equals(QueryType.REVERSE_LOOKUP)) {
// Attempt to retrieve the fully qualified domain name for this IP address
String hostName = addr.getCanonicalHostName();
if (InetAddressUtils.str(addr).equals(hostName)) {
// The given host name matches the textual representation of
// the IP address, which means that the reverse lookup failed
// NMS-9356: InetAddress#getCanonicalHostName requires PTR records
// to have a corresponding A record in order to succeed, so we
// try using dnsjava's implementation to work around this
try {
hostName = Address.getHostName(addr);
} catch (UnknownHostException e) {
LOG.warn("Failed to retrieve the fully qualified domain name for {}. "
+ "Using the textual representation of the IP address.", addr);
return this.threadPoolBulkhead.submit(() -> {
final InetAddress addr = InetAddressUtils.addr(request.getHostRequest());
final DnsLookupResponseDTO dto = new DnsLookupResponseDTO();
final QueryType queryType = request.getQueryType();
if (queryType.equals(QueryType.LOOKUP)) {
dto.setHostResponse(addr.getHostAddress());
} else if (queryType.equals(QueryType.REVERSE_LOOKUP)) {
// Attempt to retrieve the fully qualified domain name for this IP address
String hostName = addr.getCanonicalHostName();
if (InetAddressUtils.str(addr).equals(hostName)) {
// The given host name matches the textual representation of
// the IP address, which means that the reverse lookup failed
// NMS-9356: InetAddress#getCanonicalHostName requires PTR records
// to have a corresponding A record in order to succeed, so we
// try using dnsjava's implementation to work around this
try {
hostName = Address.getHostName(addr);
} catch (UnknownHostException e) {
LOG.warn("Failed to retrieve the fully qualified domain name for {}. "
+ "Using the textual representation of the IP address.", addr);
}
}
dto.setHostResponse(hostName);
}
dto.setHostResponse(hostName);
}
future.complete(dto);
} catch (Exception e) {
future.completeExceptionally(e);
}
return future;
return dto;
}).toCompletableFuture();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@

<context:annotation-config />

<bean id="dnsLookupClientRpcModule" class="org.opennms.netmgt.provision.dns.client.rpc.DnsLookupClientRpcModule" />
<bean id="dnsLookupClientRpcModule" class="org.opennms.netmgt.provision.dns.client.rpc.DnsLookupClientRpcModule" >
<constructor-arg value="${org.opennms.netmgt.provision.dns.client.rpc.maxThreadPoolSize:4096}"/>
<constructor-arg value="${org.opennms.netmgt.provision.dns.client.rpc.queueCapacity:64}"/>
</bean>

<bean id="locationAwareDnsClient" class="org.opennms.netmgt.provision.dns.client.rpc.LocationAwareDnsLookupClientRpcImpl" />
<onmsgi:service interface="org.opennms.netmgt.provision.LocationAwareDnsLookupClient" ref="locationAwareDnsClient"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@
http://aries.apache.org/schemas/blueprint-ext/blueprint-ext-1.5.xsd
">

<cm:property-placeholder id="dnsLookupProperties" persistent-id="org.opennms.netmgt.provision.dns.client.rpc" update-strategy="reload">
<cm:default-properties>
<cm:property name="queueCapacity" value="4096"/>
<cm:property name="maxThreadPoolSize" value="64"/>
</cm:default-properties>
</cm:property-placeholder>

<reference id="serviceDetectorRegistry" interface="org.opennms.netmgt.provision.detector.registry.api.ServiceDetectorRegistry" availability="mandatory"/>
<bean id="detectorExecutor" class="java.util.concurrent.Executors" factory-method="newCachedThreadPool"/>
<bean id="detectorRpcModule" class="org.opennms.netmgt.provision.detector.client.rpc.DetectorClientRpcModule" >
Expand All @@ -20,7 +27,10 @@
<service ref="detectorRpcModule" interface="org.opennms.core.rpc.api.RpcModule" />

<service interface="org.opennms.core.rpc.api.RpcModule">
<bean class="org.opennms.netmgt.provision.dns.client.rpc.DnsLookupClientRpcModule" />
<bean class="org.opennms.netmgt.provision.dns.client.rpc.DnsLookupClientRpcModule">
<argument value="${maxThreadPoolSize}"/>
<argument value="${queueCapacity}"/>
</bean>
</service>

</blueprint>
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,7 @@
import java.net.InetAddress;
import java.util.Objects;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;

import org.opennms.core.utils.InetAddressUtils;
import org.opennms.netmgt.provision.LocationAwareDnsLookupClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -49,16 +47,8 @@ public DefaultHostnameResolver(LocationAwareDnsLookupClient locationAwareDnsLook
}

@Override
public String getHostname(final InetAddress addr, final String location) {
public CompletableFuture<String> getHostnameAsync(final InetAddress addr, final String location) {
LOG.debug("Performing reverse lookup on {} at location {}", addr, location);
final CompletableFuture<String> future = m_locationAwareDnsLookupClient.reverseLookup(addr, location);
try {
final String result = future.get();
LOG.debug("Reverse lookup returned {} for {} at location {}", result, addr, location);
return result;
} catch (InterruptedException | ExecutionException e) {
LOG.warn("Reverse lookup failed for {} at location {}. Using IP address as hostname.", addr, location);
return InetAddressUtils.str(addr);
}
return m_locationAwareDnsLookupClient.reverseLookup(addr, location);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,26 @@
package org.opennms.netmgt.provision.service;

import java.net.InetAddress;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;

import org.opennms.core.utils.InetAddressUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public interface HostnameResolver {
public String getHostname(final InetAddress addr, final String location);
Logger LOG = LoggerFactory.getLogger(HostnameResolver.class);

default String getHostname(final InetAddress addr, final String location) {
try {
final String result = getHostnameAsync(addr, location).get();
LOG.debug("Reverse lookup returned {} for {} at location {}", result, addr, location);
return result;
} catch (InterruptedException | ExecutionException e) {
LOG.warn("Reverse lookup failed for {} at location {}. Using IP address as hostname.", addr, location);
return InetAddressUtils.str(addr);
}
}

CompletableFuture<String> getHostnameAsync(final InetAddress addr, final String location);
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,35 @@

package org.opennms.netmgt.provision.service;

import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.CompletableFuture;

import org.opennms.netmgt.provision.persist.AbstractRequisitionVisitor;
import org.opennms.netmgt.provision.persist.OnmsAssetRequisition;
import org.opennms.netmgt.provision.persist.OnmsInterfaceMetaDataRequisition;
import org.opennms.netmgt.provision.persist.OnmsIpInterfaceRequisition;
import org.opennms.netmgt.provision.persist.OnmsNodeMetaDataRequisition;
import org.opennms.netmgt.provision.persist.OnmsMonitoredServiceRequisition;
import org.opennms.netmgt.provision.persist.OnmsNodeCategoryRequisition;
import org.opennms.netmgt.provision.persist.OnmsNodeMetaDataRequisition;
import org.opennms.netmgt.provision.persist.OnmsNodeRequisition;
import org.opennms.netmgt.provision.persist.OnmsServiceMetaDataRequisition;
import org.opennms.netmgt.provision.persist.requisition.Requisition;
import org.opennms.netmgt.provision.service.operations.ImportOperationsManager;
import org.opennms.netmgt.provision.service.operations.SaveOrUpdateOperation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class RequisitionAccountant extends AbstractRequisitionVisitor {
private final ImportOperationsManager m_opsMgr;
private static final Logger LOG = LoggerFactory.getLogger(RequisitionAccountant.class);

private final ImportOperationsManager m_opsMgr;
private SaveOrUpdateOperation m_currentOp;
private String monitorKey;


private final Set<CompletableFuture<Void>> m_dnsLookups = Collections.synchronizedSet(new HashSet<>());

/**
* <p>Constructor for RequisitionAccountant.</p>
*
Expand All @@ -70,8 +82,8 @@ public void completeNode(OnmsNodeRequisition nodeReq) {
/** {@inheritDoc} */
@Override
public void visitInterface(OnmsIpInterfaceRequisition ifaceReq) {
m_currentOp.foundInterface(ifaceReq.getIpAddr(), ifaceReq.getDescr(), ifaceReq.getSnmpPrimary(), ifaceReq.getManaged(), ifaceReq.getStatus());

m_currentOp.foundInterface(ifaceReq.getIpAddr(), ifaceReq.getDescr(), ifaceReq.getSnmpPrimary(), ifaceReq.getManaged(), ifaceReq.getStatus(), m_dnsLookups);
LOG.debug("{} DNS lookups scheduled, {} DNS lookups completed", dnsLookupsTotal(), dnsLookupsCompleted());
}

/** {@inheritDoc} */
Expand Down Expand Up @@ -106,4 +118,23 @@ public void visitInterfaceMetaData(OnmsInterfaceMetaDataRequisition metaDataReq)
public void visitServiceMetaData(OnmsServiceMetaDataRequisition metaDataReq) {
m_currentOp.foundServiceMetaData(metaDataReq.getContext(), metaDataReq.getKey(), metaDataReq.getValue());
}

int dnsLookupsCompleted() {
return (int) m_dnsLookups.stream().filter(f -> f.isDone()).count();
}

int dnsLookupsPending() {
return dnsLookupsTotal() - dnsLookupsCompleted();
}

int dnsLookupsTotal() {
return m_dnsLookups.size();
}

@Override
public void completeModelImport(Requisition req) {
LOG.debug("Waiting for {} scheduled DNS lookups, {} DNS lookups pending", dnsLookupsTotal(), dnsLookupsPending());
CompletableFuture.allOf(m_dnsLookups.toArray(CompletableFuture[]::new)).join();
LOG.debug("All {} scheduled DNS lookups completed", dnsLookupsTotal());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@
package org.opennms.netmgt.provision.service.operations;

import java.net.InetAddress;
import java.util.Set;
import java.util.concurrent.CompletableFuture;

import org.opennms.core.utils.InetAddressUtils;
import org.opennms.netmgt.dao.api.MonitoringLocationDao;
import org.opennms.netmgt.model.OnmsCategory;
import org.opennms.netmgt.model.OnmsIpInterface;
Expand Down Expand Up @@ -99,8 +100,9 @@ public ScanManager getScanManager() {
* @param primaryType a {@link InterfaceSnmpPrimaryType} object.
* @param managed a boolean.
* @param status a int.
* @param dnsLookups the list of DNS lookup futures.
*/
public void foundInterface(InetAddress addr, Object descr, final PrimaryType primaryType, boolean managed, int status) {
public void foundInterface(InetAddress addr, Object descr, final PrimaryType primaryType, boolean managed, int status, final Set<CompletableFuture<Void>> dnsLookups) {

if (addr == null) {
LOG.error("Found interface on node {} with an empty/invalid ipaddr! Ignoring!", m_node.getLabel());
Expand All @@ -111,10 +113,6 @@ public void foundInterface(InetAddress addr, Object descr, final PrimaryType pri
m_currentInterface.setIsManaged(status == 3 ? "U" : "M");
m_currentInterface.setIsSnmpPrimary(primaryType);

if (addr != null && System.getProperty("org.opennms.provisiond.reverseResolveRequisitionIpInterfaceHostnames", "true").equalsIgnoreCase("true")) {
m_currentInterface.setIpHostName(getProvisionService().getHostnameResolver().getHostname(addr, m_node.getLocation().getLocationName()));
}

if (PrimaryType.PRIMARY.equals(primaryType)) {
if (addr != null) {
m_scanManager = new ScanManager(getProvisionService().getLocationAwareSnmpClient(), addr);
Expand All @@ -124,6 +122,10 @@ public void foundInterface(InetAddress addr, Object descr, final PrimaryType pri
//FIXME: verify this doesn't conflict with constructor. The constructor already adds this
//interface to the node.
m_node.addIpInterface(m_currentInterface);

if (System.getProperty("org.opennms.provisiond.reverseResolveRequisitionIpInterfaceHostnames", "true").equalsIgnoreCase("true")) {
dnsLookups.add(getProvisionService().getHostnameResolver().getHostnameAsync(addr, m_node.getLocation().getLocationName()).thenAccept(s -> m_node.getInterfaceWithAddress(addr).setIpHostName(s)));
}
}

/**
Expand Down
Loading