From dbd69a2a403547faad476b76df13279d6caaf684 Mon Sep 17 00:00:00 2001 From: iximeow Date: Tue, 29 Oct 2024 21:50:42 +0000 Subject: [PATCH] [dns-server] only return answers to the incoming question (#6308) As noted in https://github.com/oxidecomputer/omicron/issues/4051, queries to the internal DNS server would get any records we have for a name in response, rather than only records matching the query incoming query type. That behavior is confusing, but worse, wrong. Subtly, this is not actually a misbehavior I believe we can observe through `trust_dns_resolver`: the resolver from that crate includes its own `CachingClient`. As a side effect of upstream answers going through that caching client, incorrect Answers records are cached and only correct answers actually make it out to us as consumers of `trust_dns_resolver`. But as is plenty clear in #4051, `dig` and other DNS clients can get incoherent answers! Simple enough to fix: only return answers that are answers to the question we were asked. --- dns-server/src/dns_server.rs | 13 ++++ dns-server/tests/basic_test.rs | 116 ++++++++++++++++++++++++++++++++- 2 files changed, 126 insertions(+), 3 deletions(-) diff --git a/dns-server/src/dns_server.rs b/dns-server/src/dns_server.rs index 34750719c1..6259c4951e 100644 --- a/dns-server/src/dns_server.rs +++ b/dns-server/src/dns_server.rs @@ -274,6 +274,19 @@ async fn handle_dns_message( let mut additional_records = vec![]; let response_records = records .into_iter() + .filter(|record| { + let ty = query.query_type(); + if ty == RecordType::ANY { + return true; + } + + match (ty, record) { + (RecordType::A, DnsRecord::A(_)) => true, + (RecordType::AAAA, DnsRecord::Aaaa(_)) => true, + (RecordType::SRV, DnsRecord::Srv(_)) => true, + _ => false, + } + }) .map(|record| { let record = dns_record_to_record(&name, record)?; diff --git a/dns-server/tests/basic_test.rs b/dns-server/tests/basic_test.rs index b555b82a80..30026dc08c 100644 --- a/dns-server/tests/basic_test.rs +++ b/dns-server/tests/basic_test.rs @@ -6,11 +6,20 @@ use anyhow::{Context, Result}; use camino_tempfile::Utf8TempDir; use dns_service_client::Client; use dropshot::{test_util::LogContext, HandlerTaskMode}; +use hickory_client::{ + client::{AsyncClient, ClientHandle}, + error::ClientError, + udp::UdpClientStream, +}; use hickory_resolver::error::ResolveErrorKind; use hickory_resolver::TokioAsyncResolver; use hickory_resolver::{ config::{NameServerConfig, Protocol, ResolverConfig, ResolverOpts}, - proto::op::ResponseCode, + proto::{ + op::ResponseCode, + rr::{DNSClass, Name, RecordType}, + xfer::DnsResponse, + }, }; use internal_dns_types::config::{ DnsConfigParams, DnsConfigZone, DnsRecord, Srv, @@ -85,6 +94,59 @@ pub async fn aaaa_crud() -> Result<(), anyhow::Error> { Ok(()) } +#[tokio::test] +pub async fn answers_match_question() -> Result<(), anyhow::Error> { + let test_ctx = init_client_server("answers_match_question").await?; + let client = &test_ctx.client; + + // records should initially be empty + let records = dns_records_list(client, TEST_ZONE).await?; + assert!(records.is_empty()); + + // add an aaaa record + let name = "devron".to_string(); + let addr = Ipv6Addr::new(0xfd, 0, 0, 0, 0, 0, 0, 0x1); + let aaaa = DnsRecord::Aaaa(addr); + let input_records = HashMap::from([(name.clone(), vec![aaaa])]); + dns_records_create(client, TEST_ZONE, input_records.clone()).await?; + + // read back the aaaa record + let records = dns_records_list(client, TEST_ZONE).await?; + assert_eq!(input_records, records); + + let name = Name::from_ascii(&(name + "." + TEST_ZONE + ".")) + .expect("can construct name for query"); + + // If a server returns answers incorrectly, such as sending AAAA answers to + // an A query, it turns out `hickory-resolver`'s internal CachingClient + // transparently corrects the misbehavior. The caching client will cache the + // extra record, then see there are no A records matching the query, and + // finally send a correct response with no answers. + // + // `raw_dns_client_query` avoids using a hickory Resolver, so we can assert + // on the exact answer from our server. + let response = raw_dns_client_query( + test_ctx.dns_server.local_address(), + name, + RecordType::A, + ) + .await + .expect("test query is ok"); + + // The answer we expect is: + // * no error: the domain exists, so NXDOMAIN would be wrong + // * no answers: we ask specifically for a record type the server does not + // have + // * no additionals: the server could return AAAA records as additionals to + // an A query, but does not currently. + assert_eq!(response.header().response_code(), ResponseCode::NoError); + assert_eq!(response.answers(), &[]); + assert_eq!(response.additionals(), &[]); + + test_ctx.cleanup().await; + Ok(()) +} + #[tokio::test] pub async fn srv_crud() -> Result<(), anyhow::Error> { let test_ctx = init_client_server("srv_crud").await?; @@ -97,6 +159,7 @@ pub async fn srv_crud() -> Result<(), anyhow::Error> { // add a srv record let name = "hromi".to_string(); + let test_fqdn = name.clone() + "." + TEST_ZONE + "."; let target = "outpost47"; let srv = Srv { prio: 47, @@ -121,8 +184,13 @@ pub async fn srv_crud() -> Result<(), anyhow::Error> { )]); dns_records_create(client, TEST_ZONE, input_records.clone()).await?; - // resolve the srv - let response = resolver.srv_lookup(name + "." + TEST_ZONE + ".").await?; + // resolve the srv. we'll test this in two ways: + // * the srv record as seen through `hickory_resolver`, the higher-level + // interface we use in many places + // * the srv record as seen through `hickory_client`, to double-check that + // the exact DNS response has the answers/additionals sections as we'd + // expect it to be + let response = resolver.srv_lookup(&test_fqdn).await?; let srvr = response.iter().next().expect("no srv records returned!"); assert_eq!(srvr.priority(), srv.prio); assert_eq!(srvr.weight(), srv.weight); @@ -132,6 +200,27 @@ pub async fn srv_crud() -> Result<(), anyhow::Error> { aaaa_records.sort(); assert_eq!(aaaa_records, [IpAddr::from(addr1), IpAddr::from(addr2)]); + // OK, through `hickory_resolver` everything looks right. now double-check + // that the additional records really do come back in the "Additionals" + // section of the response. + + let name = hickory_client::rr::domain::Name::from_ascii(&test_fqdn) + .expect("can construct name for query"); + + let response = raw_dns_client_query( + test_ctx.dns_server.local_address(), + name, + RecordType::SRV, + ) + .await + .expect("test query is ok"); + assert_eq!(response.header().response_code(), ResponseCode::NoError); + assert_eq!(response.answers().len(), 1); + assert_eq!(response.answers()[0].record_type(), RecordType::SRV); + assert_eq!(response.additionals().len(), 2); + assert_eq!(response.additionals()[0].record_type(), RecordType::AAAA); + assert_eq!(response.additionals()[1].record_type(), RecordType::AAAA); + test_ctx.cleanup().await; Ok(()) } @@ -481,3 +570,24 @@ async fn dns_records_list( .map(|z| z.records) .unwrap_or_else(HashMap::new)) } + +/// Issue a DNS query of `record_ty` records for `name`. +/// +/// In most tests we just use `hickory-resolver` for a higher-level interface to +/// making DNS queries and handling responses. This is also the crate used +/// elsewhere to handle DNS responses in Omicron. However, it is slightly +/// higher-level than the actual wire response we produce from the DNS server in +/// this same crate; to assert on responses we send on the wire, this issues a +/// query using `hickory-client` and returns the corresponding `DnsResponse`. +async fn raw_dns_client_query( + resolver_addr: std::net::SocketAddr, + name: Name, + record_ty: RecordType, +) -> Result { + let stream = UdpClientStream::::new(resolver_addr); + let (mut trust_client, bg) = AsyncClient::connect(stream).await.unwrap(); + + tokio::spawn(bg); + + trust_client.query(name, DNSClass::IN, record_ty).await +}