Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ipvs: toa enhancements #928

Merged
merged 2 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 22 additions & 25 deletions patch/dcdn-toa.patch
Original file line number Diff line number Diff line change
@@ -1,32 +1,31 @@
From 55e8e5da2b4b0893d36cb3f621bedf9833c4ea50 Mon Sep 17 00:00:00 2001
From cee6889685240558ebea795615539b7289070842 Mon Sep 17 00:00:00 2001
From: wangyetong <[email protected]>
Date: Thu, 14 Sep 2023 15:33:42 +0800
Subject: [PATCH] added dcdn toa

---
include/ipvs/conn.h | 5 +++++
include/ipvs/conn.h | 4 ++++
include/ipvs/proto_tcp.h | 2 ++
src/ipvs/ip_vs_proto_tcp.c | 54 +++++++++++++++++++++++++++++++++++++++++++++-
src/ipvs/ip_vs_proto_tcp.c | 55 +++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/include/ipvs/conn.h b/include/ipvs/conn.h
index fa0bdeb..88dcb44 100644
index 843721e..78fb0ba 100644
--- a/include/ipvs/conn.h
+++ b/include/ipvs/conn.h
@@ -166,6 +166,11 @@ struct dp_vs_conn {
/* flag for gfwip */
bool outwall;
@@ -167,6 +167,10 @@ struct dp_vs_conn {
/* connection redirect in fnat/snat/nat modes */
struct dp_vs_redirect *redirect;

+ /* dcdn toa found or not */
+ bool dcdn_found;
+ /* dcdn toa address */
+ struct in_addr dcdn_addr;
+
} __rte_cache_aligned;

/* for syn-proxy to save all ack packet in conn before rs's syn-ack arrives */
diff --git a/include/ipvs/proto_tcp.h b/include/ipvs/proto_tcp.h
index 9f5162a..41d5646 100644
index 3d1515a..f0cf50c 100644
--- a/include/ipvs/proto_tcp.h
+++ b/include/ipvs/proto_tcp.h
@@ -28,6 +28,7 @@ enum {
Expand All @@ -46,11 +45,11 @@ index 9f5162a..41d5646 100644
#define TCP_OLEN_TSTAMP_ALIGNED 12
#define TCP_OLEN_SACK_BASE 2
diff --git a/src/ipvs/ip_vs_proto_tcp.c b/src/ipvs/ip_vs_proto_tcp.c
index cbb7cb2..2cd889a 100644
index 6acbbca..5b185fa 100644
--- a/src/ipvs/ip_vs_proto_tcp.c
+++ b/src/ipvs/ip_vs_proto_tcp.c
@@ -305,6 +305,43 @@ static void tcp_in_remove_ts(struct tcphdr *tcph)
}
@@ -441,6 +441,43 @@ static int tcp_in_add_proxy_proto(struct dp_vs_conn *conn, struct rte_mbuf *mbuf
return proxy_proto_insert(&ppinfo, conn, mbuf, tcph, hdr_shift);
}

+/* check dcdn toa option */
Expand Down Expand Up @@ -90,10 +89,10 @@ index cbb7cb2..2cd889a 100644
+ return EDPVS_NOTEXIST;
+}
+
static inline int tcp_in_add_toa(struct dp_vs_conn *conn, struct rte_mbuf *mbuf,
static int tcp_in_add_toa(struct dp_vs_conn *conn, struct rte_mbuf *mbuf,
struct tcphdr *tcph)
{
@@ -382,7 +419,10 @@ static inline int tcp_in_add_toa(struct dp_vs_conn *conn, struct rte_mbuf *mbuf,
@@ -518,7 +555,10 @@ static int tcp_in_add_toa(struct dp_vs_conn *conn, struct rte_mbuf *mbuf,

if (conn->af == AF_INET) {
struct tcpopt_ip4_addr *toa_ip4 = (struct tcpopt_ip4_addr *)(tcph + 1);
Expand All @@ -105,21 +104,18 @@ index cbb7cb2..2cd889a 100644
}
else {
struct tcpopt_ip6_addr *toa_ip6 = (struct tcpopt_ip6_addr *)(tcph + 1);
@@ -694,9 +734,13 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto,
struct dp_vs_conn *conn, struct rte_mbuf *mbuf)
{
struct tcphdr *th;
@@ -842,6 +882,10 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto,
int af; /* outbound af */
int iphdrlen;
int err, pp_hdr_shift = 0;
+ struct in_addr dcdn_addr;
/* af/mbuf may be changed for nat64 which in af is ipv6 and out is ipv4 */
int af = tuplehash_out(conn).af;
int iphdrlen = ((AF_INET6 == af) ? ip6_hdrlen(mbuf): ip4_hdrlen(mbuf));
+#ifdef CONFIG_DPVS_IPVS_DEBUG
+ char dcdn_buf[64];
+#endif

if (mbuf_may_pull(mbuf, iphdrlen + sizeof(*th)) != 0)
return EDPVS_INVPKT;
@@ -720,6 +764,14 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto,
af = tuplehash_out(conn).af;
iphdrlen = ((AF_INET6 == af) ? ip6_hdrlen(mbuf): ip4_hdrlen(mbuf));
@@ -866,6 +910,15 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto,
if (th->syn && !th->ack) {
tcp_in_remove_ts(th);
tcp_in_init_seq(conn, mbuf, th);
Expand All @@ -131,9 +127,10 @@ index cbb7cb2..2cd889a 100644
+ RTE_LOG(DEBUG, IPVS, "get dcdn toa addr %s\n", dcdn_buf);
+#endif
+ }
tcp_in_add_toa(conn, mbuf, th);
+ tcp_in_add_toa(conn, mbuf, th);
}

/* Add toa/proxy_protocol to the first data packet */
--
1.8.3.1

184 changes: 136 additions & 48 deletions src/ipvs/ip_vs_proto_tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -306,46 +306,129 @@ static void tcp_in_remove_ts(struct tcphdr *tcph)
}
}

/* use NOP option to replace TCP_OLEN_IP4_ADDR and TCP_OLEN_IP6_ADDR opt */
static void tcp_in_remove_toa(struct tcphdr *tcph, int af)
/*
* Remove NOP and TOA options preset in the mbuf and compact option space.
* If still no enough space, trim more options except for the protected ones.
*
* Return the trimmed length on success, otherwise dpvs error num on failure.
* */
static int tcp_in_prune_options(int af, int reqlen, struct rte_mbuf *mbuf, struct tcphdr *tcph)
{
unsigned char *ptr;
int len, i;
uint32_t tcp_opt_len = af == AF_INET ? TCP_OLEN_IP4_ADDR : TCP_OLEN_IP6_ADDR;
unsigned char *ptr, *fast, *slow;
const unsigned char *l3hdr, *payload;
int i, optlen;
unsigned int pruned;
uint8_t opcode, opsize;
uint64_t opts_protected;
const uint8_t opts_maxlen[64] = {
[2] = 4, [3] = 3, [4] = 2,
[8] = 10, [30] = 40, [34] = 18
};

ptr = (unsigned char *)(tcph + 1);
len = (tcph->doff << 2) - sizeof(struct tcphdr);
fast = slow = ptr;
optlen = (tcph->doff << 2) - sizeof(struct tcphdr);
payload = ptr + optlen;

while (len > 0) {
int opcode = *ptr++;
int opsize;
if (optlen < reqlen) /* make no sense to do anything */
return 0;

while (optlen > 0) {
opcode = *ptr++;
switch (opcode) {
case TCP_OPT_EOL:
return;
goto fini;
ywc689 marked this conversation as resolved.
Show resolved Hide resolved
case TCP_OPT_NOP:
len--;
fast++;
optlen--;
continue;
default:
opsize = *ptr++;
if (opsize < 2) /* silly options */
return;
if (opsize > len)
return; /* partial options */
if ((opcode == TCP_OPT_ADDR) && (opsize == tcp_opt_len)) {
for (i = 0; i < tcp_opt_len; i++) {
*(ptr - 2 + i) = TCP_OPT_NOP;
if (opsize < 2) /* silly options */
goto fini;
if (opsize > optlen) /* partial options */
goto fini;
if (opcode == TCP_OPT_ADDR) {
fast += opsize;
ywc689 marked this conversation as resolved.
Show resolved Hide resolved
} else {
for (i = 0; i < opsize; i++) {
if (slow != fast)
*slow = *fast;
slow++;
fast++;
}
/* DON'T RETURN
* keep search other TCP_OPT_ADDR ,and clear them.
* See https://github.com/iqiyi/dpvs/pull/925 for more detail. */
}

ptr += opsize - 2;
len -= opsize;
optlen -= opsize;
break;
}
}

fini:
pruned = payload - slow;
if (pruned < reqlen) {
/* further trim the options, the tcp functionality relies on unprotected
* options may get hurt, refer to:
* https://www.iana.org/assignments/tcp-parameters/tcp-parameters.xhtml
* #tcp-parameters-1
* */
ptr = slow;
slow = fast = (unsigned char *)(tcph + 1);
if (tcph->syn)
ywc689 marked this conversation as resolved.
Show resolved Hide resolved
opts_protected = (1ULL << 2) | (1ULL << 3) | (1ULL << 4) /* MSS, WS, SACKP */
| (1ULL << 8) | (1ULL << 30) | (1ULL << 34); /* TS, MPTCP, TFO */
else
opts_protected = (1ULL << 8); /* TS, drop SACK, MPTCP DSS/REMOVE_ADDR */
ywc689 marked this conversation as resolved.
Show resolved Hide resolved
while (fast < ptr) {
opcode = *fast;
opsize = *(fast + 1);
if (opcode < 64 && ((1ULL << opcode) & opts_protected)
&& (opsize <= opts_maxlen[opcode])) {
for (i = 0; i < opsize; i++)
*slow++ = *fast++;
opts_protected ^= (1ULL << opcode);
} else {
fast += opsize;
pruned += opsize;
if (pruned >= reqlen) {
while (fast < ptr)
*slow++ = *fast++;
break;
}
}
}
pruned = payload - slow;
}
if (pruned > 0) {
while (pruned & 0x3) { /* 4-bytes alignment for tcp options */
*slow++ = 0;
pruned--;
}
if (!pruned)
return 0;
/* trim the packet */
l3hdr = rte_pktmbuf_mtod(mbuf, void *);
if (unlikely(mbuf_may_pull(mbuf, mbuf->pkt_len) != 0)) {
memset(slow, 0, pruned);
return EDPVS_INVPKT;
}
if (unlikely(payload - l3hdr > mbuf->pkt_len)) {
memset(slow, 0, pruned);
return EDPVS_INVPKT;
}
memmove(slow, payload, mbuf->pkt_len - (payload - l3hdr));
ywc689 marked this conversation as resolved.
Show resolved Hide resolved
rte_pktmbuf_trim(mbuf, pruned);
tcph->doff -= (pruned >> 2);
if (af == AF_INET)
((struct rte_ipv4_hdr *)l3hdr)->total_length =
htons(ntohs(((struct rte_ipv4_hdr *)l3hdr)->total_length) - pruned);
else
((struct rte_ipv6_hdr *)l3hdr)->payload_len =
htons(ntohs(((struct rte_ipv6_hdr *)l3hdr)->payload_len) - pruned);
return pruned;
}
return 0;
}

static int tcp_in_add_proxy_proto(struct dp_vs_conn *conn, struct rte_mbuf *mbuf,
Expand Down Expand Up @@ -797,15 +880,12 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto,
struct dp_vs_conn *conn, struct rte_mbuf *mbuf)
{
struct tcphdr *th;
/* af/mbuf may be changed for nat64 which in af is ipv6 and out is ipv4 */
int iaf, oaf;
int iphdrlen;
int af; /* outbound af */
int iphdrlen, toalen;
int err, pp_hdr_shift = 0;

iaf = tuplehash_in(conn).af;
oaf = tuplehash_out(conn).af;

iphdrlen = ((AF_INET6 == oaf) ? ip6_hdrlen(mbuf): ip4_hdrlen(mbuf));
af = tuplehash_out(conn).af;
iphdrlen = ((AF_INET6 == af) ? ip6_hdrlen(mbuf): ip4_hdrlen(mbuf));

if (mbuf_may_pull(mbuf, iphdrlen + sizeof(*th)) != 0)
return EDPVS_INVPKT;
Expand All @@ -819,41 +899,49 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto,

/*
* for SYN packet
* 1. remove tcp timestamp option
* laddress for different client have diff timestamp.
* 2. save original TCP sequence for seq-adjust later.
* since TCP option will be change.
* 3. add TOA option
* so that RS with TOA module can get real client IP.
* 1. remove tcp timestamp option,
* laddrs for different clients have diff timestamp.
* 2. save original TCP sequence for seq-adjust later
* since TCP option will be changed.
*/
if (th->syn && !th->ack) {
tcp_in_remove_ts(th);

tcp_in_init_seq(conn, mbuf, th);
if (PROXY_PROTOCOL_V1 != PROXY_PROTOCOL_VERSION(conn->pp_version)
&& PROXY_PROTOCOL_V2 != PROXY_PROTOCOL_VERSION(conn->pp_version)) {
if (unlikely(tcp_in_add_toa(conn, mbuf, th) != EDPVS_OK)) {
tcp_in_remove_toa(th, iaf);
}
}
}

/* add toa/proxy_proto to first data packet */
/* Add toa/proxy_protocol to the first data packet */
if (ntohl(th->ack_seq) == conn->fnat_seq.fdata_seq
&& !th->syn && !th->rst /*&& !th->fin*/) {
if (PROXY_PROTOCOL_V2 == PROXY_PROTOCOL_VERSION(conn->pp_version)
|| PROXY_PROTOCOL_V1 == PROXY_PROTOCOL_VERSION(conn->pp_version)) {
if (conn->fnat_seq.isn - conn->fnat_seq.delta + 1 == ntohl(th->seq)) {
/* avoid inserting repetitive ppdata when the first rs ack delayed */
/* avoid inserting repetitive proxy protocol data
* when the first rs ack is delayed */
err = tcp_in_add_proxy_proto(conn, mbuf, th, iphdrlen, &pp_hdr_shift);
if (unlikely(EDPVS_OK != err))
RTE_LOG(INFO, IPVS, "%s: insert proxy protocol fail -- %s\n",
__func__, dpvs_strerror(err));
th = ((void *)th) + pp_hdr_shift;
}
} else {
if (unlikely(tcp_in_add_toa(conn, mbuf, th) != EDPVS_OK)) {
tcp_in_remove_toa(th, iaf);
} else { /* use toa */
err = tcp_in_add_toa(conn, mbuf, th);
if (unlikely(EDPVS_OK != err)) {
toalen = tuplehash_in(conn).af == AF_INET ? TCP_OLEN_IP4_ADDR : TCP_OLEN_IP6_ADDR;
if (tcp_in_prune_options(af, toalen, mbuf, th) >= toalen
&& (EDPVS_NOROOM == err || EDPVS_FRAG == err)) {
err = tcp_in_add_toa(conn, mbuf, th);
}
if (EDPVS_OK != err) {
char caddrbuf[64], vaddrbuf[64], laddrbuf[64], daddrbuf[64];
const char *caddr, *vaddr, *laddr, *daddr;
caddr = inet_ntop(conn->af, &conn->caddr, caddrbuf, sizeof(caddrbuf)) ? caddrbuf : "::";
vaddr = inet_ntop(conn->af, &conn->vaddr, vaddrbuf, sizeof(vaddrbuf)) ? vaddrbuf : "::";
laddr = inet_ntop(af, &conn->laddr, laddrbuf, sizeof(laddrbuf)) ? laddrbuf : "::";
daddr = inet_ntop(af, &conn->daddr, daddrbuf, sizeof(daddrbuf)) ? daddrbuf : "::";
RTE_LOG(WARNING, IPVS, "TOA add failed(%s): [%s]:%d -> [%s]:%d; [%s]:%d -> [%s]:%d\n",
dpvs_strerror(err), caddr, htons(conn->cport), vaddr, htons(conn->vport),
laddr, htons(conn->lport), daddr, htons(conn->dport));
}
}
}
}
Expand All @@ -864,7 +952,7 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto,
th->source = conn->lport;
th->dest = conn->dport;

return tcp_send_csum(oaf, iphdrlen, th, conn, mbuf, conn->in_dev);
return tcp_send_csum(af, iphdrlen, th, conn, mbuf, conn->in_dev);
}

static int tcp_fnat_out_handler(struct dp_vs_proto *proto,
Expand Down