Skip to content

Commit

Permalink
Merge pull request #928 from ywc689/toa-enhancement
Browse files Browse the repository at this point in the history
ipvs: toa enhancements
  • Loading branch information
ywc689 authored Dec 19, 2023
2 parents 486ed1e + 58a0e75 commit e42c170
Show file tree
Hide file tree
Showing 2 changed files with 158 additions and 73 deletions.
47 changes: 22 additions & 25 deletions patch/dcdn-toa.patch
Original file line number Diff line number Diff line change
@@ -1,32 +1,31 @@
From 55e8e5da2b4b0893d36cb3f621bedf9833c4ea50 Mon Sep 17 00:00:00 2001
From cee6889685240558ebea795615539b7289070842 Mon Sep 17 00:00:00 2001
From: wangyetong <[email protected]>
Date: Thu, 14 Sep 2023 15:33:42 +0800
Subject: [PATCH] added dcdn toa

---
include/ipvs/conn.h | 5 +++++
include/ipvs/conn.h | 4 ++++
include/ipvs/proto_tcp.h | 2 ++
src/ipvs/ip_vs_proto_tcp.c | 54 +++++++++++++++++++++++++++++++++++++++++++++-
src/ipvs/ip_vs_proto_tcp.c | 55 +++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/include/ipvs/conn.h b/include/ipvs/conn.h
index fa0bdeb..88dcb44 100644
index 843721e..78fb0ba 100644
--- a/include/ipvs/conn.h
+++ b/include/ipvs/conn.h
@@ -166,6 +166,11 @@ struct dp_vs_conn {
/* flag for gfwip */
bool outwall;
@@ -167,6 +167,10 @@ struct dp_vs_conn {
/* connection redirect in fnat/snat/nat modes */
struct dp_vs_redirect *redirect;

+ /* dcdn toa found or not */
+ bool dcdn_found;
+ /* dcdn toa address */
+ struct in_addr dcdn_addr;
+
} __rte_cache_aligned;

/* for syn-proxy to save all ack packet in conn before rs's syn-ack arrives */
diff --git a/include/ipvs/proto_tcp.h b/include/ipvs/proto_tcp.h
index 9f5162a..41d5646 100644
index 3d1515a..f0cf50c 100644
--- a/include/ipvs/proto_tcp.h
+++ b/include/ipvs/proto_tcp.h
@@ -28,6 +28,7 @@ enum {
Expand All @@ -46,11 +45,11 @@ index 9f5162a..41d5646 100644
#define TCP_OLEN_TSTAMP_ALIGNED 12
#define TCP_OLEN_SACK_BASE 2
diff --git a/src/ipvs/ip_vs_proto_tcp.c b/src/ipvs/ip_vs_proto_tcp.c
index cbb7cb2..2cd889a 100644
index 6acbbca..5b185fa 100644
--- a/src/ipvs/ip_vs_proto_tcp.c
+++ b/src/ipvs/ip_vs_proto_tcp.c
@@ -305,6 +305,43 @@ static void tcp_in_remove_ts(struct tcphdr *tcph)
}
@@ -441,6 +441,43 @@ static int tcp_in_add_proxy_proto(struct dp_vs_conn *conn, struct rte_mbuf *mbuf
return proxy_proto_insert(&ppinfo, conn, mbuf, tcph, hdr_shift);
}

+/* check dcdn toa option */
Expand Down Expand Up @@ -90,10 +89,10 @@ index cbb7cb2..2cd889a 100644
+ return EDPVS_NOTEXIST;
+}
+
static inline int tcp_in_add_toa(struct dp_vs_conn *conn, struct rte_mbuf *mbuf,
static int tcp_in_add_toa(struct dp_vs_conn *conn, struct rte_mbuf *mbuf,
struct tcphdr *tcph)
{
@@ -382,7 +419,10 @@ static inline int tcp_in_add_toa(struct dp_vs_conn *conn, struct rte_mbuf *mbuf,
@@ -518,7 +555,10 @@ static int tcp_in_add_toa(struct dp_vs_conn *conn, struct rte_mbuf *mbuf,

if (conn->af == AF_INET) {
struct tcpopt_ip4_addr *toa_ip4 = (struct tcpopt_ip4_addr *)(tcph + 1);
Expand All @@ -105,21 +104,18 @@ index cbb7cb2..2cd889a 100644
}
else {
struct tcpopt_ip6_addr *toa_ip6 = (struct tcpopt_ip6_addr *)(tcph + 1);
@@ -694,9 +734,13 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto,
struct dp_vs_conn *conn, struct rte_mbuf *mbuf)
{
struct tcphdr *th;
@@ -842,6 +882,10 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto,
int af; /* outbound af */
int iphdrlen;
int err, pp_hdr_shift = 0;
+ struct in_addr dcdn_addr;
/* af/mbuf may be changed for nat64 which in af is ipv6 and out is ipv4 */
int af = tuplehash_out(conn).af;
int iphdrlen = ((AF_INET6 == af) ? ip6_hdrlen(mbuf): ip4_hdrlen(mbuf));
+#ifdef CONFIG_DPVS_IPVS_DEBUG
+ char dcdn_buf[64];
+#endif

if (mbuf_may_pull(mbuf, iphdrlen + sizeof(*th)) != 0)
return EDPVS_INVPKT;
@@ -720,6 +764,14 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto,
af = tuplehash_out(conn).af;
iphdrlen = ((AF_INET6 == af) ? ip6_hdrlen(mbuf): ip4_hdrlen(mbuf));
@@ -866,6 +910,15 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto,
if (th->syn && !th->ack) {
tcp_in_remove_ts(th);
tcp_in_init_seq(conn, mbuf, th);
Expand All @@ -131,9 +127,10 @@ index cbb7cb2..2cd889a 100644
+ RTE_LOG(DEBUG, IPVS, "get dcdn toa addr %s\n", dcdn_buf);
+#endif
+ }
tcp_in_add_toa(conn, mbuf, th);
+ tcp_in_add_toa(conn, mbuf, th);
}

/* Add toa/proxy_protocol to the first data packet */
--
1.8.3.1

184 changes: 136 additions & 48 deletions src/ipvs/ip_vs_proto_tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -306,46 +306,129 @@ static void tcp_in_remove_ts(struct tcphdr *tcph)
}
}

/* use NOP option to replace TCP_OLEN_IP4_ADDR and TCP_OLEN_IP6_ADDR opt */
static void tcp_in_remove_toa(struct tcphdr *tcph, int af)
/*
* Remove NOP and TOA options preset in the mbuf and compact option space.
* If still no enough space, trim more options except for the protected ones.
*
* Return the trimmed length on success, otherwise dpvs error num on failure.
* */
static int tcp_in_prune_options(int af, int reqlen, struct rte_mbuf *mbuf, struct tcphdr *tcph)
{
unsigned char *ptr;
int len, i;
uint32_t tcp_opt_len = af == AF_INET ? TCP_OLEN_IP4_ADDR : TCP_OLEN_IP6_ADDR;
unsigned char *ptr, *fast, *slow;
const unsigned char *l3hdr, *payload;
int i, optlen;
unsigned int pruned;
uint8_t opcode, opsize;
uint64_t opts_protected;
const uint8_t opts_maxlen[64] = {
[2] = 4, [3] = 3, [4] = 2,
[8] = 10, [30] = 40, [34] = 18
};

ptr = (unsigned char *)(tcph + 1);
len = (tcph->doff << 2) - sizeof(struct tcphdr);
fast = slow = ptr;
optlen = (tcph->doff << 2) - sizeof(struct tcphdr);
payload = ptr + optlen;

while (len > 0) {
int opcode = *ptr++;
int opsize;
if (optlen < reqlen) /* make no sense to do anything */
return 0;

while (optlen > 0) {
opcode = *ptr++;
switch (opcode) {
case TCP_OPT_EOL:
return;
goto fini;
case TCP_OPT_NOP:
len--;
fast++;
optlen--;
continue;
default:
opsize = *ptr++;
if (opsize < 2) /* silly options */
return;
if (opsize > len)
return; /* partial options */
if ((opcode == TCP_OPT_ADDR) && (opsize == tcp_opt_len)) {
for (i = 0; i < tcp_opt_len; i++) {
*(ptr - 2 + i) = TCP_OPT_NOP;
if (opsize < 2) /* silly options */
goto fini;
if (opsize > optlen) /* partial options */
goto fini;
if (opcode == TCP_OPT_ADDR) {
fast += opsize;
} else {
for (i = 0; i < opsize; i++) {
if (slow != fast)
*slow = *fast;
slow++;
fast++;
}
/* DON'T RETURN
* keep search other TCP_OPT_ADDR ,and clear them.
* See https://github.com/iqiyi/dpvs/pull/925 for more detail. */
}

ptr += opsize - 2;
len -= opsize;
optlen -= opsize;
break;
}
}

fini:
pruned = payload - slow;
if (pruned < reqlen) {
/* further trim the options, the tcp functionality relies on unprotected
* options may get hurt, refer to:
* https://www.iana.org/assignments/tcp-parameters/tcp-parameters.xhtml
* #tcp-parameters-1
* */
ptr = slow;
slow = fast = (unsigned char *)(tcph + 1);
if (tcph->syn)
opts_protected = (1ULL << 2) | (1ULL << 3) | (1ULL << 4) /* MSS, WS, SACKP */
| (1ULL << 8) | (1ULL << 30) | (1ULL << 34); /* TS, MPTCP, TFO */
else
opts_protected = (1ULL << 8); /* TS, drop SACK, MPTCP DSS/REMOVE_ADDR */
while (fast < ptr) {
opcode = *fast;
opsize = *(fast + 1);
if (opcode < 64 && ((1ULL << opcode) & opts_protected)
&& (opsize <= opts_maxlen[opcode])) {
for (i = 0; i < opsize; i++)
*slow++ = *fast++;
opts_protected ^= (1ULL << opcode);
} else {
fast += opsize;
pruned += opsize;
if (pruned >= reqlen) {
while (fast < ptr)
*slow++ = *fast++;
break;
}
}
}
pruned = payload - slow;
}
if (pruned > 0) {
while (pruned & 0x3) { /* 4-bytes alignment for tcp options */
*slow++ = 0;
pruned--;
}
if (!pruned)
return 0;
/* trim the packet */
l3hdr = rte_pktmbuf_mtod(mbuf, void *);
if (unlikely(mbuf_may_pull(mbuf, mbuf->pkt_len) != 0)) {
memset(slow, 0, pruned);
return EDPVS_INVPKT;
}
if (unlikely(payload - l3hdr > mbuf->pkt_len)) {
memset(slow, 0, pruned);
return EDPVS_INVPKT;
}
memmove(slow, payload, mbuf->pkt_len - (payload - l3hdr));
rte_pktmbuf_trim(mbuf, pruned);
tcph->doff -= (pruned >> 2);
if (af == AF_INET)
((struct rte_ipv4_hdr *)l3hdr)->total_length =
htons(ntohs(((struct rte_ipv4_hdr *)l3hdr)->total_length) - pruned);
else
((struct rte_ipv6_hdr *)l3hdr)->payload_len =
htons(ntohs(((struct rte_ipv6_hdr *)l3hdr)->payload_len) - pruned);
return pruned;
}
return 0;
}

static int tcp_in_add_proxy_proto(struct dp_vs_conn *conn, struct rte_mbuf *mbuf,
Expand Down Expand Up @@ -797,15 +880,12 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto,
struct dp_vs_conn *conn, struct rte_mbuf *mbuf)
{
struct tcphdr *th;
/* af/mbuf may be changed for nat64 which in af is ipv6 and out is ipv4 */
int iaf, oaf;
int iphdrlen;
int af; /* outbound af */
int iphdrlen, toalen;
int err, pp_hdr_shift = 0;

iaf = tuplehash_in(conn).af;
oaf = tuplehash_out(conn).af;

iphdrlen = ((AF_INET6 == oaf) ? ip6_hdrlen(mbuf): ip4_hdrlen(mbuf));
af = tuplehash_out(conn).af;
iphdrlen = ((AF_INET6 == af) ? ip6_hdrlen(mbuf): ip4_hdrlen(mbuf));

if (mbuf_may_pull(mbuf, iphdrlen + sizeof(*th)) != 0)
return EDPVS_INVPKT;
Expand All @@ -819,41 +899,49 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto,

/*
* for SYN packet
* 1. remove tcp timestamp option
* laddress for different client have diff timestamp.
* 2. save original TCP sequence for seq-adjust later.
* since TCP option will be change.
* 3. add TOA option
* so that RS with TOA module can get real client IP.
* 1. remove tcp timestamp option,
* laddrs for different clients have diff timestamp.
* 2. save original TCP sequence for seq-adjust later
* since TCP option will be changed.
*/
if (th->syn && !th->ack) {
tcp_in_remove_ts(th);

tcp_in_init_seq(conn, mbuf, th);
if (PROXY_PROTOCOL_V1 != PROXY_PROTOCOL_VERSION(conn->pp_version)
&& PROXY_PROTOCOL_V2 != PROXY_PROTOCOL_VERSION(conn->pp_version)) {
if (unlikely(tcp_in_add_toa(conn, mbuf, th) != EDPVS_OK)) {
tcp_in_remove_toa(th, iaf);
}
}
}

/* add toa/proxy_proto to first data packet */
/* Add toa/proxy_protocol to the first data packet */
if (ntohl(th->ack_seq) == conn->fnat_seq.fdata_seq
&& !th->syn && !th->rst /*&& !th->fin*/) {
if (PROXY_PROTOCOL_V2 == PROXY_PROTOCOL_VERSION(conn->pp_version)
|| PROXY_PROTOCOL_V1 == PROXY_PROTOCOL_VERSION(conn->pp_version)) {
if (conn->fnat_seq.isn - conn->fnat_seq.delta + 1 == ntohl(th->seq)) {
/* avoid inserting repetitive ppdata when the first rs ack delayed */
/* avoid inserting repetitive proxy protocol data
* when the first rs ack is delayed */
err = tcp_in_add_proxy_proto(conn, mbuf, th, iphdrlen, &pp_hdr_shift);
if (unlikely(EDPVS_OK != err))
RTE_LOG(INFO, IPVS, "%s: insert proxy protocol fail -- %s\n",
__func__, dpvs_strerror(err));
th = ((void *)th) + pp_hdr_shift;
}
} else {
if (unlikely(tcp_in_add_toa(conn, mbuf, th) != EDPVS_OK)) {
tcp_in_remove_toa(th, iaf);
} else { /* use toa */
err = tcp_in_add_toa(conn, mbuf, th);
if (unlikely(EDPVS_OK != err)) {
toalen = tuplehash_in(conn).af == AF_INET ? TCP_OLEN_IP4_ADDR : TCP_OLEN_IP6_ADDR;
if (tcp_in_prune_options(af, toalen, mbuf, th) >= toalen
&& (EDPVS_NOROOM == err || EDPVS_FRAG == err)) {
err = tcp_in_add_toa(conn, mbuf, th);
}
if (EDPVS_OK != err) {
char caddrbuf[64], vaddrbuf[64], laddrbuf[64], daddrbuf[64];
const char *caddr, *vaddr, *laddr, *daddr;
caddr = inet_ntop(conn->af, &conn->caddr, caddrbuf, sizeof(caddrbuf)) ? caddrbuf : "::";
vaddr = inet_ntop(conn->af, &conn->vaddr, vaddrbuf, sizeof(vaddrbuf)) ? vaddrbuf : "::";
laddr = inet_ntop(af, &conn->laddr, laddrbuf, sizeof(laddrbuf)) ? laddrbuf : "::";
daddr = inet_ntop(af, &conn->daddr, daddrbuf, sizeof(daddrbuf)) ? daddrbuf : "::";
RTE_LOG(WARNING, IPVS, "TOA add failed(%s): [%s]:%d -> [%s]:%d; [%s]:%d -> [%s]:%d\n",
dpvs_strerror(err), caddr, htons(conn->cport), vaddr, htons(conn->vport),
laddr, htons(conn->lport), daddr, htons(conn->dport));
}
}
}
}
Expand All @@ -864,7 +952,7 @@ static int tcp_fnat_in_handler(struct dp_vs_proto *proto,
th->source = conn->lport;
th->dest = conn->dport;

return tcp_send_csum(oaf, iphdrlen, th, conn, mbuf, conn->in_dev);
return tcp_send_csum(af, iphdrlen, th, conn, mbuf, conn->in_dev);
}

static int tcp_fnat_out_handler(struct dp_vs_proto *proto,
Expand Down

0 comments on commit e42c170

Please sign in to comment.