diff --git a/conf/dpvs.conf.items b/conf/dpvs.conf.items index 448b40044..80f2e123a 100644 --- a/conf/dpvs.conf.items +++ b/conf/dpvs.conf.items @@ -194,6 +194,9 @@ ipv4_defs { ipv6_defs { disable off forwarding off + addr_gen_mode eui64 + stable_secret "" <128-bit hexadecimal string, used in stable-privacy mode > + route6 { method "hlist" <"hlist"/"lpm"> recycle_time 10 <10, 1-36000> diff --git a/include/conf/inetaddr.h b/include/conf/inetaddr.h index 7a82b0d94..252e6f3ee 100644 --- a/include/conf/inetaddr.h +++ b/include/conf/inetaddr.h @@ -34,6 +34,7 @@ enum { /* leverage IFA_F_XXX in linux/if_addr.h*/ #define IFA_F_SAPOOL 0x10000 /* if address with sockaddr pool */ +#define IFA_F_LINKLOCAL 0x20000 /* ipv6 link-local address */ /* ifa command flags */ #define IFA_F_OPS_VERBOSE 0x0001 diff --git a/include/ctrl.h b/include/ctrl.h index 754e38b9d..1df3d94f3 100644 --- a/include/ctrl.h +++ b/include/ctrl.h @@ -214,6 +214,7 @@ int msg_dump(const struct dpvs_msg *msg, char *buf, int len); #define MSG_TYPE_IPSET_SET 40 #define MSG_TYPE_DEST_CHECK_NOTIFY_MASTER 41 #define MSG_TYPE_DEST_CHECK_NOTIFY_SLAVES 42 +#define MSG_TYPE_IFA_IDEVINIT 43 #define MSG_TYPE_IPVS_RANGE_START 100 /* for svc per_core, refer to service.h*/ diff --git a/include/inetaddr.h b/include/inetaddr.h index 44276659f..a96b62518 100644 --- a/include/inetaddr.h +++ b/include/inetaddr.h @@ -26,6 +26,12 @@ #include "dpdk.h" #include "list.h" + +enum { + IDEV_F_NO_IPV6 = 0x00000001, + IDEV_F_NO_ROUTE = 0x00000002, +}; + struct inet_device { struct netif_port *dev; struct list_head ifa_list[DPVS_MAX_LCORE]; /* inet_ifaddr list */ @@ -33,6 +39,7 @@ struct inet_device { uint32_t ifa_cnt[DPVS_MAX_LCORE]; uint32_t ifm_cnt[DPVS_MAX_LCORE]; rte_atomic32_t refcnt; /* not used yet */ + uint32_t flags; /* IDEV_F_XXX */ #define this_ifa_list ifa_list[rte_lcore_id()] #define this_ifm_list ifm_list[rte_lcore_id()] #define this_ifa_cnt ifa_cnt[rte_lcore_id()] @@ -119,7 +126,12 @@ bool inet_chk_mcast_addr(int af, struct netif_port *dev, void inet_ifaddr_dad_failure(struct inet_ifaddr *ifa); -int idev_add_mcast_init(struct netif_port *dev); +struct inet_device *dev_get_idev(const struct netif_port *dev); + +void idev_put(struct inet_device *idev); + +int idev_addr_init(struct inet_device *idev); + int inet_addr_init(void); int inet_addr_term(void); diff --git a/include/ip_tunnel.h b/include/ip_tunnel.h index 499aefb69..9a9f366e6 100644 --- a/include/ip_tunnel.h +++ b/include/ip_tunnel.h @@ -98,6 +98,8 @@ int ip_tunnel_xmit(struct rte_mbuf *mbuf, struct netif_port *dev, int ip_tunnel_pull_header(struct rte_mbuf *mbuf, int hlen, __be16 in_proto); +int ip_tunnel_dev_init(struct netif_port *dev); +int ip_tunnel_set_mc_list(struct netif_port *dev); int ip_tunnel_get_link(struct netif_port *dev, struct rte_eth_link *link); int ip_tunnel_get_stats(struct netif_port *dev, struct rte_eth_stats *stats); int ip_tunnel_get_promisc(struct netif_port *dev, bool *promisc); diff --git a/include/ipv6.h b/include/ipv6.h index edb562d00..7a95736f0 100644 --- a/include/ipv6.h +++ b/include/ipv6.h @@ -33,6 +33,29 @@ #define IPV6 #define RTE_LOGTYPE_IPV6 RTE_LOGTYPE_USER1 +enum ip6_addr_gen_mode { + IP6_ADDR_GEN_MODE_EUI64 = 1, + IP6_ADDR_GEN_MODE_NONE, + IP6_ADDR_GEN_MODE_STABLE_PRIVACY, + IP6_ADDR_GEN_MODE_RANDOM, + IP6_ADDR_GFN_MODE_MAX = 64, +}; + +struct ipv6_stable_secret { + bool initialized; + struct in6_addr secret; +}; + +struct ipv6_config { + unsigned disable:1; + unsigned forwarding:1; + unsigned addr_gen_mode:6; + struct ipv6_stable_secret secret_stable; + struct ipv6_stable_secret secret_random; +}; + +const struct ipv6_config *ip6_config_get(void); + /* * helper functions */ diff --git a/include/linux_ipv6.h b/include/linux_ipv6.h index 05bc4ae3b..6d19b1b36 100644 --- a/include/linux_ipv6.h +++ b/include/linux_ipv6.h @@ -492,6 +492,19 @@ static inline int ipv6_saddr_preferred(int type) return 0; } +static inline bool ipv6_reserved_interfaceid(const struct in6_addr *addr) +{ + if ((addr->s6_addr32[2] | addr->s6_addr32[3]) == 0) + return true; + if (addr->s6_addr32[2] == htonl(0x02005eff) && + ((addr->s6_addr32[3] & htonl(0xfe000000)) == htonl(0xfe000000))) + return true; + if (addr->s6_addr32[2] == htonl(0xfdffffff) && + ((addr->s6_addr32[3] & htonl(0xffffff80)) == htonl(0xffffff80))) + return true; + return false; +} + #ifdef __DPVS__ /*functions below were edited from addrconf.c*/ diff --git a/include/netif.h b/include/netif.h index af3adeb7a..274f4b3d4 100644 --- a/include/netif.h +++ b/include/netif.h @@ -72,7 +72,7 @@ enum { #define NETIF_ALIGN 32 -#define NETIF_PORT_ID_INVALID 0xFF +#define NETIF_PORT_ID_INVALID NETIF_MAX_PORTS #define NETIF_PORT_ID_ALL NETIF_PORT_ID_INVALID #define NETIF_LCORE_ID_INVALID 0xFF @@ -283,7 +283,7 @@ int netif_get_promisc(struct netif_port *dev, bool *promisc); int netif_get_allmulticast(struct netif_port *dev, bool *allmulticast); int netif_get_stats(struct netif_port *dev, struct rte_eth_stats *stats); int netif_get_xstats(struct netif_port *dev, netif_nic_xstats_get_t **xstats); -struct netif_port *netif_alloc(size_t priv_size, const char *namefmt, +struct netif_port *netif_alloc(portid_t id, size_t priv_size, const char *namefmt, unsigned int nrxq, unsigned int ntxq, void (*setup)(struct netif_port *)); portid_t netif_port_count(void); diff --git a/src/inetaddr.c b/src/inetaddr.c index 077f9d844..fb833436a 100644 --- a/src/inetaddr.c +++ b/src/inetaddr.c @@ -16,6 +16,7 @@ * */ #include +#include #include "dpdk.h" #include "ctrl.h" #include "netif.h" @@ -24,6 +25,7 @@ #include "sa_pool.h" #include "ndisc.h" #include "route.h" +#include "ipv6.h" #include "route6.h" #include "inetaddr.h" #include "conf/inetaddr.h" @@ -75,14 +77,14 @@ static uint32_t ifa_msg_seq(void) return counter++; } -static inline struct inet_device *dev_get_idev(const struct netif_port *dev) +struct inet_device *dev_get_idev(const struct netif_port *dev) { assert(dev && dev->in_ptr); rte_atomic32_inc(&dev->in_ptr->refcnt); return dev->in_ptr; } -static inline void idev_put(struct inet_device *idev) +void idev_put(struct inet_device *idev) { rte_atomic32_dec(&idev->refcnt); } @@ -232,18 +234,13 @@ static int ifa_add_del_mcast(struct inet_ifaddr *ifa, bool add, bool is_master) } /* add ipv6 multicast address after port start */ -static int __idev_add_mcast_init(void *args) +static int __idev_inet6_mcast_init(struct inet_device *idev) { int err; - struct inet_device *idev; union inet_addr all_nodes, all_routers; struct rte_ether_addr eaddr_nodes, eaddr_routers; bool is_master = (rte_lcore_id() == g_master_lcore_id); - struct netif_port *dev = (struct netif_port *) args; - - idev = dev_get_idev(dev); - memset(&eaddr_nodes, 0, sizeof(eaddr_nodes)); memset(&eaddr_routers, 0, sizeof(eaddr_routers)); @@ -273,7 +270,6 @@ static int __idev_add_mcast_init(void *args) goto free_idev_routers; } - idev_put(idev); return EDPVS_OK; free_idev_routers: @@ -284,20 +280,171 @@ static int __idev_add_mcast_init(void *args) free_idev_nodes: idev_mc_del(AF_INET6, idev, &all_nodes); errout: - idev_put(idev); return err; } -int idev_add_mcast_init(struct netif_port *dev) +static int inet6_addr_gen_eui64(struct inet_device *idev, struct in6_addr *addr) +{ + unsigned char hwaddr[6]; + unsigned char *eui = &addr->s6_addr[8]; + + rte_memcpy(hwaddr, &idev->dev->addr, 6); + eui[0] = hwaddr[0] ^ 0x02; + eui[1] = hwaddr[1]; + eui[2] = hwaddr[2]; + eui[3] = 0xFF; + eui[4] = 0xFE; + eui[5] = hwaddr[3]; + eui[6] = hwaddr[4]; + eui[7] = hwaddr[5]; + + return EDPVS_OK; +} + +static int inet6_addr_gen_stable(struct in6_addr secret, struct inet_device *idev, struct in6_addr *addr) +{ +#define MAX_RETRY 8 + struct in6_addr temp; + union { + unsigned char data[SHA256_DIGEST_LENGTH]; + uint32_t data_word[2]; + } md; + struct { + struct in6_addr secret; + uint32_t prefix[2]; + struct rte_ether_addr hwaddr; + uint8_t dad_count; + } __rte_packed data; + uint8_t dad_count = 0; + + memset(&data, 0, sizeof(data)); + data.secret = secret; + data.prefix[0] = addr->s6_addr32[0]; + data.prefix[1] = addr->s6_addr32[1]; + data.hwaddr = idev->dev->addr; + while (1) { + data.dad_count = dad_count++; + memset(&md, 0, sizeof(md)); + SHA512((const unsigned char*)&data, sizeof(data), md.data); + temp = *addr; + temp.s6_addr32[2] = md.data_word[0]; + temp.s6_addr32[3] = md.data_word[1]; + if (!ipv6_reserved_interfaceid(&temp)) + break; + if (dad_count >= MAX_RETRY) + return EDPVS_RESOURCE; + } + + *addr = temp; + return EDPVS_OK; +} + +static int inet6_link_local_addr_gen(struct inet_device *idev, struct in6_addr *addr) +{ + const struct ipv6_config *ip6cfg = ip6_config_get(); + + ipv6_addr_set(addr, htonl(0xFE800000), 0, 0, 0); + switch (ip6cfg->addr_gen_mode) { + case IP6_ADDR_GEN_MODE_EUI64: + return inet6_addr_gen_eui64(idev, addr); + case IP6_ADDR_GEN_MODE_NONE: + return EDPVS_DISABLED; + case IP6_ADDR_GEN_MODE_STABLE_PRIVACY: + if (ip6cfg->secret_stable.initialized) + return inet6_addr_gen_stable(ip6cfg->secret_stable.secret, idev, addr); + // fallthrough + case IP6_ADDR_GEN_MODE_RANDOM: + return inet6_addr_gen_stable(ip6cfg->secret_random.secret, idev, addr); + default: + return EDPVS_NOTSUPP; + } + + return EDPVS_OK; +} + +static int ifa_entry_add(const struct ifaddr_action *param); +static int __inet6_link_local_addr_config(struct inet_device *idev, const struct in6_addr *addr) +{ + struct ifaddr_action param; + + memset(¶m, 0, sizeof(param)); + fill_ifaddr_action(AF_INET6, idev->dev, (union inet_addr *)addr, 64, NULL, + 0, 0, IFA_SCOPE_LINK, IFA_F_LINKLOCAL, INET_ADDR_ADD, ¶m); + return ifa_entry_add(¶m); +} + +struct idev_addr_init_args { + struct inet_device *idev; + struct in6_addr link_local_addr; +}; + +static int __idev_addr_init(void *args) { int err; - lcoreid_t cid; + struct idev_addr_init_args *param = args; + + assert(param && param->idev && param->idev->dev); + + if (rte_lcore_id() >= DPVS_MAX_LCORE) + return EDPVS_OK; + + err = __inet6_link_local_addr_config(param->idev, ¶m->link_local_addr); + if (err != EDPVS_OK) + return err; + + return __idev_inet6_mcast_init(param->idev); +} + +int idev_addr_init(struct inet_device *idev) +{ + int err; + lcoreid_t cid, tcid; + struct dpvs_msg *msg; + struct idev_addr_init_args args; + + // only ipv6 needs address initialization now + if (ip6_config_get()->disable || (idev->flags & IDEV_F_NO_IPV6)) + return EDPVS_OK; + + if (idev->flags & IDEV_F_NO_ROUTE) + return EDPVS_OK; + + cid = rte_lcore_id(); + if (cid != rte_get_main_lcore()) + return EDPVS_NOTSUPP; + + args.idev = idev; + err = inet6_link_local_addr_gen(idev, &args.link_local_addr); + if (err != EDPVS_OK) { + if (EDPVS_DISABLED == err) + return EDPVS_OK; + return err; + } - rte_eal_mp_remote_launch(__idev_add_mcast_init, dev, CALL_MAIN); - RTE_LCORE_FOREACH_WORKER(cid) { - err = rte_eal_wait_lcore(cid); - if (unlikely(err < 0)) + // do it on master lcore + err = __idev_addr_init(&args); + if (err != EDPVS_OK) + return err; + + // do it on slave lcores + if (dpvs_state_get() == DPVS_STATE_NORMAL) { + msg = msg_make(MSG_TYPE_IFA_IDEVINIT, ifa_msg_seq(), DPVS_MSG_MULTICAST, + cid, sizeof(args), &args); + if (unlikely(!msg)) + return EDPVS_NOMEM; + err = multicast_msg_send(msg, DPVS_MSG_F_ASYNC, NULL); + if (err != EDPVS_OK) { + msg_destroy(&msg); return err; + } + msg_destroy(&msg); + } else { + rte_eal_mp_remote_launch(__idev_addr_init, &args, SKIP_MAIN); + RTE_LCORE_FOREACH_WORKER(tcid) { + err = rte_eal_wait_lcore(tcid); + if (unlikely(err < 0)) + return err; + } } return EDPVS_OK; @@ -1344,6 +1491,17 @@ static int ifa_msg_sync_cb(struct dpvs_msg *msg) return EDPVS_OK; } +static int ifa_msg_idevinit_cb(struct dpvs_msg *msg) +{ + struct idev_addr_init_args *param; + + if (unlikely(!msg || msg->len != sizeof(*param))) + return EDPVS_INVAL; + param = (struct idev_addr_init_args *)(msg->data); + + return __idev_addr_init(param); +} + static int __inet_addr_add(const struct ifaddr_action *param) { int err; @@ -1952,7 +2110,7 @@ static int ifa_sockopt_agent_get(sockoptid_t opt, const void *conf, size_t size, struct inet_device *idev = NULL; struct inet_addr_front *array = NULL; const struct inet_addr_entry *entry = conf; - int len; + int len = 0; int err; if (entry->af != AF_INET && entry->af != AF_INET6 && entry->af != AF_UNSPEC) { @@ -2113,6 +2271,13 @@ static struct dpvs_msg_type ifa_msg_types[] = { //.cid = rte_get_main_lcore(), .unicast_msg_cb = ifa_msg_sync_cb, .multicast_msg_cb = NULL + }, + { + .type = MSG_TYPE_IFA_IDEVINIT, + .prio = MSG_PRIO_NORM, + .mode = DPVS_MSG_MULTICAST, + .unicast_msg_cb = ifa_msg_idevinit_cb, + .multicast_msg_cb = NULL, } }; diff --git a/src/ip_gre.c b/src/ip_gre.c index 43fd31b8b..dbfdad7c7 100644 --- a/src/ip_gre.c +++ b/src/ip_gre.c @@ -233,12 +233,13 @@ static int gre_dev_init(struct netif_port *dev) tnl->hlen = gre_calc_hlen(tnl->params.o_flags); - return EDPVS_OK; + return ip_tunnel_dev_init(dev); } static struct netif_ops gre_dev_ops = { .op_init = gre_dev_init, .op_xmit = gre_xmit, + .op_set_mc_list = ip_tunnel_set_mc_list, .op_get_link = ip_tunnel_get_link, .op_get_stats = ip_tunnel_get_stats, .op_get_promisc = ip_tunnel_get_promisc, diff --git a/src/ip_tunnel.c b/src/ip_tunnel.c index 3e1e3483b..e26f3eee7 100644 --- a/src/ip_tunnel.c +++ b/src/ip_tunnel.c @@ -171,7 +171,8 @@ static struct netif_port *tunnel_create(struct ip_tunnel_tab *tab, if (!strlen(params.ifname)) snprintf(params.ifname, IFNAMSIZ, "%s%%d", ops->kind); - dev = netif_alloc(ops->priv_size, params.ifname, 1, 1, ops->setup); + dev = netif_alloc(NETIF_PORT_ID_INVALID, ops->priv_size, params.ifname, + 1, 1, ops->setup); if (!dev) return NULL; @@ -206,6 +207,8 @@ static struct netif_port *tunnel_create(struct ip_tunnel_tab *tab, dev->flag &= ~NETIF_PORT_FLAG_TX_UDP_CSUM_OFFLOAD; dev->flag &= ~NETIF_PORT_FLAG_LLDP; + dev->in_ptr->flags |= IDEV_F_NO_IPV6; + err = netif_port_register(dev); if (err != EDPVS_OK) { netif_free(dev); @@ -899,6 +902,30 @@ int ip_tunnel_pull_header(struct rte_mbuf *mbuf, int hlen, __be16 in_proto) return EDPVS_OK; } +int ip_tunnel_dev_init(struct netif_port *dev) +{ + int err; + struct ip_tunnel *tnl = netif_priv(dev); + struct inet_device *idev = dev_get_idev(tnl->dev); + + err = idev_addr_init(idev); + if (err != EDPVS_OK) { + idev_put(idev); + return err; + } + + idev_put(idev); + return EDPVS_OK; +} + +int ip_tunnel_set_mc_list(struct netif_port *dev) +{ + // IP tunnel devices need no hw multicast address, + // and should always return success + + return EDPVS_OK; +} + int ip_tunnel_get_link(struct netif_port *dev, struct rte_eth_link *link) { struct ip_tunnel *tnl = netif_priv(dev); diff --git a/src/ipip.c b/src/ipip.c index 8b25f0c8e..8a7ffe100 100644 --- a/src/ipip.c +++ b/src/ipip.c @@ -46,7 +46,9 @@ static int ipip_xmit(struct rte_mbuf *mbuf, struct netif_port *dev) } static struct netif_ops ipip_dev_ops = { + .op_init = ip_tunnel_dev_init, .op_xmit = ipip_xmit, + .op_set_mc_list = ip_tunnel_set_mc_list, .op_get_link = ip_tunnel_get_link, .op_get_stats = ip_tunnel_get_stats, .op_get_promisc = ip_tunnel_get_promisc, diff --git a/src/ipset/ipset_core.c b/src/ipset/ipset_core.c index 2d752be41..85d4325ff 100644 --- a/src/ipset/ipset_core.c +++ b/src/ipset/ipset_core.c @@ -227,6 +227,9 @@ ipset_flush_lcore(void *arg) int i; struct ipset *set; + if (rte_lcore_id() >= DPVS_MAX_LCORE) + return EDPVS_OK; + for (i = 0; i < IPSETS_TBL_SIZE; i++) { list_for_each_entry(set, &this_ipsets_tbl[i], list) set->type->destroy(set); @@ -244,8 +247,12 @@ static int ipset_lcore_init(void *arg) { int i; + lcoreid_t cid = rte_lcore_id(); + + if (cid >= DPVS_MAX_LCORE) + return EDPVS_OK; - if (!rte_lcore_is_enabled(rte_lcore_id())) + if (!rte_lcore_is_enabled(cid)) return EDPVS_DISABLED; this_ipsets_tbl = rte_zmalloc(NULL, diff --git a/src/ipv6/ipv6.c b/src/ipv6/ipv6.c index 1fa712110..37da08fb6 100644 --- a/src/ipv6/ipv6.c +++ b/src/ipv6/ipv6.c @@ -46,8 +46,12 @@ static rte_rwlock_t inet6_prot_lock; /* * IPv6 configures with default values. */ -static bool conf_ipv6_forwarding = false; -static bool conf_ipv6_disable = false; +static struct ipv6_config ip6_configs; + +const struct ipv6_config *ip6_config_get(void) +{ + return &ip6_configs; +}; /* * IPv6 statistics @@ -108,42 +112,109 @@ static void ip6_prot_init(void) rte_rwlock_write_unlock(&inet6_prot_lock); } -static void ip6_conf_forward(vector_t tokens) +static void ip6_forwarding_handler(vector_t tokens) { char *str = set_value(tokens); assert(str); if (strcasecmp(str, "on") == 0) - conf_ipv6_forwarding = true; + ip6_configs.forwarding = 1; else if (strcasecmp(str, "off") == 0) - conf_ipv6_forwarding = false; + ip6_configs.forwarding = 0; else RTE_LOG(WARNING, IPV6, "invalid ipv6:forwarding %s\n", str); - RTE_LOG(INFO, IPV6, "ipv6:forwarding = %s\n", conf_ipv6_forwarding ? "on" : "off"); + RTE_LOG(INFO, IPV6, "ipv6:forwarding = %s\n", ip6_configs.forwarding ? "on" : "off"); FREE_PTR(str); } -static void ip6_conf_disable(vector_t tokens) +static void ip6_disable_handler(vector_t tokens) { char *str = set_value(tokens); assert(str); if (strcasecmp(str, "on") == 0) - conf_ipv6_disable = true; + ip6_configs.disable = 1; else if (strcasecmp(str, "off") == 0) - conf_ipv6_disable = false; + ip6_configs.disable = 0; else RTE_LOG(WARNING, IPV6, "invalid ipv6:disable %s\n", str); - RTE_LOG(INFO, IPV6, "ipv6:disable = %s\n", conf_ipv6_disable ? "on" : "off"); + RTE_LOG(INFO, IPV6, "ipv6:disable=%s\n", ip6_configs.disable ? "disabled" : "enabled"); + + FREE_PTR(str); +} + +static void ip6_addr_gen_mode_handler(vector_t tokens) +{ + char *str = set_value(tokens); + + assert(str); + + if (!strcasecmp(str, "eui64")) + ip6_configs.addr_gen_mode = IP6_ADDR_GEN_MODE_EUI64; + else if (!strcasecmp(str, "none")) + ip6_configs.addr_gen_mode = IP6_ADDR_GEN_MODE_NONE; + else if (!strcasecmp(str, "stable-privacy")) + ip6_configs.addr_gen_mode = IP6_ADDR_GEN_MODE_STABLE_PRIVACY; + else if (!strcasecmp(str, "random")) + ip6_configs.addr_gen_mode = IP6_ADDR_GEN_MODE_RANDOM; + else + RTE_LOG(WARNING, IPV6, "invalid ipv6:addr_gen_mode:%s\n", str); + + RTE_LOG(INFO, IPV6, "ipv6:addr_gen_mode=%s\n", str); + + FREE_PTR(str); +} + +static void ip6_stable_secret_handler(vector_t tokens) +{ + bool valid = true; + size_t i, len; + char *str = set_value(tokens); + + assert(str); + len = strlen(str); + if (len < 32) { + valid = false; + } else { + for (i = 0; i < 32; i++) { + if (!isxdigit(str[i])) { + valid = false; + break; + } + } + } + if (!valid) { + RTE_LOG(WARNING, IPV6, "invalid ipv6:stable_secret %s, " + "a 128-bit hexadecimal string required\n", str); + FREE_PTR(str); + return; + } + + if (hexstr2binary(str, 32, (uint8_t *)(&ip6_configs.secret_stable.secret), 16) == 16) + ip6_configs.secret_stable.initialized = true; + else + RTE_LOG(WARNING, IPV6, "fail to tranlate ipv6:stable_secret %s into binary\n", str); + RTE_LOG(INFO, IPV6, "ipv6:stable_secret configured"); FREE_PTR(str); } +static inline void ip6_gen_mode_random_init(void) +{ + const char hex_chars[] = "0123456789abcdef"; + char *buf = (char *)(&ip6_configs.secret_random.secret); + int i; + + for (i = 0; i < 16; i++) + buf[i] = hex_chars[random() % 16]; + ip6_configs.secret_random.initialized = true; +} + /* refer linux:ip6_input_finish() */ static int ip6_local_in_fin(struct rte_mbuf *mbuf) { @@ -371,7 +442,7 @@ int ip6_output(struct rte_mbuf *mbuf) mbuf->port = dev->id; iftraf_pkt_out(AF_INET6, mbuf, dev); - if (unlikely(conf_ipv6_disable)) { + if (unlikely(ip6_configs.disable)) { IP6_INC_STATS(outdiscards); if (rt) route6_put(rt); @@ -411,7 +482,7 @@ static int ip6_forward(struct rte_mbuf *mbuf) int addrtype; uint32_t mtu; - if (!conf_ipv6_forwarding) + if (!ip6_configs.forwarding) goto error; if (mbuf->packet_type != ETH_PKT_HOST) @@ -539,7 +610,7 @@ static int ip6_rcv(struct rte_mbuf *mbuf, struct netif_port *dev) IP6_UPD_PO_STATS(in, mbuf->pkt_len); iftraf_pkt_in(AF_INET6, mbuf, dev); - if (unlikely(conf_ipv6_disable)) { + if (unlikely(ip6_configs.disable)) { IP6_INC_STATS(indiscards); goto drop; } @@ -656,6 +727,8 @@ int ipv6_init(void) /* htons, cpu_to_be16 not work when struct initialization :( */ ip6_pkt_type.type = htons(RTE_ETHER_TYPE_IPV6); + ip6_gen_mode_random_init(); + err = netif_register_pkt(&ip6_pkt_type); if (err) goto reg_pkt_err; @@ -815,8 +888,10 @@ void ipv6_keyword_value_init(void) /* KW_TYPE_INIT keyword */ } /* KW_TYPE NORMAL keyword */ - conf_ipv6_forwarding = false; - conf_ipv6_disable = false; + ip6_configs.forwarding = 0; + ip6_configs.disable = 0; + ip6_configs.addr_gen_mode = IP6_ADDR_GEN_MODE_EUI64; + ip6_configs.secret_stable.initialized = false; route6_keyword_value_init(); } @@ -824,8 +899,10 @@ void ipv6_keyword_value_init(void) void install_ipv6_keywords(void) { install_keyword_root("ipv6_defs", NULL); - install_keyword("forwarding", ip6_conf_forward, KW_TYPE_NORMAL); - install_keyword("disable", ip6_conf_disable, KW_TYPE_NORMAL); + install_keyword("forwarding", ip6_forwarding_handler, KW_TYPE_NORMAL); + install_keyword("disable", ip6_disable_handler, KW_TYPE_NORMAL); + install_keyword("addr_gen_mode", ip6_addr_gen_mode_handler, KW_TYPE_NORMAL); + install_keyword("stable_secret", ip6_stable_secret_handler, KW_TYPE_NORMAL); install_route6_keywords(); } diff --git a/src/ipv6/route6.c b/src/ipv6/route6.c index 0c3d06d79..311e9f64c 100644 --- a/src/ipv6/route6.c +++ b/src/ipv6/route6.c @@ -135,10 +135,14 @@ static int rt6_setup_lcore(void *arg) int err; bool global; struct timeval tv; + lcoreid_t cid = rte_lcore_id(); + + if (cid >= DPVS_MAX_LCORE) + return EDPVS_OK; tv.tv_sec = g_rt6_recycle_time, tv.tv_usec = 0, - global = (rte_lcore_id() == rte_get_main_lcore()); + global = (cid == rte_get_main_lcore()); INIT_LIST_HEAD(&this_rt6_dustbin.routes); err = dpvs_timer_sched_period(&this_rt6_dustbin.tm, &tv, rt6_recycle, NULL, global); @@ -152,6 +156,9 @@ static int rt6_destroy_lcore(void *arg) { struct route6 *rt6, *next; + if (rte_lcore_id() >= DPVS_MAX_LCORE) + return EDPVS_OK; + list_for_each_entry_safe(rt6, next, &this_rt6_dustbin.routes, hnode) { if (rte_atomic32_read(&rt6->refcnt) <= 1) { /* need judge refcnt here? */ list_del(&rt6->hnode); diff --git a/src/ipvs/ip_vs_blklst.c b/src/ipvs/ip_vs_blklst.c index 25491a076..a608cd0c5 100644 --- a/src/ipvs/ip_vs_blklst.c +++ b/src/ipvs/ip_vs_blklst.c @@ -459,15 +459,20 @@ static struct dpvs_sockopts blklst_sockopts = { static int blklst_lcore_init(void *args) { int i; + lcoreid_t cid = rte_lcore_id(); + + if (cid >= DPVS_MAX_LCORE) + return EDPVS_OK; - if (!rte_lcore_is_enabled(rte_lcore_id())) + if (!rte_lcore_is_enabled(cid)) return EDPVS_DISABLED; this_num_blklsts = 0; this_num_blklsts_ipset = 0; - this_blklst_tab = rte_malloc(NULL, sizeof(struct list_head) * - DPVS_BLKLST_TAB_SIZE, RTE_CACHE_LINE_SIZE); + this_blklst_tab = rte_malloc(NULL, + sizeof(struct list_head) * DPVS_BLKLST_TAB_SIZE, + RTE_CACHE_LINE_SIZE); if (!this_blklst_tab) return EDPVS_NOMEM; for (i = 0; i < DPVS_BLKLST_TAB_SIZE; i++) @@ -487,7 +492,12 @@ static int blklst_lcore_init(void *args) static int blklst_lcore_term(void *args) { - if (!rte_lcore_is_enabled(rte_lcore_id())) + lcoreid_t cid = rte_lcore_id(); + + if (cid >= DPVS_MAX_LCORE) + return EDPVS_OK; + + if (!rte_lcore_is_enabled(cid)) return EDPVS_DISABLED; dp_vs_blklst_flush_all(); diff --git a/src/ipvs/ip_vs_conn.c b/src/ipvs/ip_vs_conn.c index 33268760c..4179e68ad 100644 --- a/src/ipvs/ip_vs_conn.c +++ b/src/ipvs/ip_vs_conn.c @@ -1192,11 +1192,15 @@ static void dp_vs_conn_put_nolock(struct dp_vs_conn *conn) static int conn_init_lcore(void *arg) { int i; + lcoreid_t cid = rte_lcore_id(); - if (!rte_lcore_is_enabled(rte_lcore_id())) + if (cid >= DPVS_MAX_LCORE) + return EDPVS_IDLE; + + if (!rte_lcore_is_enabled(cid)) return EDPVS_DISABLED; - if (!netif_lcore_is_fwd_worker(rte_lcore_id())) + if (!netif_lcore_is_fwd_worker(cid)) return EDPVS_IDLE; this_conn_tbl = rte_malloc(NULL, @@ -1218,7 +1222,12 @@ static int conn_init_lcore(void *arg) static int conn_term_lcore(void *arg) { - if (!rte_lcore_is_enabled(rte_lcore_id())) + lcoreid_t cid = rte_lcore_id(); + + if (cid >= DPVS_MAX_LCORE) + return EDPVS_IDLE; + + if (!rte_lcore_is_enabled(cid)) return EDPVS_DISABLED; if (this_conn_tbl) { diff --git a/src/ipvs/ip_vs_whtlst.c b/src/ipvs/ip_vs_whtlst.c index 672077456..36023e6ca 100644 --- a/src/ipvs/ip_vs_whtlst.c +++ b/src/ipvs/ip_vs_whtlst.c @@ -522,15 +522,20 @@ static struct dpvs_sockopts whtlst_sockopts = { static int whtlst_lcore_init(void *args) { int i; + lcoreid_t cid = rte_lcore_id(); + + if (cid >= DPVS_MAX_LCORE) + return EDPVS_OK; - if (!rte_lcore_is_enabled(rte_lcore_id())) + if (!rte_lcore_is_enabled(cid)) return EDPVS_DISABLED; this_num_whtlsts = 0; this_num_whtlsts_ipset = 0; - this_whtlst_tab = rte_malloc(NULL, sizeof(struct list_head) * - DPVS_WHTLST_TAB_SIZE, RTE_CACHE_LINE_SIZE); + this_whtlst_tab = rte_malloc(NULL, + sizeof(struct list_head) * DPVS_WHTLST_TAB_SIZE, + RTE_CACHE_LINE_SIZE); if (!this_whtlst_tab) return EDPVS_NOMEM; for (i = 0; i < DPVS_WHTLST_TAB_SIZE; i++) @@ -550,7 +555,12 @@ static int whtlst_lcore_init(void *args) static int whtlst_lcore_term(void *args) { - if (!rte_lcore_is_enabled(rte_lcore_id())) + lcoreid_t cid = rte_lcore_id(); + + if (cid >= DPVS_MAX_LCORE) + return EDPVS_OK; + + if (!rte_lcore_is_enabled(cid)) return EDPVS_DISABLED; dp_vs_whtlst_flush_all(); diff --git a/src/netif.c b/src/netif.c index 3fd24aa0b..5d8fe0b3d 100644 --- a/src/netif.c +++ b/src/netif.c @@ -2766,8 +2766,6 @@ static inline void netif_lcore_cleanup(void) } } -/********************************************** kni *************************************************/ - /* always update bond port macaddr and its KNI macaddr together */ static int update_bond_macaddr(struct netif_port *port) { @@ -2799,6 +2797,8 @@ static inline void free_mbufs(struct rte_mbuf **pkts, unsigned num) } } +/********************************************** kni *************************************************/ + void kni_ingress(struct rte_mbuf *mbuf, struct netif_port *dev) { if (!kni_dev_exist(dev)) @@ -3224,7 +3224,7 @@ portid_t netif_port_count(void) return port_id_end; } -struct netif_port *netif_alloc(size_t priv_size, const char *namefmt, +struct netif_port *netif_alloc(portid_t id, size_t priv_size, const char *namefmt, unsigned int nrxq, unsigned int ntxq, void (*setup)(struct netif_port *)) { @@ -3247,13 +3247,17 @@ struct netif_port *netif_alloc(size_t priv_size, const char *namefmt, return NULL; } - dev->id = netif_port_id_alloc(); + if (id != NETIF_PORT_ID_INVALID && !netif_port_get(id)) + dev->id = id; + else + dev->id = netif_port_id_alloc(); if (strstr(namefmt, "%d")) snprintf(dev->name, sizeof(dev->name), namefmt, dev->id); else snprintf(dev->name, sizeof(dev->name), "%s", namefmt); + rte_rwlock_init(&dev->dev_lock); dev->socket = SOCKET_ID_ANY; dev->hw_header_len = sizeof(struct rte_ether_hdr); /* default */ @@ -3277,7 +3281,6 @@ struct netif_port *netif_alloc(size_t priv_size, const char *namefmt, if (dev->mtu == 0) dev->mtu = ETH_DATA_LEN; - rte_rwlock_init(&dev->dev_lock); netif_mc_init(dev); dev->in_ptr = rte_zmalloc(NULL, sizeof(struct inet_device), RTE_CACHE_LINE_SIZE); @@ -3474,79 +3477,6 @@ static inline void setup_dev_of_flags(struct netif_port *port) port->flag |= NETIF_PORT_FLAG_LLDP; } -/* TODO: refactor it with netif_alloc */ -static struct netif_port* netif_rte_port_alloc(portid_t id, int nrxq, - int ntxq, const struct rte_eth_conf *conf) -{ - int ii; - struct netif_port *port; - - port = rte_zmalloc("port", sizeof(struct netif_port) + - sizeof(union netif_bond), RTE_CACHE_LINE_SIZE); - if (!port) { - RTE_LOG(ERR, NETIF, "%s: no memory\n", __func__); - return NULL; - } - - port->id = id; - port->bond = (union netif_bond *)(port + 1); - if (is_physical_port(id)) { - port->type = PORT_TYPE_GENERAL; /* update later in netif_rte_port_alloc */ - port->netif_ops = &dpdk_netif_ops; - } else if (is_bond_port(id)) { - port->type = PORT_TYPE_BOND_MASTER; - port->netif_ops = &bond_netif_ops; - } else { - RTE_LOG(ERR, NETIF, "%s: invalid port id: %d\n", __func__, id); - rte_free(port); - return NULL; - } - - if (port_name_alloc(id, port->name, sizeof(port->name)) != EDPVS_OK) { - RTE_LOG(ERR, NETIF, "%s: fail to get port name for port%d\n", - __func__, id); - rte_free(port); - return NULL; - } - - port->nrxq = nrxq; // update after port_rx_queues_get(); - port->ntxq = ntxq; // update after port_tx_queues_get(); - port->socket = rte_eth_dev_socket_id(id); - port->hw_header_len = sizeof(struct rte_ether_hdr); - if (port->socket == SOCKET_ID_ANY) - port->socket = rte_socket_id(); - port->mbuf_pool = pktmbuf_pool[port->socket]; - rte_eth_macaddr_get((uint8_t)id, &port->addr); // bonding mac is zero here - rte_eth_dev_get_mtu((uint8_t)id, &port->mtu); - rte_eth_dev_info_get((uint8_t)id, &port->dev_info); - port->dev_conf = *conf; - rte_rwlock_init(&port->dev_lock); - netif_mc_init(port); - - setup_dev_of_flags(port); - - port->in_ptr = rte_zmalloc(NULL, sizeof(struct inet_device), RTE_CACHE_LINE_SIZE); - if (!port->in_ptr) { - RTE_LOG(ERR, NETIF, "%s: no memory\n", __func__); - rte_free(port); - return NULL; - } - port->in_ptr->dev = port; - - for (ii = 0; ii < DPVS_MAX_LCORE; ii++) { - INIT_LIST_HEAD(&port->in_ptr->ifa_list[ii]); - INIT_LIST_HEAD(&port->in_ptr->ifm_list[ii]); - } - - if (tc_init_dev(port) != EDPVS_OK) { - RTE_LOG(ERR, NETIF, "%s: fail to init TC\n", __func__); - rte_free(port); - return NULL; - } - - return port; -} - struct netif_port* netif_port_get(portid_t id) { int hash = port_tab_hashkey(id); @@ -4083,9 +4013,9 @@ int netif_port_start(struct netif_port *port) if (port->netif_ops->op_update_addr) port->netif_ops->op_update_addr(port); - /* add in6_addr multicast address */ - if ((ret = idev_add_mcast_init(port)) != EDPVS_OK) { - RTE_LOG(WARNING, NETIF, "%s: idev_add_mcast_init failed -- %d(%s)\n", + /* ipv6 default addresses initialization */ + if ((ret = idev_addr_init(port->in_ptr)) != EDPVS_OK) { + RTE_LOG(WARNING, NETIF, "%s: idev_addr_init failed -- %d(%s)\n", __func__, ret, dpvs_strerror(ret)); return ret; } @@ -4123,7 +4053,7 @@ int netif_port_register(struct netif_port *port) { struct netif_port *cur; int hash, nhash; - int err = EDPVS_OK; + int err; if (unlikely(NULL == port)) return EDPVS_INVAL; @@ -4146,10 +4076,15 @@ int netif_port_register(struct netif_port *port) list_add_tail(&port->nlist, &port_ntab[nhash]); g_nports++; - if (port->netif_ops->op_init) + if (port->netif_ops->op_init) { err = port->netif_ops->op_init(port); + if (err != EDPVS_OK) { + netif_port_unregister(port); + return err; + } + } - return err; + return EDPVS_OK; } int netif_port_unregister(struct netif_port *port) @@ -4229,6 +4164,7 @@ static int relate_bonding_device(void) } sport->type = PORT_TYPE_BOND_SLAVE; sport->bond->slave.master = mport; + sport->in_ptr->flags |= IDEV_F_NO_ROUTE; } mport->bond->master.slave_nb = i; } @@ -4336,14 +4272,42 @@ static char *find_conf_kni_name(portid_t id) return NULL; } +static void dpdk_port_setup(struct netif_port *dev) +{ + dev->type = PORT_TYPE_GENERAL; + dev->netif_ops = &dpdk_netif_ops; + dev->socket = rte_eth_dev_socket_id(dev->id); + dev->dev_conf = default_port_conf; + dev->bond = (union netif_bond *)(dev + 1); + + rte_eth_macaddr_get(dev->id, &dev->addr); + rte_eth_dev_get_mtu(dev->id, &dev->mtu); + rte_eth_dev_info_get(dev->id, &dev->dev_info); + setup_dev_of_flags(dev); +} + +static void bond_port_setup(struct netif_port *dev) +{ + dev->type = PORT_TYPE_BOND_MASTER; + dev->netif_ops = &bond_netif_ops; + dev->socket = rte_eth_dev_socket_id(dev->id); + dev->dev_conf = default_port_conf; + dev->bond = (union netif_bond *)(dev + 1); + + rte_eth_macaddr_get(dev->id, &dev->addr); + rte_eth_dev_get_mtu(dev->id, &dev->mtu); + rte_eth_dev_info_get(dev->id, &dev->dev_info); + setup_dev_of_flags(dev); +} + /* Allocate and register all DPDK ports available */ static void netif_port_init(void) { int nports, nports_cfg; portid_t pid; struct netif_port *port; - struct rte_eth_conf this_eth_conf; char *kni_name; + char ifname[IFNAMSIZ]; nports = dpvs_rte_eth_dev_count(); if (nports <= 0) @@ -4358,17 +4322,23 @@ static void netif_port_init(void) port_tab_init(); port_ntab_init(); - this_eth_conf = default_port_conf; - kni_init(); for (pid = 0; pid < nports; pid++) { + if (port_name_alloc(pid, ifname, sizeof(ifname)) != EDPVS_OK) + rte_exit(EXIT_FAILURE, "Port name allocation failed, exiting...\n"); + /* queue number will be filled on device start */ - port = netif_rte_port_alloc(pid, 0, 0, &this_eth_conf); + port = NULL; + if (is_physical_port(pid)) + port = netif_alloc(pid, sizeof(union netif_bond), ifname, 0, 0, dpdk_port_setup); + else if (is_bond_port(pid)) + port = netif_alloc(pid, sizeof(union netif_bond), ifname, 0, 0, bond_port_setup); if (!port) - rte_exit(EXIT_FAILURE, "Port allocate fail, exiting...\n"); + rte_exit(EXIT_FAILURE, "Port allocation failed, exiting...\n"); + if (netif_port_register(port) < 0) - rte_exit(EXIT_FAILURE, "Port register fail, exiting...\n"); + rte_exit(EXIT_FAILURE, "Port registration failed, exiting...\n"); } if (relate_bonding_device() < 0) @@ -4490,7 +4460,7 @@ int netif_vdevs_add(void) __func__, bond_cfg->name, bond_cfg->mode, bond_cfg->numa_node); return EDPVS_CALLBACKFAIL; } - bond_cfg->port_id = ret; /* relate port_id with port_name, used by netif_rte_port_alloc */ + bond_cfg->port_id = ret; /* relate port_id with port_name, used by port_name_alloc */ RTE_LOG(INFO, NETIF, "created bondig device %s: mode=%d, primary=%s, numa_node=%d\n", bond_cfg->name, bond_cfg->mode, bond_cfg->primary, bond_cfg->numa_node); diff --git a/src/route.c b/src/route.c index 00354f7c8..f48d82fa2 100644 --- a/src/route.c +++ b/src/route.c @@ -671,8 +671,12 @@ static struct dpvs_sockopts route_sockopts = { static int route_lcore_init(void *arg) { int i; + lcoreid_t cid = rte_lcore_id(); + + if (cid >= DPVS_MAX_LCORE) + return EDPVS_OK; - if (!rte_lcore_is_enabled(rte_lcore_id())) + if (!rte_lcore_is_enabled(cid)) return EDPVS_DISABLED; for (i = 0; i < LOCAL_ROUTE_TAB_SIZE; i++) @@ -684,7 +688,12 @@ static int route_lcore_init(void *arg) static int route_lcore_term(void *arg) { - if (!rte_lcore_is_enabled(rte_lcore_id())) + lcoreid_t cid = rte_lcore_id(); + + if (cid >= DPVS_MAX_LCORE) + return EDPVS_OK; + + if (!rte_lcore_is_enabled(cid)) return EDPVS_DISABLED; return route_lcore_flush(); diff --git a/src/scheduler.c b/src/scheduler.c index d446f378b..338cfa238 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -188,6 +188,9 @@ static int dpvs_job_loop(void *arg) thres_time = BIG_LOOP_THRESH_MASTER; #endif + if (cid >= DPVS_MAX_LCORE) + return EDPVS_OK; + /* skip irrelative job loops */ if (role == LCORE_ROLE_MAX) return EDPVS_INVAL; diff --git a/src/timer.c b/src/timer.c index dd3485375..b3ae9d505 100644 --- a/src/timer.c +++ b/src/timer.c @@ -412,7 +412,12 @@ static int timer_term_schedler(struct timer_scheduler *sched) static int timer_lcore_init(void *arg) { - if (!rte_lcore_is_enabled(rte_lcore_id())) + lcoreid_t cid = rte_lcore_id(); + + if (cid >= DPVS_MAX_LCORE) + return EDPVS_OK; + + if (!rte_lcore_is_enabled(cid)) return EDPVS_DISABLED; return timer_init_schedler(&RTE_PER_LCORE(timer_sched), rte_lcore_id()); @@ -420,7 +425,12 @@ static int timer_lcore_init(void *arg) static int timer_lcore_term(void *arg) { - if (!rte_lcore_is_enabled(rte_lcore_id())) + lcoreid_t cid = rte_lcore_id(); + + if (cid >= DPVS_MAX_LCORE) + return EDPVS_OK; + + if (!rte_lcore_is_enabled(cid)) return EDPVS_DISABLED; return timer_term_schedler(&RTE_PER_LCORE(timer_sched)); diff --git a/src/vlan.c b/src/vlan.c index 6d657e449..955b0b840 100644 --- a/src/vlan.c +++ b/src/vlan.c @@ -75,6 +75,21 @@ static int alloc_vlan_info(struct netif_port *dev) return EDPVS_OK; } +static int vlan_dev_init(struct netif_port *dev) +{ + int err; + struct inet_device *idev = dev_get_idev(dev); + + err = idev_addr_init(idev); + if (err != EDPVS_OK) { + idev_put(idev); + return err; + } + + idev_put(idev); + return EDPVS_OK; +} + static int vlan_xmit(struct rte_mbuf *mbuf, struct netif_port *dev) { struct vlan_dev_priv *vlan = netif_priv(dev); @@ -171,6 +186,7 @@ static int vlan_get_stats(struct netif_port *dev, struct rte_eth_stats *stats) } static struct netif_ops vlan_netif_ops = { + .op_init = vlan_dev_init, .op_xmit = vlan_xmit, .op_set_mc_list = vlan_set_mc_list, .op_get_queue = vlan_get_queue, @@ -228,8 +244,8 @@ int vlan_add_dev(struct netif_port *real_dev, const char *ifname, } /* allocate and register netif device */ - dev = netif_alloc(sizeof(struct vlan_dev_priv), name_buf, - real_dev->nrxq, real_dev->ntxq, vlan_setup); + dev = netif_alloc(NETIF_PORT_ID_INVALID, sizeof(struct vlan_dev_priv), + name_buf, real_dev->nrxq, real_dev->ntxq, vlan_setup); if (!dev) { err = EDPVS_NOMEM; goto out;