This removes the hacky route merging/splitting code needed to support older kernel versions. Consequently the required Linux version is raised to 4.11 for reliable operation. --- sysdep/linux/netlink.c | 218 +++++++---------------------------------- 1 file changed, 33 insertions(+), 185 deletions(-) diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index 29b744cb..eca681f9 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -78,49 +78,6 @@ const int rt_default_ecmp = 16; -/* - * Structure nl_parse_state keeps state of received route processing. Ideally, - * we could just independently parse received Netlink messages and immediately - * propagate received routes to the rest of BIRD, but older Linux kernel (before - * version 4.11) represents and announces IPv6 ECMP routes not as one route with - * multiple next hops (like RTA_MULTIPATH in IPv4 ECMP), but as a sequence of - * routes with the same prefix. More recent kernels work as with IPv4. - * - * Therefore, BIRD keeps currently processed route in nl_parse_state structure - * and postpones its propagation until we expect it to be final; i.e., when - * non-matching route is received or when the scan ends. When another matching - * route is received, it is merged with the already processed route to form an - * ECMP route. Note that merging is done only for IPv6 (merge == 1), but the - * postponing is done in both cases (for simplicity). All IPv4 routes or IPv6 - * routes with RTA_MULTIPATH set are just considered non-matching. - * - * This is ignored for asynchronous notifications (every notification is handled - * as a separate route). It is not an issue for our routes, as we ignore such - * notifications anyways. But importing alien IPv6 ECMP routes does not work - * properly with older kernels. - * - * Whatever the kernel version is, IPv6 ECMP routes are sent as multiple routes - * for the same prefix. - */ - -struct nl_parse_state -{ - struct linpool *pool; - int scan; - int merge; - - net *net; - rta *attrs; - struct krt_proto *proto; - s8 new; - s8 krt_src; - u8 krt_type; - u8 krt_proto; - u32 krt_metric; - - u32 rta_flow; /* Used during parsing */ -}; - /* * Synchronous Netlink interface */ @@ -761,7 +718,7 @@ nl_add_multipath(struct nlmsghdr *h, uint bufsize, struct nexthop *nh, int af, e } static struct nexthop * -nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, const net_addr *n, struct rtattr *ra, int af, int krt_src) +nl_parse_multipath(struct krt_proto *p, const net_addr *n, struct rtattr *ra, int af, int krt_src, u32 *rta_flow) { struct rtattr *a[BIRD_RTA_MAX]; struct rtnexthop *nh = RTA_DATA(ra); @@ -780,7 +737,7 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, const net_addr if ((nh->rtnh_flags & RTNH_F_DEAD) && (krt_src != KRT_SRC_BIRD)) goto next; - *last = rv = lp_allocz(s->pool, NEXTHOP_MAX_SIZE); + *last = rv = lp_allocz(nl_linpool, NEXTHOP_MAX_SIZE); last = &(rv->next); rv->weight = nh->rtnh_hops; @@ -824,7 +781,7 @@ nl_parse_multipath(struct nl_parse_state *s, struct krt_proto *p, const net_addr rv->gw = rta_get_ipa(a[RTA_GATEWAY]); if (a[RTA_FLOW]) - s->rta_flow = rta_get_u32(a[RTA_FLOW]); + *rta_flow = rta_get_u32(a[RTA_FLOW]); #ifdef HAVE_MPLS_KERNEL if (a[RTA_VIA]) @@ -1480,36 +1437,13 @@ static inline int nl_add_rte(struct krt_proto *p, rte *e) { rta *a = e->attrs; - int err = 0; - - if (krt_ecmp6(p) && a->nh.next) - { - struct nexthop *nh = &(a->nh); - - err = nl_send_route(p, e, NL_OP_ADD, RTD_UNICAST, nh); - if (err < 0) - return err; - - for (nh = nh->next; nh; nh = nh->next) - err += nl_send_route(p, e, NL_OP_APPEND, RTD_UNICAST, nh); - - return err; - } - return nl_send_route(p, e, NL_OP_ADD, a->dest, &(a->nh)); } static inline int nl_delete_rte(struct krt_proto *p, rte *e) { - int err = 0; - - /* For IPv6, we just repeatedly request DELETE until we get error */ - do - err = nl_send_route(p, e, NL_OP_DELETE, RTD_NONE, NULL); - while (krt_ecmp6(p) && !err); - - return err; + return nl_send_route(p, e, NL_OP_DELETE, RTD_NONE, NULL); } static inline int @@ -1559,67 +1493,11 @@ krt_replace_rte(struct krt_proto *p, net *n UNUSED, rte *new, rte *old) } } -static int -nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type, uint rtm_family) -{ - /* Route merging is used for IPv6 scans */ - if (!s->scan || (rtm_family != AF_INET6)) - return 0; - - /* Saved and new route must have same network, proto/table, and priority */ - if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority)) - return 0; - - /* Both must be regular unicast routes */ - if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST)) - return 0; - - return 1; -} - -static void -nl_announce_route(struct nl_parse_state *s) -{ - rte *e = rte_get_temp(s->attrs); - e->net = s->net; - e->u.krt.src = s->krt_src; - e->u.krt.proto = s->krt_proto; - e->u.krt.seen = 0; - e->u.krt.best = 0; - e->u.krt.metric = s->krt_metric; - - if (s->scan) - krt_got_route(s->proto, e); - else - krt_got_route_async(s->proto, e, s->new); - - s->net = NULL; - s->attrs = NULL; - s->proto = NULL; - lp_flush(s->pool); -} - -static inline void -nl_parse_begin(struct nl_parse_state *s, int scan) -{ - memset(s, 0, sizeof (struct nl_parse_state)); - s->pool = nl_linpool; - s->scan = scan; -} - -static inline void -nl_parse_end(struct nl_parse_state *s) -{ - if (s->net) - nl_announce_route(s); -} - - #define SKIP0(ARG, ...) do { DBG("KRT: Ignoring route - " ARG, ##__VA_ARGS__); return; } while(0) #define SKIP(ARG, ...) do { DBG("KRT: Ignoring route %N - " ARG, &dst, ##__VA_ARGS__); return; } while(0) static void -nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) +nl_parse_route(struct nlmsghdr *h, int scan) { struct krt_proto *p; struct rtmsg *i; @@ -1632,6 +1510,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) u32 priority = 0; u32 def_scope = RT_SCOPE_UNIVERSE; int krt_src; + u32 rta_flow; if (!(i = nl_checkin(h, sizeof(*i)))) return; @@ -1707,7 +1586,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) if (i->rtm_tos != 0) /* We don't support TOS */ SKIP("TOS %02x\n", i->rtm_tos); - if (s->scan && !new) + if (scan && !new) SKIP("RTM_DELROUTE in scan\n"); if (a[RTA_PRIORITY]) @@ -1731,7 +1610,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) return; case RTPROT_BIRD: - if (!s->scan) + if (!scan) SKIP("echo\n"); krt_src = KRT_SRC_BIRD; break; @@ -1751,18 +1630,15 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) net *net = net_get(p->p.main_channel->table, n); - if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type, i->rtm_family)) - nl_announce_route(s); - - rta *ra = lp_allocz(s->pool, RTA_MAX_SIZE); + rta *ra = lp_allocz(nl_linpool, RTA_MAX_SIZE); ra->src = p->p.main_source; ra->source = RTS_INHERIT; ra->scope = SCOPE_UNIVERSE; if (a[RTA_FLOW]) - s->rta_flow = rta_get_u32(a[RTA_FLOW]); + rta_flow = rta_get_u32(a[RTA_FLOW]); else - s->rta_flow = 0; + rta_flow = 0; switch (i->rtm_type) { @@ -1771,7 +1647,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) if (a[RTA_MULTIPATH]) { - struct nexthop *nh = nl_parse_multipath(s, p, n, a[RTA_MULTIPATH], i->rtm_family, krt_src); + struct nexthop *nh = nl_parse_multipath(p, n, a[RTA_MULTIPATH], i->rtm_family, krt_src, &rta_flow); if (!nh) SKIP("strange RTA_MULTIPATH\n"); @@ -1859,7 +1735,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) if (i->rtm_scope != def_scope) { - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); + ea_list *ea = lp_alloc(nl_linpool, sizeof(ea_list) + sizeof(eattr)); ea->next = ra->eattrs; ra->eattrs = ea; ea->flags = EALF_SORTED; @@ -1874,7 +1750,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) { ip_addr ps = rta_get_ipa(a[RTA_PREFSRC]); - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); + ea_list *ea = lp_alloc(nl_linpool, sizeof(ea_list) + sizeof(eattr)); ea->next = ra->eattrs; ra->eattrs = ea; ea->flags = EALF_SORTED; @@ -1883,7 +1759,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) ea->attrs[0].flags = 0; ea->attrs[0].type = EAF_TYPE_IP_ADDRESS; - struct adata *ad = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps)); + struct adata *ad = lp_alloc(nl_linpool, sizeof(struct adata) + sizeof(ps)); ad->length = sizeof(ps); memcpy(ad->data, &ps, sizeof(ps)); @@ -1891,9 +1767,9 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) } /* Can be set per-route or per-nexthop */ - if (s->rta_flow) + if (rta_flow) { - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr)); + ea_list *ea = lp_alloc(nl_linpool, sizeof(ea_list) + sizeof(eattr)); ea->next = ra->eattrs; ra->eattrs = ea; ea->flags = EALF_SORTED; @@ -1901,13 +1777,13 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) ea->attrs[0].id = EA_KRT_REALM; ea->attrs[0].flags = 0; ea->attrs[0].type = EAF_TYPE_INT; - ea->attrs[0].u.data = s->rta_flow; + ea->attrs[0].u.data = rta_flow; } if (a[RTA_METRICS]) { u32 metrics[KRT_METRICS_MAX]; - ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr)); + ea_list *ea = lp_alloc(nl_linpool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr)); int t, n = 0; if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0) @@ -1936,59 +1812,35 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h) } /* - * Ideally, now we would send the received route to the rest of kernel code. - * But IPv6 ECMP routes before 4.11 are sent as a sequence of routes, so we - * postpone it and merge next hops until the end of the sequence. Note that - * when doing merging of next hops, we expect the new route to be unipath. - * Otherwise, we ignore additional next hops in nexthop_insert(). + * Send the received route to the rest of kernel code. */ + rte *e = rte_get_temp(ra); + e->net = net; + e->u.krt.src = krt_src; + e->u.krt.proto = i->rtm_protocol; + e->u.krt.seen = 0; + e->u.krt.best = 0; + e->u.krt.metric = priority; - if (!s->net) - { - /* Store the new route */ - s->net = net; - s->attrs = ra; - s->proto = p; - s->new = new; - s->krt_src = krt_src; - s->krt_type = i->rtm_type; - s->krt_proto = i->rtm_protocol; - s->krt_metric = priority; - } + if (scan) + krt_got_route(p, e); else - { - /* Merge next hops with the stored route */ - rta *oa = s->attrs; - - struct nexthop *nhs = &oa->nh; - nexthop_insert(&nhs, &ra->nh); - - /* Perhaps new nexthop is inserted at the first position */ - if (nhs == &ra->nh) - { - /* Swap rtas */ - s->attrs = ra; + krt_got_route_async(0, e, new); - /* Keep old eattrs */ - ra->eattrs = oa->eattrs; - } - } + lp_flush(nl_linpool); } void krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */ { struct nlmsghdr *h; - struct nl_parse_state s; - nl_parse_begin(&s, 1); nl_request_dump_route(AF_UNSPEC); while (h = nl_get_scan()) if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) - nl_parse_route(&s, h); + nl_parse_route(h, /*scan=*/1); else log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); - nl_parse_end(&s); } /* @@ -2003,16 +1855,12 @@ static struct config *nl_last_config; /* For tracking changes to nl_async_bufsiz static void nl_async_msg(struct nlmsghdr *h) { - struct nl_parse_state s; - switch (h->nlmsg_type) { case RTM_NEWROUTE: case RTM_DELROUTE: DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type); - nl_parse_begin(&s, 0); - nl_parse_route(&s, h); - nl_parse_end(&s); + nl_parse_route(h, /*scan=*/0); break; case RTM_NEWLINK: case RTM_DELLINK: -- 2.30.2