<div dir="ltr"><div><div><div><div>Hi Ondrej,<br><br></div>Any update/feedback regarding this latest patch?<br></div>I'm opened for further discussions and adjustments if you feel they are needed.<br><br></div>Thanks,<br></div>Mikhail<br></div><div class="gmail_extra"><br><div class="gmail_quote">2016-02-22 13:01 GMT+01:00 Mikhail Sennikovskii <span dir="ltr"><<a href="mailto:mikhail.sennikovskii@profitbricks.com" target="_blank">mikhail.sennikovskii@profitbricks.com</a>></span>:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">The API for configuring ECMP for IPv6 on Linux is not symmetrical.<br>
Routes can be set via the multipath structures, but Linux kernel<br>
splits this up into separate routes internally.<br>
As a result, ECMP routes are retorned as separate independent<br>
routes when queried.<br>
This patch works around this issue by making bird collect<br>
individual routes for the same destination in one multipath route.<br>
It also implements deletion of multipath routes as a set of<br>
delete operations for each route entry.<br>
Asynchronous motification are still not supported for now.<br>
<br>
Signed-off-by: Mikhail Sennikovskii <<a href="mailto:mikhail.sennikovskii@profitbricks.com">mikhail.sennikovskii@profitbricks.com</a>><br>
---<br>
nest/route.h | 2 +<br>
nest/rt-attr.c | 145 +++++++++++++++++++<br>
nest/rt-table.c | 41 +++++-<br>
sysdep/linux/netlink.c | 371 +++++++++++++++++++++++++++++++++++++++++++------<br>
4 files changed, 512 insertions(+), 47 deletions(-)<br>
<br>
diff --git a/nest/route.h b/nest/route.h<br>
index c435b9e..3b87a0e 100644<br>
--- a/nest/route.h<br>
+++ b/nest/route.h<br>
@@ -498,6 +498,8 @@ int mpnh__same(struct mpnh *x, struct mpnh *y); /* Compare multipath nexthops */<br>
static inline int mpnh_same(struct mpnh *x, struct mpnh *y)<br>
{ return (x == y) || mpnh__same(x, y); }<br>
struct mpnh *mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry, int max, linpool *lp);<br>
+struct mpnh *mpnh_sub(struct mpnh *x, struct mpnh *y, linpool *lp);<br>
+struct mpnh *mpnh_sort(struct mpnh *x, linpool *lp);<br>
<br>
void rta_init(void);<br>
rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */<br>
diff --git a/nest/rt-attr.c b/nest/rt-attr.c<br>
index 7fa05d6..335c96e 100644<br>
--- a/nest/rt-attr.c<br>
+++ b/nest/rt-attr.c<br>
@@ -302,6 +302,151 @@ mpnh_merge(struct mpnh *x, struct mpnh *y, int rx, int ry, int max, linpool *lp)<br>
return root;<br>
}<br>
<br>
+/**<br>
+ * mpnh_sub - subtract one nexthop list from another.<br>
+ * I.e. returns a list of entries, that existed in list1, but did not<br>
+ * exist in list 2.<br>
+ * The input lists must be sorted and the<br>
+ * result is sorted too.<br>
+ *<br>
+ * @x: list 1<br>
+ * @y: list 2<br>
+ * @lp: linpool if not NULL list 1 is not reusable,<br>
+ * new entries are to be allocated using this pool.<br>
+ * list 2 is never modified.<br>
+ *<br>
+ * The argument linpool determines whether the list1<br>
+ * consumed by the function (i.e. its nodes reused in the resulting list).<br>
+ * If NULL, the list1 is reused, otherwise the resulting list<br>
+ * is populated with the new entries, allocated using the linpool.<br>
+ * To eliminate issues with deallocation of this list,<br>
+ * the caller should use some form of bulk deallocation<br>
+ * (e.g. stack or linpool) to free these nodes when the<br>
+ * resulting list is no longer needed.<br>
+ */<br>
+struct mpnh *<br>
+mpnh_sub(struct mpnh *x, struct mpnh *y, linpool *lp)<br>
+{<br>
+ struct mpnh *root = NULL;<br>
+ struct mpnh **n = &root;<br>
+<br>
+ while (x || y)<br>
+ {<br>
+ int cmp = mpnh_compare_node(x, y);<br>
+ if (cmp < 0)<br>
+ {<br>
+ *n = !lp ? x : mpnh_copy_node(x, lp);<br>
+ x = x->next;<br>
+ n = &((*n)->next);<br>
+ }<br>
+ else if (cmp > 0)<br>
+ y = y->next;<br>
+ else<br>
+ {<br>
+ x = x->next;<br>
+ y = y->next;<br>
+ }<br>
+ }<br>
+<br>
+ *n = NULL;<br>
+<br>
+ return root;<br>
+}<br>
+<br>
+/**<br>
+ * mpnh_copy_lp copies nexthop list using given linpool<br>
+ * (unlike mpnh_copy, which uses sl_alloc)<br>
+ */<br>
+static struct mpnh *<br>
+mpnh_copy_lp(struct mpnh *o, linpool *lp)<br>
+{<br>
+ struct mpnh *first = NULL;<br>
+ struct mpnh **last = &first;<br>
+<br>
+ for (; o; o = o->next)<br>
+ {<br>
+ struct mpnh *n = mpnh_copy_node(o, lp);<br>
+ *last = n;<br>
+ last = &(n->next);<br>
+ }<br>
+<br>
+ return first;<br>
+}<br>
+<br>
+/*<br>
+ * mpnh_sort - sort the nexthop list<br>
+ * @x: the list to be sorted<br>
+ * @lp: if not NULL - the list will be copied in case it needs to be reordered,<br>
+ * in this case the given list always remains unchanged.<br>
+ * If however the list is ordered, the given list is just returned,<br>
+ * and no copy of the list is created.<br>
+ * If lp is NULL, the given list will be reordered directly<br>
+ */<br>
+struct mpnh *<br>
+mpnh_sort(struct mpnh *x, linpool *lp)<br>
+{<br>
+ struct mpnh *ret = x;<br>
+ struct mpnh *cur;<br>
+ struct mpnh *prev;<br>
+ int copy_on_change = !!lp;<br>
+<br>
+ for (cur = ret->next, prev = ret; cur; prev = cur, cur = cur->next)<br>
+ {<br>
+ int cmp = mpnh_compare_node(prev, cur);<br>
+ if (cmp <= 0)<br>
+ continue;<br>
+<br>
+ if (copy_on_change)<br>
+ {<br>
+ /* the list needs to be copied, and prev and cur need to be made<br>
+ * pointing to the new list entries */<br>
+<br>
+ struct mpnh *old_prev, *new_prev;<br>
+<br>
+ ret = mpnh_copy_lp(x, lp);<br>
+<br>
+ for (old_prev = x, new_prev = ret;<br>
+ old_prev != prev;<br>
+ old_prev = old_prev->next, new_prev = new_prev->next);<br>
+<br>
+ prev = new_prev;<br>
+ cur = new_prev->next;<br>
+<br>
+ copy_on_change = 0;<br>
+ }<br>
+<br>
+ /* promote the entry */<br>
+ struct mpnh *cur2;<br>
+ struct mpnh **next2_ptr;<br>
+<br>
+ for (cur2 = ret, next2_ptr = &ret; ; next2_ptr = &cur2->next, cur2 = cur2->next)<br>
+ {<br>
+ cmp = mpnh_compare_node(cur2, cur);<br>
+ if (cmp <= 0)<br>
+ continue;<br>
+<br>
+ /*<br>
+ * found the place, where to insert the entry<br>
+ * do the entry move<br>
+ */<br>
+<br>
+ /* 1. remove entry from the list */<br>
+ prev->next = cur->next;<br>
+<br>
+ /* 2. now insert entry to the new place */<br>
+ *next2_ptr = cur;<br>
+ cur->next = cur2;<br>
+<br>
+ break;<br>
+ }<br>
+<br>
+ /* now we have everything sorted upto prev,<br>
+ * set cur to prev and proceed with the cur->next loop */<br>
+ cur = prev;<br>
+ }<br>
+<br>
+ return ret;<br>
+}<br>
<br>
static struct mpnh *<br>
mpnh_copy(struct mpnh *o)<br>
diff --git a/nest/rt-table.c b/nest/rt-table.c<br>
index 57c8b8e..0a90633 100644<br>
--- a/nest/rt-table.c<br>
+++ b/nest/rt-table.c<br>
@@ -592,8 +592,27 @@ static struct mpnh *<br>
mpnh_merge_rta(struct mpnh *nhs, rta *a, int max)<br>
{<br>
struct mpnh nh = { .gw = a->gw, .iface = a->iface };<br>
- struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;<br>
- return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool);<br>
+ struct mpnh *nh2;<br>
+ int r2 = 0;<br>
+<br>
+ if (a->dest == RTD_MULTIPATH)<br>
+ {<br>
+ /*<br>
+ * mpnh_merge expects the nexthops list to be sorted,<br>
+ * while the nexthops returned by the protocols,<br>
+ * e.g. the "static" one, are actually not.<br>
+ * Ensures the nh2 is sorted.<br>
+ */<br>
+ nh2 = mpnh_sort(a->nexthops, rte_update_pool);<br>
+ /*<br>
+ * If the sort was actually done, the nh2 is already copies,<br>
+ * so no need to copy it once again, set r2 to 1 in this case.<br>
+ */<br>
+ r2 = (nh2 != a->nexthops);<br>
+ }<br>
+ else<br>
+ nh2 = &nh;<br>
+ return mpnh_merge(nhs, nh2, 1, r2, max, rte_update_pool);<br>
}<br>
<br>
rte *<br>
@@ -642,6 +661,24 @@ rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tm<br>
best->attrs->nexthops = nhs;<br>
}<br>
}<br>
+ else if (best->attrs->dest == RTD_MULTIPATH)<br>
+ {<br>
+ /*<br>
+ * mpnh_merge, mpnh_same and mpnh_sub expect the nexthops list<br>
+ * to be sorted, while the nexthops returned by the protocols,<br>
+ * e.g. the "static" one, are actually not.<br>
+ * This ensures the resulting entry has nexthops sorted,<br>
+ * and makes the behavior consistent and agnostic to<br>
+ * the number of elements in the best0 entries list<br>
+ * (i.e. best0->next processing above)<br>
+ */<br>
+ nhs = mpnh_sort(best->attrs->nexthops, rte_update_pool);<br>
+ if (nhs != best->attrs->nexthops)<br>
+ {<br>
+ best = rte_cow_rta(best, rte_update_pool);<br>
+ best->attrs->nexthops = nhs;<br>
+ }<br>
+ }<br>
<br>
if (best != best0)<br>
*rt_free = best;<br>
diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c<br>
index 640d187..ca2648b 100644<br>
--- a/sysdep/linux/netlink.c<br>
+++ b/sysdep/linux/netlink.c<br>
@@ -19,7 +19,6 @@<br>
#include "nest/route.h"<br>
#include "nest/protocol.h"<br>
#include "nest/iface.h"<br>
-#include "lib/alloca.h"<br>
#include "lib/timer.h"<br>
#include "lib/unix.h"<br>
#include "lib/krt.h"<br>
@@ -46,6 +45,32 @@<br>
#define RTA_TABLE 15<br>
#endif<br>
<br>
+/*<br>
+ * nl parse route context<br>
+ * its duty is<br>
+ * 1. To maintain the entry collect state -<br>
+ * for IPv6 ECMP the nl parsing logic needs to collect<br>
+ * separate individual entries, representing the multipath<br>
+ * into one multipath entry<br>
+ * 2. To hold some temporary data used while parsing<br>
+ * (like non-cached rta) on the stack.<br>
+ *<br>
+ * Implementation note: the context actually maintain two rta entries:<br>
+ * one to be used for the current rte being processed<br>
+ * (i.e. being created as a result of the nl data parsing),<br>
+ * another is used for the current rte being collected,<br>
+ * (i.e. stored in collect_rte, and for which multipath entries are being collected).<br>
+ * process_attrs holds the index of the attrs, being used for rte being processed.<br>
+ * Once the rte being processed becomes the one being collected,<br>
+ * the attrs used with it become "being collected", and another attrs become "being processed".<br>
+ */<br>
+typedef struct nl_parsectx<br>
+{<br>
+ struct krt_proto *collect_p; /* Protocol, for which entries are currently being processed */<br>
+ rte *collect_rte; /* Entry, for which multipath entries are currently being collected */<br>
+ int process_attrs; /* index in the attrs array for the entry to be used for the "processed" entry */<br>
+ rta attrs[2];<br>
+} nl_parsectx;<br>
<br>
/*<br>
* Synchronous Netlink interface<br>
@@ -62,6 +87,8 @@ struct nl_sock<br>
<br>
#define NL_RX_SIZE 8192<br>
<br>
+static linpool *netlink_lp;<br>
+<br>
static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */<br>
static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */<br>
<br>
@@ -803,7 +830,7 @@ nh_bufsize(struct mpnh *nh)<br>
}<br>
<br>
static int<br>
-nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)<br>
+nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new, int mp)<br>
{<br>
eattr *ea;<br>
net *net = e->net;<br>
@@ -820,7 +847,8 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)<br>
bzero(&r.r, sizeof(r.r));<br>
r.h.nlmsg_type = new ? RTM_NEWROUTE : RTM_DELROUTE;<br>
r.h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));<br>
- r.h.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | (new ? NLM_F_CREATE|NLM_F_EXCL : 0);<br>
+ r.h.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK<br>
+ | (new ? NLM_F_CREATE | (!mp ? NLM_F_EXCL : 0) : 0);<br>
<br>
r.r.rtm_family = BIRD_AF;<br>
r.r.rtm_dst_len = net->n.pxlen;<br>
@@ -835,8 +863,12 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)<br>
<br>
/* For route delete, we do not specify route attributes */<br>
if (!new)<br>
- return nl_exchange(&r.h);<br>
-<br>
+ {<br>
+ if (mp)<br>
+ goto set_dest;<br>
+ else<br>
+ goto submit;<br>
+ }<br>
<br>
if (ea = ea_find(eattrs, EA_KRT_METRIC))<br>
nl_add_attr_u32(&r.h, sizeof(r), RTA_PRIORITY, ea->u.data);<br>
@@ -864,7 +896,7 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)<br>
<br>
<br>
/* a->iface != NULL checked in krt_capable() for router and device routes */<br>
-<br>
+set_dest:<br>
switch (a->dest)<br>
{<br>
case RTD_ROUTER:<br>
@@ -892,10 +924,104 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)<br>
default:<br>
bug("krt_capable inconsistent with nl_send_route");<br>
}<br>
-<br>
+submit:<br>
return nl_exchange(&r.h);<br>
}<br>
<br>
+/*<br>
+ * this is just to unify the code for bird1.x and bird2<br>
+ * for bird1.x it is just a define, resolving to 1<br>
+ * for IPV6 and 0 for IPV4<br>
+ *<br>
+ * for bird2 it is a function, making a decision based<br>
+ * on the p->p.table->addr_type<br>
+ *<br>
+ * static int<br>
+ * trk_is_use_collect_mode(struct krt_proto *p);<br>
+ */<br>
+#ifdef IPV6<br>
+#define trk_is_use_collect_mode(_p) 1<br>
+#else<br>
+#define trk_is_use_collect_mode(_p) 0<br>
+#endif<br>
+<br>
+static struct mpnh *<br>
+krt_mp_merge_rta(struct mpnh *nhs, rta *a, int max)<br>
+{<br>
+ struct mpnh nh = { .gw = a->gw, .iface = a->iface };<br>
+ struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;<br>
+ return mpnh_merge(nhs, nh2, 1, 0, max, netlink_lp);<br>
+}<br>
+<br>
+static struct mpnh *<br>
+krt_mp_sub_rte_rta(rta *ax, rta *ay)<br>
+{<br>
+ struct mpnh nhx = { .gw = ax->gw, .iface = ax->iface };<br>
+ struct mpnh nhy = { .gw = ay->gw, .iface = ay->iface };<br>
+ struct mpnh *nhpx = (ax->dest == RTD_MULTIPATH) ? ax->nexthops : &nhx;<br>
+ struct mpnh *nhpy = (ay->dest == RTD_MULTIPATH) ? ay->nexthops : &nhy;<br>
+ return mpnh_sub(nhpx, nhpy, netlink_lp);<br>
+}<br>
+<br>
+static int<br>
+krt_send_nh_multipath(struct krt_proto *p, rte *base, struct mpnh *nh, struct ea_list *eattrs, int new)<br>
+{<br>
+ rte *e;<br>
+ int err = 0;<br>
+ rta ra = {<br>
+ .src= p->p.main_source,<br>
+ .source = RTS_INHERIT,<br>
+ .scope = SCOPE_UNIVERSE,<br>
+ .cast = RTC_UNICAST<br>
+ };<br>
+<br>
+ e = rte_get_temp(&ra);<br>
+ e->net = base->net;<br>
+ e->u.krt = base->u.krt;<br>
+<br>
+ for (; nh; nh = nh->next)<br>
+ {<br>
+ <a href="http://ra.gw" rel="noreferrer" target="_blank">ra.gw</a> = nh->gw;<br>
+ ra.iface = nh->iface;<br>
+<br>
+ err = nl_send_route(p, e, eattrs, new, 1);<br>
+ if (err < 0)<br>
+ DBG("deleting route failed %d\n", err);<br>
+ }<br>
+<br>
+ rte_free(e);<br>
+<br>
+ return err;<br>
+}<br>
+<br>
+static int<br>
+krt_adjust_rte_multipath(struct krt_proto *p, rte *new, rte *old, struct ea_list *eattrs)<br>
+{<br>
+ struct mpnh *nhold, *nhnew;<br>
+ int err = 0;<br>
+<br>
+ nhold = krt_mp_sub_rte_rta(old->attrs, new->attrs);<br>
+ nhnew = krt_mp_sub_rte_rta(new->attrs, old->attrs);<br>
+<br>
+ if (nhold)<br>
+ {<br>
+ if (old->attrs->dest == RTD_MULTIPATH)<br>
+ err = krt_send_nh_multipath(p, old, nhold, NULL, 0);<br>
+ else<br>
+ err = nl_send_route(p, old, NULL, 0, 1);<br>
+ }<br>
+<br>
+ if (nhnew)<br>
+ {<br>
+ if (new->attrs->dest == RTD_MULTIPATH)<br>
+ err |= krt_send_nh_multipath(p, new, nhnew, eattrs, 1);<br>
+ else<br>
+ err |= nl_send_route(p, new, eattrs, 1, 1);<br>
+ }<br>
+<br>
+ return err;<br>
+}<br>
+<br>
void<br>
krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list *eattrs)<br>
{<br>
@@ -909,10 +1035,27 @@ krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list<br>
*/<br>
<br>
if (old)<br>
- nl_send_route(p, old, NULL, 0);<br>
+ {<br>
+ if (trk_is_use_collect_mode(p))<br>
+ {<br>
+ if (new && ( new->attrs->dest == RTD_MULTIPATH<br>
+ || old->attrs->dest == RTD_MULTIPATH))<br>
+ {<br>
+ err = krt_adjust_rte_multipath(p, new, old, eattrs);<br>
+ /* zero up "new" to ensure the below "if (new)" branch is not triggered */<br>
+ new = NULL;<br>
+ }<br>
+ else if (old->attrs->dest == RTD_MULTIPATH)<br>
+ krt_send_nh_multipath(p, old, old->attrs->nexthops, NULL, 0);<br>
+ else<br>
+ nl_send_route(p, old, NULL, 0, 0);<br>
+ }<br>
+ else<br>
+ nl_send_route(p, old, NULL, 0, 0);<br>
+ }<br>
<br>
if (new)<br>
- err = nl_send_route(p, new, eattrs, 1);<br>
+ err = nl_send_route(p, new, eattrs, 1, 0);<br>
<br>
if (err < 0)<br>
n->n.flags |= KRF_SYNC_ERROR;<br>
@@ -920,11 +1063,138 @@ krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list<br>
n->n.flags &= ~KRF_SYNC_ERROR;<br>
}<br>
<br>
+static int<br>
+krt_mp_is_collectable(struct krt_proto *p, rte *e)<br>
+{<br>
+ if (!trk_is_use_collect_mode(p))<br>
+ return 0;<br>
+<br>
+ struct rta *a = e->attrs;<br>
+<br>
+ if (a->dest != RTD_ROUTER && a->dest != RTD_DEVICE)<br>
+ return 0;<br>
+<br>
+ return 1;<br>
+}<br>
+<br>
+static int<br>
+krt_mp_is_mergable(struct krt_proto *p, rte *e1, rte *e2)<br>
+{<br>
+ if (e1->net != e2->net)<br>
+ return 0;<br>
+<br>
+ if (!rte_is_valid(e1) || !rte_is_valid(e2))<br>
+ return 0;<br>
+<br>
+ if (e1->pref != e2->pref)<br>
+ return 0;<br>
+<br>
+ if (e1->attrs->src->proto->proto != e2->attrs->src->proto->proto)<br>
+ return 0;<br>
+<br>
+ return 1;<br>
+}<br>
+<br>
+static rte *<br>
+krt_mp_collect_do_add(struct krt_proto *p, rte *mp_collect_rte, rte *e)<br>
+{<br>
+ struct rta *attrs = mp_collect_rte->attrs;<br>
+<br>
+ ASSERT(!rta_is_cached(attrs));<br>
+<br>
+ /* sanity to check our tmp attrs selection logic works correctly */<br>
+ ASSERT(attrs != e->attrs);<br>
+<br>
+ if (attrs->dest != RTD_MULTIPATH)<br>
+ {<br>
+ attrs->nexthops = krt_mp_merge_rta(NULL, attrs, p->p.merge_limit);<br>
+ attrs->dest = RTD_MULTIPATH;<br>
+ }<br>
+<br>
+ attrs->nexthops = krt_mp_merge_rta(attrs->nexthops, e->attrs, p->p.merge_limit);<br>
+<br>
+ return mp_collect_rte;<br>
+}<br>
+<br>
+static int<br>
+krt_mp_can_collect(struct krt_proto *p, rte *mp_collect_rte, rte *e)<br>
+{<br>
+ if (!krt_mp_is_collectable(p, e))<br>
+ return 0;<br>
+<br>
+ if (!krt_mp_is_mergable(p, mp_collect_rte, e))<br>
+ return 0;<br>
+<br>
+ return 1;<br>
+}<br>
+<br>
+static rta* nl_parse_get_tmp_rta(nl_parsectx *ctx)<br>
+{<br>
+ rta *a = &ctx->attrs[ctx->process_attrs];<br>
+<br>
+ memset(a, 0, sizeof(*a));<br>
+ return a;<br>
+}<br>
+<br>
+static void nl_parse_collect_complete(nl_parsectx *ctx)<br>
+{<br>
+ if (ctx->collect_p)<br>
+ {<br>
+ DBG("KRT: collected\n");<br>
+ krt_got_route(ctx->collect_p, ctx->collect_rte);<br>
+ ctx->collect_p = NULL;<br>
+ ctx->collect_rte = NULL;<br>
+ lp_flush(netlink_lp);<br>
+ }<br>
+}<br>
+<br>
+static void<br>
+nl_parse_collect_rte(nl_parsectx *ctx, struct krt_proto *p, rte *e)<br>
+{<br>
+ if (ctx->collect_p)<br>
+ {<br>
+ ASSERT(ctx->collect_rte);<br>
+ if (ctx->collect_p == p && krt_mp_can_collect(p, ctx->collect_rte, e))<br>
+ {<br>
+ ctx->collect_rte = krt_mp_collect_do_add(p, ctx->collect_rte, e);<br>
+ DBG("KRT: collecting[add]\n");<br>
+ return;<br>
+ }<br>
+<br>
+ nl_parse_collect_complete(ctx);<br>
+ }<br>
+<br>
+ ASSERT(!ctx->collect_p);<br>
+ ASSERT(!ctx->collect_rte);<br>
+<br>
+ if (krt_mp_is_collectable(p, e))<br>
+ {<br>
+ ASSERT(e->attrs == &ctx->attrs[ctx->process_attrs]);<br>
+ ASSERT(!rta_is_cached(e->attrs));<br>
+ ctx->collect_p = p;<br>
+ ctx->collect_rte = e;<br>
+ ctx->process_attrs = (ctx->process_attrs + 1) % 2;<br>
+ DBG("KRT: collecting\n");<br>
+ return;<br>
+ }<br>
+<br>
+ krt_got_route(p, e);<br>
+}<br>
+<br>
+static void nl_parse_begin(nl_parsectx *ctx)<br>
+{<br>
+ memset(ctx, 0, sizeof (*ctx));<br>
+}<br>
+<br>
+static void nl_parse_end(nl_parsectx *ctx)<br>
+{<br>
+ nl_parse_collect_complete(ctx);<br>
+}<br>
<br>
#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)<br>
<br>
static void<br>
-nl_parse_route(struct nlmsghdr *h, int scan)<br>
+nl_parse_route(nl_parsectx *ctx, struct nlmsghdr *h, int scan)<br>
{<br>
struct krt_proto *p;<br>
struct rtmsg *i;<br>
@@ -1022,12 +1292,12 @@ nl_parse_route(struct nlmsghdr *h, int scan)<br>
<br>
net *net = net_get(p->p.table, dst, i->rtm_dst_len);<br>
<br>
- rta ra = {<br>
- .src= p->p.main_source,<br>
- .source = RTS_INHERIT,<br>
- .scope = SCOPE_UNIVERSE,<br>
- .cast = RTC_UNICAST<br>
- };<br>
+ rta *ra = nl_parse_get_tmp_rta(ctx);<br>
+<br>
+ ra->src= p->p.main_source,<br>
+ ra->source = RTS_INHERIT,<br>
+ ra->scope = SCOPE_UNIVERSE,<br>
+ ra->cast = RTC_UNICAST;<br>
<br>
switch (i->rtm_type)<br>
{<br>
@@ -1035,9 +1305,9 @@ nl_parse_route(struct nlmsghdr *h, int scan)<br>
<br>
if (a[RTA_MULTIPATH] && (i->rtm_family == AF_INET))<br>
{<br>
- ra.dest = RTD_MULTIPATH;<br>
- ra.nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]);<br>
- if (!ra.nexthops)<br>
+ ra->dest = RTD_MULTIPATH;<br>
+ ra->nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]);<br>
+ if (!ra->nexthops)<br>
{<br>
log(L_ERR "KRT: Received strange multipath route %I/%d",<br>
net->n.prefix, net->n.pxlen);<br>
@@ -1047,8 +1317,8 @@ nl_parse_route(struct nlmsghdr *h, int scan)<br>
break;<br>
}<br>
<br>
- ra.iface = if_find_by_index(oif);<br>
- if (!ra.iface)<br>
+ ra->iface = if_find_by_index(oif);<br>
+ if (!ra->iface)<br>
{<br>
log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u",<br>
net->n.prefix, net->n.pxlen, oif);<br>
@@ -1058,39 +1328,39 @@ nl_parse_route(struct nlmsghdr *h, int scan)<br>
if (a[RTA_GATEWAY])<br>
{<br>
neighbor *ng;<br>
- ra.dest = RTD_ROUTER;<br>
- memcpy(&<a href="http://ra.gw" rel="noreferrer" target="_blank">ra.gw</a>, RTA_DATA(a[RTA_GATEWAY]), sizeof(<a href="http://ra.gw" rel="noreferrer" target="_blank">ra.gw</a>));<br>
- ipa_ntoh(<a href="http://ra.gw" rel="noreferrer" target="_blank">ra.gw</a>);<br>
+ ra->dest = RTD_ROUTER;<br>
+ memcpy(&ra->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra->gw));<br>
+ ipa_ntoh(ra->gw);<br>
<br>
#ifdef IPV6<br>
/* Silently skip strange 6to4 routes */<br>
- if (ipa_in_net(<a href="http://ra.gw" rel="noreferrer" target="_blank">ra.gw</a>, IPA_NONE, 96))<br>
+ if (ipa_in_net(ra->gw, IPA_NONE, 96))<br>
return;<br>
#endif<br>
<br>
- ng = neigh_find2(&p->p, &<a href="http://ra.gw" rel="noreferrer" target="_blank">ra.gw</a>, ra.iface,<br>
+ ng = neigh_find2(&p->p, &ra->gw, ra->iface,<br>
(i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);<br>
if (!ng || (ng->scope == SCOPE_HOST))<br>
{<br>
log(L_ERR "KRT: Received route %I/%d with strange next-hop %I",<br>
- net->n.prefix, net->n.pxlen, <a href="http://ra.gw" rel="noreferrer" target="_blank">ra.gw</a>);<br>
+ net->n.prefix, net->n.pxlen, ra->gw);<br>
return;<br>
}<br>
}<br>
else<br>
{<br>
- ra.dest = RTD_DEVICE;<br>
+ ra->dest = RTD_DEVICE;<br>
}<br>
<br>
break;<br>
case RTN_BLACKHOLE:<br>
- ra.dest = RTD_BLACKHOLE;<br>
+ ra->dest = RTD_BLACKHOLE;<br>
break;<br>
case RTN_UNREACHABLE:<br>
- ra.dest = RTD_UNREACHABLE;<br>
+ ra->dest = RTD_UNREACHABLE;<br>
break;<br>
case RTN_PROHIBIT:<br>
- ra.dest = RTD_PROHIBIT;<br>
+ ra->dest = RTD_PROHIBIT;<br>
break;<br>
/* FIXME: What about RTN_THROW? */<br>
default:<br>
@@ -1098,7 +1368,7 @@ nl_parse_route(struct nlmsghdr *h, int scan)<br>
return;<br>
}<br>
<br>
- rte *e = rte_get_temp(&ra);<br>
+ rte *e = rte_get_temp(ra);<br>
e->net = net;<br>
e->u.krt.src = src;<br>
e->u.krt.proto = i->rtm_protocol;<br>
@@ -1114,24 +1384,24 @@ nl_parse_route(struct nlmsghdr *h, int scan)<br>
memcpy(&ps, RTA_DATA(a[RTA_PREFSRC]), sizeof(ps));<br>
ipa_ntoh(ps);<br>
<br>
- ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr));<br>
- ea->next = ra.eattrs;<br>
- ra.eattrs = ea;<br>
+ ea_list *ea = lp_alloc(netlink_lp, sizeof(ea_list) + sizeof(eattr));<br>
+ ea->next = ra->eattrs;<br>
+ ra->eattrs = ea;<br>
ea->flags = EALF_SORTED;<br>
ea->count = 1;<br>
ea->attrs[0].id = EA_KRT_PREFSRC;<br>
ea->attrs[0].flags = 0;<br>
ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;<br>
- ea->attrs[0].u.ptr = alloca(sizeof(struct adata) + sizeof(ps));<br>
+ ea->attrs[0].u.ptr = lp_alloc(netlink_lp, sizeof(struct adata) + sizeof(ps));<br>
ea->attrs[0].u.ptr->length = sizeof(ps);<br>
memcpy(ea->attrs[0].u.ptr->data, &ps, sizeof(ps));<br>
}<br>
<br>
if (a[RTA_FLOW])<br>
{<br>
- ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr));<br>
- ea->next = ra.eattrs;<br>
- ra.eattrs = ea;<br>
+ ea_list *ea = lp_alloc(netlink_lp, sizeof(ea_list) + sizeof(eattr));<br>
+ ea->next = ra->eattrs;<br>
+ ra->eattrs = ea;<br>
ea->flags = EALF_SORTED;<br>
ea->count = 1;<br>
ea->attrs[0].id = EA_KRT_REALM;<br>
@@ -1143,7 +1413,7 @@ nl_parse_route(struct nlmsghdr *h, int scan)<br>
if (a[RTA_METRICS])<br>
{<br>
u32 metrics[KRT_METRICS_MAX];<br>
- ea_list *ea = alloca(sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));<br>
+ ea_list *ea = lp_alloc(netlink_lp, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));<br>
int t, n = 0;<br>
<br>
if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)<br>
@@ -1165,15 +1435,15 @@ nl_parse_route(struct nlmsghdr *h, int scan)<br>
<br>
if (n > 0)<br>
{<br>
- ea->next = ra.eattrs;<br>
+ ea->next = ra->eattrs;<br>
ea->flags = EALF_SORTED;<br>
ea->count = n;<br>
- ra.eattrs = ea;<br>
+ ra->eattrs = ea;<br>
}<br>
}<br>
<br>
if (scan)<br>
- krt_got_route(p, e);<br>
+ nl_parse_collect_rte(ctx, p, e);<br>
else<br>
krt_got_route_async(p, e, new);<br>
}<br>
@@ -1182,13 +1452,19 @@ void<br>
krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */<br>
{<br>
struct nlmsghdr *h;<br>
+ nl_parsectx ctx;<br>
<br>
nl_request_dump(BIRD_AF, RTM_GETROUTE);<br>
+<br>
+ nl_parse_begin(&ctx);<br>
+<br>
while (h = nl_get_scan())<br>
if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)<br>
- nl_parse_route(h, 1);<br>
+ nl_parse_route(&ctx, h, 1);<br>
else<br>
log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);<br>
+<br>
+ nl_parse_end(&ctx);<br>
}<br>
<br>
/*<br>
@@ -1201,12 +1477,16 @@ static byte *nl_async_rx_buffer; /* Receive buffer */<br>
static void<br>
nl_async_msg(struct nlmsghdr *h)<br>
{<br>
+ nl_parsectx ctx;<br>
+<br>
switch (h->nlmsg_type)<br>
{<br>
case RTM_NEWROUTE:<br>
case RTM_DELROUTE:<br>
DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);<br>
- nl_parse_route(h, 0);<br>
+ nl_parse_begin(&ctx);<br>
+ nl_parse_route(&ctx, h, 0);<br>
+ nl_parse_end(&ctx);<br>
break;<br>
case RTM_NEWLINK:<br>
case RTM_DELLINK:<br>
@@ -1325,6 +1605,7 @@ void<br>
krt_sys_io_init(void)<br>
{<br>
HASH_INIT(nl_table_map, krt_pool, 6);<br>
+ netlink_lp = lp_new(krt_pool, 4080);<br>
}<br>
<br>
int<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.5.0<br>
<br>
</font></span></blockquote></div><br></div>