[PATCH v4] Babel: Replace internal route selection by bird's nest

Daniel Gröber dxld at darkboxed.org
Sun Feb 26 21:05:24 CET 2023


This introduces the ability to filter routes from specific interfaces and
neighbours. With the current internal route selection proto babel exports
only up to one route and an admin cannot do fine-grained filtering.

To fix this we rip out the internal route selection entirely and export
evey babel route as a distinct route into the bird's nest instead.

In the presence of esoteric route filters this change may be observable by
users however we feel the additional filtering power is more important. A
release note entry for this change is recommended.
---
Changes in v4:
 - We've seen the light, use FIB_ITERATE as intended, thanks Ondrej.
 - Reinstate seqno request logic on loss of all feasible routes, thanks
   Toke. Discussion of behaviour still ongoing, see[1]
 - Try to prepare for async rt_notify in BIRD 3, thanks Maria.

[1]: http://trubka.network.cz/pipermail/bird-users/2023-February/016689.html


 proto/babel/babel.c | 278 ++++++++++++++++++++------------------------
 proto/babel/babel.h |   3 +
 2 files changed, 128 insertions(+), 153 deletions(-)

diff --git a/proto/babel/babel.c b/proto/babel/babel.c
index ff8b6b52..eb347a85 100644
--- a/proto/babel/babel.c
+++ b/proto/babel/babel.c
@@ -29,10 +29,12 @@
  * possible routes for the prefix are tracked as &babel_route entries and the
  * feasibility distance is maintained through &babel_source structures.
  *
- * The main route selection is done in babel_select_route(). This is called when
- * an entry is updated by receiving updates from the network or when modified by
- * internal timers. The function selects from feasible and reachable routes the
- * one with the lowest metric to be announced to the core.
+ * The main route selection is done by bird's nest (heh). For each prefix we
+ * export all feasible routes to the core with a distinct source (rte_src) per
+ * neihbour so bird handles them as distinct routes. Nest will then notify us of
+ * one best route for each prefix, either our own (internal) or from another
+ * protocol, by calling babel_rt_notify. For internal best routes we remember
+ * which babel_route it selected in babel_entry.selected.
  */
 
 #include <stdlib.h>
@@ -50,8 +52,8 @@ static inline int ge_mod64k(uint a, uint b)
 { return (u16)(a - b) < 0x8000; }
 
 static void babel_expire_requests(struct babel_proto *p, struct babel_entry *e);
-static void babel_select_route(struct babel_proto *p, struct babel_entry *e, struct babel_route *mod);
-static inline void babel_announce_retraction(struct babel_proto *p, struct babel_entry *e);
+static void babel_announce_rte(struct babel_proto *p, struct babel_entry *e, struct babel_route *r);
+static void babel_rte_update_unreachable(struct babel_proto *p, struct babel_entry *e, u8 announce);
 static void babel_send_route_request(struct babel_proto *p, struct babel_entry *e, struct babel_neighbor *n);
 static void babel_send_seqno_request(struct babel_proto *p, struct babel_entry *e, struct babel_seqno_request *sr, struct babel_neighbor *n);
 static void babel_update_cost(struct babel_neighbor *n);
@@ -168,9 +170,7 @@ static inline void
 babel_retract_route(struct babel_proto *p, struct babel_route *r)
 {
   r->metric = r->advert_metric = BABEL_INFINITY;
-
-  if (r == r->e->selected)
-    babel_select_route(p, r->e, r);
+  babel_announce_rte(p, r->e, r);
 }
 
 static void
@@ -182,9 +182,6 @@ babel_flush_route(struct babel_proto *p UNUSED, struct babel_route *r)
   rem_node(NODE r);
   rem_node(&r->neigh_route);
 
-  if (r->e->selected == r)
-    r->e->selected = NULL;
-
   sl_free(r);
 }
 
@@ -200,6 +197,7 @@ babel_expire_route(struct babel_proto *p, struct babel_route *r)
   {
     r->metric = r->advert_metric = BABEL_INFINITY;
     r->expires = current_time() + cf->hold_time;
+    babel_announce_rte(p, r->e, r);
   }
   else
   {
@@ -229,8 +227,6 @@ babel_expire_routes_(struct babel_proto *p, struct fib *rtable)
 loop:
   FIB_ITERATE_START(rtable, &fit, struct babel_entry, e)
   {
-    int changed = 0;
-
     WALK_LIST_DELSAFE(r, rx, e->routes)
     {
       if (r->refresh_time && r->refresh_time <= now_)
@@ -238,23 +234,12 @@ loop:
 
       if (r->expires && r->expires <= now_)
       {
-	changed = changed || (r == e->selected);
+	FIB_ITERATE_PUT(&fit);
 	babel_expire_route(p, r);
+	goto loop;
       }
     }
 
-    if (changed)
-    {
-      /*
-       * We have to restart the iteration because there may be a cascade of
-       * synchronous events babel_select_route() -> nest table change ->
-       * babel_rt_notify() -> rtable change, invalidating hidden variables.
-       */
-      FIB_ITERATE_PUT(&fit);
-      babel_select_route(p, e, NULL);
-      goto loop;
-    }
-
     /* Clean up stale entries */
     if ((e->valid == BABEL_ENTRY_STALE) && ((e->updated + cf->hold_time) <= now_))
       e->valid = BABEL_ENTRY_DUMMY;
@@ -263,7 +248,7 @@ loop:
     if (e->unreachable && (!e->valid || (e->router_id == p->router_id)))
     {
       FIB_ITERATE_PUT(&fit);
-      babel_announce_retraction(p, e);
+      babel_rte_update_unreachable(p, e, 0);
       goto loop;
     }
 
@@ -447,6 +432,7 @@ babel_get_neighbor(struct babel_iface *ifa, ip_addr addr)
 
   nbr = mb_allocz(ifa->pool, sizeof(struct babel_neighbor));
   nbr->ifa = ifa;
+  nbr->src = rt_get_source(&p->p, idm_alloc(&p->src_ids));
   nbr->addr = addr;
   nbr->rxcost = BABEL_INFINITY;
   nbr->txcost = BABEL_INFINITY;
@@ -474,6 +460,8 @@ babel_flush_neighbor(struct babel_proto *p, struct babel_neighbor *nbr)
   }
 
   nbr->ifa = NULL;
+
+  idm_free(&p->src_ids, nbr->src->private_id);
   rem_node(NODE nbr);
   mb_free(nbr);
 }
@@ -634,11 +622,47 @@ done:
     WALK_LIST2(r, n, nbr->routes, neigh_route)
     {
       r->metric = babel_compute_metric(nbr, r->advert_metric);
-      babel_select_route(p, r->e, r);
+      babel_announce_rte(p, r->e, r);
     }
   }
 }
 
+/**
+ * This function handles announcing the special unreachable route we insert for
+ * a prefix whenever we have no more feasible routes available as per RFC8966
+ * section 3.5.4 as well as retracting it when such routes are available
+ * again.
+ *
+ * We also remember if we inserted an unreachable route in e->unreachable in
+ * order to clean it up later in babel_expire_routes_.
+ */
+static void
+babel_rte_update_unreachable(struct babel_proto *p, struct babel_entry *e, u8 announce)
+{
+  struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel;
+  rte *rte = NULL;
+
+  if (announce) {
+    rta a0 = {
+      .source = RTS_BABEL,
+      .scope = SCOPE_UNIVERSE,
+      .dest = RTD_UNREACHABLE,
+      .pref = 1,
+    };
+
+    rta *a = rta_lookup(&a0);
+    rte = rte_get_temp(a, p->p.main_source);
+  }
+
+  e->unreachable = announce;
+
+  /* Unlike the regular per-neighbour routes we only want one unreachable route
+   * for each prefix. This is mainly due to the lifetime of the unreachable rte
+   * exceeding that of the neighbour's rte_src ID as the babal neighbour may be
+   * flushed before the unreachable route is retracted. */
+  rte_update2(c, e->n.addr, rte, p->p.main_source);
+}
+
 /**
  * babel_announce_rte - announce selected route to the core
  * @p: Babel protocol instance
@@ -649,12 +673,11 @@ done:
  * the entry is valid and ours, the unreachable route is announced instead.
  */
 static void
-babel_announce_rte(struct babel_proto *p, struct babel_entry *e)
+babel_announce_rte(struct babel_proto *p, struct babel_entry *e, struct babel_route *r)
 {
-  struct babel_route *r = e->selected;
   struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel;
 
-  if (r)
+  if (r->metric != BABEL_INFINITY && r->feasible)
   {
     rta a0 = {
       .source = RTS_BABEL,
@@ -698,122 +721,23 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e)
       a0.nh.flags = RNF_ONLINK;
 
     rta *a = rta_lookup(&a0);
-    rte *rte = rte_get_temp(a, p->p.main_source);
-
-    e->unreachable = 0;
-    rte_update2(c, e->n.addr, rte, p->p.main_source);
-  }
-  else if (e->valid && (e->router_id != p->router_id))
-  {
-    /* Unreachable */
-    rta a0 = {
-      .source = RTS_BABEL,
-      .scope = SCOPE_UNIVERSE,
-      .dest = RTD_UNREACHABLE,
-      .pref = 1,
-    };
-
-    rta *a = rta_lookup(&a0);
-    rte *rte = rte_get_temp(a, p->p.main_source);
-
-    e->unreachable = 1;
-    rte_update2(c, e->n.addr, rte, p->p.main_source);
-  }
-  else
-  {
-    /* Retraction */
-    e->unreachable = 0;
-    rte_update2(c, e->n.addr, NULL, p->p.main_source);
-  }
-}
-
-/* Special case of babel_announce_rte() just for retraction */
-static inline void
-babel_announce_retraction(struct babel_proto *p, struct babel_entry *e)
-{
-  struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel;
-  e->unreachable = 0;
-  rte_update2(c, e->n.addr, NULL, p->p.main_source);
-}
-
-
-/**
- * babel_select_route - select best route for given route entry
- * @p: Babel protocol instance
- * @e: Babel entry to select the best route for
- * @mod: Babel route that was modified or NULL if unspecified
- *
- * Select the best reachable and feasible route for a given prefix among the
- * routes received from peers, and propagate it to the nest. This just selects
- * the reachable and feasible route with the lowest metric, but keeps selected
- * the old one in case of tie.
- *
- * If no feasible route is available for a prefix that previously had a route
- * selected, a seqno request is sent to try to get a valid route. If the entry
- * is valid and not owned by us, the unreachable route is announced to the nest
- * (to blackhole packets going to it, as per section 2.8). It is later removed
- * by babel_expire_routes(). Otherwise, the route is just removed from the nest.
- *
- * Argument @mod is used to optimize best route calculation. When specified, the
- * function can assume that only the @mod route was modified to avoid full best
- * route selection and announcement when non-best route was modified in minor
- * way. The caller is advised to not call babel_select_route() when no change is
- * done (e.g. periodic route updates) to avoid unnecessary announcements of the
- * same best route. The caller is not required to call the function in case of a
- * retraction of a non-best route.
- *
- * Note that the function does not active triggered updates. That is done by
- * babel_rt_notify() when the change is propagated back to Babel.
- */
-static void
-babel_select_route(struct babel_proto *p, struct babel_entry *e, struct babel_route *mod)
-{
-  struct babel_route *r, *best = e->selected;
+    rte *rte = rte_get_temp(a, r->neigh->src);
 
-  /* Shortcut if only non-best was modified */
-  if (mod && (mod != best))
-  {
-    /* Either select modified route, or keep old best route */
-    if ((mod->metric < (best ? best->metric : BABEL_INFINITY)) && mod->feasible)
-      best = mod;
-    else
-      return;
+    rte_update2(c, e->n.addr, rte, r->neigh->src);
+    if (e->unreachable)
+      babel_rte_update_unreachable(p, e, 0);
   }
-  else
-  {
-    /* Selected route may be modified and no longer admissible */
-    if (!best || (best->metric == BABEL_INFINITY) || !best->feasible)
-      best = NULL;
+  else {
+    if (e->selected == r)
+      /* We NULL e->selected here rather than wait for babel_rt_notify in order
+       * to stay in a sync context. This is to be prepared for async rt_notify
+       * in BIRD 3. This is critical as RFC8966 demands unfeasible or infinite
+       * metric routes never being selected (see section 3.6 Route
+       * Selection). */
+      e->selected = NULL;
 
-    /* Find the best feasible route from all routes */
-    WALK_LIST(r, e->routes)
-      if ((r->metric < (best ? best->metric : BABEL_INFINITY)) && r->feasible)
-	best = r;
+    rte_update2(c, e->n.addr, NULL, r->neigh->src);
   }
-
-  if (best)
-  {
-    if (best != e->selected)
-      TRACE(D_EVENTS, "Picked new route for prefix %N: router-id %lR metric %d",
-	    e->n.addr, best->router_id, best->metric);
-  }
-  else if (e->selected)
-  {
-    /*
-     * We have lost all feasible routes. We have to broadcast seqno request
-     * (Section 3.8.2.1) and keep unreachable route for a while (section 2.8).
-     * The later is done automatically by babel_announce_rte().
-     */
-
-    TRACE(D_EVENTS, "Lost feasible route for prefix %N", e->n.addr);
-    if (e->valid && (e->selected->router_id == e->router_id))
-      babel_add_seqno_request(p, e, e->selected->router_id, e->selected->seqno + 1, 0, NULL);
-  }
-  else
-    return;
-
-  e->selected = best;
-  babel_announce_rte(p, e);
 }
 
 /*
@@ -1338,7 +1262,7 @@ babel_handle_update(union babel_msg *m, struct babel_iface *ifa)
     e->updated = current_time();
   }
 
-  babel_select_route(p, e, r);
+  babel_announce_rte(p, e, r);
 }
 
 void
@@ -2006,7 +1930,7 @@ static void
 babel_dump_entry(struct babel_entry *e)
 {
   struct babel_source *s;
-  struct babel_route *r;
+  struct babel_route *r, *best = e->selected;
 
   debug("Babel: Entry %N:\n", e->n.addr);
 
@@ -2016,7 +1940,7 @@ babel_dump_entry(struct babel_entry *e)
   WALK_LIST(r,e->routes)
   {
     debug(" ");
-    if (r == e->selected) debug("*");
+    if (r == best) debug("*");
     babel_dump_route(r);
   }
 }
@@ -2186,7 +2110,7 @@ babel_show_entries_(struct babel_proto *p, struct fib *rtable)
 
   FIB_WALK(rtable, struct babel_entry, e)
   {
-    struct babel_route *r = NULL;
+    struct babel_route *r = NULL, *best = e->selected;
     uint rts = 0, srcs = 0;
     node *n;
 
@@ -2199,7 +2123,7 @@ babel_show_entries_(struct babel_proto *p, struct fib *rtable)
     if (e->valid)
       cli_msg(-1025, "%-*N %-23lR %6u %5u %7u %7u", width,
 	      e->n.addr, e->router_id, e->metric, e->seqno, rts, srcs);
-    else if (r = e->selected)
+    else if (r = best)
       cli_msg(-1025, "%-*N %-23lR %6u %5u %7u %7u", width,
 	      e->n.addr, r->router_id, r->metric, r->seqno, rts, srcs);
     else
@@ -2236,10 +2160,10 @@ babel_show_routes_(struct babel_proto *p, struct fib *rtable)
 
   FIB_WALK(rtable, struct babel_entry, e)
   {
-    struct babel_route *r;
+    struct babel_route *r, *best = e->selected;
     WALK_LIST(r, e->routes)
     {
-      char c = (r == e->selected) ? '*' : (r->feasible ? '+' : ' ');
+      char c = (r == best) ? '*' : (r->feasible ? '+' : ' ');
       btime time = r->expires ? r->expires - current_time() : 0;
       cli_msg(-1025, "%-*N %-25I %-10s %5u %c %5u %7t", width,
 	      e->n.addr, r->next_hop, r->neigh->ifa->ifname,
@@ -2316,19 +2240,23 @@ babel_preexport(struct channel *C, struct rte *new)
  */
 static void
 babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net,
-		struct rte *new, struct rte *old UNUSED)
+		struct rte *new, struct rte *old)
 {
   struct babel_proto *p = (void *) P;
   struct babel_entry *e;
+  struct babel_iface *ifa;
+  struct babel_neighbor *n;
 
   if (new)
   {
     /* Update */
+    uint internal = new->src->proto == P;
     uint rt_seqno;
     uint rt_metric = ea_get_int(new->attrs->eattrs, EA_BABEL_METRIC, 0);
     u64 rt_router_id = 0;
+    struct babel_route *best;
 
-    if (new->src->proto == P)
+    if (internal)
     {
       rt_seqno = ea_find(new->attrs->eattrs, EA_BABEL_SEQNO)->u.data;
       eattr *e = ea_find(new->attrs->eattrs, EA_BABEL_ROUTER_ID);
@@ -2350,6 +2278,25 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net,
 
     e = babel_get_entry(p, net->n.addr);
 
+    best = e->selected;
+    if (internal) {
+      ifa = babel_find_iface(p, new->attrs->nh.iface);
+      n = ifa ? babel_find_neighbor(ifa, new->attrs->from) : NULL;
+      best = n ? babel_get_route(p, e, n) : NULL;
+      ASSERT(best); /* Note: We think this can't happen but the lifetimes of the
+		     * involved objects are complicated so we're not yet
+		     * completely convinced. */
+    } else {
+      best = NULL;
+    }
+
+    if (best && best != e->selected)
+      TRACE(D_EVENTS,
+	    "Picked new route for prefix %N: router-id %lR metric %d",
+	    e->n.addr, best->router_id, best->metric);
+
+    e->selected = best;
+
     /* Activate triggered updates */
     if ((e->valid != BABEL_ENTRY_VALID) ||
 	(e->router_id != rt_router_id))
@@ -2365,14 +2312,38 @@ babel_rt_notify(struct proto *P, struct channel *c UNUSED, struct network *net,
   }
   else
   {
+    uint internal = old->src->proto == P;
+
     /* Withdraw */
     e = babel_find_entry(p, net->n.addr);
 
     if (!e || e->valid != BABEL_ENTRY_VALID)
       return;
 
+    if (e->selected || !internal) {
+      TRACE(D_EVENTS, "Lost all feasible routes for prefix %N", e->n.addr);
+      /* We had a feasible route, but now it's gone. We must send a seqno
+       * request (Section 3.8.2.1). As allowed by RFC8966 we choose to always do
+       * so even if we don't have any more unfeasible routes and we use the
+       * simple unconditional multicast strategy.
+       *
+       * Note we interpret "feasible route" to include external routes from
+       * other protocols. You can think of these as being routes announced by
+       * neighbours internal to the bird implementation.
+       */
+      if (e->valid && (e->selected->router_id == e->router_id))
+	babel_add_seqno_request(
+	  p, e, e->selected->router_id, e->selected->seqno + 1, 0, NULL);
+    }
+
+
     e->valid = BABEL_ENTRY_STALE;
     e->metric = BABEL_INFINITY;
+    e->selected = NULL;
+
+    /* Install an unreachable route for prefix hold time as per RFC8966 section
+     * 3.5.4. */
+    babel_rte_update_unreachable(p, e, 1);
 
     babel_trigger_update(p);
     e->updated = current_time();
@@ -2464,6 +2435,7 @@ babel_start(struct proto *P)
   p->source_slab = sl_new(P->pool, sizeof(struct babel_source));
   p->msg_slab = sl_new(P->pool, sizeof(struct babel_msg_node));
   p->seqno_slab = sl_new(P->pool, sizeof(struct babel_seqno_request));
+  idm_init(&p->src_ids, P->pool, 8);
 
   p->log_pkt_tbf = (struct tbf){ .rate = 1, .burst = 5 };
 
diff --git a/proto/babel/babel.h b/proto/babel/babel.h
index da8386b3..16d9c7d0 100644
--- a/proto/babel/babel.h
+++ b/proto/babel/babel.h
@@ -25,6 +25,7 @@
 #include "lib/socket.h"
 #include "lib/string.h"
 #include "lib/timer.h"
+#include "lib/idm.h"
 
 #define EA_BABEL_METRIC		EA_CODE(PROTOCOL_BABEL, 0)
 #define EA_BABEL_ROUTER_ID	EA_CODE(PROTOCOL_BABEL, 1)
@@ -174,6 +175,7 @@ struct babel_proto {
   slab *source_slab;
   slab *msg_slab;
   slab *seqno_slab;
+  struct idm src_ids;
 
   struct tbf log_pkt_tbf;		/* TBF for packet messages */
 };
@@ -216,6 +218,7 @@ struct babel_iface {
 struct babel_neighbor {
   node n;
   struct babel_iface *ifa;
+  struct rte_src *src;
 
   ip_addr addr;
   u16 rxcost;				/* Sent in last IHU */
-- 
2.30.2



More information about the Bird-users mailing list