[PATCH 1/2] Babel: Add option to support ecmp routes
Toke Høiland-Jørgensen
toke at toke.dk
Sun May 8 21:06:43 CEST 2022
(+ Juliusz)
Daniel Gröber <dxld at darkboxed.org> writes:
> ---
> The FIB walks in babel_reconfigure_iface are a bit ugly, I tried to
> loop over ifa->neigh_list instead but couldn't get it to work. I'm
> open to suggestions :)
>
> doc/bird.sgml | 8 +++++
> proto/babel/babel.c | 70 +++++++++++++++++++++++++++++++++++++-------
> proto/babel/babel.h | 3 ++
> proto/babel/config.Y | 3 ++
> 4 files changed, 73 insertions(+), 11 deletions(-)
>
> diff --git a/doc/bird.sgml b/doc/bird.sgml
> index 1580facd..5e85d8ec 100644
> --- a/doc/bird.sgml
> +++ b/doc/bird.sgml
> @@ -1879,6 +1879,7 @@ protocol babel [<name>] {
> check link <switch>;
> next hop ipv4 <address>;
> next hop ipv6 <address>;
> + ecmp <switch> [limit <num>];
> authentication none|mac [permissive];
> password "<text>";
> password "<text>" {
> @@ -1983,6 +1984,13 @@ protocol babel [<name>] {
> source for Babel packets will be used. In normal operation, it should not
> be necessary to set this option.
>
> + <tag><label id="babel-ecmp">ecmp <m>switch</m> [limit <m>number</m>]</tag>
> +
> + Determines whether babel will emit ECMP (equal-cost multipath)
> + routes, allowing to load-balancing traffic across multiple paths. If
> + enabled the maximum number of next-hops to allow can be specified,
> + defaulting to 16.
This should probably explain the constraints: i.e., that only
equal-weight routes are added (which in practice limits it to interfaces
of type 'wired', or wireless interfaces with no packet loss, I suppose?)
and that the routes all have to be feasible (though I'm not sure if we
explain feasibility in the docs anywhere).
> <tag><label id="babel-authentication">authentication none|mac [permissive]</tag>
> Selects authentication method to be used. <cf/none/ means that packets
> are not authenticated at all, <cf/mac/ means MAC authentication is
> diff --git a/proto/babel/babel.c b/proto/babel/babel.c
> index 4a7d550f..4d9c657c 100644
> --- a/proto/babel/babel.c
> +++ b/proto/babel/babel.c
> @@ -623,6 +623,34 @@ done:
> }
> }
>
> +/**
> + * babel_nexthop_insert - add next_hop of route to nexthop list
> + * @p: Babel protocol instance
> + * @r: Babel route
> + * @nhs: nexthop list head to append onto
> + * @nh: freshly allocated buffer to fill
> + */
> +static void
> +babel_nexthop_insert(
> + struct babel_proto *p,
> + struct babel_route *r,
> + struct nexthop **nhs,
> + struct nexthop *nh)
> +{
> + nh->gw = r->next_hop;
> + nh->iface = r->neigh->ifa->iface;
> +
> + /*
> + * If we cannot find a reachable neighbour, set the entry to be onlink. This
> + * makes it possible to, e.g., assign /32 addresses on a mesh interface and
> + * have routing work.
> + */
> + if (!neigh_find(&p->p, r->next_hop, r->neigh->ifa->iface, 0))
> + nh->flags = RNF_ONLINK;
> +
> + nexthop_insert(nhs, nh);
> +}
> +
> /**
> * babel_announce_rte - announce selected route to the core
> * @p: Babel protocol instance
> @@ -635,6 +663,7 @@ done:
> static void
> babel_announce_rte(struct babel_proto *p, struct babel_entry *e)
> {
> + struct babel_config *cf = (void *) p->p.cf;
> struct babel_route *r = e->selected;
> struct channel *c = (e->n.addr->type == NET_IP4) ? p->ip4_channel : p->ip6_channel;
>
> @@ -645,18 +674,24 @@ babel_announce_rte(struct babel_proto *p, struct babel_entry *e)
> .source = RTS_BABEL,
> .scope = SCOPE_UNIVERSE,
> .dest = RTD_UNICAST,
> - .from = r->neigh->addr,
> - .nh.gw = r->next_hop,
> - .nh.iface = r->neigh->ifa->iface,
> };
>
> - /*
> - * If we cannot find a reachable neighbour, set the entry to be onlink. This
> - * makes it possible to, e.g., assign /32 addresses on a mesh interface and
> - * have routing work.
> - */
> - if (!neigh_find(&p->p, r->next_hop, r->neigh->ifa->iface, 0))
> - a0.nh.flags = RNF_ONLINK;
> + struct nexthop *nhs = NULL;
> + babel_nexthop_insert(p, r, &nhs, allocz(sizeof(struct nexthop)));
> + int num_nexthops = 1;
> +
> + struct babel_route *cr;
> + WALK_LIST(cr, e->routes) {
> + if (num_nexthops++ >= cf->ecmp)
> + break;
> +
> + if (cr == r || !cr->feasible || cr->metric != r->metric)
> + continue;
You should probably switch the order of those two checks? You're
increasing the counter even if the route is subsequently discarded...
> +
> + babel_nexthop_insert(p, cr, &nhs, allocz(sizeof(struct nexthop)));
> + }
> +
> + a0.nh = *nhs;
>
> rta *a = rta_lookup(&a0);
> rte *rte = rte_get_temp(a);
> @@ -736,6 +771,7 @@ babel_announce_retraction(struct babel_proto *p, struct babel_entry *e)
> static void
> babel_select_route(struct babel_proto *p, struct babel_entry *e, struct babel_route *mod)
> {
> + struct babel_config *cf = (void *) p->p.cf;
> struct babel_route *r, *best = e->selected;
>
> /* Shortcut if only non-best was modified */
> @@ -744,8 +780,10 @@ babel_select_route(struct babel_proto *p, struct babel_entry *e, struct babel_ro
> /* Either select modified route, or keep old best route */
> if ((mod->metric < (best ? best->metric : BABEL_INFINITY)) && mod->feasible)
> best = mod;
> - else
> + else if (cf->ecmp==1)
This looks like you're returning if ECMP is enabled; I think finding a
better name for that config variable would be good; maybe just
"max_nexthops"?
> return;
> + /* With ecmp one of the non-selected but equal metric routes might have
> + * changed so contine on with the announcement in that case. */
> }
> else
> {
> @@ -1879,6 +1917,16 @@ babel_reconfigure_iface(struct babel_proto *p, struct babel_iface *ifa, struct b
> if (ifa->up)
> babel_iface_kick_timer(ifa);
>
> + /* Update all routes to refresh ecmp settings. */
> + FIB_WALK(&p->ip6_rtable, struct babel_entry, e) {
> + babel_select_route(p, e, NULL);
> + }
> + FIB_WALK_END;
> + FIB_WALK(&p->ip4_rtable, struct babel_entry, e) {
> + babel_select_route(p, e, NULL);
> + }
> + FIB_WALK_END;
> +
I don't think this is the right thing to do; you should probably just
refuse the reconfiguration if ECMP settings changed (see the 0-return at
the top when TOS changes)?
> return 1;
> }
>
> diff --git a/proto/babel/babel.h b/proto/babel/babel.h
> index 84feb085..3868dcb2 100644
> --- a/proto/babel/babel.h
> +++ b/proto/babel/babel.h
> @@ -62,6 +62,8 @@
> #define BABEL_OVERHEAD (IP6_HEADER_LENGTH+UDP_HEADER_LENGTH)
> #define BABEL_MIN_MTU (512 + BABEL_OVERHEAD)
>
> +#define BABEL_DEFAULT_ECMP_LIMIT 16
> +
> #define BABEL_AUTH_NONE 0
> #define BABEL_AUTH_MAC 1
>
> @@ -120,6 +122,7 @@ struct babel_config {
> list iface_list; /* List of iface configs (struct babel_iface_config) */
> uint hold_time; /* Time to hold stale entries and unreachable routes */
> u8 randomize_router_id;
> + u8 ecmp; /* Maximum number of nexthops to install */
See above re: the naming of this; 'max_nexthops'?
> struct channel_config *ip4_channel;
> struct channel_config *ip6_channel;
> diff --git a/proto/babel/config.Y b/proto/babel/config.Y
> index 05210fa4..8f18c790 100644
> --- a/proto/babel/config.Y
> +++ b/proto/babel/config.Y
> @@ -36,6 +36,7 @@ babel_proto_start: proto_start BABEL
> this_proto = proto_config_new(&proto_babel, $1);
> init_list(&BABEL_CFG->iface_list);
> BABEL_CFG->hold_time = 1 S_;
> + BABEL_CFG->ecmp = 1;
> };
>
> babel_proto_item:
> @@ -43,6 +44,8 @@ babel_proto_item:
> | proto_channel
> | INTERFACE babel_iface
> | RANDOMIZE ROUTER ID bool { BABEL_CFG->randomize_router_id = $4; }
> + | ECMP bool { BABEL_CFG->ecmp = $2 ? BABEL_DEFAULT_ECMP_LIMIT : 1; }
> + | ECMP bool LIMIT expr { BABEL_CFG->ecmp = $2 ? $4 : 1; }
> ;
>
> babel_proto_opts:
> --
> 2.30.2
More information about the Bird-users
mailing list