This adds the Babel routing protocol (RFC6126) to Bird. It is a complete implementation of the IPv6 subset of RFC6126, but does not implement any of the extensions. Compared to the RFC patch posted earlier, this patch implements several more SHOULD parts of the protocol, has updated interactions with the Bird core, and uses fewer timers and resource pools. In addition, several other tweaks and fixes have been made following interoperability testing with the official babeld. The implementation of the protocol is now, to the best of my knowledge, complete. The only exception is the propagation of IPv4 routes which has deliberately been left out due to Bird's lack of support for a dual-stack operation mode. Juliusz Chroboczek, the author of the Babel spec, has expressed a preference to not have an IPv4-only implementation of Babel, so as to avoid fragmenting the community. I have followed that preference, and so this implementation ignores IPv4 route TLVs entirely. As far as interactions with Bird core is concerned, the main thing missing is the reconfiguration support; I still haven't decided exactly what is the right thing to do here. But probably some variant of the OSPF protocol's logic is needed. And of course documentation of the configuration options. But since I expect to have to do at least another iteration on this, I decided to defer that in the interest of keeping the ball rolling. Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk> Cc: babel-users@lists.alioth.debian.org --- configure.in | 3 + lib/ip.h | 1 + nest/proto.c | 4 + nest/protocol.h | 2 +- nest/route.h | 11 +- proto/Doc | 1 + proto/babel/Doc | 2 + proto/babel/Makefile | 5 + proto/babel/babel.c | 1558 ++++++++++++++++++++++++++++++++++++++++++++++++++ proto/babel/babel.h | 434 ++++++++++++++ proto/babel/config.Y | 84 +++ proto/babel/packet.c | 636 +++++++++++++++++++++ sysdep/autoconf.h.in | 1 + 13 files changed, 2740 insertions(+), 2 deletions(-) create mode 100644 proto/babel/Doc create mode 100644 proto/babel/Makefile create mode 100644 proto/babel/babel.c create mode 100644 proto/babel/babel.h create mode 100644 proto/babel/config.Y create mode 100644 proto/babel/packet.c diff --git a/configure.in b/configure.in index c81709e..1265565 100644 --- a/configure.in +++ b/configure.in @@ -206,6 +206,9 @@ fi AC_SUBST(iproutedir) all_protocols="$proto_bfd bgp ospf pipe $proto_radv rip static" +if test "$ip" = ipv6 ; then + all_protocols="$all_protocols babel" +fi all_protocols=`echo $all_protocols | sed 's/ /,/g'` if test "$with_protocols" = all ; then diff --git a/lib/ip.h b/lib/ip.h index 90bb7f8..fa18da1 100644 --- a/lib/ip.h +++ b/lib/ip.h @@ -23,6 +23,7 @@ #define IP6_OSPF_ALL_ROUTERS ipa_build6(0xFF020000, 0, 0, 5) #define IP6_OSPF_DES_ROUTERS ipa_build6(0xFF020000, 0, 0, 6) #define IP6_RIP_ROUTERS ipa_build6(0xFF020000, 0, 0, 9) +#define IP6_BABEL_ROUTERS ipa_build6(0xFF020000, 0, 0, 0x00010006) #define IP4_NONE _MI4(0) #define IP6_NONE _MI6(0,0,0,0) diff --git a/nest/proto.c b/nest/proto.c index 6531083..5804b73 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -919,6 +919,10 @@ protos_build(void) proto_build(&proto_bfd); bfd_init_all(); #endif +#ifdef CONFIG_BABEL + proto_build(&proto_babel); + bfd_init_all(); +#endif proto_pool = rp_new(&root_pool, "Protocols"); proto_flush_event = ev_new(proto_pool); diff --git a/nest/protocol.h b/nest/protocol.h index 8c49154..ec78735 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -76,7 +76,7 @@ void protos_dump_all(void); extern struct protocol proto_device, proto_radv, proto_rip, proto_static, - proto_ospf, proto_pipe, proto_bgp, proto_bfd; + proto_ospf, proto_pipe, proto_bgp, proto_bfd, proto_babel; /* * Routing Protocol Instance diff --git a/nest/route.h b/nest/route.h index 6067526..a5d71a8 100644 --- a/nest/route.h +++ b/nest/route.h @@ -214,6 +214,12 @@ typedef struct rte { u8 suppressed; /* Used for deterministic MED comparison */ } bgp; #endif +#ifdef CONFIG_BABEL + struct { + u16 metric; /* Babel metric */ + u64 router_id; /* Babel router id */ + } babel; +#endif struct { /* Routes generated by krt sync (both temporary and inherited ones) */ s8 src; /* Alleged route source (see krt.h) */ u8 proto; /* Kernel source protocol ID */ @@ -368,6 +374,7 @@ typedef struct rta { #define RTS_OSPF_EXT2 10 /* OSPF external route type 2 */ #define RTS_BGP 11 /* BGP route */ #define RTS_PIPE 12 /* Inter-table wormhole */ +#define RTS_BABEL 13 /* Babel route */ #define RTC_UNICAST 0 #define RTC_BROADCAST 1 @@ -416,7 +423,8 @@ typedef struct eattr { #define EAP_RIP 2 /* RIP */ #define EAP_OSPF 3 /* OSPF */ #define EAP_KRT 4 /* Kernel route attributes */ -#define EAP_MAX 5 +#define EAP_BABEL 5 /* Babel attributes */ +#define EAP_MAX 6 #define EA_CODE(proto,id) (((proto) << 8) | (id)) #define EA_PROTO(ea) ((ea) >> 8) @@ -538,6 +546,7 @@ extern struct protocol *attr_class_to_protocol[EAP_MAX]; #define DEF_PREF_RIP 120 /* RIP */ #define DEF_PREF_BGP 100 /* BGP */ #define DEF_PREF_PIPE 70 /* Routes piped from other tables */ +#define DEF_PREF_BABEL 42 /* Babel */ #define DEF_PREF_INHERITED 10 /* Routes inherited from other routing daemons */ diff --git a/proto/Doc b/proto/Doc index 7863472..04c25bc 100644 --- a/proto/Doc +++ b/proto/Doc @@ -1,4 +1,5 @@ H Protocols +C babel C bfd C bgp C ospf diff --git a/proto/babel/Doc b/proto/babel/Doc new file mode 100644 index 0000000..2480239 --- /dev/null +++ b/proto/babel/Doc @@ -0,0 +1,2 @@ +S babel.c +S packet.c diff --git a/proto/babel/Makefile b/proto/babel/Makefile new file mode 100644 index 0000000..a27b010 --- /dev/null +++ b/proto/babel/Makefile @@ -0,0 +1,5 @@ +source=babel.c packet.c +root-rel=../../ +dir-name=proto/babel + +include ../../Rules diff --git a/proto/babel/babel.c b/proto/babel/babel.c new file mode 100644 index 0000000..d991b35 --- /dev/null +++ b/proto/babel/babel.c @@ -0,0 +1,1558 @@ +/* -*- c-file-style: "gnu"; indent-tabs-mode: nil; -*- + * + * The Babel protocol + * + * Copyright (c) 2015 Toke Høiland-Jørgensen + * + * Can be freely distributed and used under the terms of the GNU GPL. + * + * This file contains the main routines for handling and sending TLVs, as + * well as timers and interaction with the nest. + */ + +/** + * DOC: The Babel protocol + * + * Babel (RFC6126) is a loop-avoiding distance-vector routing protocol that is + * robust and efficient both in ordinary wired networks and in wireless mesh + * networks. + * + * The Babel protocol keeps state for each neighbour in a &babel_neighbor + * struct, tracking received hellos and I Heard You (IHU) messages. A + * &babel_interface struct keeps hello and update timers for each interface, and + * a separate hello seqno is maintained for each interface. + * + * For each prefix, Babel keeps track of both the possible routes + * (with next hop and router IDs), as well as the feasibility distance for each + * prefix and router id. The prefix itself is tracked in a &babel_entry struct, + * while the possible routes for the prefix are tracked as &babel_route entries + * and the feasibility distance is maintained through &babel_source structures. + * + * The main route selection is done in babel_select_route(). This is called when + * an update for a prefix is received, when a new prefix is received from the + * nest, and when a prefix expiry timer fires. It performs feasibility checks on + * the available routes for the prefix and selects the one with the lowest + * metric. + */ + +#undef LOCAL_DEBUG +#define LOCAL_DEBUG 1 + +#include "babel.h" + + +#undef TRACE +#define TRACE(level, msg, args...) do { if (p->p.debug & level) { log(L_TRACE "%s: " msg, p->p.name , ## args); } } while(0) +#define BAD( x ) { log( L_REMOTE "%s: " x, p->p.name ); return 1; } + +/* computes a-b % 65535 for u16 datatypes */ +static inline u16 +diff_mod64k(u16 a, u16 b) +{ + return a >= b ? a-b : 0xffff-b+a; +} +/* Is one number larger than another mod 65535? Since diff_mod64k is always >= + 0, just use a simple cutoff value to determine if the difference is small + enough that one is really larger. Since these comparisons are only made for + values that should not differ by more than a few numbers, this should be + safe.*/ +static inline u16 ge_mod64k(u16 a, u16 b) +{ + return diff_mod64k(a,b) < 0xfff0; +} + +static void babel_new_interface(struct babel_proto *p, struct iface *new, + unsigned long flags, struct iface_patt *patt); +static void expire_hello(struct babel_neighbor *bn); +static void expire_ihu(struct babel_neighbor *bn); +static void expire_sources(struct babel_entry *e); +static void expire_route(struct babel_route *r); +static void refresh_route(struct babel_route *r); +static void babel_dump_entry(struct babel_entry *e); +static void babel_dump_route(struct babel_route *r); +static void babel_select_route(struct babel_entry *e); +static void babel_send_route_request(struct babel_entry *e, struct babel_neighbor *n); +static int cache_seqno_request(struct babel_proto *p, ip_addr prefix, u8 plen, + u64 router_id, u16 seqno); + + +static void +babel_init_entry(struct fib_node *n) +{ + struct babel_entry *e = (struct babel_entry *)n; + e->proto = NULL; + e->selected = NULL; + init_list(&e->sources); + init_list(&e->routes); +} + + +static inline struct babel_entry * +babel_find_entry(struct babel_proto *p, ip_addr prefix, u8 plen) +{ + return fib_find(&p->rtable, &prefix, plen); +} + +static struct babel_entry * +babel_get_entry(struct babel_proto *p, ip_addr prefix, u8 plen) +{ + struct babel_entry *e = babel_find_entry(p, prefix, plen); + if(e) return e; + e = fib_get(&p->rtable, &prefix, plen); + e->proto = p; + return e; +} + +void +babel_flush_entry(struct babel_entry *e) +{ + struct babel_proto *p = e->proto; + TRACE(D_EVENTS, "Flushing entry %I/%d", e->n.prefix, e->n.pxlen); + rem_node(&e->garbage_node); + if(p) fib_delete(&p->rtable, e); +} + +static struct babel_source * +babel_find_source(struct babel_entry *e, u64 router_id) +{ + struct babel_source *s; + WALK_LIST(s, e->sources) + if(s->router_id == router_id) + return s; + return NULL; +} + +static struct babel_source * +babel_get_source(struct babel_entry *e, u64 router_id) +{ + struct babel_proto *p = e->proto; + struct babel_source *s = babel_find_source(e, router_id); + if(s) return s; + s = sl_alloc(p->source_slab); + s->router_id = router_id; + s->expires = now + BABEL_GARBAGE_INTERVAL; + s->e = e; + s->seqno = 0; + s->metric = BABEL_INFINITY; + add_tail(&e->sources, NODE s); + return s; +} + +static void +expire_sources(struct babel_entry *e) +{ + struct babel_proto *p = e->proto; + struct babel_source *n, *nx; + WALK_LIST_DELSAFE(n, nx, e->sources) { + if(n->expires && n->expires <= now) { + rem_node(NODE n); + sl_free(p->source_slab, n); + } + } + if(EMPTY_LIST(e->sources) && EMPTY_LIST(e->routes)) + add_tail(&p->garbage, &e->garbage_node); /* to be removed later */ +} + +static struct babel_route * +babel_find_route(struct babel_entry *e, struct babel_neighbor *n) +{ + struct babel_route *r; + WALK_LIST(r, e->routes) + if(r->neigh == n) + return r; + return NULL; +} + +static struct babel_route * +babel_get_route(struct babel_entry *e, struct babel_neighbor *n) +{ + struct babel_proto *p = e->proto; + struct babel_route *r = babel_find_route(e,n); + if(r) return r; + r = sl_alloc(p->route_slab); + memset(r, 0, sizeof(*r)); + r->neigh = n; + r->e = e; + if(n) + r->expires = now + BABEL_GARBAGE_INTERVAL; /* default until we get updates to set expiry time */ + add_tail(&e->routes, NODE r); + if(n) add_tail(&n->routes, NODE &r->neigh_route); + return r; +} + +static void +babel_flush_route(struct babel_route *r) +{ + struct babel_proto *p = r->e->proto; + DBG("Flush route %I/%d router_id %0lx neigh %I\n", + r->e->n.prefix, r->e->n.pxlen, r->router_id, r->neigh ? r->neigh->addr : IPA_NONE); + rem_node(NODE r); + if(r->neigh) rem_node(&r->neigh_route); + if(r->e->selected == r) r->e->selected = NULL; + sl_free(p->route_slab, r); +} + +static void +expire_route(struct babel_route *r) +{ + struct babel_entry *e = r->e; + struct babel_proto *p = r->e->proto; + TRACE(D_EVENTS, "Route expiry timer for %I/%d router_id %0lx fired", + r->e->n.prefix, r->e->n.pxlen, r->router_id); + if(r->metric < BABEL_INFINITY) { + r->metric = BABEL_INFINITY; + r->expires = now + r->expiry_interval; + } else { + babel_flush_route(r); + } + + babel_select_route(e); +} + +static void +refresh_route(struct babel_route *r) +{ + if(!r->neigh || r != r->e->selected) return; + babel_send_route_request(r->e, r->neigh); +} + +static void +babel_expire_routes(struct babel_proto *p) +{ + struct babel_entry *e; + struct babel_route *r, *rx; + node *n; + FIB_WALK(&p->rtable, n) { + e = (struct babel_entry *)n; + WALK_LIST_DELSAFE(r, rx, e->routes) { + if(r->refresh_time && r->refresh_time <= now) { + refresh_route(r); + r->refresh_time = 0; + } + if(r->expires && r->expires <= now) + expire_route(r); + } + expire_sources(e); + } FIB_WALK_END; + WALK_LIST_FIRST(n, p->garbage) { + e = SKIP_BACK(struct babel_entry, garbage_node, n); + babel_flush_entry(e); + } +} + +static struct babel_neighbor * +babel_find_neighbor(struct babel_iface *bif, ip_addr addr) +{ + struct babel_neighbor *bn; + WALK_LIST(bn, bif->neigh_list) + if(ipa_equal(bn->addr, addr)) + return bn; + return NULL; +} + +static struct babel_neighbor * +babel_get_neighbor(struct babel_iface *bif, ip_addr addr) +{ + struct babel_neighbor *bn = babel_find_neighbor(bif, addr); + if(bn) return bn; + bn = mb_allocz(bif->pool, sizeof(struct babel_neighbor)); + bn->bif = bif; + bn->addr = addr; + bn->txcost = BABEL_INFINITY; + init_list(&bn->routes); + add_tail(&bif->neigh_list, NODE bn); + return bn; +} + +static void +babel_expire_neighbors(struct babel_proto *p) +{ + struct babel_iface *bif; + struct babel_neighbor *bn, *bnx; + WALK_LIST(bif, p->interfaces) { + WALK_LIST_DELSAFE(bn, bnx, bif->neigh_list) { + if(bn->hello_expiry && bn->hello_expiry <= now) + expire_hello(bn); + if(bn->ihu_expiry && bn->ihu_expiry <= now) + expire_ihu(bn); + } + } +} + + +/** + From the RFC (section 3.5.1): + + a route advertisement carrying the quintuple (prefix, plen, router-id, seqno, + metric) is feasible if one of the following conditions holds: + + - metric is infinite; or + + - no entry exists in the source table indexed by (id, prefix, plen); or + + - an entry (prefix, plen, router-id, seqno', metric') exists in the source + table, and either + + - seqno' < seqno or + - seqno = seqno' and metric < metric'. +*/ +static inline int +is_feasible(struct babel_source *s, u16 seqno, u16 metric) +{ + if(!s || metric == BABEL_INFINITY) return 1; + return (seqno > s->seqno + || (seqno == s->seqno && metric < s->metric)); +} + +static u16 +babel_compute_rxcost(struct babel_neighbor *bn) +{ + struct babel_iface *bif = bn->bif; + struct babel_proto *p = bif->proto; + u8 n, missed; + u16 map=bn->hello_map; + + if(!map) return BABEL_INFINITY; + n = __builtin_popcount(map); // number of bits set + missed = bn->hello_n-n; + + if(bif->cf->type == BABEL_IFACE_TYPE_WIRED) { + /* k-out-of-j selection - Appendix 2.1 in the RFC. */ + DBG("Missed %d hellos from %I\n", missed, bn->addr); + /* Link is bad if more than half the expected hellos were lost */ + return (missed > 0 && n/missed < 2) ? BABEL_INFINITY : bif->cf->rxcost; + } else if(bif->cf->type == BABEL_IFACE_TYPE_WIRELESS) { + /* ETX - Appendix 2.2 in the RFC. + + beta = prob. of successful transmission. + rxcost = BABEL_RXCOST_WIRELESS/beta + + Since: beta = 1-missed/bn->hello_n = n/bn->hello_n + Then: rxcost = BABEL_RXCOST_WIRELESS * bn->hello_n / n + */ + if(!n) return BABEL_INFINITY; + return BABEL_RXCOST_WIRELESS * bn->hello_n / n; + } else { + BAD("Unknown interface type!"); + } +} + + +static u16 +compute_cost(struct babel_neighbor *bn) +{ + struct babel_iface *bif = bn->bif; + struct babel_proto *p = bif->proto; + u16 rxcost = babel_compute_rxcost(bn); + if(rxcost == BABEL_INFINITY) return rxcost; + else if(bif->cf->type == BABEL_IFACE_TYPE_WIRED) { + /* k-out-of-j selection - Appendix 2.1 in the RFC. */ + return bn->txcost; + } else if(bif->cf->type == BABEL_IFACE_TYPE_WIRELESS) { + /* ETX - Appendix 2.2 in the RFC */ + return (MAX(bn->txcost, BABEL_RXCOST_WIRELESS) * rxcost)/BABEL_RXCOST_WIRELESS; + } else { + BAD("Unknown interface type!"); + } +} + +/* Simple additive metric - Appendix 3.1 in the RFC */ +static u16 +compute_metric(struct babel_neighbor *bn, u16 metric) +{ + u16 cost = compute_cost(bn); + return (cost == BABEL_INFINITY) ? cost : cost+metric; +} + +static rte * +babel_build_rte(struct babel_proto *p, net *n, struct babel_route *r) +{ + rta *a; + rte *rte; + + rta A = { + .src = p->p.main_source, + .source = RTS_BABEL, + .scope = SCOPE_UNIVERSE, + .cast = RTC_UNICAST, + .dest = r->metric == BABEL_INFINITY ? RTD_UNREACHABLE : RTD_ROUTER, + .flags = 0, + .gw = r->next_hop, + }; + + if(r->neigh) { + A.from = r->neigh->addr; + A.iface = r->neigh->bif->iface; + } + + a = rta_lookup(&A); + rte = rte_get_temp(a); + rte->u.babel.metric = r->metric; + rte->u.babel.router_id = r->router_id; + rte->net = n; + rte->pflags = 0; + return rte; +} + +static void +babel_send_seqno_request(struct babel_entry *e) +{ + struct babel_proto *p = e->proto; + struct babel_route *r = e->selected; + struct babel_source *s = babel_find_source(e, r->router_id); + struct babel_iface *bif; + struct babel_tlv_seqno_request *tlv; + + if(s && cache_seqno_request(p, e->n.prefix, e->n.pxlen, r->router_id, s->seqno+1)) { + TRACE(D_EVENTS, "Sending seqno request for %I/%d router_id %0lx", + e->n.prefix, e->n.pxlen, r->router_id); + + WALK_LIST(bif, p->interfaces) { + tlv = babel_add_tlv_seqno_request(bif); + tlv->plen = e->n.pxlen; + tlv->seqno = s->seqno + 1; + tlv->hop_count = BABEL_INITIAL_HOP_COUNT; + tlv->router_id = r->router_id; + babel_put_addr(&tlv->header, e->n.prefix); + } + } +} + +static void +babel_unicast_seqno_request(struct babel_route *r) +{ + struct babel_entry *e = r->e; + struct babel_proto *p = e->proto; + struct babel_source *s = babel_find_source(e, r->router_id); + struct babel_iface *bif = r->neigh->bif; + struct babel_tlv_seqno_request *tlv; + if(s && cache_seqno_request(p, e->n.prefix, e->n.pxlen, r->router_id, s->seqno+1)) { + TRACE(D_EVENTS, "Sending seqno request for %I/%d router_id %0lx", + e->n.prefix, e->n.pxlen, r->router_id); + babel_new_unicast(bif); + tlv = babel_add_tlv_seqno_request(bif); + tlv->plen = e->n.pxlen; + tlv->seqno = s->seqno + 1; + tlv->hop_count = BABEL_INITIAL_HOP_COUNT; + tlv->router_id = r->router_id; + babel_put_addr(&tlv->header, e->n.prefix); + babel_send_unicast(bif, r->neigh->addr); + } +} + +static void +babel_send_route_request(struct babel_entry *e, struct babel_neighbor *n) +{ + struct babel_iface *bif = n->bif; + struct babel_proto *p = e->proto; + struct babel_tlv_route_request *tlv; + TRACE(D_PACKETS, "Babel: Sending route request for %I/%d to %I\n", + e->n.prefix, e->n.pxlen, n->addr); + babel_new_unicast(bif); + tlv = babel_add_tlv_route_request(bif); + babel_put_addr(&tlv->header, e->n.prefix); + tlv->plen = e->n.pxlen; + babel_send_unicast(bif, n->addr); +} + + +/** + * babel_select_route: + * @e: Babel entry to select the best route for. + * + * Select the best feasible route for a given prefix. This just selects the + * feasible route with the lowest metric. If this results in switching upstream + * router (identified by router id), the nest is notified of the new route. + * + * If no feasible route is available for a prefix that previously had a route + * selected, a seqno request is sent to try to get a valid route. In the + * meantime, the route is marked as infeasible in the nest (to blackhole packets + * going to it, as per the RFC). + * + * If no feasible route is available, and no previous route is selected, the + * route is removed from the nest entirely. + */ +static void +babel_select_route(struct babel_entry *e) +{ + struct babel_proto *p = e->proto; + net *n = net_get(p->p.table, e->n.prefix, e->n.pxlen); + struct babel_route *r, *cur = e->selected; + + /* try to find the best feasible route */ + WALK_LIST(r, e->routes) + if((!cur || r->metric < cur->metric) + && is_feasible(babel_find_source(e, r->router_id), + r->seqno, r->advert_metric)) + cur = r; + + if(cur && cur->neigh && ((!e->selected && cur->metric < BABEL_INFINITY) + || (e->selected && cur->metric < e->selected->metric))) { + TRACE(D_EVENTS, "Picked new route for prefix %I/%d: router id %0lx metric %d", + e->n.prefix, e->n.pxlen, cur->router_id, cur->metric); + /* Notify the nest of the update. If we change router ID, we also trigger + a global update. */ + if(!e->selected || + e->selected->metric == BABEL_INFINITY || + e->selected->router_id != cur->router_id) + + ev_schedule(p->update_event); + + e->selected = cur; + rte_update(&p->p, n, babel_build_rte(p, n, cur)); + } else if(!cur || cur->metric == BABEL_INFINITY) { + /* Couldn't find a feasible route. If we have a selected route, that means + it just became infeasible; so set it's metric to infinite and install it + (as unreachable), then send a seqno request. + + babel_build_rte() will set the unreachable flag if the metric is BABEL_INFINITY.*/ + if(e->selected) { + TRACE(D_EVENTS, "Lost feasible route for prefix %I/%d: sending update and seqno request", + e->n.prefix, e->n.pxlen); + e->selected->metric = BABEL_INFINITY; + rte_update(&p->p, n, babel_build_rte(p, n, e->selected)); + + ev_schedule(p->update_event); + babel_send_seqno_request(e); + } else { + /* No route currently selected, and no new one selected; this means we + don't have a route to this destination anymore (and were probably + called from an expiry timer). Remove the route from the nest. */ + TRACE(D_EVENTS, "Flushing route for prefix %I/%d", e->n.prefix, e->n.pxlen); + e->selected = NULL; + rte_update(&p->p, n, NULL); + } + } +} + +static void +babel_send_ack(struct babel_iface *bif, ip_addr dest, u16 nonce) +{ + struct babel_proto *p = bif->proto; + struct babel_tlv_ack *tlv; + TRACE(D_PACKETS, "Babel: Sending ACK to %I with nonce %d\n", dest, nonce); + babel_new_unicast(bif); + tlv = babel_add_tlv_ack(bif); + tlv->nonce = nonce; + babel_send_unicast(bif, dest); +} + +static void +babel_add_ihu(struct babel_iface *bif, struct babel_neighbor *bn) +{ + struct babel_tlv_ihu *tlv = babel_add_tlv_ihu(bif); + babel_put_addr_ihu(&tlv->header, bn->addr); + tlv->rxcost = babel_compute_rxcost(bn); + tlv->interval = bif->cf->ihu_interval*100; +} + +static void +babel_add_ihus(struct babel_iface *bif) +{ + struct babel_neighbor *bn; + WALK_LIST(bn, bif->neigh_list) + babel_add_ihu(bif,bn); +} + +static void +babel_send_ihu(struct babel_iface *bif, struct babel_neighbor *bn) +{ + struct babel_proto *p = bif->proto; + TRACE(D_PACKETS, "Babel: Sending IHUs"); + babel_new_unicast(bif); + babel_add_ihu(bif, bn); + babel_send_unicast(bif, bn->addr); +} + +void +babel_send_hello(struct babel_iface *bif, u8 send_ihu) +{ + struct babel_proto *p = bif->proto; + struct babel_tlv_hello *tlv; + TRACE(D_PACKETS, "Babel: Sending hello on interface %s", bif->ifname); + tlv = babel_add_tlv_hello(bif); + tlv->seqno = bif->hello_seqno++; + tlv->interval = bif->cf->hello_interval*100; + + if(send_ihu) babel_add_ihus(bif); +} + +static void +babel_hello_timer(timer *t) +{ + struct babel_iface *bif = t->data; + babel_send_hello(bif, (bif->cf->type == BABEL_IFACE_TYPE_WIRED && + bif->hello_seqno % BABEL_IHU_INTERVAL_FACTOR == 0)); +} + +/* Function to add router_id -- a router id TLV is always followed by an update + TLV, so add both atomically (which may send the queue), then fill in the + router ID and return the update TLV position. + + This prevents a full queue causing a packet to be sent with a router id TLV + as the last TLV (and so the update TLV in the next packet missing a router + id).*/ +static struct babel_tlv_update * +babel_add_router_id(struct babel_iface *bif, u64 router_id) +{ + struct babel_tlv_router_id *rid; + struct babel_tlv_update *upd; + rid = (struct babel_tlv_router_id *) babel_add_tlv_size(bif, + BABEL_TYPE_ROUTER_ID, + (sizeof(struct babel_tlv_router_id) + + sizeof(struct babel_tlv_update))); + rid->router_id = router_id; + upd = (struct babel_tlv_update *)(rid+1); + upd->header.type = BABEL_TYPE_UPDATE; + upd->header.length = sizeof(struct babel_tlv_update) - sizeof(struct babel_tlv_header); + return upd; +} + +void +babel_send_update(struct babel_iface *bif) +{ + struct babel_proto *p = bif->proto; + struct babel_tlv_update *upd; + struct babel_entry *e; + struct babel_route *r; + struct babel_source *s; + u64 router_id = 0; + TRACE(D_PACKETS, "Sending update on %s", bif->ifname); + FIB_WALK(&p->rtable, n) { + e = (struct babel_entry *)n; + r = e->selected; + if(!r) continue; + + if(r->router_id != router_id) { + upd = babel_add_router_id(bif, r->router_id); + router_id = r->router_id; + } else { + upd = babel_add_tlv_update(bif); + } + + /* Our own seqno might have changed, in which case we update the routes we + originate. */ + if(r->router_id == p->router_id && r->seqno < p->update_seqno) + r->seqno = p->update_seqno; + upd->plen = e->n.pxlen; + upd->interval = bif->cf->update_interval*100; + upd->seqno = r->seqno; + upd->metric = r->metric; + babel_put_addr(&upd->header, e->n.prefix); + + /* Update feasibility distance. */ + s = babel_get_source(e, r->router_id); + s->expires = now + BABEL_GARBAGE_INTERVAL; + if(upd->seqno > s->seqno + || (upd->seqno == s->seqno && upd->metric < s->metric)) { + s->seqno = upd->seqno; + s->metric = upd->metric; + } + } FIB_WALK_END; +} + +/* Sends and update on all interfaces. */ +static void +babel_global_update(void *arg) +{ + struct babel_proto *p = arg; + struct babel_iface *bif; + TRACE(D_EVENTS, "Sending global update. Seqno %d", p->update_seqno); + WALK_LIST(bif, p->interfaces) + bif->update_triggered = 1; +} + +static void +babel_update_timer(timer *t) +{ + struct babel_iface *bif = t->data; + struct babel_proto *p = bif->proto; + TRACE(D_EVENTS, "Update timer firing"); + bif->update_triggered = 1; +} + + +int +babel_handle_ack_req(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ + struct babel_tlv_ack_req *tlv = (struct babel_tlv_ack_req *)hdr; + struct babel_proto *p = state->proto; + TRACE(D_PACKETS, "Received ACK req nonce %d interval %d", tlv->nonce, tlv->interval); + if(tlv->interval) { + babel_send_ack(state->bif, state->saddr, tlv->nonce); + } + return 1; +} + +int +babel_handle_ack(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ + struct babel_tlv_ack *tlv = (struct babel_tlv_ack *)hdr; + struct babel_proto *p = state->proto; + TRACE(D_PACKETS, "Received ACK nonce %d", tlv->nonce); + /* We don't send any ACK requests, so no need to do anything with ACKs. */ + return 1; +} + +static void +babel_flush_neighbor(struct babel_neighbor *bn) +{ + struct babel_proto *p = bn->bif->proto; + struct babel_route *r; + node *n; + TRACE(D_EVENTS, "Flushing neighbor %I", bn->addr); + rem_node(NODE bn); + WALK_LIST_FIRST(n, bn->routes) { + r = SKIP_BACK(struct babel_route, neigh_route, n); + babel_flush_route(r); + } + mb_free(bn); +} + +static void +expire_hello(struct babel_neighbor *bn) +{ + bn->hello_map <<= 1; + if(bn->hello_n < 16) bn->hello_n++; + if(!bn->hello_map) { + babel_flush_neighbor(bn); + } +} + +static void +expire_ihu(struct babel_neighbor *bn) +{ + bn->txcost = BABEL_INFINITY; +} + + +/* update hello history according to Appendix A1 of the RFC */ +static void +update_hello_history(struct babel_neighbor *bn, u16 seqno, u16 interval) +{ + u8 diff; + if(seqno == bn->next_hello_seqno) {/* do nothing */} + /* if the expected and seen seqnos are within 16 of each other (mod 65535), + the modular difference is going to be less than 16 for one of the + directions. Otherwise, the values differ too much, so just reset. */ + else if(diff_mod64k(seqno, bn->next_hello_seqno) > 16 && + diff_mod64k(bn->next_hello_seqno,seqno) > 16) { + /* note state reset - flush entries */ + bn->hello_map = bn->hello_n = 0; + } else if((diff = diff_mod64k(bn->next_hello_seqno,seqno)) <= 16) { + /* sending node increased interval; reverse history */ + bn->hello_map >>= diff; + bn->hello_n = (diff < bn->hello_n) ? bn->hello_n - diff : 0; + } else if((diff = diff_mod64k(seqno,bn->next_hello_seqno)) <= 16) { + /* sending node decreased interval; fast-forward */ + bn->hello_map <<= diff; + bn->hello_n = MIN(bn->hello_n + diff, 16); + } + /* current entry */ + bn->hello_map = (bn->hello_map << 1) | 1; + bn->next_hello_seqno = seqno+1; + if(bn->hello_n < 16) bn->hello_n++; + bn->hello_expiry = now + (BABEL_HELLO_EXPIRY_FACTOR*interval)/100; +} + + +int +babel_handle_hello(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ + struct babel_tlv_hello *tlv = (struct babel_tlv_hello *)hdr; + struct babel_proto *p = state->proto; + struct babel_iface *bif = state->bif; + struct babel_neighbor *bn = babel_get_neighbor(bif, state->saddr); + TRACE(D_PACKETS, "Handling hello seqno %d interval %d", tlv->seqno, + tlv->interval, state->saddr); + update_hello_history(bn, tlv->seqno, tlv->interval); + if(bif->cf->type == BABEL_IFACE_TYPE_WIRELESS) + babel_send_ihu(bif, bn); + return 0; +} + +int +babel_handle_ihu(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ + struct babel_tlv_ihu *tlv = (struct babel_tlv_ihu *)hdr; + struct babel_proto *p = state->proto; + struct babel_iface *bif = state->bif; + ip_addr addr = babel_get_addr(hdr, state); + + if(!ipa_equal(addr, bif->addr)) return 1; // not for us + TRACE(D_PACKETS, "Handling IHU rxcost %d interval %d", tlv->rxcost, + tlv->interval); + struct babel_neighbor *bn = babel_get_neighbor(bif, state->saddr); + bn->txcost = tlv->rxcost; + bn->ihu_expiry = now + 1.5*(tlv->interval/100); + return 0; +} + +int +babel_handle_router_id(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ + struct babel_tlv_router_id *tlv = (struct babel_tlv_router_id *)hdr; + struct babel_proto *p = state->proto; + state->router_id = tlv->router_id; + TRACE(D_PACKETS, "Handling router ID %016lx", state->router_id); + return 0; +} + +int +babel_handle_next_hop(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ + state->next_hop = babel_get_addr(hdr, state); + return 0; +} + +int +babel_handle_update(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ + struct babel_tlv_update *tlv = (struct babel_tlv_update *)hdr; + struct babel_iface *bif = state->bif; + struct babel_proto *p = state->proto; + struct babel_neighbor *n; + struct babel_entry *e; + struct babel_source *s; + struct babel_route *r; + ip_addr prefix = babel_get_addr(hdr, state); + int feasible; + TRACE(D_PACKETS, "Handling update for %I/%d with seqno %d metric %d", + prefix, tlv->plen, tlv->seqno, tlv->metric); + if(tlv->flags & BABEL_FLAG_DEF_PREFIX) { + state->prefix = prefix; + } + if(tlv->flags & BABEL_FLAG_ROUTER_ID) { + u64 *buf = (u64*)&prefix; + memcpy(&state->router_id, buf+1, sizeof(u64)); + } + if(!state->router_id) + log(L_WARN "%s: Received update on %s with no preceding router id", p->p.name, bif->ifname); + + n = babel_find_neighbor(bif, state->saddr); + if(!n) { + DBG("Haven't heard from neighbor %I; ignoring update.\n", state->saddr); + return 1; + } + + if(state->router_id == p->router_id) { + DBG("Ignoring update for our own router ID.\n"); + return 1; + } + + /* RFC section 3.5.4: + + When a Babel node receives an update (id, prefix, seqno, metric) from a + neighbour neigh with a link cost value equal to cost, it checks whether it + already has a routing table entry indexed by (neigh, id, prefix). + + If no such entry exists: + + o if the update is unfeasible, it is ignored; + + o if the metric is infinite (the update is a retraction), the update is + ignored; + + o otherwise, a new route table entry is created, indexed by (neigh, id, + prefix), with seqno equal to seqno and an advertised metric equal to the + metric carried by the update. + + If such an entry exists: + + o if the entry is currently installed and the update is unfeasible, then + the behaviour depends on whether the router-ids of the two entries match. + If the router-ids are different, the update is treated as though it were + a retraction (i.e., as though the metric were FFFF hexadecimal). If the + router-ids are equal, the update is ignored; + + o otherwise (i.e., if either the update is feasible or the entry is not + currently installed), then the entry's sequence number, advertised + metric, metric, and router-id are updated and, unless the advertised + metric is infinite, the route's expiry timer is reset to a small multiple + of the Interval value included in the update. + +*/ + e = babel_find_entry(p, prefix, tlv->plen); + if(!e && tlv->metric == BABEL_INFINITY) + return 1; + + if(!e) e = babel_get_entry(p, prefix, tlv->plen); + + s = babel_find_source(e, state->router_id); /* for feasibility */ + r = babel_find_route(e, n); /* the route entry indexed by neighbour */ + feasible = is_feasible(s, tlv->seqno, tlv->metric); + + if(!r) { + + if(!feasible || tlv->metric == BABEL_INFINITY) + return 1; + + r = babel_get_route(e, n); + r->advert_metric = tlv->metric; + r->router_id = state->router_id; + r->metric = compute_metric(n, tlv->metric); + r->next_hop = state->next_hop; + r->seqno = tlv->seqno; + } else if(r == r->e->selected + && !feasible) { + + /* route is installed and update is infeasible - we may lose the route, so + send a unicast seqno request (section 3.8.2.2 second paragraph). */ + babel_unicast_seqno_request(r); + + if(state->router_id == s->router_id) return 1; + r->metric = BABEL_INFINITY; /* retraction */ + } else { + /* last point above - update entry */ + r->advert_metric = tlv->metric; + r->metric = compute_metric(n, tlv->metric); + r->router_id = state->router_id; + r->next_hop = state->next_hop; + r->seqno = tlv->seqno; + if(tlv->metric != BABEL_INFINITY) { + r->expiry_interval = (BABEL_ROUTE_EXPIRY_FACTOR*tlv->interval)/100; + r->expires = now + r->expiry_interval; + if(r->expiry_interval > BABEL_ROUTE_REFRESH_INTERVAL) + r->refresh_time = now + r->expiry_interval - BABEL_ROUTE_REFRESH_INTERVAL; + } + /* If the route is not feasible at this point, it means it is from another + neighbour than the one currently selected; so send a unicast seqno + request to try to get a better route (section 3.8.2.2 last paragraph). */ + if(!feasible) + babel_unicast_seqno_request(r); + } + babel_select_route(e); + return 0; +} + +/* A retraction is an update with an infinite metric. */ +static void babel_send_retraction(struct babel_iface *bif, ip_addr prefix, int plen) +{ + struct babel_proto *p = bif->proto; + struct babel_tlv_update *upd; + upd = babel_add_router_id(bif, p->router_id); + upd->plen = plen; + upd->interval = bif->cf->update_interval*100; + upd->seqno = p->update_seqno; + upd->metric = BABEL_INFINITY; + babel_put_addr(&upd->header, prefix); +} + +int +babel_handle_route_request(struct babel_tlv_header *hdr, + struct babel_parse_state *state) +{ + struct babel_tlv_route_request *tlv = (struct babel_tlv_route_request *)hdr; + struct babel_iface *bif = state->bif; + struct babel_proto *p = state->proto; + ip_addr prefix = babel_get_addr(hdr, state); + struct babel_entry *e; + + TRACE(D_PACKETS, "Handling route request for %I/%d on interface %s", + prefix, tlv->plen, bif->ifname); + + /* Wildcard request - full update on the interface */ + if(ipa_equal(prefix,IPA_NONE)) { + state->needs_update = 1; + return 0; + } + /* Non-wildcard request - see if we have an entry for the route. If not, send + a retraction, otherwise send an update. */ + e = babel_find_entry(p, prefix, tlv->plen); + if(!e) { + babel_send_retraction(bif, prefix, tlv->plen); + } else { + state->needs_update = 1; + } + return 0; +} + +static void +expire_seqno_requests(struct babel_seqno_request_cache *c) { + struct babel_seqno_request *n, *nx; + WALK_LIST_DELSAFE(n, nx, c->entries) { + if(n->updated < now-BABEL_SEQNO_REQUEST_EXPIRY) { + rem_node(NODE n); + sl_free(c->slab, n); + } + } +} + +/* Checks the seqno request cache for a matching request and returns failure if + found. Otherwise, a new entry is stored in the cache. */ +static int +cache_seqno_request(struct babel_proto *p, ip_addr prefix, u8 plen, + u64 router_id, u16 seqno) +{ + struct babel_seqno_request_cache *c = p->seqno_cache; + struct babel_seqno_request *r; + WALK_LIST(r, c->entries) { + if(ipa_equal(r->prefix, prefix) && r->plen == plen && + r->router_id == router_id && r->seqno == seqno) + return 0; + } + + /* no entries found */ + r = sl_alloc(c->slab); + r->prefix = prefix; + r->plen = plen; + r->router_id = router_id; + r->seqno = seqno; + r->updated = now; + add_tail(&c->entries, NODE r); + return 1; +} + +void +babel_forward_seqno_request(struct babel_entry *e, + struct babel_tlv_seqno_request *in, + ip_addr sender) +{ + struct babel_proto *p = e->proto; + struct babel_route *r; + struct babel_iface *bif; + struct babel_tlv_seqno_request *out; + TRACE(D_PACKETS, "Forwarding seqno request for %I/%d router_id %0lx", + e->n.prefix, e->n.pxlen, in->router_id); + WALK_LIST(r, e->routes) { + if(r->router_id == in->router_id && r->neigh + && !ipa_equal(r->neigh->addr,sender)) { + if(!cache_seqno_request(p, e->n.prefix, e->n.pxlen, in->router_id, in->seqno)) + return; + bif = r->neigh->bif; + babel_new_unicast(bif); + out = babel_add_tlv_seqno_request(bif); + out->plen = in->plen; + out->seqno = in->seqno; + out->hop_count = in->hop_count-1; + out->router_id = in->router_id; + babel_put_addr(&out->header, e->n.prefix); + babel_send_unicast(bif, r->neigh->addr); + return; + } + } +} + +/* The RFC section 3.8.1.2 on seqno requests: + + When a node receives a seqno request for a given router-id and sequence + number, it checks whether its routing table contains a selected entry for + that prefix; if no such entry exists, or the entry has infinite metric, it + ignores the request. + + If a selected route for the given prefix exists, and either the router-ids + are different or the router-ids are equal and the entry's sequence number is + no smaller than the requested sequence number, it MUST send an update for the + given prefix. + + If the router-ids match but the requested seqno is larger than the route + entry's, the node compares the router-id against its own router-id. If the + router-id is its own, then it increases its sequence number by 1 and sends an + update. A node MUST NOT increase its sequence number by more than 1 in + response to a route request. + + If the requested router-id is not its own, the received request's hop count + is 2 or more, and the node has a route (not necessarily a feasible one) for + the requested prefix that does not use the requestor as a next hop, the node + SHOULD forward the request. It does so by decreasing the hop count and + sending the request in a unicast packet destined to a neighbour that + advertises the given prefix (not necessarily the selected neighbour) and that + is distinct from the neighbour from which the request was received. + + A node SHOULD maintain a list of recently forwarded requests and forward the + reply in a timely manner. A node SHOULD compare every incoming request + against its list of recently forwarded requests and avoid forwarding it if it + is redundant. +*/ +int +babel_handle_seqno_request(struct babel_tlv_header *hdr, + struct babel_parse_state *state) +{ + struct babel_tlv_seqno_request *tlv = (struct babel_tlv_seqno_request *)hdr; + struct babel_proto *p = state->proto; + ip_addr prefix = babel_get_addr(hdr, state); + struct babel_entry *e; + struct babel_route *r; + + TRACE(D_PACKETS, "Handling seqno request for %I/%d router_id %0lx seqno %d hop count %d", + prefix, tlv->plen, tlv->router_id, tlv->seqno, tlv->hop_count); + + e = babel_find_entry(p, prefix, tlv->plen); + if(!e || !e->selected || e->selected->metric == BABEL_INFINITY) return 1; + + r = e->selected; + if(r->router_id != tlv->router_id || ge_mod64k(r->seqno, tlv->seqno)) { + state->needs_update = 1; + return 0; + } + + /* seqno is larger; check if we own the router id */ + if(tlv->router_id == p->router_id) { + p->update_seqno++; + ev_schedule(p->update_event); + return 0; + } + + if(tlv->hop_count > 1) { + babel_forward_seqno_request(e, tlv, state->saddr); + } + + return 1; + +} + +static void +babel_dump_source(struct babel_source *s) +{ + debug("Source router_id %0lx seqno %d metric %d expires %d\n", + s->router_id, s->seqno, s->metric, s->expires ? s->expires-now : 0); +} + +static void +babel_dump_route(struct babel_route *r) +{ + debug("Route neigh %I if %s seqno %d metric %d/%d router_id %0lx expires %d\n", + r->neigh ? r->neigh->addr : IPA_NONE, + r->neigh ? r->neigh->bif->ifname : "(none)", + r->seqno, r->advert_metric, + r->metric, r->router_id, r->expires ? r->expires-now : 0); +} + +static void +babel_dump_entry(struct babel_entry *e) +{ + debug("Babel: Entry %I/%d:\n", e->n.prefix, e->n.pxlen); + struct babel_source *s; struct babel_route *r; + WALK_LIST(s,e->sources) { debug(" "); babel_dump_source(s); } + WALK_LIST(r,e->routes) { debug(r==e->selected?" * " : " "); babel_dump_route(r); } +} + +static void +babel_dump_neighbor(struct babel_neighbor *bn) +{ + debug("Neighbor %I txcost %d hello_map %x next seqno %d expires %d/%d\n", + bn->addr, bn->txcost, bn->hello_map, bn->next_hello_seqno, + bn->hello_expiry ? bn->hello_expiry - now : 0, + bn->ihu_expiry ? bn->ihu_expiry - now : 0); +} + +static void +babel_dump_interface(struct babel_iface *bif) +{ + struct babel_neighbor *bn; + debug("Babel: Interface %s addr %I rxcost %d type %d hello seqno %d intervals %d %d\n", + bif->ifname, bif->addr, bif->cf->rxcost, bif->cf->type, bif->hello_seqno, + bif->cf->hello_interval, bif->cf->update_interval); + WALK_LIST(bn,bif->neigh_list) { debug(" "); babel_dump_neighbor(bn); } + +} + +static void +babel_dump(struct proto *P) +{ + struct babel_proto *p = (struct babel_proto *) P; + struct babel_entry *e; + struct babel_iface *bif; + debug("Babel: router id %0lx update seqno %d\n", p->router_id, p->update_seqno); + WALK_LIST(bif, p->interfaces) {babel_dump_interface(bif);} + FIB_WALK(&p->rtable, n) { + e = (struct babel_entry *)n; + babel_dump_entry(e); + } FIB_WALK_END; +} + + +static struct babel_iface* +babel_find_interface(struct babel_proto *p, struct iface *what) +{ + struct babel_iface *bif; + + WALK_LIST (bif, p->interfaces) + if (bif->iface == what) + return bif; + return NULL; +} + +static void +kill_iface(struct babel_iface *bif) +{ + DBG( "Babel: Interface %s disappeared\n", bif->iface->name); + struct babel_neighbor *bn; + WALK_LIST_FIRST(bn, bif->neigh_list) + babel_flush_neighbor(bn); + rfree(bif->pool); +} + +static void +babel_iface_linkdown(struct babel_iface *bif) +{ + struct babel_neighbor *bn; + struct babel_route *r; + node *n; + WALK_LIST(bn, bif->neigh_list) { + WALK_LIST(n, bn->routes) { + r = SKIP_BACK(struct babel_route, neigh_route, n); + r->metric = BABEL_INFINITY; + r->expires = now + r->expiry_interval; + babel_select_route(r->e); + } + } + +} + + + +static void +babel_open_interface(struct object_lock *lock) +{ + struct babel_iface *bif = lock->data; + struct babel_proto *p = bif->proto; + + if(!babel_open_socket(bif)) { + log(L_ERR "%s: Cannot open socket for %s", p->p.name, bif->iface->name); + } +} + + + +static void +babel_if_notify(struct proto *P, unsigned c, struct iface *iface) +{ + struct babel_proto *p = (struct babel_proto *) P; + struct babel_config *cf = (struct babel_config *) P->cf; + DBG("Babel: if notify: %s flags %x\n", iface->name, iface->flags); + if (iface->flags & IF_IGNORE) + return; + if (c & IF_CHANGE_UP) { + struct iface_patt *k = iface_patt_find(&cf->iface_list, iface, iface->addr); + + /* we only speak multicast */ + if(!(iface->flags & IF_MULTICAST)) return; + + if (!k) return; /* We are not interested in this interface */ + + babel_new_interface(p, iface, iface->flags, k); + + } + struct babel_iface *bif = babel_find_interface(p, iface); + + if(!bif) + return; + + if(!(iface->flags & IF_CHANGE_LINK)) { + TRACE(D_EVENTS, "Interface %s lost link", iface->name); + babel_iface_linkdown(bif); + } + + if (c & IF_CHANGE_DOWN) { + rem_node(NODE bif); + rfree(bif->lock); + kill_iface(bif); + } +} + +void +babel_queue_timer(timer *t) +{ + struct babel_iface *bif = t->data; + if(bif->update_triggered) { + babel_send_update(bif); + bif->update_triggered = 0; + } + ev_schedule(bif->send_event); +} + +static void +babel_new_interface(struct babel_proto *p, struct iface *new, + unsigned long flags, struct iface_patt *patt) +{ + struct babel_config *cf = (struct babel_config *) p->p.cf; + struct babel_iface * bif; + struct babel_iface_config *iface_cf = (struct babel_iface_config *) patt; + struct object_lock *lock; + pool *pool; + DBG("New interface %s\n", new->name); + + if(!new) return NULL; + + pool = rp_new(p->p.pool, new->name); + bif = mb_allocz(pool, sizeof( struct babel_iface )); + add_tail(&p->interfaces, NODE bif); + bif->pool = pool; + bif->iface = new; + bif->ifname = new->name; + bif->proto = p; + struct ifa* ifa; + WALK_LIST(ifa, new->addrs) + if(ipa_is_link_local(ifa->ip)) + bif->addr = ifa->ip; + if (iface_cf) { + bif->cf = iface_cf; + + if(bif->cf->type == BABEL_IFACE_TYPE_WIRED) { + if(bif->cf->hello_interval == BABEL_INFINITY) + bif->cf->hello_interval = BABEL_HELLO_INTERVAL_WIRED; + if(bif->cf->rxcost == BABEL_INFINITY) + bif->cf->rxcost = BABEL_RXCOST_WIRED; + } else if(bif->cf->type == BABEL_IFACE_TYPE_WIRELESS) { + if(bif->cf->hello_interval == BABEL_INFINITY) + bif->cf->hello_interval = BABEL_HELLO_INTERVAL_WIRELESS; + if(bif->cf->rxcost == BABEL_INFINITY) + bif->cf->rxcost = BABEL_RXCOST_WIRELESS; + } + if(bif->cf->update_interval == BABEL_INFINITY) { + bif->cf->update_interval = bif->cf->hello_interval*BABEL_UPDATE_INTERVAL_FACTOR; + } + bif->cf->ihu_interval = bif->cf->hello_interval*BABEL_IHU_INTERVAL_FACTOR; + } + init_list(&bif->neigh_list); + bif->hello_seqno = 1; + bif->max_pkt_len = new->mtu - BABEL_OVERHEAD; + + bif->hello_timer = tm_new_set(bif->pool, babel_hello_timer, bif, 0, bif->cf->hello_interval); + bif->update_timer = tm_new_set(bif->pool, babel_update_timer, bif, 0, bif->cf->update_interval); + bif->packet_timer = tm_new_set(bif->pool, babel_queue_timer, bif, BABEL_MAX_SEND_INTERVAL, 1); + + + bif->tlv_buf = bif->current_buf = mb_alloc(bif->pool, new->mtu); + babel_init_packet(bif->tlv_buf); + bif->send_event = ev_new(bif->pool); + bif->send_event->hook = babel_send_queue; + bif->send_event->data = bif; + + lock = olock_new( bif->pool ); + lock->addr = IP6_BABEL_ROUTERS; + lock->port = cf->port; + lock->iface = bif->iface; + lock->hook = babel_open_interface; + lock->data = bif; + lock->type = OBJLOCK_UDP; + olock_acquire(lock); +} + + +static struct ea_list * +babel_gen_attrs(struct linpool *pool, int metric, u64 router_id) +{ + struct ea_list *l = lp_alloc(pool, sizeof(struct ea_list) + 2*sizeof(eattr)); + struct adata *rid = lp_alloc(pool, sizeof(struct adata) + sizeof(u64)); + rid->length = sizeof(u64); + memcpy(&rid->data, &router_id, sizeof(u64)); + + l->next = NULL; + l->flags = EALF_SORTED; + l->count = 2; + l->attrs[0].id = EA_BABEL_METRIC; + l->attrs[0].flags = 0; + l->attrs[0].type = EAF_TYPE_INT | EAF_TEMP; + l->attrs[0].u.data = metric; + l->attrs[1].id = EA_BABEL_ROUTER_ID; + l->attrs[1].flags = 0; + l->attrs[1].type = EAF_TYPE_OPAQUE | EAF_TEMP; + l->attrs[1].u.ptr = rid; + return l; +} + +static void +babel_timer(timer *t) +{ + struct babel_proto *p = t->data; + babel_expire_routes(p); + expire_seqno_requests(p->seqno_cache); + babel_expire_neighbors(p); +} + + +static int +babel_import_control(struct proto *P, struct rte **rt, struct ea_list **attrs, struct linpool *pool) +{ + struct babel_proto *p = (struct babel_proto *)P; + + if ((*rt)->attrs->source != RTS_BABEL) { + struct ea_list *new = babel_gen_attrs(pool, 1, p->router_id); + new->next = *attrs; + *attrs = new; + } + return 0; +} + +static struct ea_list * +babel_make_tmp_attrs(struct rte *rt, struct linpool *pool) +{ + return babel_gen_attrs(pool, rt->u.babel.metric, rt->u.babel.router_id); +} + +static void +babel_store_tmp_attrs(struct rte *rt, struct ea_list *attrs) +{ + eattr *rid = ea_find(attrs, EA_BABEL_ROUTER_ID); + rt->u.babel.router_id = rid ? *((u64*) rid->u.ptr->data) : 0; + rt->u.babel.metric = ea_get_int(attrs, EA_BABEL_METRIC, 0); +} + +/* + * babel_rt_notify - core tells us about new route (possibly our + * own), so store it into our data structures. + */ +static void +babel_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net, + struct rte *new, struct rte *old, struct ea_list *attrs) +{ + struct babel_proto *p = (struct babel_proto *)P; + struct babel_entry *e; + struct babel_route *r; + + TRACE(D_EVENTS, "Got route from nest: %I/%d", net->n.prefix, net->n.pxlen); + if(new) { + e = babel_get_entry(p, net->n.prefix, net->n.pxlen); + r = (e->selected) ? e->selected : babel_get_route(e, NULL); + + if(!r->neigh) { + r->seqno = p->update_seqno; + r->router_id = p->router_id; + r->metric = 0; + e->selected = r; + } + } else if(old) { + /* route has gone away; send retraction */ + e = babel_find_entry(p, net->n.prefix, net->n.pxlen); + if(e && e->selected && !e->selected->neigh) { + /* no neighbour, so our route */ + e->selected->metric = BABEL_INFINITY; + e->selected->expires = now + BABEL_HOLD_TIME; + babel_select_route(e); + } + } else { + return; + } + ev_schedule(p->update_event); +} + +static void babel_neigh_notify(neighbor *n) +{ + struct babel_proto *p = (struct babel_proto *)n->proto; + struct babel_neighbor *bn = n->data; + DBG("Neighbor: bn %d scope %d flags %d\n", bn, n->scope, n->flags); + if(n->scope <= 0) { + TRACE(D_EVENTS, "Babel: Neighbor lost"); + } else { + TRACE(D_EVENTS, "Babel: Neighbor ready"); + } +} + +static int +babel_rte_same(struct rte *new, struct rte *old) +{ + return ((new->u.babel.router_id == old->u.babel.router_id) && + (new->u.babel.metric == old->u.babel.metric)); +} + + +static int +babel_rte_better(struct rte *new, struct rte *old) +{ + return new->u.babel.metric < old->u.babel.metric; +} + + +static struct proto * +babel_init(struct proto_config *cfg) +{ + struct proto *p = proto_new(cfg, sizeof(struct babel_proto)); + + p->accept_ra_types = RA_OPTIMAL; + p->if_notify = babel_if_notify; + p->rt_notify = babel_rt_notify; + p->neigh_notify = babel_neigh_notify; + p->import_control = babel_import_control; + p->make_tmp_attrs = babel_make_tmp_attrs; + p->store_tmp_attrs = babel_store_tmp_attrs; + p->rte_better = babel_rte_better; + p->rte_same = babel_rte_same; + + return p; +} + +void +babel_init_config(struct babel_config *c) +{ + init_list(&c->iface_list); + c->port = BABEL_PORT; +} + +static void +babel_get_route_info(rte *rte, byte *buf, ea_list *attrs) +{ + buf += bsprintf(buf, " (%d/%0lx)", rte->u.babel.metric, rte->u.babel.router_id); +} + +static int +babel_get_attr(eattr *a, byte *buf, int buflen UNUSED) +{ + switch (a->id) { + case EA_BABEL_METRIC: bsprintf( buf, "metric: %d", a->u.data ); return GA_FULL; + default: return GA_UNKNOWN; + } +} + +static int +babel_reconfigure(struct proto *p, struct proto_config *c) +{ + return 0; +} + +static void +babel_copy_config(struct proto_config *dest, struct proto_config *src) +{ + struct babel_config *b = (struct babel_config *) dest; + /* Shallow copy of everything */ + proto_copy_rest(dest, src, sizeof(struct babel_config)); + + /* We clean up iface_list, ifaces are non-sharable */ + init_list(&b->iface_list); +} + +static int +babel_start(struct proto *P) +{ + struct babel_proto *p = (struct babel_proto *) P; + struct babel_config *cf = (struct babel_config *) P->cf; + DBG( "Babel: starting instance...\n" ); + fib_init( &p->rtable, P->pool, sizeof( struct babel_entry ), 0, babel_init_entry ); + init_list( &p->interfaces ); + init_list( &p->garbage ); + p->timer = tm_new_set(P->pool, babel_timer, p, 0, 1); + tm_start( p->timer, 2 ); + p->update_seqno = 1; + p->router_id = proto_get_router_id(&cf->c); + p->update_event = ev_new(P->pool); + p->update_event->hook = babel_global_update; + p->update_event->data = p; + + p->entry_slab = sl_new(P->pool, sizeof(struct babel_entry)); + p->route_slab = sl_new(P->pool, sizeof(struct babel_route)); + p->source_slab = sl_new(P->pool, sizeof(struct babel_source)); + + p->seqno_cache = mb_allocz(P->pool, sizeof(struct babel_seqno_request_cache)); + p->seqno_cache->slab = sl_new(P->pool, sizeof(struct babel_seqno_request)); + init_list(&p->seqno_cache->entries); + DBG( "Babel: ...done\n"); + return PS_UP; +} + + + +struct protocol proto_babel = { + .name = "Babel", + .template = "babel%d", + .attr_class = EAP_BABEL, + .preference = DEF_PREF_BABEL, + .config_size = sizeof(struct babel_config), + .init = babel_init, + .dump = babel_dump, + .start = babel_start, + .reconfigure = babel_reconfigure, + .copy_config = babel_copy_config, + .get_route_info = babel_get_route_info, + .get_attr = babel_get_attr +}; diff --git a/proto/babel/babel.h b/proto/babel/babel.h new file mode 100644 index 0000000..d8cbeaa --- /dev/null +++ b/proto/babel/babel.h @@ -0,0 +1,434 @@ +/* -*- c-file-style: "gnu"; indent-tabs-mode: nil; -*- + * + * The Babel protocol + * + * Copyright (c) 2015 Toke Høiland-Jørgensen + * + * Can be freely distributed and used under the terms of the GNU GPL. + * + * This file contains the data structures used by Babel. + */ + +#include "nest/bird.h" +#include "nest/iface.h" +#include "nest/route.h" +#include "nest/protocol.h" +#include "nest/locks.h" +#include "lib/resource.h" +#include "lib/lists.h" +#include "lib/socket.h" +#include "lib/string.h" +#include "lib/timer.h" + +#ifndef IPV6 +#error "The Babel protocol only speaks IPv6" +#endif + +#define EA_BABEL_METRIC EA_CODE(EAP_BABEL, 0) +#define EA_BABEL_ROUTER_ID EA_CODE(EAP_BABEL, 1) + +#define BABEL_MAGIC 42 +#define BABEL_VERSION 2 +#define BABEL_PORT 6696 +#define BABEL_INFINITY 0xFFFF + + /* default hello intervals in seconds */ +#define BABEL_HELLO_INTERVAL_WIRED 20 +#define BABEL_HELLO_INTERVAL_WIRELESS 4 +#define BABEL_UPDATE_INTERVAL_FACTOR 4 +#define BABEL_IHU_INTERVAL_FACTOR 3 +#define BABEL_HELLO_EXPIRY_FACTOR 1.5 +#define BABEL_ROUTE_EXPIRY_FACTOR 3.5 +#define BABEL_ROUTE_REFRESH_INTERVAL 2 /* seconds before route expiry to send route request */ +#define BABEL_HOLD_TIME 10 /* expiry time for our own routes */ +#define BABEL_RXCOST_WIRED 96 +#define BABEL_RXCOST_WIRELESS 256 +#define BABEL_INITIAL_HOP_COUNT 255 +#define BABEL_MAX_SEND_INTERVAL 5 + +#define BABEL_SEQNO_REQUEST_EXPIRY 60 +#define BABEL_GARBAGE_INTERVAL 300 + +/* ip header + udp header + babel header */ +#define BABEL_OVERHEAD (SIZE_OF_IP_HEADER+8+sizeof(struct babel_header)) + +struct babel_header { + u8 magic; + u8 version; + u16 length; +}; + +enum babel_tlv_type_t { + BABEL_TYPE_PAD0 = 0, + BABEL_TYPE_PADN = 1, + BABEL_TYPE_ACK_REQ = 2, + BABEL_TYPE_ACK = 3, + BABEL_TYPE_HELLO = 4, + BABEL_TYPE_IHU = 5, + BABEL_TYPE_ROUTER_ID = 6, + BABEL_TYPE_NEXT_HOP = 7, + BABEL_TYPE_UPDATE = 8, + BABEL_TYPE_ROUTE_REQUEST = 9, + BABEL_TYPE_SEQNO_REQUEST = 10, + /* extensions - not implemented + BABEL_TYPE_TS_PC = 11, + BABEL_TYPE_HMAC = 12, + BABEL_TYPE_SS_UPDATE = 13, + BABEL_TYPE_SS_REQUEST = 14, + BABEL_TYPE_SS_SEQNO_REQUEST = 15, + */ + BABEL_TYPE_MAX +}; + +enum babel_iface_type_t { + BABEL_IFACE_TYPE_WIRED, + BABEL_IFACE_TYPE_WIRELESS, + BABEL_IFACE_TYPE_MAX +}; + +enum babel_ae_type_t { + BABEL_AE_WILDCARD = 0, + BABEL_AE_IP4 = 1, + BABEL_AE_IP6 = 2, + BABEL_AE_IP6_LL = 3, + BABEL_AE_MAX +}; + + +struct babel_parse_state { + struct babel_proto *proto; + struct babel_iface *bif; + ip_addr saddr; + u64 router_id; + ip_addr prefix; + ip_addr next_hop; + u8 needs_update; +}; + + + + +struct babel_tlv_header { + u8 type; + u8 length; +}; + +struct babel_tlv_ack_req { + struct babel_tlv_header header; + u16 reserved; + u16 nonce; + u16 interval; +}; + +void babel_hton_ack_req(struct babel_tlv_header *tlv); +void babel_ntoh_ack_req(struct babel_tlv_header *tlv); + +struct babel_tlv_ack { + struct babel_tlv_header header; + u16 nonce; +}; + +struct babel_tlv_hello { + struct babel_tlv_header header; + u16 reserved; + u16 seqno; + u16 interval; +}; + +void babel_hton_hello(struct babel_tlv_header *tlv); +void babel_ntoh_hello(struct babel_tlv_header *tlv); + + +struct babel_tlv_ihu { + struct babel_tlv_header header; + u8 ae; + u8 reserved; + u16 rxcost; + u16 interval; + u32 addr[2]; +}; +void babel_hton_ihu(struct babel_tlv_header *tlv); +void babel_ntoh_ihu(struct babel_tlv_header *tlv); +ip_addr babel_get_addr_ihu(struct babel_tlv_header *tlv, struct babel_parse_state *state); +void babel_put_addr_ihu(struct babel_tlv_header *tlv, ip_addr addr); + + +struct babel_tlv_router_id { + struct babel_tlv_header header; + u16 reserved; + u64 router_id __attribute__((packed)); +}; +void babel_hton_router_id(struct babel_tlv_header *tlv); +void babel_ntoh_router_id(struct babel_tlv_header *tlv); + +struct babel_tlv_next_hop { + struct babel_tlv_header header; + u8 ae; + u8 reserved; + u32 addr[2]; +}; +ip_addr babel_get_addr_next_hop(struct babel_tlv_header *tlv, struct babel_parse_state *state); +void babel_put_addr_next_hop(struct babel_tlv_header *tlv, ip_addr addr); + +struct babel_tlv_update { + struct babel_tlv_header header; + u8 ae; +#define BABEL_FLAG_DEF_PREFIX 0x80 +#define BABEL_FLAG_ROUTER_ID 0x40 + u8 flags; + u8 plen; + u8 omitted; + u16 interval; + u16 seqno; + u16 metric; + u32 addr[4]; +}; +void babel_hton_update(struct babel_tlv_header *tlv); +void babel_ntoh_update(struct babel_tlv_header *tlv); +ip_addr babel_get_addr_update(struct babel_tlv_header *tlv, struct babel_parse_state *state); +void babel_put_addr_update(struct babel_tlv_header *tlv, ip_addr addr); + +struct babel_tlv_route_request { + struct babel_tlv_header header; + u8 ae; + u8 plen; + u32 addr[4]; +}; +/* works for both types of request */ +ip_addr babel_get_addr_request(struct babel_tlv_header *tlv, + struct babel_parse_state *state); +void babel_put_addr_request(struct babel_tlv_header *tlv, ip_addr addr); + +struct babel_tlv_seqno_request { + struct babel_tlv_header header; + u8 ae; + u8 plen; + u16 seqno; + u8 hop_count; + u8 reserved; + u64 router_id __attribute__((packed)); + u32 addr[4]; +}; +void babel_hton_seqno_request(struct babel_tlv_header *tlv); +void babel_ntoh_seqno_request(struct babel_tlv_header *tlv); + + +/* Handlers */ + +int babel_validate_length(struct babel_tlv_header *tlv, struct babel_parse_state *state); +int babel_handle_ack_req(struct babel_tlv_header *tlv, struct babel_parse_state *state); +int babel_handle_ack(struct babel_tlv_header *tlv, struct babel_parse_state *state); +int babel_handle_hello(struct babel_tlv_header *tlv, struct babel_parse_state *state); +int babel_handle_ihu(struct babel_tlv_header *tlv, struct babel_parse_state *state); +int babel_validate_ihu(struct babel_tlv_header *hdr, struct babel_parse_state *state); +int babel_handle_router_id(struct babel_tlv_header *tlv, struct babel_parse_state *state); +int babel_handle_next_hop(struct babel_tlv_header *tlv, struct babel_parse_state *state); +int babel_validate_next_hop(struct babel_tlv_header *hdr, struct babel_parse_state *state); +int babel_handle_update(struct babel_tlv_header *tlv, struct babel_parse_state *state); +int babel_validate_update(struct babel_tlv_header *hdr, struct babel_parse_state *state); +int babel_handle_route_request(struct babel_tlv_header *tlv, + struct babel_parse_state *state); +int babel_validate_request(struct babel_tlv_header *hdr, struct babel_parse_state *state); +int babel_handle_seqno_request(struct babel_tlv_header *tlv, + struct babel_parse_state *state); + +/* Stores forwarded seqno requests for duplicate suppression. */ +struct babel_seqno_request { + node n; + ip_addr prefix; + u8 plen; + u64 router_id; + u16 seqno; + bird_clock_t updated; +}; + +struct babel_seqno_request_cache { + slab *slab; + list entries; /* Seqno requests in the cache (struct babel_seqno_request) */ +}; + + +struct babel_iface { + node n; + + struct babel_proto *proto; + struct iface *iface; + struct object_lock *lock; + + struct babel_iface_config *cf; + + pool *pool; + char *ifname; + sock *sock; + ip_addr addr; + int max_pkt_len; + list neigh_list; /* List of neighbors seen on this iface (struct babel_neighbor) */ + + void *tlv_buf; + void *current_buf; + int update_triggered; + + u16 hello_seqno; /* To be increased on each hello */ + + timer *hello_timer; + timer *update_timer; + timer *packet_timer; + event *send_event; + +}; + +struct babel_iface_config { + struct iface_patt i; + + u16 rxcost; + int type; + int tx_tos; + int tx_priority; + u16 hello_interval; + u16 ihu_interval; + u16 update_interval; +}; + +struct babel_neighbor { + node n; + struct babel_iface *bif; + + ip_addr addr; + u16 txcost; + u8 hello_n; + u16 hello_map; + u16 next_hello_seqno; + /* expiry timers */ + bird_clock_t hello_expiry; + bird_clock_t ihu_expiry; + + list routes; /* Routes this neighbour has sent us (struct babel_route) */ +}; + +struct babel_entry; + +struct babel_source { + node n; + struct babel_entry *e; + + u64 router_id; + u16 seqno; + u16 metric; + bird_clock_t expires; +}; + +struct babel_route { + node n; + node neigh_route; + struct babel_entry *e; + struct babel_neighbor *neigh; + + u16 seqno; + u16 advert_metric; + u16 metric; + u64 router_id; + ip_addr next_hop; + bird_clock_t refresh_time; + bird_clock_t expires; + u16 expiry_interval; +}; + + +struct babel_entry { + struct fib_node n; + node garbage_node; + struct babel_proto *proto; + struct babel_route *selected; + + list sources; /* Source table entries for this prefix (struct babel_source). */ + list routes; /* Routes for this prefix (struct babel_route). */ +}; + + + +struct babel_config { + struct proto_config c; + + list iface_list; /* Patterns configured -- keep it first; see babel_reconfigure why */ + int port; +}; + +struct babel_proto { + struct proto p; + timer *timer; + struct fib rtable; + list garbage; /* Entries to be garbage collected (struct babel_entry) */ + list interfaces; /* Interfaces we really know about (struct babel_iface) */ + u16 update_seqno; /* To be increased on request */ + u64 router_id; + event *update_event; /* For triggering global updates */ + + slab *entry_slab; + slab *route_slab; + slab *source_slab; + + struct babel_seqno_request_cache *seqno_cache; +}; + + + +void babel_init_config(struct babel_config *c); + +/* Packet mangling code - packet.c */ +void babel_send_hello(struct babel_iface *bif, u8 send_ihu); +void babel_send_unicast( struct babel_iface *bif, ip_addr dest ); +void babel_send_queue(void *arg); +void babel_send_update(struct babel_iface *bif); +void babel_init_packet(void *buf); +int babel_open_socket(struct babel_iface *bif); +int babel_process_packet(struct babel_header *pkt, int size, + ip_addr saddr, int port, struct babel_iface *bif); +ip_addr babel_get_addr(struct babel_tlv_header *hdr, struct babel_parse_state *state); +void babel_put_addr(struct babel_tlv_header *hdr, ip_addr addr); +void babel_new_unicast(struct babel_iface *bif); +struct babel_tlv_header * babel_add_tlv_size(struct babel_iface *bif, u16 type, int size); +struct babel_tlv_header * babel_add_tlv(struct babel_iface *bif, u16 len); +#define BABEL_ADD_TLV_SEND(tlv,bif,func,addr) do { \ + tlv=func(bif); \ + if(!tlv) { \ + babel_send_to(bif,addr); \ + babel_new_packet(bif); \ + tlv=func(bif); \ + }} while (0); + +inline struct babel_tlv_ack_req * babel_add_tlv_ack_req(struct babel_iface *bif) +{ + return (struct babel_tlv_ack_req *) babel_add_tlv(bif, BABEL_TYPE_ACK_REQ); +} +inline struct babel_tlv_ack * babel_add_tlv_ack(struct babel_iface *bif) +{ + return (struct babel_tlv_ack *) babel_add_tlv(bif, BABEL_TYPE_ACK); +} +inline struct babel_tlv_hello * babel_add_tlv_hello(struct babel_iface *bif) +{ + return (struct babel_tlv_hello *) babel_add_tlv(bif, BABEL_TYPE_HELLO); +} +inline struct babel_tlv_ihu * babel_add_tlv_ihu(struct babel_iface *bif) +{ + return (struct babel_tlv_ihu *) babel_add_tlv(bif, BABEL_TYPE_IHU); +} +inline struct babel_tlv_router_id * babel_add_tlv_router_id(struct babel_iface *bif) +{ + return (struct babel_tlv_router_id *) babel_add_tlv(bif, BABEL_TYPE_ROUTER_ID); +} +inline struct babel_tlv_next_hop * babel_add_tlv_next_hop(struct babel_iface *bif) +{ + return (struct babel_tlv_next_hop *) babel_add_tlv(bif, BABEL_TYPE_NEXT_HOP); +} +inline struct babel_tlv_update * babel_add_tlv_update(struct babel_iface *bif) +{ + return (struct babel_tlv_update *) babel_add_tlv(bif, BABEL_TYPE_UPDATE); +} +inline struct babel_tlv_route_request * babel_add_tlv_route_request(struct babel_iface *bif) +{ + return (struct babel_tlv_route_request *) babel_add_tlv(bif, BABEL_TYPE_ROUTE_REQUEST); +} +inline struct babel_tlv_seqno_request * babel_add_tlv_seqno_request(struct babel_iface *bif) +{ + return (struct babel_tlv_seqno_request *) babel_add_tlv(bif, BABEL_TYPE_SEQNO_REQUEST); +} diff --git a/proto/babel/config.Y b/proto/babel/config.Y new file mode 100644 index 0000000..b66705a --- /dev/null +++ b/proto/babel/config.Y @@ -0,0 +1,84 @@ +/* + * BIRD -- Babel Configuration + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + + + +CF_HDR + +#include "proto/babel/babel.h" +#include "nest/iface.h" + +CF_DEFINES + +#define BABEL_CFG ((struct babel_config *) this_proto) +#define BABEL_IFACE_CFG ((struct babel_iface_config *) this_ipatt) + +CF_DECLS + +CF_KEYWORDS(BABEL, METRIC, RXCOST, HELLO_INTERVAL, UPDATE_INTERVAL, PORT, WIRED, +WIRELESS, BABEL_METRIC) + +CF_GRAMMAR + +CF_ADDTO(proto, babel_cfg '}' { } ) + +babel_cfg_start: proto_start BABEL { + this_proto = proto_config_new(&proto_babel, $1); + babel_init_config(BABEL_CFG); + } + ; + +babel_cfg: + babel_cfg_start proto_name '{' + | babel_cfg proto_item ';' + | babel_cfg PORT expr ';' { BABEL_CFG->port = $3; } + | babel_cfg INTERFACE babel_iface ';' + ; + + +babel_iface_item: + | RXCOST expr { BABEL_IFACE_CFG->rxcost = $2; } + | TX tos { BABEL_IFACE_CFG->tx_tos = $2; } + | TX PRIORITY expr { BABEL_IFACE_CFG->tx_priority = $3; } + | TYPE WIRED { BABEL_IFACE_CFG->type = BABEL_IFACE_TYPE_WIRED; } + | TYPE WIRELESS { BABEL_IFACE_CFG->type = BABEL_IFACE_TYPE_WIRELESS; } + | HELLO_INTERVAL expr { BABEL_IFACE_CFG->hello_interval = $2; } + | UPDATE_INTERVAL expr { BABEL_IFACE_CFG->update_interval = $2; } + ; + +babel_iface_opts: + /* empty */ + | babel_iface_opts babel_iface_item ';' + ; + +babel_iface_opt_list: + /* empty */ + | '{' babel_iface_opts '}' + ; + +babel_iface_init: + /* EMPTY */ { + this_ipatt = cfg_allocz(sizeof(struct babel_iface_config)); + add_tail(&BABEL_CFG->iface_list, NODE this_ipatt); + init_list(&this_ipatt->ipn_list); + BABEL_IFACE_CFG->rxcost = BABEL_INFINITY; + BABEL_IFACE_CFG->type = BABEL_IFACE_TYPE_WIRED; + BABEL_IFACE_CFG->tx_tos = IP_PREC_INTERNET_CONTROL; + BABEL_IFACE_CFG->tx_priority = sk_priority_control; + BABEL_IFACE_CFG->hello_interval = BABEL_INFINITY; + BABEL_IFACE_CFG->update_interval = BABEL_INFINITY; + } + ; + +babel_iface: /* TODO: switch to iface_patt_list_nopx */ + babel_iface_init iface_patt_list babel_iface_opt_list + ; + +CF_ADDTO(dynamic_attr, BABEL_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT | EAF_TEMP, T_INT, EA_BABEL_METRIC); }) + +CF_CODE + +CF_END diff --git a/proto/babel/packet.c b/proto/babel/packet.c new file mode 100644 index 0000000..54f256b --- /dev/null +++ b/proto/babel/packet.c @@ -0,0 +1,636 @@ +/** -*- c-file-style: "gnu"; indent-tabs-mode: nil; -*- + * + * The Babel protocol + * + * Copyright (c) 2015 Toke Høiland-Jørgensen + * + * Can be freely distributed and used under the terms of the GNU GPL. + * + * This files contains the packet and TLV handling routines for the protocol. + */ + +#undef LOCAL_DEBUG +#define LOCAL_DEBUG 1 + +#include "babel.h" + +#undef TRACE +#define TRACE(level, msg, args...) do { if (p->p.debug & level) { log(L_TRACE "%s: " msg, p->p.name , ## args); } } while(0) +#define BAD( x ) { log( L_REMOTE "%s: " x, p->p.name ); return 1; } + +#define FIRST_TLV(p) ((struct babel_tlv_header *)(((struct babel_header *) p) + 1)) +#define NEXT_TLV(t) (t = (void *)((char *)t) + TLV_SIZE(t)) +#define TLV_SIZE(t) (t->type == BABEL_TYPE_PAD0 ? 1 : t->length + sizeof(struct babel_tlv_header)) +#define TLV_LENGTH(t) (tlv_data[t].struct_length-sizeof(struct babel_tlv_header)) + +static void babel_send_to(struct babel_iface *bif, ip_addr dest); + +static inline ip_addr +get_ip6_ll(u32 *addr) +{ + return ip6_or(ipa_build6(0xfe800000,0,0,0), + ipa_build6(0,0,ntohl(addr[0]),ntohl(addr[1]))); +} + + +struct babel_tlv_data { + int struct_length; + int (*handle)(struct babel_tlv_header *tlv, + struct babel_parse_state *state); + int (*validate)(struct babel_tlv_header *tlv, + struct babel_parse_state *state); + void (*hton)(struct babel_tlv_header *tlv); + void (*ntoh)(struct babel_tlv_header *tlv); + ip_addr (*get_addr)(struct babel_tlv_header *tlv, + struct babel_parse_state *state); + void (*put_addr)(struct babel_tlv_header *tlv, ip_addr addr); +}; + +static struct babel_tlv_data tlv_data[BABEL_TYPE_MAX] = { + {1, NULL,NULL,NULL,NULL,NULL}, + {3, NULL,NULL,NULL,NULL,NULL}, + {sizeof(struct babel_tlv_ack_req), + babel_handle_ack_req, babel_validate_length, + babel_hton_ack_req, babel_ntoh_ack_req, + NULL,NULL}, + {sizeof(struct babel_tlv_ack), + babel_handle_ack, babel_validate_length, + NULL, NULL, + NULL, NULL}, + {sizeof(struct babel_tlv_hello), + babel_handle_hello, babel_validate_length, + babel_hton_hello, babel_ntoh_hello, + NULL, NULL}, + {sizeof(struct babel_tlv_ihu), + babel_handle_ihu, babel_validate_ihu, + babel_hton_ihu, babel_ntoh_ihu, + babel_get_addr_ihu, babel_put_addr_ihu}, + {sizeof(struct babel_tlv_router_id), + babel_handle_router_id, babel_validate_length, + babel_hton_router_id, babel_ntoh_router_id, + NULL, NULL}, + {sizeof(struct babel_tlv_next_hop), + babel_handle_next_hop, babel_validate_next_hop, + NULL, NULL, + babel_get_addr_next_hop, NULL}, + {sizeof(struct babel_tlv_update), + babel_handle_update, babel_validate_update, + babel_hton_update, babel_ntoh_update, + babel_get_addr_update, babel_put_addr_update}, + {sizeof(struct babel_tlv_route_request), + babel_handle_route_request, babel_validate_request, + NULL, NULL, + babel_get_addr_request, babel_put_addr_request}, + {sizeof(struct babel_tlv_seqno_request), + babel_handle_seqno_request, babel_validate_request, + babel_hton_seqno_request, babel_ntoh_seqno_request, + babel_get_addr_request, babel_put_addr_request}, +}; + +static inline int +validate_tlv(struct babel_tlv_header *tlv, struct babel_parse_state *state) +{ + return (tlv_data[tlv->type].validate != NULL && tlv_data[tlv->type].validate(tlv, state)); +} + +void +babel_hton_ack_req(struct babel_tlv_header *hdr) +{ + struct babel_tlv_ack_req *tlv = (struct babel_tlv_ack_req *)hdr; + tlv->interval = htons(tlv->interval); +} + +void +babel_ntoh_ack_req(struct babel_tlv_header *hdr) +{ + struct babel_tlv_ack_req *tlv = (struct babel_tlv_ack_req *)hdr; + tlv->interval = ntohs(tlv->interval); +} + +void +babel_hton_hello(struct babel_tlv_header *hdr) +{ + struct babel_tlv_hello *tlv = (struct babel_tlv_hello *)hdr; + tlv->seqno = htons(tlv->seqno); + tlv->interval = htons(tlv->interval); +} + +void +babel_ntoh_hello(struct babel_tlv_header *hdr) +{ + struct babel_tlv_hello *tlv = (struct babel_tlv_hello *)hdr; + tlv->seqno = ntohs(tlv->seqno); + tlv->interval = ntohs(tlv->interval); +} + +int +babel_validate_ihu(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ + struct babel_tlv_ihu *tlv = (struct babel_tlv_ihu *)hdr; + if(hdr->length < TLV_LENGTH(BABEL_TYPE_IHU)-sizeof(tlv->addr)) return 0; + return (tlv->ae == BABEL_AE_WILDCARD + || (tlv->ae == BABEL_AE_IP6_LL && hdr->length >= TLV_LENGTH(BABEL_TYPE_IHU))); +} + +void +babel_hton_ihu(struct babel_tlv_header *hdr) +{ + struct babel_tlv_ihu *tlv = (struct babel_tlv_ihu *)hdr; + tlv->rxcost = htons(tlv->rxcost); + tlv->interval = htons(tlv->interval); +} + +void +babel_ntoh_ihu(struct babel_tlv_header *hdr) +{ + struct babel_tlv_ihu *tlv = (struct babel_tlv_ihu *)hdr; + tlv->rxcost = ntohs(tlv->rxcost); + tlv->interval = ntohs(tlv->interval); +} + +ip_addr +babel_get_addr_ihu(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ + struct babel_tlv_ihu *tlv = (struct babel_tlv_ihu *)hdr; + struct babel_iface *bif = state->bif; + if(tlv->ae == BABEL_AE_WILDCARD) { + return bif->iface->addr->ip; /* FIXME: Correct? */ + } else if(tlv->ae == BABEL_AE_IP6_LL) { + return get_ip6_ll(tlv->addr); + } + return IPA_NONE; +} + +void +babel_put_addr_ihu(struct babel_tlv_header *hdr, ip_addr addr) +{ + char buf[16]; + struct babel_tlv_ihu *tlv = (struct babel_tlv_ihu *)hdr; + if(!ipa_is_link_local(addr)) { + tlv->ae = BABEL_AE_WILDCARD; + return; + } + put_ip6(buf,addr); + memcpy(tlv->addr, buf+8, 8); + tlv->ae = BABEL_AE_IP6_LL; +} + +void +babel_hton_router_id(struct babel_tlv_header *hdr) +{ + struct babel_tlv_router_id *tlv = (struct babel_tlv_router_id *)hdr; + tlv->router_id = htobe64(tlv->router_id); +} + +void +babel_ntoh_router_id(struct babel_tlv_header *hdr) +{ + struct babel_tlv_router_id *tlv = (struct babel_tlv_router_id *)hdr; + tlv->router_id = be64toh(tlv->router_id); +} + +int +babel_validate_next_hop(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ + struct babel_tlv_next_hop *tlv = (struct babel_tlv_next_hop *)hdr; + /* We don't speak IPv4, so only recognise IP6 LL next hops */ + if(tlv->ae != BABEL_AE_IP6_LL) return 0; + return babel_validate_length(hdr, state); +} + +ip_addr +babel_get_addr_next_hop(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ + struct babel_tlv_next_hop *tlv = (struct babel_tlv_next_hop *)hdr; + return get_ip6_ll(tlv->addr); +} + +int +babel_validate_update(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ + struct babel_tlv_update *tlv = (struct babel_tlv_update *)hdr; + int min_length = TLV_LENGTH(BABEL_TYPE_UPDATE)-sizeof(tlv->addr); + u8 len = tlv->plen/8; + if(tlv->plen % 8) len++; + + if(tlv->plen > MAX_PREFIX_LENGTH) + return 0; + + if(hdr->length < min_length) return 0; + if(tlv->ae == BABEL_AE_IP4 /* we don't speak IPv4 */ + || tlv->ae >= BABEL_AE_MAX) /* invalid */ + return 0; + /* Can only omit bits if a previous update defined a prefix to take them from */ + if(tlv->omitted && ipa_equal(state->prefix, IPA_NONE)) + return 0; + + /* TLV should be large enough to old the entire prefix */ + if(hdr->length < min_length + len-tlv->omitted) + return 0; + + return 1; +} + +void +babel_hton_update(struct babel_tlv_header *hdr) +{ + struct babel_tlv_update *tlv = (struct babel_tlv_update *)hdr; + tlv->interval = htons(tlv->interval); + tlv->seqno = htons(tlv->seqno); + tlv->metric = htons(tlv->metric); +} + +void +babel_ntoh_update(struct babel_tlv_header *hdr) +{ + struct babel_tlv_update *tlv = (struct babel_tlv_update *)hdr; + tlv->interval = ntohs(tlv->interval); + tlv->seqno = ntohs(tlv->seqno); + tlv->metric = ntohs(tlv->metric); +} + +ip_addr +babel_get_addr_update(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ + struct babel_tlv_update *tlv = (struct babel_tlv_update *)hdr; + char buf[16] = {0}; + u8 len = tlv->plen/8; + if(tlv->plen % 8) len++; + + /* fixed encodings */ + if(tlv->ae == BABEL_AE_WILDCARD) return IPA_NONE; + if(tlv->ae == BABEL_AE_IP6_LL) return get_ip6_ll(tlv->addr); + + /* if we have omitted bytes, get them from previous prefix */ + if(tlv->omitted) put_ipa(buf, state->prefix); + /* if the prefix is longer than the omitted octets, copy the rest */ + if(tlv->omitted < len) memcpy(buf+tlv->omitted, tlv->addr, len-tlv->omitted); + /* make sure the tail is zeroed */ + if(len < 16) memset(buf+len, 0, 16-len); + return get_ipa(buf); +} + +void babel_put_addr_update(struct babel_tlv_header *hdr, ip_addr addr) +{ + struct babel_tlv_update *tlv = (struct babel_tlv_update *)hdr; + tlv->ae = BABEL_AE_IP6; + put_ipa(&tlv->addr, addr); +} + +int +babel_validate_request(struct babel_tlv_header *hdr, + struct babel_parse_state *state) +{ + /* Validates both seqno and route_request. Works because ae and plen fields + are in the same place. */ + struct babel_tlv_route_request *tlv = (struct babel_tlv_route_request *)hdr; + u8 len = tlv->plen/8; + if(tlv->plen % 8) len++; + + if(tlv->plen > MAX_PREFIX_LENGTH) + return 0; + + /* enough space to hold the prefix */ + if(hdr->length < TLV_LENGTH(hdr->type) - sizeof(tlv->addr) + len) + return 0; + /* wildcard requests must have plen 0 */ + if(tlv->ae == BABEL_AE_WILDCARD && tlv->plen > 0) + return 0; + + /* We don't speak IPv4, and prefixes cannot be link-local addresses. */ + if(tlv->ae != BABEL_AE_IP6 && tlv->ae != BABEL_AE_WILDCARD) + return 0; + + return 1; +} + +ip_addr +babel_get_addr_request(struct babel_tlv_header *hdr, + struct babel_parse_state *state) +{ + struct babel_tlv_route_request *tlv = (struct babel_tlv_route_request *)hdr; + char buf[16] = {0}; + u8 len = tlv->plen/8; + if(tlv->plen % 8) len++; + + /* fixed encoding */ + if(tlv->ae == BABEL_AE_WILDCARD) return IPA_NONE; + if(hdr->type == BABEL_TYPE_SEQNO_REQUEST) + memcpy(buf, ((struct babel_tlv_seqno_request *)tlv)->addr, len); + else + memcpy(buf, tlv->addr, len); + return get_ipa(buf); +} + +void +babel_put_addr_request(struct babel_tlv_header *hdr, ip_addr addr) +{ + struct babel_tlv_route_request *tlv = (struct babel_tlv_route_request *)hdr; + char buf[16] = {0}; + u8 len = tlv->plen/8; + if(tlv->plen % 8) len++; + put_ipa(buf, addr); + memcpy(tlv->addr, buf, len); +} + +void +babel_hton_seqno_request(struct babel_tlv_header *hdr) +{ + struct babel_tlv_seqno_request *tlv = (struct babel_tlv_seqno_request *)hdr; + tlv->seqno = htons(tlv->seqno); + tlv->router_id = htobe64(tlv->router_id); +} + +void +babel_ntoh_seqno_request(struct babel_tlv_header *hdr) +{ + struct babel_tlv_seqno_request *tlv = (struct babel_tlv_seqno_request *)hdr; + tlv->seqno = ntohs(tlv->seqno); + tlv->router_id = be64toh(tlv->router_id); +} + +static void +babel_tlv_hton(struct babel_tlv_header *hdr) +{ + if(tlv_data[hdr->type].hton) { + tlv_data[hdr->type].hton(hdr); + } +} + +static void +babel_tlv_ntoh(struct babel_tlv_header *hdr) +{ + if(tlv_data[hdr->type].ntoh) { + tlv_data[hdr->type].ntoh(hdr); + } +} + +static void +babel_packet_hton(struct babel_header *hdr) +{ + struct babel_tlv_header *tlv = FIRST_TLV(hdr); + int len = hdr->length+sizeof(struct babel_header); + char *p = (char *)hdr; + while((char *)tlv < p+len) { + babel_tlv_hton(tlv); + NEXT_TLV(tlv); + } + hdr->length = htons(hdr->length); +} + +ip_addr +babel_get_addr(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ + if(tlv_data[hdr->type].get_addr) { + return tlv_data[hdr->type].get_addr(hdr, state); + } + return IPA_NONE; +} + +void +babel_put_addr(struct babel_tlv_header *hdr, ip_addr addr) +{ + if(tlv_data[hdr->type].put_addr) { + tlv_data[hdr->type].put_addr(hdr, addr); + } +} + + +void +babel_init_packet(void *buf) +{ + struct babel_header *hdr = buf; + memset(hdr, 0, sizeof(struct babel_header)); + hdr->magic = BABEL_MAGIC; + hdr->version = BABEL_VERSION; +} + +void +babel_new_unicast(struct babel_iface *bif) +{ + babel_init_packet(bif->sock->tbuf); + bif->current_buf = bif->sock->tbuf; +} + +void +babel_send_unicast(struct babel_iface *bif, ip_addr dest) +{ + babel_send_to(bif, dest); + bif->current_buf = bif->tlv_buf; +} + + +struct babel_tlv_header * +babel_add_tlv_size(struct babel_iface *bif, u16 type, int len) +{ + struct babel_header *hdr = bif->current_buf; + struct babel_tlv_header *tlv; + int pktlen = sizeof(struct babel_header)+hdr->length; + if(pktlen+len > bif->max_pkt_len) { + babel_send_queue(bif); + pktlen = sizeof(struct babel_header)+hdr->length; + } + hdr->length+=len; + tlv = (struct babel_tlv_header *)((char*)hdr+pktlen); + memset(tlv, 0, len); + tlv->type = type; + tlv->length = TLV_LENGTH(type); + return tlv; +} + +struct babel_tlv_header * +babel_add_tlv(struct babel_iface *bif, u16 type) +{ + return babel_add_tlv_size(bif, type, tlv_data[type].struct_length); +} + + +static int +babel_copy_tlv(void *buf, struct babel_tlv_header *src, int max_len) +{ + struct babel_header *dst = buf; + int pktlen = sizeof(struct babel_header)+dst->length; + int len = tlv_data[src->type].struct_length; + if(pktlen+len > max_len) + return 0; + + memcpy((char *)dst + pktlen, src, len); + dst->length += len; + return 1; +} + + +static void +babel_send_to(struct babel_iface *bif, ip_addr dest) +{ + sock *s = bif->sock; + struct babel_header *hdr = (void *) s->tbuf; + int len = hdr->length+sizeof(struct babel_header); + int done; + + babel_packet_hton(hdr); + + DBG( "Sending %d bytes to %I\n", len, dest); + done = sk_send_to(s, len, dest, 0); + if(!done) + log(L_WARN "Babel: TX queue full on %s", bif->ifname); +} + +static void +babel_send(struct babel_iface *bif) +{ + babel_send_to(bif, IP6_BABEL_ROUTERS); +} + +void +babel_send_queue(void *arg) +{ + struct babel_iface *bif = arg; + struct babel_header *dst = (void *)bif->sock->tbuf; + struct babel_header *src = (void *)bif->tlv_buf; + struct babel_tlv_header *hdr; + char *p; + int moved; + if(!src->length) return; + + babel_init_packet(dst); + hdr = FIRST_TLV(bif->tlv_buf); + p = (char *) hdr; + while((char *)hdr < p + src->length && babel_copy_tlv(dst, hdr, bif->max_pkt_len)) { + NEXT_TLV(hdr); + } + moved = (char *)hdr - p; + if(moved && moved < src->length) { + memmove(p, hdr, src->length - moved); + } + src->length -= moved; + babel_send(bif); + + /* re-schedule if we still have data to send */ + if(src->length) + ev_schedule(bif->send_event); +} + + +int +babel_process_packet(struct babel_header *pkt, int size, + ip_addr saddr, int port, struct babel_iface *bif) +{ + struct babel_tlv_header *tlv = FIRST_TLV(pkt); + struct babel_proto *proto = bif->proto; + struct babel_parse_state state = { + .proto = proto, + .bif = bif, + .saddr = saddr, + .prefix = IPA_NONE, + .next_hop = saddr, + }; + char *p = (char *)pkt; + int res = 0; + + pkt->length = ntohs(pkt->length); + if(pkt->magic != BABEL_MAGIC + || pkt->version != BABEL_VERSION + || pkt->length > size - sizeof(struct babel_header)) { + DBG("Invalid packet: magic %d version %d length %d size %d\n", + pkt->magic, pkt->version, pkt->length, size); + return 1; + } + + while((char *)tlv < p+size) { + if(tlv->type > BABEL_TYPE_PADN + && tlv->type < BABEL_TYPE_MAX + && validate_tlv(tlv, &state)) { + babel_tlv_ntoh(tlv); + res &= tlv_data[tlv->type].handle(tlv, &state); + } else { + DBG("Unknown or invalid TLV of type %d\n",tlv->type); + } + NEXT_TLV(tlv); + } + if(state.needs_update) + bif->update_triggered = 1; + return res; +} + +int +babel_validate_length(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ + /*DBG("Validate type: %d length: %d needed: %d\n", hdr->type, hdr->length, + tlv_data[hdr->type].struct_length - sizeof(struct babel_tlv_header));*/ + return (hdr->length >= tlv_data[hdr->type].struct_length - sizeof(struct babel_tlv_header)); +} + +static void +babel_tx_err( sock *s, int err ) +{ + log( L_ERR ": Unexpected error at Babel transmit: %M", err ); +} + + +static int +babel_rx(sock *s, int size) +{ + struct babel_iface *bif = s->data; + struct babel_proto *p = bif->proto; + if (! bif->iface || s->lifindex != bif->iface->index) + return 1; + + DBG("Babel: incoming packet: %d bytes from %I via %s\n", size, s->faddr, bif->iface->name); + if (size < sizeof(struct babel_header)) BAD( "Too small packet" ); + + if (ipa_equal(bif->iface->addr->ip, s->faddr)) { + DBG("My own packet\n"); + return 1; + } + + if (!ipa_is_link_local(s->faddr)) { BAD("Non-link local sender"); } + + babel_process_packet((struct babel_header *) s->rbuf, size, s->faddr, s->fport, bif ); + return 1; +} + +int +babel_open_socket(struct babel_iface *bif) +{ + struct babel_proto *p = bif->proto; + struct babel_config *cf = (struct babel_config *) p->p.cf; + bif->sock = sk_new( bif->pool ); + bif->sock->type = SK_UDP; + bif->sock->sport = cf->port; + bif->sock->rx_hook = babel_rx; + bif->sock->data = bif; + bif->sock->rbsize = 10240; + bif->sock->iface = bif->iface; + bif->sock->tbuf = mb_alloc( bif->pool, bif->iface->mtu); + bif->sock->err_hook = babel_tx_err; + bif->sock->dport = cf->port; + bif->sock->daddr = IP6_BABEL_ROUTERS; + + bif->sock->tos = bif->cf->tx_tos; + bif->sock->priority = bif->cf->tx_priority; + bif->sock->flags = SKF_LADDR_RX; + if (sk_open( bif->sock) < 0) + goto err; + if (sk_setup_multicast( bif->sock) < 0) + goto err; + if (sk_join_group( bif->sock, bif->sock->daddr) < 0) + goto err; + TRACE(D_EVENTS, "Listening on %s, port %d, mode multicast (%I)", bif->iface->name, cf->port, bif->sock->daddr ); + + tm_start(bif->hello_timer, bif->cf->hello_interval); + tm_start(bif->update_timer, bif->cf->update_interval); + tm_start(bif->packet_timer, 1); + + babel_send_hello(bif,0); + babel_send_queue(bif); + + return 1; + + err: + sk_log_error(bif->sock, p->p.name); + log(L_ERR "%s: Cannot open socket for %s", p->p.name, bif->iface->name); + rfree(bif->sock); + return 0; + +} diff --git a/sysdep/autoconf.h.in b/sysdep/autoconf.h.in index a9e46e2..c73270c 100644 --- a/sysdep/autoconf.h.in +++ b/sysdep/autoconf.h.in @@ -43,6 +43,7 @@ #undef CONFIG_BGP #undef CONFIG_OSPF #undef CONFIG_PIPE +#undef CONFIG_BABEL /* We use multithreading */ #undef USE_PTHREADS -- 2.5.1
On Mon, Sep 07, 2015 at 11:10:34PM +0200, Toke Høiland-Jørgensen wrote:
This adds the Babel routing protocol (RFC6126) to Bird. It is a complete implementation of the IPv6 subset of RFC6126, but does not implement any of the extensions.
Thanks, i will review the patch ASAP.
Compared to the RFC patch posted earlier, this patch implements several more SHOULD parts of the protocol, has updated interactions with the Bird core, and uses fewer timers and resource pools. In addition, several other tweaks and fixes have been made following interoperability testing with the official babeld.
The implementation of the protocol is now, to the best of my knowledge, complete. The only exception is the propagation of IPv4 routes which has deliberately been left out due to Bird's lack of support for a dual-stack operation mode. Juliusz Chroboczek, the author of the Babel spec, has expressed a preference to not have an IPv4-only implementation of Babel, so as to avoid fragmenting the community. I have followed that preference, and so this implementation ignores IPv4 route TLVs entirely.
As far as interactions with Bird core is concerned, the main thing missing is the reconfiguration support; I still haven't decided exactly what is the right thing to do here. But probably some variant of the OSPF protocol's logic is needed. And of course documentation of the configuration options.
Yes, OSPF-style reconfiguration is a proper way to go. -- Elen sila lumenn' omentielvo Ondrej 'Santiago' Zajicek (email: santiago@crfreenet.org) OpenPGP encrypted e-mails preferred (KeyID 0x11DEADC3, wwwkeys.pgp.net) "To err is human -- to blame it on a computer is even more so."
On Mon, Sep 07, 2015 at 11:10:34PM +0200, Toke Høiland-Jørgensen wrote:
This adds the Babel routing protocol (RFC6126) to Bird. It is a complete implementation of the IPv6 subset of RFC6126, but does not implement any of the extensions.
Hello I am sorry i didn't got around to review it sooner, but doing a proper review for a whole new (and unfamiliar) protocol takes nontrivial amount of time. Generally, the code looks good and i am determined to finally merge it, but it definitely needs more refinement before that. The review consist of general points followed by comments specific to lines of the patch. Occasionally i reference the new RIP code, which is not yet merged to BIRD but is available at https://gitlab.labs.nic.cz/labs/bird/blob/rip-new/proto/rip/ . I am glad to see your answers. First, there is an issue with route selection. Proper route selection in BIRD should work this way: 1) Babel chooses the best route from received routes, say selected_in 2) If selected_in changes, it is propagated to the core by rte_update() 3) Core compares routes from multiple protocols and one is exported to the Babel 4) This route is either Babel own route or external route 5) In babel_rt_notify(), Babel receives that route, finds appropriate internal route (or create a new one) and stores it in say selected_out. 6) Even if the received selected_out is an external route, Babel do not stop propagating selected_in to the core 7) selected_out is propagated to the network in periodic updates. If selected_out changes, it is an event for triggered updates. The current code clearly does not work this way: babel_rt_notify(): r = (e->selected) ? e->selected : babel_get_route(e, NULL); Which IMHO means that if there is an internal selected route, the new received external route will be ignored. E.g. in cases when at first some internal routes were recived, one of them was selected, and after a while a new external route appears. The proper way (for babel_rt_notify()) would be to check whether the received route (new) is local and based on that use either the selected_in or a route with NULL neighbor: r = (new->attrs->src->proto == p) ? e->selected_in : babel_get_route(e, NULL); Note that generally babel_rt_notify() should not need to call babel_select_route(), as changing the selected_out route does not influence the selected_in route. There are also other places in the code where the behavior is different the scheme above (like triggering update directly in babel_select_route(). Although it is not necessary to explicitly keep both selected_in and selected_out as fields in struct babel_entry, but it is simple and comprehensible. Second, can we really expect that TLVs in received packets are aligned? Although all TLVs looks like suited to 32-bit alignment, i did not found mentioned it in Babel RFC and availability of Pad1, PadN TLVs and per-byte compressible addresses suggest that they generally are not aligned. In that case, we have to use get_uXX()/put_uXX() functions instead of directly accessing babel_tlv_* structure fields. Or we could copy each TLV to a local buffer before calling ntoh+handle callbacka, as each TLV internally is aligned, and enforce alignment for outgoing packets. I would generally prefer the first approach, but the second approach is simpler way to fix it. Third, although i am OK with accepting IPv6-only implementation, it should be ready for future expansion. TLVs with AE 1 could be skipped, but there should not be hidden hardwired IPv6 assumptions in the code. Mainly, TLV structures should use addr[0] at the end instead of addr[2] and addr[4]. Address space requirements should be handled explictly based on AE. Fourth, it seems like triggered updates are not really implemented as they should. Route change triggers propagation, but the whole routing table is always propagated instead of just the changed entries. This could be easily fixed by having timestamp on each babel_entry updated everytime when a significant change of selected_out route happened. For regular updates all routes are propagated, for triggered updates only the entries with timestamps >= time when the trigger update was requested. See want_triggered and tx_changed fields in the new RIP code. Fifth, for protocols like RIP or Babel that do not use explicit acknowledgments it is a very good idea to properly handle TX failures (which usually means that kernel buffers are full) at least when sending updates. When sk_send_to() fails during sending updates, the update process should be postponed until TX hook of the socket is called, then the update process could continue in the TX hook. Which means FIB_ITERATE with persistent iterator should be used instead of FIB_WALK. See rip_send_table() and rip_send_response() in the new RIP code. In the current Babel code this is complicated by the implicit call to babel_send_queue() inside of babel_add_tlv_size() instead of explicit call from update process. Sixth, the code handling the external representation should be in packet.c Esp. all babel_handle_* functions and most of babel_send_* functions. In most cases it is just a move, in some cases it makes sense to separate external-representation-bound parts to internal-structures-bound parts (e.g. for babel_handle_update() separating the part updating routing table to a separate function, see rip_receive_response() and rip_update_rte() in the new RIP code for the example) Also babel_header and TLV structures could be moved at the beginning of babel.c, Unnecessary function declarations (babel_hton_*) could be removed from babel.h, Trivial inline functions babel_add_tlv_* could be just inlined to appropriate babel_send_* function. Seventh, babel_get_addr_* and babel_put_addr_* functions are strange - IMHO these should not be named based on TLV, but based on their real functions and explicitly called from appropriate babel_handle* or babel_send_* functions instead using dispatch through tlv_data. Finally, the coding style is much more consistent with the rest of the BIRD than in the previous patch, just some minor issues: "if(" -> "if (", opening braces of code blocks should be on separate lines, we don't use aligned field names in structure definitions. Local var for struct babel_iface * should be generally named 'ifa' instead of 'bif', for struct babel_neighbor * just 'n' instead of 'bn'. Rest are comments specific to lines in the patch:
diff --git a/nest/proto.c b/nest/proto.c .. + proto_build(&proto_babel); + bfd_init_all(); +#endif
Calling bfd_init_all() seems like copy-paste error.
diff --git a/nest/route.h b/nest/route.h ... @@ -538,6 +546,7 @@ extern struct protocol *attr_class_to_protocol[EAP_MAX]; #define DEF_PREF_RIP 120 /* RIP */ #define DEF_PREF_BGP 100 /* BGP */ #define DEF_PREF_PIPE 70 /* Routes piped from other tables */ +#define DEF_PREF_BABEL 42 /* Babel */
Default could be higher, e.g. 130
diff --git a/proto/babel/babel.h b/proto/babel/babel.h ... +enum babel_tlv_type_t {
Enum names (like struct names) are not type names, so do not use _t suffix.
+enum babel_tlv_type_t { + BABEL_TYPE_PAD0 = 0, + BABEL_TYPE_PADN = 1,
Perhaps BABEL_TLV_* would be better?
+enum babel_iface_type_t { + BABEL_IFACE_TYPE_WIRED, + BABEL_IFACE_TYPE_WIRELESS, + BABEL_IFACE_TYPE_MAX +};
I would prefer to have explicit values here. Perhaps also reserve 0 for undefined?
+struct babel_seqno_request_cache { + slab *slab; + list entries; /* Seqno requests in the cache (struct babel_seqno_request) */ +};
Perhaps this should be merged into babel_proto?
+struct babel_source { + node n; + struct babel_entry *e;
This ptr seems completely unused.
+struct babel_config { + struct proto_config c; + + list iface_list; /* Patterns configured -- keep it first; see babel_reconfigure why */ + int port;
Port option should be per-interface
+#define BABEL_ADD_TLV_SEND(tlv,bif,func,addr) do { \
Seems unused
diff --git a/proto/babel/babel.c b/proto/babel/babel.c ... +#undef TRACE +#define TRACE(level, msg, args...) do { if (p->p.debug & level) { log(L_TRACE "%s: " msg, p->p.name , ## args); } } while(0)
Unnecessary. Just use TRACE() defined in nest/protocol.h
+/* computes a-b % 65535 for u16 datatypes */ +static inline u16 +diff_mod64k(u16 a, u16 b) +{ + return a >= b ? a-b : 0xffff-b+a; +}
Babel uses modulo 2^16 (65536), comment says a-b % 65535 The diff_mod64k() is neither: a=0xffff b=0 -> ret=0xffff (ok for modulo 2^16, not for 65535) a=0 b=1 -> ret=0xfffe (ok for modulo 65535, not for 2^16) If it should be just a-b % 2^16, then there is no need for this function ((u16) a-b) should be enough.
+static inline struct babel_entry * +babel_find_entry(struct babel_proto *p, ip_addr prefix, u8 plen) +{ + return fib_find(&p->rtable, &prefix, plen); +} + +static struct babel_entry * +babel_get_entry(struct babel_proto *p, ip_addr prefix, u8 plen) +{ + struct babel_entry *e = babel_find_entry(p, prefix, plen); + if(e) return e; + e = fib_get(&p->rtable, &prefix, plen);
babel_find_entry() is unnecessary, just use fib_get(), as it uses fib_find() internally anyways).
+void +babel_flush_entry(struct babel_entry *e) +{ + struct babel_proto *p = e->proto; + TRACE(D_EVENTS, "Flushing entry %I/%d", e->n.prefix, e->n.pxlen); + rem_node(&e->garbage_node); + if(p) fib_delete(&p->rtable, e);
It is really possible that (p == NULL)? In that case TRACE() would crash.
+static void +babel_expire_routes(struct babel_proto *p) +{ + struct babel_entry *e; + struct babel_route *r, *rx; + node *n; + FIB_WALK(&p->rtable, n) { + e = (struct babel_entry *)n; + WALK_LIST_DELSAFE(r, rx, e->routes) { + if(r->refresh_time && r->refresh_time <= now) { + refresh_route(r); + r->refresh_time = 0; + } + if(r->expires && r->expires <= now) + expire_route(r); + } + expire_sources(e); + } FIB_WALK_END; + WALK_LIST_FIRST(n, p->garbage) { + e = SKIP_BACK(struct babel_entry, garbage_node, n); + babel_flush_entry(e); + } +}
Instead of using p->garbage and garbage_node, you could simply use FIB_ITERATE instead of FIB_WALK and remove babel_entry inside the cycle: FIB_ITERATE_INIT(&fit, &p->rtable); loop: FIB_ITERATE_START(&p->rtable, &fit, node) { struct babel_entry *e = (void *) node; ... if (EMPTY_LIST(e->sources) && EMPTY_LIST(e->routes)) { FIB_ITERATE_PUT(&fit, node); babel_flush_entry(e) goto loop; } } FIB_ITERATE_END(node); Therefore whole garbage list could be removed.
+static void +babel_expire_neighbors(struct babel_proto *p) +{ + struct babel_iface *bif; + struct babel_neighbor *bn, *bnx; + WALK_LIST(bif, p->interfaces) { + WALK_LIST_DELSAFE(bn, bnx, bif->neigh_list) { + if(bn->hello_expiry && bn->hello_expiry <= now) + expire_hello(bn); + if(bn->ihu_expiry && bn->ihu_expiry <= now) + expire_ihu(bn); + }
expire_hello() -> babel_flush_neighbor() -> neighbor is freed but on the next line it is accessed.
+static u16 +babel_compute_rxcost(struct babel_neighbor *bn) +{ + struct babel_iface *bif = bn->bif; + struct babel_proto *p = bif->proto; + u8 n, missed; + u16 map=bn->hello_map; + + if(!map) return BABEL_INFINITY; + n = __builtin_popcount(map); // number of bits set
This should be defined as our inline function in lib/bitopts.h, then used here.
+ missed = bn->hello_n-n; + + if(bif->cf->type == BABEL_IFACE_TYPE_WIRED) { + /* k-out-of-j selection - Appendix 2.1 in the RFC. */ + DBG("Missed %d hellos from %I\n", missed, bn->addr); + /* Link is bad if more than half the expected hellos were lost */ + return (missed > 0 && n/missed < 2) ? BABEL_INFINITY : bif->cf->rxcost;
Shouldn't there be bn->hello_n instead of n? Also the next variant (with bn->hello_n instead of n) is faster and simpler: return (missed > n/2) ? BABEL_INFINITY : bif->cf->rxcost;
+/* Simple additive metric - Appendix 3.1 in the RFC */ +static u16 +compute_metric(struct babel_neighbor *bn, u16 metric) +{ + u16 cost = compute_cost(bn); + return (cost == BABEL_INFINITY) ? cost : cost+metric; +}
Well, you could have both metric and cost < BABEL_INFINITY, but metric+cost > BABEL_INFINITY. Just use: static u16 compute_metric(struct babel_neighbor *bn, uint metric) { metric += compute_cost(bn); return MIN(metric, BABEL_INFINITY); }
+static rte * +babel_build_rte(struct babel_proto *p, net *n, struct babel_route *r) +{ + rta *a; + rte *rte; + + rta A = { + .src = p->p.main_source, + .source = RTS_BABEL, + .scope = SCOPE_UNIVERSE, + .cast = RTC_UNICAST, + .dest = r->metric == BABEL_INFINITY ? RTD_UNREACHABLE : RTD_ROUTER, + .flags = 0, + .gw = r->next_hop,
Keep .gw zeroed for RTD_UNREACHABLE.
+ }; + + if(r->neigh) { + A.from = r->neigh->addr; + A.iface = r->neigh->bif->iface; + }
When r->neigh is NULL? If 'r' is external route (one exported to Babel from other protocols), then it should not be propagated back to core. See the first comment in this mail.
+ a = rta_lookup(&A); + rte = rte_get_temp(a); + rte->u.babel.metric = r->metric; + rte->u.babel.router_id = r->router_id; + rte->net = n; + rte->pflags = 0; + return rte; +}
I would suggest to integrate rte_update() here and name the function babel_announce_rte().
+ +static void +babel_send_seqno_request(struct babel_entry *e) +{ + struct babel_proto *p = e->proto; + struct babel_route *r = e->selected; + struct babel_source *s = babel_find_source(e, r->router_id); + struct babel_iface *bif; + struct babel_tlv_seqno_request *tlv; + + if(s && cache_seqno_request(p, e->n.prefix, e->n.pxlen, r->router_id, s->seqno+1)) { + TRACE(D_EVENTS, "Sending seqno request for %I/%d router_id %0lx",
"%0lx" is incorrect, as long is 32-bit on 32-bit machines, while router_id is u64. Perhaps bsprintf have to be extended. Or perhabs we just should have function to print 64-bit router id to local string buffer using some string representation. I don't know what is the preffered string representation for this 64-bit router ID in Babel. My suggestion is to use a half of IPv6 (e.g. 1aa9:5ff:fe1a:61e3).
+static void +babel_unicast_seqno_request(struct babel_route *r) +{ + struct babel_entry *e = r->e; + struct babel_proto *p = e->proto; + struct babel_source *s = babel_find_source(e, r->router_id); + struct babel_iface *bif = r->neigh->bif; + struct babel_tlv_seqno_request *tlv; + if(s && cache_seqno_request(p, e->n.prefix, e->n.pxlen, r->router_id, s->seqno+1)) { + TRACE(D_EVENTS, "Sending seqno request for %I/%d router_id %0lx",
Ditto. There are more such cases in the sources.
+/** + * babel_select_route: + * @e: Babel entry to select the best route for. + * + * Select the best feasible route for a given prefix. This just selects the + * feasible route with the lowest metric. If this results in switching upstream + * router (identified by router id), the nest is notified of the new route. + * + * If no feasible route is available for a prefix that previously had a route + * selected, a seqno request is sent to try to get a valid route. In the + * meantime, the route is marked as infeasible in the nest (to blackhole packets + * going to it, as per the RFC). + * + * If no feasible route is available, and no previous route is selected, the + * route is removed from the nest entirely. + */ +static void +babel_select_route(struct babel_entry *e) +{ + struct babel_proto *p = e->proto; + net *n = net_get(p->p.table, e->n.prefix, e->n.pxlen); + struct babel_route *r, *cur = e->selected; + + /* try to find the best feasible route */ + WALK_LIST(r, e->routes) + if((!cur || r->metric < cur->metric) + && is_feasible(babel_find_source(e, r->router_id), + r->seqno, r->advert_metric)) + cur = r;
Why feasibility is checked here? I would expect that feasibility is checked when the route is received, but all accepted to e->routes stays feasible. Also we should not process external routes here (see the first comment of the mail).
+ if(cur && cur->neigh && ((!e->selected && cur->metric < BABEL_INFINITY) + || (e->selected && cur->metric < e->selected->metric))) { + TRACE(D_EVENTS, "Picked new route for prefix %I/%d: router id %0lx metric %d", + e->n.prefix, e->n.pxlen, cur->router_id, cur->metric); + /* Notify the nest of the update. If we change router ID, we also trigger + a global update. */ + if(!e->selected || + e->selected->metric == BABEL_INFINITY || + e->selected->router_id != cur->router_id) + + ev_schedule(p->update_event);
Generally, we should not triger update event here, that should be triggered as a result of babel_rt_notify (see the first comment of the mail).
+static void +babel_send_ack(struct babel_iface *bif, ip_addr dest, u16 nonce) +{ + struct babel_proto *p = bif->proto; + struct babel_tlv_ack *tlv; + TRACE(D_PACKETS, "Babel: Sending ACK to %I with nonce %d\n", dest, nonce);
Starting with "Babel: " is unncesessary in TRACE(). There are more of these in the sources.
+static void +babel_flush_neighbor(struct babel_neighbor *bn) +{ + struct babel_proto *p = bn->bif->proto; + struct babel_route *r; + node *n; + TRACE(D_EVENTS, "Flushing neighbor %I", bn->addr); + rem_node(NODE bn); + WALK_LIST_FIRST(n, bn->routes) { + r = SKIP_BACK(struct babel_route, neigh_route, n); + babel_flush_route(r); + } + mb_free(bn); +}
Perhaps this should be together with other babel_*_neighbor() functions?
+/* update hello history according to Appendix A1 of the RFC */ +static void +update_hello_history(struct babel_neighbor *bn, u16 seqno, u16 interval) +{ + u8 diff; + if(seqno == bn->next_hello_seqno) {/* do nothing */} + /* if the expected and seen seqnos are within 16 of each other (mod 65535), + the modular difference is going to be less than 16 for one of the + directions. Otherwise, the values differ too much, so just reset. */ + else if(diff_mod64k(seqno, bn->next_hello_seqno) > 16 && + diff_mod64k(bn->next_hello_seqno,seqno) > 16) { + /* note state reset - flush entries */ + bn->hello_map = bn->hello_n = 0; + } else if((diff = diff_mod64k(bn->next_hello_seqno,seqno)) <= 16) { + /* sending node increased interval; reverse history */ + bn->hello_map >>= diff; + bn->hello_n = (diff < bn->hello_n) ? bn->hello_n - diff : 0; + } else if((diff = diff_mod64k(seqno,bn->next_hello_seqno)) <= 16) { + /* sending node decreased interval; fast-forward */ + bn->hello_map <<= diff; + bn->hello_n = MIN(bn->hello_n + diff, 16); + }
This is a mess. What about just: u16 delta = diff_mod64k(seqno,bn->next_hello_seqno); if (delta == 0) { /* do nothing */ } else if (delta <= 16) { /* sending node decreased interval; fast-forward */ } else if (delta >= 0xfff0) { /* sending node increased interval; reverse history */ } else { /* note state reset - flush entries */ }
+int +babel_handle_ihu(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ ... + struct babel_neighbor *bn = babel_get_neighbor(bif, state->saddr); + bn->txcost = tlv->rxcost; + bn->ihu_expiry = now + 1.5*(tlv->interval/100);
Please avoid floating point ops.
+int +babel_handle_update(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ ... + if(tlv->flags & BABEL_FLAG_ROUTER_ID) { + u64 *buf = (u64*)&prefix; + memcpy(&state->router_id, buf+1, sizeof(u64));
I don't think is correct. BIRD stores ip6_addr as four u32 integers in host order, also router_id is u64 in host order, therefore such memcpy will swap meaning of lower and upper half of router ID on little endian machines. This is more clean and comprehensible: state->router_id = (((u64) _I2(prefix)) << 32) | _I3(prefix); I wonder what this flag is supposed to mean when AE is 1, RFC does not mention this.
+ } + if(!state->router_id) + log(L_WARN "%s: Received update on %s with no preceding router id", p->p.name, bif->ifname);
BTW, Babel RFC does not explictly specify that zero Router ID is invalid. But that is likely an ommision in the RFC.
+static void +expire_seqno_requests(struct babel_seqno_request_cache *c) { + struct babel_seqno_request *n, *nx; + WALK_LIST_DELSAFE(n, nx, c->entries) { + if(n->updated < now-BABEL_SEQNO_REQUEST_EXPIRY) {
Perhaps better would be (n->updated + BABEL_SEQNO_REQUEST_EXPIRY < now). now is not real time, but monotonic timer, so it can be very small. Although bird_clock_t is likely to be signed, it is safer to stay in positive range. Also perhaps <= instead of < ?
+/* The RFC section 3.8.1.2 on seqno requests: + + When a node receives a seqno request for a given router-id and sequence + number, it checks whether its routing table contains a selected entry for + that prefix; if no such entry exists, or the entry has infinite metric, it + ignores the request.
There is no need to paste 5 paragraphs from RFC, just reference the section.
+static void +babel_dump(struct proto *P) +{ + struct babel_proto *p = (struct babel_proto *) P; + struct babel_entry *e; + struct babel_iface *bif; + debug("Babel: router id %0lx update seqno %d\n", p->router_id, p->update_seqno); + WALK_LIST(bif, p->interfaces) {babel_dump_interface(bif);} + FIB_WALK(&p->rtable, n) { + e = (struct babel_entry *)n; + babel_dump_entry(e); + } FIB_WALK_END; +}
Although dump functions may be useful, i would suggest also to implement regular 'show babel interfaces' and 'show babel neighbors' user commands. See rip_show_interfaces(), rip_show_neighbors() in the new RIP code.
+static void +kill_iface(struct babel_iface *bif) +{ + DBG( "Babel: Interface %s disappeared\n", bif->iface->name); + struct babel_neighbor *bn; + WALK_LIST_FIRST(bn, bif->neigh_list) + babel_flush_neighbor(bn); + rfree(bif->pool); +}
It would make sense to integrate rem_node() from babel_if_notify() (explicit free of lock is unnecessary, that is handled by freeing of pool), and perhaps rename the function to babel_remove_iface().
+static void +babel_iface_linkdown(struct babel_iface *bif) +{ + struct babel_neighbor *bn; + struct babel_route *r; + node *n; + WALK_LIST(bn, bif->neigh_list) { + WALK_LIST(n, bn->routes) { + r = SKIP_BACK(struct babel_route, neigh_route, n); + r->metric = BABEL_INFINITY; + r->expires = now + r->expiry_interval; + babel_select_route(r->e); + } + } +}
Is there any reason why route handing is different in this case and in kill_iface() case?
+static void +babel_if_notify(struct proto *P, unsigned c, struct iface *iface) +{ + struct babel_proto *p = (struct babel_proto *) P; + struct babel_config *cf = (struct babel_config *) P->cf; + DBG("Babel: if notify: %s flags %x\n", iface->name, iface->flags); + if (iface->flags & IF_IGNORE) + return; + if (c & IF_CHANGE_UP) { + struct iface_patt *k = iface_patt_find(&cf->iface_list, iface, iface->addr); + + /* we only speak multicast */ + if(!(iface->flags & IF_MULTICAST)) return; + + if (!k) return; /* We are not interested in this interface */ + + babel_new_interface(p, iface, iface->flags, k); + + } + struct babel_iface *bif = babel_find_interface(p, iface); + + if(!bif) + return; + + if(!(iface->flags & IF_CHANGE_LINK)) { + TRACE(D_EVENTS, "Interface %s lost link", iface->name); + babel_iface_linkdown(bif); + }
The iface->flags contain IF_LINK_UP, while the 'c' variable contain IF_CHANGE_LINK it should be: if ((c & IF_CHANGE_LINK) && !(iface->flags & IF_LINK_UP)) ... I would suggest to see the new rip_if_notify() for an example.
+static void +babel_store_tmp_attrs(struct rte *rt, struct ea_list *attrs) +{ + eattr *rid = ea_find(attrs, EA_BABEL_ROUTER_ID); + rt->u.babel.router_id = rid ? *((u64*) rid->u.ptr->data) : 0;
This may be an unaligned access. I would suggest to ignore the router_id here, and just store the metric. It is not a problem as EA_BABEL_ROUTER_ID is unmodifiable due to being EAF_TYPE_OPAQUE. See krt_store_tmp_attrs() in sysdep/unix/krt.c for similar case.
+/* + * babel_rt_notify - core tells us about new route (possibly our + * own), so store it into our data structures. + */ +static void +babel_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net, + struct rte *new, struct rte *old, struct ea_list *attrs) +{ + struct babel_proto *p = (struct babel_proto *)P; + struct babel_entry *e; + struct babel_route *r; + + TRACE(D_EVENTS, "Got route from nest: %I/%d", net->n.prefix, net->n.pxlen); + if(new) { + e = babel_get_entry(p, net->n.prefix, net->n.pxlen); + r = (e->selected) ? e->selected : babel_get_route(e, NULL); + + if(!r->neigh) { + r->seqno = p->update_seqno; + r->router_id = p->router_id; + r->metric = 0; + e->selected = r; + } + } else if(old) { + /* route has gone away; send retraction */ + e = babel_find_entry(p, net->n.prefix, net->n.pxlen); + if(e && e->selected && !e->selected->neigh) { + /* no neighbour, so our route */ + e->selected->metric = BABEL_INFINITY; + e->selected->expires = now + BABEL_HOLD_TIME; + babel_select_route(e); + } + } else { + return; + } + ev_schedule(p->update_event); +}
+static void babel_neigh_notify(neighbor *n)
You could just remove this function.
+void +babel_init_config(struct babel_config *c) +{ + init_list(&c->iface_list); + c->port = BABEL_PORT; +}
Just move that to config.Y, see config.Y of other protocols (e.g. ospf_proto_start).
+ +static void +babel_get_route_info(rte *rte, byte *buf, ea_list *attrs) +{ + buf += bsprintf(buf, " (%d/%0lx)", rte->u.babel.metric, rte->u.babel.router_id); +}
First number in the output should be rte->pref. To be consistent with OSPF, router id should be separate, see ospf_get_route_info(): "(%d/%d) [%x]", rte->pref, rte->u.babel.metric, rte->u.babel.router_id. OTOH, i would suggest to just ignore router_id here completely, as available line length is limited.
+ +static int +babel_get_attr(eattr *a, byte *buf, int buflen UNUSED) +{ + switch (a->id) { + case EA_BABEL_METRIC: bsprintf( buf, "metric: %d", a->u.data ); return GA_FULL; + default: return GA_UNKNOWN; + } +}
As opposed to babel_store_tmp_attrs(), it makes sense to handle EA_BABEL_ROUTER_ID here.
+static void +babel_copy_config(struct proto_config *dest, struct proto_config *src)
You could just remove this function.
diff --git a/proto/babel/config.Y b/proto/babel/config.Y ... +babel_cfg: + babel_cfg_start proto_name '{' + | babel_cfg proto_item ';' + | babel_cfg PORT expr ';' { BABEL_CFG->port = $3; }
Port should be per-interface option.
+ | babel_cfg INTERFACE babel_iface ';' + ; + + +babel_iface_item: + | RXCOST expr { BABEL_IFACE_CFG->rxcost = $2; } + | TX tos { BABEL_IFACE_CFG->tx_tos = $2; } + | TX PRIORITY expr { BABEL_IFACE_CFG->tx_priority = $3; } + | TYPE WIRED { BABEL_IFACE_CFG->type = BABEL_IFACE_TYPE_WIRED; } + | TYPE WIRELESS { BABEL_IFACE_CFG->type = BABEL_IFACE_TYPE_WIRELESS; } + | HELLO_INTERVAL expr { BABEL_IFACE_CFG->hello_interval = $2; } + | UPDATE_INTERVAL expr { BABEL_IFACE_CFG->update_interval = $2; }
Use HELLO INTERVAL instead of HELLO_INTERVAL, or perhaps just HELLO TIME.
+babel_iface_init: + /* EMPTY */ { + this_ipatt = cfg_allocz(sizeof(struct babel_iface_config)); + add_tail(&BABEL_CFG->iface_list, NODE this_ipatt); + init_list(&this_ipatt->ipn_list); + BABEL_IFACE_CFG->rxcost = BABEL_INFINITY; + BABEL_IFACE_CFG->type = BABEL_IFACE_TYPE_WIRED; + BABEL_IFACE_CFG->tx_tos = IP_PREC_INTERNET_CONTROL; + BABEL_IFACE_CFG->tx_priority = sk_priority_control; + BABEL_IFACE_CFG->hello_interval = BABEL_INFINITY; + BABEL_IFACE_CFG->update_interval = BABEL_INFINITY;
Just use zero for undefined (i.e. just not set them) instead of BABEL_INFINIT.
diff --git a/proto/babel/packet.c b/proto/babel/packet.c new file mode 100644 index 0000000..54f256b --- /dev/null +++ b/proto/babel/packet.c
+#undef TRACE +#define TRACE(level, msg, args...) do { if (p->p.debug & level) { log(L_TRACE "%s: " msg, p->p.name , ## args); } } while(0)
Unnecessary. Just use TRACE() defined in nest/protocol.h
+#define TLV_SIZE(t) (t->type == BABEL_TYPE_PAD0 ? 1 : t->length + sizeof(struct babel_tlv_header)) +#define TLV_LENGTH(t) (tlv_data[t].struct_length-sizeof(struct babel_tlv_header))
TLV_SIZE vs TLV_LENGTH is confusing, perhaps TLV_LENGTH vs TLV_DATA_LENGTH ?
+static inline ip_addr +get_ip6_ll(u32 *addr) +{ + return ip6_or(ipa_build6(0xfe800000,0,0,0), + ipa_build6(0,0,ntohl(addr[0]),ntohl(addr[1]))); +}
You should use ipa_from_ip6(). Also why ip6_or()? Just use: ipa_build6(0xfe800000,0,ntohl(addr[0]),ntohl(addr[1])).
+static struct babel_tlv_data tlv_data[BABEL_TYPE_MAX] = { + {1, NULL,NULL,NULL,NULL,NULL}, + {3, NULL,NULL,NULL,NULL,NULL}, + {sizeof(struct babel_tlv_ack_req), + babel_handle_ack_req, babel_validate_length, + babel_hton_ack_req, babel_ntoh_ack_req, + NULL,NULL},
Please use: [BABEL_*] = {xxx, yyy, ...},
+ip_addr +babel_get_addr_ihu(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ + struct babel_tlv_ihu *tlv = (struct babel_tlv_ihu *)hdr; + struct babel_iface *bif = state->bif; + if(tlv->ae == BABEL_AE_WILDCARD) { + return bif->iface->addr->ip; /* FIXME: Correct? */
Well, you check it against bif->addr in babel_handle_ihu() so you should use that.
+int +babel_validate_update(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ + struct babel_tlv_update *tlv = (struct babel_tlv_update *)hdr; + int min_length = TLV_LENGTH(BABEL_TYPE_UPDATE)-sizeof(tlv->addr); + u8 len = tlv->plen/8; + if(tlv->plen % 8) len++;
u8 len = (tlv->plen + 7) / 8;
+ /* Can only omit bits if a previous update defined a prefix to take them from */ + if(tlv->omitted && ipa_equal(state->prefix, IPA_NONE)) + return 0;
According to RFC: The next-hop address for this update is taken from the last preceding Next Hop TLV with a matching address family in the same packet; if no such TLV exists, it is taken from the network-layer source address of this packet. So this case is not necessary.
+ip_addr +babel_get_addr_update(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ + struct babel_tlv_update *tlv = (struct babel_tlv_update *)hdr; + char buf[16] = {0};
Just: char buf[16] = {};
+void +babel_hton_seqno_request(struct babel_tlv_header *hdr) +{ + struct babel_tlv_seqno_request *tlv = (struct babel_tlv_seqno_request *)hdr; + tlv->seqno = htons(tlv->seqno); + tlv->router_id = htobe64(tlv->router_id); +}
There are some issues with portability of names and including appropriate header for htobe64() / be64toh(). Perhaps this should be handled somewhere in lib/.
+void +babel_ntoh_seqno_request(struct babel_tlv_header *hdr) +{ + struct babel_tlv_seqno_request *tlv = (struct babel_tlv_seqno_request *)hdr; + tlv->seqno = ntohs(tlv->seqno); + tlv->router_id = be64toh(tlv->router_id); +}
Ditto.
+struct babel_tlv_header * +babel_add_tlv_size(struct babel_iface *bif, u16 type, int len) +{ + struct babel_header *hdr = bif->current_buf; + struct babel_tlv_header *tlv; + int pktlen = sizeof(struct babel_header)+hdr->length; + if(pktlen+len > bif->max_pkt_len) { + babel_send_queue(bif); + pktlen = sizeof(struct babel_header)+hdr->length; + }
This seems broken - TLV is added to current_buf, which may be either tlv_buf or just sock->tbuf based on multicast vs unicast. But if there si not enough space, then babel_send_queue() is called, which assumes that data are in tlv_buf, and copies it to sock->tbuf.
+int +babel_process_packet(struct babel_header *pkt, int size, + ip_addr saddr, int port, struct babel_iface *bif) +{ + struct babel_tlv_header *tlv = FIRST_TLV(pkt); + struct babel_proto *proto = bif->proto; + struct babel_parse_state state = { + .proto = proto, + .bif = bif, + .saddr = saddr, + .prefix = IPA_NONE,
It is unnecessary to initialize addresses to IPA_NONE.
+ .next_hop = saddr, + }; + char *p = (char *)pkt;
p should be always struct babel_proto ptr. At least for TRACE().
+ int res = 0; + + pkt->length = ntohs(pkt->length); + if(pkt->magic != BABEL_MAGIC + || pkt->version != BABEL_VERSION + || pkt->length > size - sizeof(struct babel_header)) { + DBG("Invalid packet: magic %d version %d length %d size %d\n", + pkt->magic, pkt->version, pkt->length, size); + return 1; + } + + while((char *)tlv < p+size) { + if(tlv->type > BABEL_TYPE_PADN + && tlv->type < BABEL_TYPE_MAX + && validate_tlv(tlv, &state)) { + babel_tlv_ntoh(tlv); + res &= tlv_data[tlv->type].handle(tlv, &state); + } else { + DBG("Unknown or invalid TLV of type %d\n",tlv->type); + } + NEXT_TLV(tlv); + }
First, the validation IMHO does not check if the current TLV does not overlap available packet length. Second, we should use pkt->length instead of size for validation and processing. Framing errors, like pkt->length larger than size (- hdr), or TLV overlapping pkt->length should be logged as error. IMHO, there should be first iteration doing just validation and if any TLV failed then whole packet should be dropped (and perhaps logged). Or at least remaining TLVs. Otherwise, e.g. if one Router-ID or Next hop TLV is invalid, then remaining TLVs would be processed with a different router id / next hop. The IPv4 (and unknown) AE should not fail validation, instead they should be ignored in handling code. There should be some general babel_validate_address(), which would be shared by other validate functions to check address based on ae and remaining length. Also, the return value (res) is meaningless?
+ if(state.needs_update) + bif->update_triggered = 1; + return res; +} + + +static void +babel_tx_err( sock *s, int err ) +{ + log( L_ERR ": Unexpected error at Babel transmit: %M", err ); +}
It would make sense to report proto and iface: static void rip_err_hook(sock *sk, int err) { struct rip_iface *ifa = sk->data; struct rip_proto *p = ifa->rip; log(L_ERR "%s: Socket error on %s: %M", p->p.name, ifa->iface->name, err); rip_reset_tx_session(p, ifa); }
+static int +babel_rx(sock *s, int size) +{ + struct babel_iface *bif = s->data; + struct babel_proto *p = bif->proto; + if (! bif->iface || s->lifindex != bif->iface->index) + return 1; + + DBG("Babel: incoming packet: %d bytes from %I via %s\n", size, s->faddr, bif->iface->name); + if (size < sizeof(struct babel_header)) BAD( "Too small packet" ); + + if (ipa_equal(bif->iface->addr->ip, s->faddr)) { + DBG("My own packet\n"); + return 1; + } + + if (!ipa_is_link_local(s->faddr)) { BAD("Non-link local sender"); } + + babel_process_packet((struct babel_header *) s->rbuf, size, s->faddr, s->fport, bif ); + return 1; +}
At least fport should also be checked. I would suggest to upgrade babel_rx() based on rip_rx_hook() code from the new RIP.
+int +babel_open_socket(struct babel_iface *bif) +{ + struct babel_proto *p = bif->proto; + struct babel_config *cf = (struct babel_config *) p->p.cf; + bif->sock = sk_new( bif->pool ); + bif->sock->type = SK_UDP; + bif->sock->sport = cf->port; + bif->sock->rx_hook = babel_rx; + bif->sock->data = bif;
+ bif->sock->rbsize = 10240; + bif->sock->iface = bif->iface; + bif->sock->tbuf = mb_alloc( bif->pool, bif->iface->mtu);
Why not just use: bif->sock->rbsize = MIN(512, bif->iface->mtu); bif->sock->tbsize = bif->sock->rbsize; This ensures rbuf, tbuf allocation. Also note that buffer sizes should be updated in case of MTU is changed, handle IF_CHANGE_MTU in babel_if_notify().
+ bif->sock->err_hook = babel_tx_err; + bif->sock->dport = cf->port; + bif->sock->daddr = IP6_BABEL_ROUTERS; ... + + return 1; + + err: + sk_log_error(bif->sock, p->p.name); + log(L_ERR "%s: Cannot open socket for %s", p->p.name, bif->iface->name);
sk_log_error() already generated the proper log message.
+ rfree(bif->sock);
bif->sock should be set to NULL, to be sure. Or perhaps you can use a local variable for socket and assign to bif->sock at the end, like other protocols do. -- Elen sila lumenn' omentielvo Ondrej 'Santiago' Zajicek (email: santiago@crfreenet.org) OpenPGP encrypted e-mails preferred (KeyID 0x11DEADC3, wwwkeys.pgp.net) "To err is human -- to blame it on a computer is even more so."
Ondrej Zajicek <santiago@crfreenet.org> writes:
I am sorry i didn't got around to review it sooner, but doing a proper review for a whole new (and unfamiliar) protocol takes nontrivial amount of time.
Generally, the code looks good and i am determined to finally merge it, but it definitely needs more refinement before that.
Hi Ondrej Awesome! Many thanks for your detailed comments, and thank you for going easy on my (lack of) C skills :) I'll go through and do another revision and resubmit. Some answers / additional questions inline below.
First, there is an issue with route selection. Proper route selection in BIRD should work this way:
1) Babel chooses the best route from received routes, say selected_in 2) If selected_in changes, it is propagated to the core by rte_update() 3) Core compares routes from multiple protocols and one is exported to the Babel 4) This route is either Babel own route or external route 5) In babel_rt_notify(), Babel receives that route, finds appropriate internal route (or create a new one) and stores it in say selected_out. 6) Even if the received selected_out is an external route, Babel do not stop propagating selected_in to the core 7) selected_out is propagated to the network in periodic updates. If selected_out changes, it is an event for triggered updates.
Ah, thank you! I struggled a great deal with figuring out how this was supposed to work. I'll fix that, and also try to write this up in a form that is suitable for the documentation and include that in the next version of the patch. Speaking of documentation: What kind of documentation do you expect in the patch? Obviously I'll include user's documentation on the configuration options, but some of the other protocols also has API documentation of (some of) their function hooks, and it's not immediately obvious to me what the purpose of those are? Should I add that for Babel as well?
Second, can we really expect that TLVs in received packets are aligned? Although all TLVs looks like suited to 32-bit alignment, i did not found mentioned it in Babel RFC and availability of Pad1, PadN TLVs and per-byte compressible addresses suggest that they generally are not aligned.
In that case, we have to use get_uXX()/put_uXX() functions instead of directly accessing babel_tlv_* structure fields. Or we could copy each TLV to a local buffer before calling ntoh+handle callbacka, as each TLV internally is aligned, and enforce alignment for outgoing packets. I would generally prefer the first approach, but the second approach is simpler way to fix it.
No, I don't think they are necessarily 32-bit aligned. I struggled with alignment issues for the 64-bit values (and got it to work by using the __attribute__((packed)) attribute in the structure definitions). I'll try to figure out how to do this properly in general.
Third, although i am OK with accepting IPv6-only implementation, it should be ready for future expansion. TLVs with AE 1 could be skipped, but there should not be hidden hardwired IPv6 assumptions in the code.
Mainly, TLV structures should use addr[0] at the end instead of addr[2] and addr[4]. Address space requirements should be handled explictly based on AE.
Noted.
Fourth, it seems like triggered updates are not really implemented as they should. Route change triggers propagation, but the whole routing table is always propagated instead of just the changed entries. This could be easily fixed by having timestamp on each babel_entry updated everytime when a significant change of selected_out route happened. For regular updates all routes are propagated, for triggered updates only the entries with timestamps >= time when the trigger update was requested. See want_triggered and tx_changed fields in the new RIP code.
Hmm, I'm not sure just dumping everything in a triggered update is strictly a violation of the RFC. But I suppose it does have obvious impacts on efficiency. Will try to be smarter about that.
Fifth, ... (snip)
Sixth, ... (snip)
Seventh ... (snip)
Finally, the coding style ... (snip)
Will fix these as well.
Rest are comments specific to lines in the patch:
Most of these seems obvious how to fix; one or two questions below:
diff --git a/nest/route.h b/nest/route.h ... @@ -538,6 +546,7 @@ extern struct protocol *attr_class_to_protocol[EAP_MAX]; #define DEF_PREF_RIP 120 /* RIP */ #define DEF_PREF_BGP 100 /* BGP */ #define DEF_PREF_PIPE 70 /* Routes piped from other tables */ +#define DEF_PREF_BABEL 42 /* Babel */
Default could be higher, e.g. 130
OK. What do these actually mean? Is it simply that a higher priority wins if two protocols have the same route?
+enum babel_iface_type_t { + BABEL_IFACE_TYPE_WIRED, + BABEL_IFACE_TYPE_WIRELESS, + BABEL_IFACE_TYPE_MAX +};
I would prefer to have explicit values here. Perhaps also reserve 0 for undefined?
Well, undefined == wired in this case. I.e. use the simple metric computation mechanism unless explicitly told to treat the interface as wireless. I don't suppose Bird has a way of telling automatically what type of interface we are dealing with?
+static void +babel_expire_routes(struct babel_proto *p) +{ + struct babel_entry *e; + struct babel_route *r, *rx; + node *n; + FIB_WALK(&p->rtable, n) { + e = (struct babel_entry *)n; + WALK_LIST_DELSAFE(r, rx, e->routes) { + if(r->refresh_time && r->refresh_time <= now) { + refresh_route(r); + r->refresh_time = 0; + } + if(r->expires && r->expires <= now) + expire_route(r); + } + expire_sources(e); + } FIB_WALK_END; + WALK_LIST_FIRST(n, p->garbage) { + e = SKIP_BACK(struct babel_entry, garbage_node, n); + babel_flush_entry(e); + } +}
Instead of using p->garbage and garbage_node, you could simply use FIB_ITERATE instead of FIB_WALK and remove babel_entry inside the cycle:
I tried and failed to get this to work; the whole thing would just crash, which is why I came up with the garbage list approach. Probably did it wrong, though (wasn't obvious to me how to use the FIB_* macros), so will give it another shot; thanks for the pointer :)
+static u16 +babel_compute_rxcost(struct babel_neighbor *bn) +{ + struct babel_iface *bif = bn->bif; + struct babel_proto *p = bif->proto; + u8 n, missed; + u16 map=bn->hello_map; + + if(!map) return BABEL_INFINITY; + n = __builtin_popcount(map); // number of bits set
This should be defined as our inline function in lib/bitopts.h, then used here.
So just alias __builtin_popcount to something in bitopts.h? Not entirely sure what compilers you are targeting; that macro seems to be GCC-specific, but is also supported in LLVM since 1.5: http://llvm.org/releases/1.5/docs/ReleaseNotes.html
+int +babel_handle_ihu(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ ... + struct babel_neighbor *bn = babel_get_neighbor(bif, state->saddr); + bn->txcost = tlv->rxcost; + bn->ihu_expiry = now + 1.5*(tlv->interval/100);
Please avoid floating point ops.
So, substitute 3/2 for 1.5?
+int +babel_handle_update(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ ... + if(tlv->flags & BABEL_FLAG_ROUTER_ID) { + u64 *buf = (u64*)&prefix; + memcpy(&state->router_id, buf+1, sizeof(u64));
I don't think is correct. BIRD stores ip6_addr as four u32 integers in host order, also router_id is u64 in host order, therefore such memcpy will swap meaning of lower and upper half of router ID on little endian machines.
This is more clean and comprehensible:
state->router_id = (((u64) _I2(prefix)) << 32) | _I3(prefix);
I wonder what this flag is supposed to mean when AE is 1, RFC does not mention this.
Noted. You are right, that is not specified in the RFC. However, I think setting this flag with an AE of 1 should be considered an error, and will treat it as such.
+ } + if(!state->router_id) + log(L_WARN "%s: Received update on %s with no preceding router id", p->p.name, bif->ifname);
BTW, Babel RFC does not explictly specify that zero Router ID is invalid. But that is likely an ommision in the RFC.
Ah yes, that is true. Will try to get that clarified.
+static void +babel_iface_linkdown(struct babel_iface *bif) +{ + struct babel_neighbor *bn; + struct babel_route *r; + node *n; + WALK_LIST(bn, bif->neigh_list) { + WALK_LIST(n, bn->routes) { + r = SKIP_BACK(struct babel_route, neigh_route, n); + r->metric = BABEL_INFINITY; + r->expires = now + r->expiry_interval; + babel_select_route(r->e); + } + } +}
Is there any reason why route handing is different in this case and in kill_iface() case?
My assumption was that linkdown means that the interface is likely to come back up, so I wanted to keep the routes and be able to refresh them when it does (and keep the hello seqno etc so the other hosts on the link don't refresh their metrics completely if the interface goes down for a short time). This came about while playing around with a laptop that was connected by wifi and ethernet to the same network (and running babel on both), then repeatedly unplugging and re-plugging the ethernet cable.
+static void +babel_copy_config(struct proto_config *dest, struct proto_config *src)
You could just remove this function.
Yeah, well, was planning to actually make it do something useful instead (i.e. support proper reconfiguring).
+void +babel_hton_seqno_request(struct babel_tlv_header *hdr) +{ + struct babel_tlv_seqno_request *tlv = (struct babel_tlv_seqno_request *)hdr; + tlv->seqno = htons(tlv->seqno); + tlv->router_id = htobe64(tlv->router_id); +}
There are some issues with portability of names and including appropriate header for htobe64() / be64toh(). Perhaps this should be handled somewhere in lib/.
Not sure what you mean here? Are you referring to htobe64() being glibc-specific? An how do you suggest to handle it? :)
+struct babel_tlv_header * +babel_add_tlv_size(struct babel_iface *bif, u16 type, int len) +{ + struct babel_header *hdr = bif->current_buf; + struct babel_tlv_header *tlv; + int pktlen = sizeof(struct babel_header)+hdr->length; + if(pktlen+len > bif->max_pkt_len) { + babel_send_queue(bif); + pktlen = sizeof(struct babel_header)+hdr->length; + }
This seems broken - TLV is added to current_buf, which may be either tlv_buf or just sock->tbuf based on multicast vs unicast. But if there si not enough space, then babel_send_queue() is called, which assumes that data are in tlv_buf, and copies it to sock->tbuf.
Ah, was trying to be careful with this; obviously didn't work. Will give it another go.
+int +babel_open_socket(struct babel_iface *bif) +{ + struct babel_proto *p = bif->proto; + struct babel_config *cf = (struct babel_config *) p->p.cf; + bif->sock = sk_new( bif->pool ); + bif->sock->type = SK_UDP; + bif->sock->sport = cf->port; + bif->sock->rx_hook = babel_rx; + bif->sock->data = bif;
+ bif->sock->rbsize = 10240; + bif->sock->iface = bif->iface; + bif->sock->tbuf = mb_alloc( bif->pool, bif->iface->mtu);
Why not just use:
bif->sock->rbsize = MIN(512, bif->iface->mtu);
You mean MAX?
bif->sock->tbsize = bif->sock->rbsize;
This ensures rbuf, tbuf allocation. Also note that buffer sizes should be updated in case of MTU is changed, handle IF_CHANGE_MTU in babel_if_notify().
Noted. I'll get back to you with an updated patch, hopefully before too long :) -Toke
On Thu, Oct 08, 2015 at 05:12:36PM +0200, Toke Høiland-Jørgensen wrote:
Ondrej Zajicek <santiago@crfreenet.org> writes:
I am sorry i didn't got around to review it sooner, but doing a proper review for a whole new (and unfamiliar) protocol takes nontrivial amount of time.
Generally, the code looks good and i am determined to finally merge it, but it definitely needs more refinement before that.
Hi Ondrej
Awesome! Many thanks for your detailed comments, and thank you for going easy on my (lack of) C skills :)
I'll go through and do another revision and resubmit. Some answers / additional questions inline below.
First, there is an issue with route selection. Proper route selection in BIRD should work this way:
1) Babel chooses the best route from received routes, say selected_in 2) If selected_in changes, it is propagated to the core by rte_update() 3) Core compares routes from multiple protocols and one is exported to the Babel 4) This route is either Babel own route or external route 5) In babel_rt_notify(), Babel receives that route, finds appropriate internal route (or create a new one) and stores it in say selected_out. 6) Even if the received selected_out is an external route, Babel do not stop propagating selected_in to the core 7) selected_out is propagated to the network in periodic updates. If selected_out changes, it is an event for triggered updates.
Ah, thank you! I struggled a great deal with figuring out how this was supposed to work. I'll fix that, and also try to write this up in a form that is suitable for the documentation and include that in the next version of the patch.
The new RIP code uses this approach for route propagation (For OSPF it is not directly applicable as it is link-state protocol, and BGP does not use internal routing table so it is much simpler in this regard). You could check rip_rte_update(), rip_rte_announce() and rip_rt_notify(). There, selected_in is stored implicitly as the first position in the list of routes, while selected_out is directly the content of rip_entry. Also one thing i forgot to notice. For generated external routes, you initialize metric to 0 (r->metric = 0 in babel_rt_notify()). You should use ea_get_int(attrs, EA_BABEL_METRIC, 0) to allow user set it in export filters. Default metric probably should be more than zero (zero makes sense for /128 local address, while more than zero for locally reachable network prefix).
Speaking of documentation: What kind of documentation do you expect in the patch? Obviously I'll include user's documentation on the configuration options, but some of the other protocols also has API documentation of (some of) their function hooks, and it's not immediately obvious to me what the purpose of those are? Should I add that for Babel as well?
There are three things in devel documentation: 1) Initial overall description. This is IMHO most important part, should be expanded and should describe overall interactions in the code. 2) (mostly) one-line description for structure fields (and constants) in header files (aligned usually to 40. column by tabs). Although these are not used to generate developer documentation, i generally consider them more useful than function description. 3) Documentation of functions. It is true that for protocols this is less important than for public lib or nest functions, but in the case of non-trivial / non-obvious functions it is a good idea. I would suggest to document esp. functions manipulating with internal structures, implementing some protocol processes and timer/event hooks. It is usually useless to document the usual protocol glue (e.g. babel_start, babel_make_tmp_attrs, babel_get_route_info), unless it does some nontrivial things (like babel_rt_notify()).
Second, can we really expect that TLVs in received packets are aligned? Although all TLVs looks like suited to 32-bit alignment, i did not found mentioned it in Babel RFC and availability of Pad1, PadN TLVs and per-byte compressible addresses suggest that they generally are not aligned.
In that case, we have to use get_uXX()/put_uXX() functions instead of directly accessing babel_tlv_* structure fields. Or we could copy each TLV to a local buffer before calling ntoh+handle callbacka, as each TLV internally is aligned, and enforce alignment for outgoing packets. I would generally prefer the first approach, but the second approach is simpler way to fix it.
No, I don't think they are necessarily 32-bit aligned. I struggled with alignment issues for the 64-bit values (and got it to work by using the __attribute__((packed)) attribute in the structure definitions). I'll try to figure out how to do this properly in general.
There are at least three options: 1) Use __attribute__((packed)) or some other GCC magic for the whole structure. Although it should reduce alignment of all internal fields to 1, i am not 100% sure whether it also guarantees that the whole structure may be unaligned. Also each access would use more generic code on some archs. 2) Just copy each TLV before processing to a local buffer, which makes it aligned. 3) Use get_uXX()/put_uXX() in handle/send functions. That would also eliminate a need for separate ntoh/hton functions. You could use something like: #define GET_U32(p,f) get_u32(((char *) p) + OFFSETOF(typeof(*p),f)) #define GET_U16(p,f) get_u16(((char *) p) + OFFSETOF(typeof(*p),f)) to access fields by GET_U16(tlv, seqno) instead of tlv->seqno. BTW, using babel_tlv_header is also a bit problematic, as some architectures enforce minimal structure alignment ant therefore size (to e.g. 4). Probably declaring it packed should be enough.
Rest are comments specific to lines in the patch:
Most of these seems obvious how to fix; one or two questions below:
diff --git a/nest/route.h b/nest/route.h ... @@ -538,6 +546,7 @@ extern struct protocol *attr_class_to_protocol[EAP_MAX]; #define DEF_PREF_RIP 120 /* RIP */ #define DEF_PREF_BGP 100 /* BGP */ #define DEF_PREF_PIPE 70 /* Routes piped from other tables */ +#define DEF_PREF_BABEL 42 /* Babel */
Default could be higher, e.g. 130
OK. What do these actually mean? Is it simply that a higher priority wins if two protocols have the same route?
Yes.
+enum babel_iface_type_t { + BABEL_IFACE_TYPE_WIRED, + BABEL_IFACE_TYPE_WIRELESS, + BABEL_IFACE_TYPE_MAX +};
I would prefer to have explicit values here. Perhaps also reserve 0 for undefined?
Well, undefined == wired in this case. I.e. use the simple metric computation mechanism unless explicitly told to treat the interface as wireless.
I don't suppose Bird has a way of telling automatically what type of interface we are dealing with?
Well, definitely not now.
+static u16 +babel_compute_rxcost(struct babel_neighbor *bn) +{ + struct babel_iface *bif = bn->bif; + struct babel_proto *p = bif->proto; + u8 n, missed; + u16 map=bn->hello_map; + + if(!map) return BABEL_INFINITY; + n = __builtin_popcount(map); // number of bits set
This should be defined as our inline function in lib/bitopts.h, then used here.
So just alias __builtin_popcount to something in bitopts.h?
Yes, may be u32_popcount().
Not entirely sure what compilers you are targeting; that macro seems to be GCC-specific, but is also supported in LLVM since 1.5: http://llvm.org/releases/1.5/docs/ReleaseNotes.html
We are mainly targetting GCC, but i guess we could support LLVM with some minor tweaks. Sane GCC extensions could be used, things like __buildin_* should be used through some define.
+int +babel_handle_ihu(struct babel_tlv_header *hdr, struct babel_parse_state *state) +{ ... + struct babel_neighbor *bn = babel_get_neighbor(bif, state->saddr); + bn->txcost = tlv->rxcost; + bn->ihu_expiry = now + 1.5*(tlv->interval/100);
Please avoid floating point ops.
So, substitute 3/2 for 1.5?
Well, obviously not (3/2)*(tlv->interval/100), but (tlv->interval*3/2)/100
+static void +babel_iface_linkdown(struct babel_iface *bif) +{ + struct babel_neighbor *bn; + struct babel_route *r; + node *n; + WALK_LIST(bn, bif->neigh_list) { + WALK_LIST(n, bn->routes) { + r = SKIP_BACK(struct babel_route, neigh_route, n); + r->metric = BABEL_INFINITY; + r->expires = now + r->expiry_interval; + babel_select_route(r->e); + } + } +}
Is there any reason why route handing is different in this case and in kill_iface() case?
My assumption was that linkdown means that the interface is likely to come back up, so I wanted to keep the routes and be able to refresh them when it does (and keep the hello seqno etc so the other hosts on the link don't refresh their metrics completely if the interface goes down for a short time). This came about while playing around with a laptop that was connected by wifi and ethernet to the same network (and running babel on both), then repeatedly unplugging and re-plugging the ethernet cable.
I will look at that.
+static void +babel_copy_config(struct proto_config *dest, struct proto_config *src)
You could just remove this function.
Yeah, well, was planning to actually make it do something useful instead (i.e. support proper reconfiguring).
This function is not related to reconfiguration, it is related to protocol templates, these do not make much sense for non-BGP protocols.
+void +babel_hton_seqno_request(struct babel_tlv_header *hdr) +{ + struct babel_tlv_seqno_request *tlv = (struct babel_tlv_seqno_request *)hdr; + tlv->seqno = htons(tlv->seqno); + tlv->router_id = htobe64(tlv->router_id); +}
There are some issues with portability of names and including appropriate header for htobe64() / be64toh(). Perhaps this should be handled somewhere in lib/.
Not sure what you mean here? Are you referring to htobe64() being glibc-specific? An how do you suggest to handle it? :)
Well, according to man page, these functions are not standardized (although widely available), be64toh is named betoh64 on OpenBSD, and different header file should be included on Linux and BSDs. We could have lib/endian.h, which can include appropriate headers and define aliases.
+struct babel_tlv_header * +babel_add_tlv_size(struct babel_iface *bif, u16 type, int len) +{ + struct babel_header *hdr = bif->current_buf; + struct babel_tlv_header *tlv; + int pktlen = sizeof(struct babel_header)+hdr->length; + if(pktlen+len > bif->max_pkt_len) { + babel_send_queue(bif); + pktlen = sizeof(struct babel_header)+hdr->length; + }
This seems broken - TLV is added to current_buf, which may be either tlv_buf or just sock->tbuf based on multicast vs unicast. But if there si not enough space, then babel_send_queue() is called, which assumes that data are in tlv_buf, and copies it to sock->tbuf.
Ah, was trying to be careful with this; obviously didn't work. Will give it another go.
BTW, why is babel_send_queue() / babel_copy_tlv() so complicated? If i understand it correctly, you have one TLV buffer (used for accumulation of TLVs) which has the same size as socket TX buffer, therefore you can (if TX buffer is empty) just copy whole TLV buffer to TX buffer and send the packet.
+int
+ bif->sock->rbsize = 10240; + bif->sock->iface = bif->iface; + bif->sock->tbuf = mb_alloc( bif->pool, bif->iface->mtu);
Why not just use:
bif->sock->rbsize = MIN(512, bif->iface->mtu);
You mean MAX?
Yes, of course :-) BTW, i missed one answer - in:
+static void +babel_select_route(struct babel_entry *e) +{ + struct babel_proto *p = e->proto; + net *n = net_get(p->p.table, e->n.prefix, e->n.pxlen); + struct babel_route *r, *cur = e->selected; + + /* try to find the best feasible route */ + WALK_LIST(r, e->routes) + if((!cur || r->metric < cur->metric) + && is_feasible(babel_find_source(e, r->router_id), + r->seqno, r->advert_metric)) + cur = r;
Why feasibility is checked here? I would expect that feasibility is checked when the route is received, but all accepted to e->routes stays feasible. Is that true or the routes in e->routes may later turn infeasible? -- Elen sila lumenn' omentielvo Ondrej 'Santiago' Zajicek (email: santiago@crfreenet.org) OpenPGP encrypted e-mails preferred (KeyID 0x11DEADC3, wwwkeys.pgp.net) "To err is human -- to blame it on a computer is even more so."
Ondrej Zajicek <santiago@crfreenet.org> writes:
Also one thing i forgot to notice. For generated external routes, you initialize metric to 0 (r->metric = 0 in babel_rt_notify()). You should use ea_get_int(attrs, EA_BABEL_METRIC, 0) to allow user set it in export filters.
Ah, so that's what that is for. Noted.
Default metric probably should be more than zero (zero makes sense for /128 local address, while more than zero for locally reachable network prefix).
Hmm, but if it's a prefix locally reachable prefix, no more routing is needed, so the *routing* metric is surely 0? There will still be path cost added at the neighbours... What else would you suggest it should be?
3) Documentation of functions. It is true that for protocols this is less important than for public lib or nest functions, but in the case of non-trivial / non-obvious functions it is a good idea. I would suggest to document esp. functions manipulating with internal structures, implementing some protocol processes and timer/event hooks. It is usually useless to document the usual protocol glue (e.g. babel_start, babel_make_tmp_attrs, babel_get_route_info), unless it does some nontrivial things (like babel_rt_notify()).
Right, will make sure there are appropriate comments in the code, of course. But should these be formatted so as to be picked up by the documentation extractor?
3) Use get_uXX()/put_uXX() in handle/send functions. That would also eliminate a need for separate ntoh/hton functions. You could use something like:
#define GET_U32(p,f) get_u32(((char *) p) + OFFSETOF(typeof(*p),f)) #define GET_U16(p,f) get_u16(((char *) p) + OFFSETOF(typeof(*p),f))
to access fields by GET_U16(tlv, seqno) instead of tlv->seqno.
Right, this makes sense, will try that.
BTW, using babel_tlv_header is also a bit problematic, as some architectures enforce minimal structure alignment ant therefore size (to e.g. 4). Probably declaring it packed should be enough.
Ah, bugger. Well, guess I'll have to change to copying the TLVs around in any case, then. Might be cleaner to change the architecture to do away with most of the pointer arithmetic entirely, then...
Not entirely sure what compilers you are targeting; that macro seems to be GCC-specific, but is also supported in LLVM since 1.5: http://llvm.org/releases/1.5/docs/ReleaseNotes.html
We are mainly targetting GCC, but i guess we could support LLVM with some minor tweaks. Sane GCC extensions could be used, things like __buildin_* should be used through some define.
Okidoki, noted.
Yeah, well, was planning to actually make it do something useful instead (i.e. support proper reconfiguring).
This function is not related to reconfiguration, it is related to protocol templates, these do not make much sense for non-BGP protocols.
Ah, I see. Cool, will nuke it.
Not sure what you mean here? Are you referring to htobe64() being glibc-specific? An how do you suggest to handle it? :)
Well, according to man page, these functions are not standardized (although widely available), be64toh is named betoh64 on OpenBSD, and different header file should be included on Linux and BSDs.
We could have lib/endian.h, which can include appropriate headers and define aliases.
Okay, I'll give it a shot. You may have to tweak it for other systems, though... :)
Ah, was trying to be careful with this; obviously didn't work. Will give it another go.
BTW, why is babel_send_queue() / babel_copy_tlv() so complicated?
Partly because the code evolved that way (added in the TLV buffering at a later stage).
If i understand it correctly, you have one TLV buffer (used for accumulation of TLVs) which has the same size as socket TX buffer, therefore you can (if TX buffer is empty) just copy whole TLV buffer to TX buffer and send the packet.
Hmm, that might be. I think the issue was that I wanted to support the case where the tx buffer wasn't empty, and so TLVs should be added to the end of an already existing packet. Not sure this really makes sense, though, will look over it again.
BTW, i missed one answer - in:
+static void +babel_select_route(struct babel_entry *e) +{ + struct babel_proto *p = e->proto; + net *n = net_get(p->p.table, e->n.prefix, e->n.pxlen); + struct babel_route *r, *cur = e->selected; + + /* try to find the best feasible route */ + WALK_LIST(r, e->routes) + if((!cur || r->metric < cur->metric) + && is_feasible(babel_find_source(e, r->router_id), + r->seqno, r->advert_metric)) + cur = r;
Why feasibility is checked here? I would expect that feasibility is checked when the route is received, but all accepted to e->routes stays feasible.
Is that true or the routes in e->routes may later turn infeasible?
Yes, because the feasibility distance can change. For example in this scenario: We receive route X from routers A and B with metrics 50 and 100, respectively. We then add the path cost of 20 to each of those, so we'll be announcing it with metric 70, making the route towards B infeasible (feasibility distance is 70 < 100). Now, if router B gets a better route and re-announces with metric 20 (this could also be a from a new router C), that route will turn feasible. We will then pick that, and re-announce with metric 40, turning that into our new feasibility distance. This makes the route towards A infeasible even though nothing changed in A's announcement. -Toke
On Fri, Oct 09, 2015 at 11:57:37AM +0200, Toke Høiland-Jørgensen wrote:
Default metric probably should be more than zero (zero makes sense for /128 local address, while more than zero for locally reachable network prefix).
Hmm, but if it's a prefix locally reachable prefix, no more routing is needed, so the *routing* metric is surely 0?
Generally, routing metric represents distance to the final destination (hosts in the network prefix) not just to the last router, so it should also contain the cost of the last hop from the router to the host. Usually this is not much important, but there are cases where it is. E.g. WLAN in infrastructure mode with two attached routers, one is AP, the other is a regular station, both connected by ethernet links to the rest of the network. In such case the station router should propagate the prefix of the WLAN with higher cost (say two times) than the AP router, so the AP router is a preferred way to reach the hosts on the WLAN for the rest of the network.
There will still be path cost added at the neighbours... What else would you suggest it should be?
Well, for RIP it is 1, for OSPF it is the same as the configured cost of the interface. The simple answer would be the same as the default cost of a regular link (BABEL_RXCOST_WIRED). Perhaps a better heuristic would be to try to find the iface associated with the route in list of babel_ifaces and then use the configured cost of that iface, BABEL_RXCOST_WIRED if not found.
3) Documentation of functions. It is true that for protocols this is less important than for public lib or nest functions, but in the case of non-trivial / non-obvious functions it is a good idea. I would suggest to document esp. functions manipulating with internal structures, implementing some protocol processes and timer/event hooks. It is usually useless to document the usual protocol glue (e.g. babel_start, babel_make_tmp_attrs, babel_get_route_info), unless it does some nontrivial things (like babel_rt_notify()).
Right, will make sure there are appropriate comments in the code, of course. But should these be formatted so as to be picked up by the documentation extractor?
Not for regular comments inside functions, but the initial 'block' documentation of function should use the format, i.e.: /** * rip_announce_rte - announce route from RIP routing table to the core * @p: RIP instance * @en: related network * * The function takes a list of incoming routes from @en, prepare appropriate * &rte for the core and propagate it by rte_update(). */ static void rip_announce_rte(struct rip_proto *p, struct rip_entry *en) { struct rip_rte *rt = en->routes; ...
Not sure what you mean here? Are you referring to htobe64() being glibc-specific? An how do you suggest to handle it? :)
Well, according to man page, these functions are not standardized (although widely available), be64toh is named betoh64 on OpenBSD, and different header file should be included on Linux and BSDs.
We could have lib/endian.h, which can include appropriate headers and define aliases.
Okay, I'll give it a shot. You may have to tweak it for other systems, though... :)
Well, if you use get_uXX, put_uXX, you would not need it. Just use these: static inline u64 get_u64(void *p) { u32 xh, xl; memcpy(&xh, p, 4); memcpy(&xl, p+4, 4); return (((u64) ntohl(xh)) << 32) | ntohl(xl); } static inline void put_u64(void *p, u64 x) { u32 xh, xl; xh = htonl(x >> 32); xl = htonl((u32) x); memcpy(p, &xh, 4); memcpy(p+4, &xl, 4); } (they are not part of lib/unaligned.h in the master branch, but they are from one of my branches)
Is that true or the routes in e->routes may later turn infeasible?
Yes, because the feasibility distance can change. For example in this scenario:
We receive route X from routers A and B with metrics 50 and 100, respectively. We then add the path cost of 20 to each of those, so we'll be announcing it with metric 70, making the route towards B infeasible (feasibility distance is 70 < 100). Now, if router B gets a better route and re-announces with metric 20 (this could also be a from a new router C), that route will turn feasible. We will then pick that, and re-announce with metric 40, turning that into our new feasibility distance. This makes the route towards A infeasible even though nothing changed in A's announcement.
Thanks. -- Elen sila lumenn' omentielvo Ondrej 'Santiago' Zajicek (email: santiago@crfreenet.org) OpenPGP encrypted e-mails preferred (KeyID 0x11DEADC3, wwwkeys.pgp.net) "To err is human -- to blame it on a computer is even more so."
participants (2)
-
Ondrej Zajicek -
Toke Høiland-Jørgensen