One possible way of supporting MPLS labels in bird 1.6

Lennert Buytenhek buytenh at wantstofly.org
Sat Aug 12 18:41:54 CEST 2017


Hi!

Included below is the patch we're using on top of bird 1.6 for kernel
MPLS label injection support.  This is not a very generic solution, and
it shouldn't be merged, and bird 2.0 probably does this in a better way
(though I haven't looked), and it's somewhat ugly, etc etc, but it works
for us in production for its intended purpose.  #include <disclaimer.h>

The basic idea is that we add a variable 'mpls_label', which when set
on a route, indicates that that MPLS label should be used when sending
traffic for this prefix to the given gateway.

Example using static routes:

	protocol static {
		route 200.0.0.0/24 via 172.17.76.1 { mpls_label 666; };
	}

	protocol static {
		route 200.0.0.0/24 via 172.17.76.2 { mpls_label 667; };
	}

	protocol static {
		route 200.0.0.0/24 via 172.17.76.3 { mpls_label 668; };
	}

	protocol kernel {
		import filter { reject; };
		export filter { if net = 200.0.0.0/24 then accept; else reject; };
		merge paths yes;
	}

This will give you:

	# birdc show route
	BIRD 1.6.3 ready.
	200.0.0.0/24       via 172.17.76.1 (MPLS label 666) on br0 [static1 04:05:18] * (200)
			   via 172.17.76.3 (MPLS label 668) on br0 [static3 04:05:18] (200)
			   via 172.17.76.2 (MPLS label 667) on br0 [static2 04:05:18] (200)
	#

And:

	$ ip route show 200.0.0.0/24
	200.0.0.0/24 proto bird 
		nexthop encap mpls  666  via 172.17.76.1  dev br0 weight 1
		nexthop encap mpls  667  via 172.17.76.2  dev br0 weight 1
		nexthop encap mpls  668  via 172.17.76.3  dev br0 weight 1
	$


Learning routes back from the kernel also works, e.g. with this config:

	protocol kernel {
		import filter { print "Importing ", net; accept; };
		export filter { print "Exporting ", net; reject; };
		merge paths yes;
		learn yes;
	}

And:

	# ip route add 200.0.0.0/24 \
		nexthop encap mpls 667 via 172.17.76.2 \
		nexthop encap mpls 668 via 172.17.76.3

We get:

	# birdc show route 200.0.0.0/24
	BIRD 1.6.3 ready.
	200.0.0.0/24       multipath [kernel1 04:09:51] * (10)
		via 172.17.76.2 (MPLS label 667) on br0 weight 1
		via 172.17.76.3 (MPLS label 668) on br0 weight 1
	#


Finally, an example involving BGP.  Configuration for router "A":

	router id 172.17.76.1;

	log stderr all;
	debug protocols all;

	protocol device {
		scan time 3600;
	}

	protocol static {
		route 200.0.0.0/24 via 172.17.76.1;
	}

	protocol static {
		route 200.0.0.0/24 via 172.17.76.2;
	}

	protocol static {
		route 200.0.0.0/24 via 172.17.76.3;
	}

	protocol bgp b {
		local 172.17.76.1 as 1;
		neighbor 172.17.76.10 as 10;
		direct;
		export filter {
			if source = RTS_STATIC then accept;
			reject;
		};
		import filter { reject; };
		add paths tx;
	}

And the configuration for router "B":

	router id 172.17.76.10;

	log stderr all;
	debug protocols all;

	protocol device {
		scan time 3600;
	}

	protocol kernel {
		import filter { reject; };
		export filter {
			if source = RTS_BGP then {
				if gw = 172.17.76.1 then { mpls_label = 666; accept; }
				if gw = 172.17.76.2 then { mpls_label = 667; accept; }
				if gw = 172.17.76.3 then { mpls_label = 668; accept; }
			}
			reject;
		};
		merge paths yes;
	}

	protocol bgp a {
		local 172.17.76.10 as 10;
		neighbor 172.17.76.1 as 1;
		direct;
		export filter { reject; };
		import filter { accept; };
		add paths rx;
	}

This gives me on router "B", as expected:

	# ip route show 200.0.0.0/24
	200.0.0.0/24 proto bird 
		nexthop encap mpls  666  via 172.17.76.1  dev eth0 weight 1
		nexthop encap mpls  667  via 172.17.76.2  dev eth0 weight 1
		nexthop encap mpls  668  via 172.17.76.3  dev eth0 weight 1
	#


Notes:

* The mpls_label attribute is a member of struct rta instead of a
  sysdep/linux specific attribute that lives on the eattr list.  I had
  to do it this way because the multipath route merging code isn't set
  up to deal with platform(Linux)-specific attributes nicely.

* mpls_label is different and is treated differently from all the other
  route attributes (except RTA_GATEWAY) in that none of those attributes
  can differ between multipath route nexthops, while mpls_label can.

* The netlink serialization code in bird 1.6 has a bug whereby if you
  try to serialize an attribute that is not a multiple of 4 bytes long
  (such as RTA_ENCAP_TYPE, which is needed for MPLS and has a payload
  of only 2 bytes) it will round up the size in the netlink header
  (which is wrong -- that field should reflect the length _without_
  padding) but then send out a netlink message without the padding
  appended if this was the last attribute in the message (which is also
  wrong).  Also, if you do try to serialize an attribute that is not a
  multiple of 4 bytes long, then between 1 and 3 bytes of uninitialized
  bird stack memory get leaked over netlink.  (The netlink attributes
  that upstream bird cares about are all multiples of 4 bytes long, so
  this won't actually surface in current bird upstream.)

* This patch doesn't implement BGP Labeled Unicast or anything like that
  -- we currently signal MPLS labels using BGP extended communities.


Again, I don't see this being merged in any shape or form, but feedback
is always appreciated.


Cheers,
Lennert



diff --git a/filter/config.Y b/filter/config.Y
index 5ea83f81..56ef598b 100644
--- a/filter/config.Y
+++ b/filter/config.Y
@@ -335,7 +335,7 @@ CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN,
 	SET, STRING, BGPMASK, BGPPATH, CLIST, ECLIST, LCLIST,
 	IF, THEN, ELSE, CASE,
 	TRUE, FALSE, RT, RO, UNKNOWN, GENERIC,
-	FROM, GW, NET, MASK, PROTO, SOURCE, SCOPE, CAST, DEST, IFNAME, IFINDEX,
+	FROM, GW, NET, MASK, PROTO, SOURCE, SCOPE, CAST, DEST, IFNAME, IFINDEX, MPLS_LABEL,
 	PREFERENCE,
 	LEN,
 	DEFINED,
@@ -786,6 +786,7 @@ static_attr:
  | DEST    { $$ = f_new_inst(); $$->aux = T_ENUM_RTD;   $$->a2.i = SA_DEST;	$$->a1.i = 1; }
  | IFNAME  { $$ = f_new_inst(); $$->aux = T_STRING;     $$->a2.i = SA_IFNAME; }
  | IFINDEX { $$ = f_new_inst(); $$->aux = T_INT;        $$->a2.i = SA_IFINDEX; }
+ | MPLS_LABEL { $$ = f_new_inst(); $$->aux = T_INT;     $$->a2.i = SA_MPLS_LABEL;	$$->a1.i = 1; }
  ;
 
 term:
diff --git a/filter/filter.c b/filter/filter.c
index f18970e0..773bf61d 100644
--- a/filter/filter.c
+++ b/filter/filter.c
@@ -928,6 +928,7 @@ interpret(struct f_inst *what)
       case SA_DEST:	res.val.i = rta->dest; break;
       case SA_IFNAME:	res.val.s = rta->iface ? rta->iface->name : ""; break;
       case SA_IFINDEX:	res.val.i = rta->iface ? rta->iface->index : 0; break;
+      case SA_MPLS_LABEL:	res.val.i = rta->mpls_label; break;
 
       default:
 	bug("Invalid static attribute access (%x)", res.type);
@@ -959,6 +960,7 @@ interpret(struct f_inst *what)
 
 	  rta->dest = RTD_ROUTER;
 	  rta->gw = ip;
+	  rta->mpls_label = 0;
 	  rta->iface = n->iface;
 	  rta->nexthops = NULL;
 	  rta->hostentry = NULL;
@@ -976,11 +978,16 @@ interpret(struct f_inst *what)
 
 	rta->dest = i;
 	rta->gw = IPA_NONE;
+	rta->mpls_label = 0;
 	rta->iface = NULL;
 	rta->nexthops = NULL;
 	rta->hostentry = NULL;
 	break;
 
+      case SA_MPLS_LABEL:
+	rta->mpls_label = v1.val.i;
+	break;
+
       default:
 	bug("Invalid static attribute access (%x)", res.type);
       }
diff --git a/filter/filter.h b/filter/filter.h
index 049ceb76..dfcb5b7a 100644
--- a/filter/filter.h
+++ b/filter/filter.h
@@ -200,6 +200,7 @@ void val_format(struct f_val v, buffer *buf);
 #define SA_DEST		 8
 #define SA_IFNAME	 9
 #define SA_IFINDEX	10
+#define SA_MPLS_LABEL	11
 
 
 struct f_tree {
diff --git a/nest/route.h b/nest/route.h
index 383f4def..2defb29f 100644
--- a/nest/route.h
+++ b/nest/route.h
@@ -332,6 +332,7 @@ void rt_show(struct rt_show_data *);
 /* Multipath next-hop */
 struct mpnh {
   ip_addr gw;				/* Next hop */
+  u32 mpls_label;
   struct iface *iface;			/* Outgoing interface */
   struct mpnh *next;
   byte weight;
@@ -359,6 +360,7 @@ typedef struct rta {
   u16 hash_key;				/* Hash over important fields */
   u32 igp_metric;			/* IGP metric to next hop (for iBGP routes) */
   ip_addr gw;				/* Next hop */
+  u32 mpls_label;
   ip_addr from;				/* Advertising router */
   struct hostentry *hostentry;		/* Hostentry for recursive next-hops */
   struct iface *iface;			/* Outgoing interface */
diff --git a/nest/rt-attr.c b/nest/rt-attr.c
index edf27d44..8d2e517f 100644
--- a/nest/rt-attr.c
+++ b/nest/rt-attr.c
@@ -200,7 +200,7 @@ mpnh_hash(struct mpnh *x)
 {
   uint h = 0;
   for (; x; x = x->next)
-    h ^= ipa_hash(x->gw);
+    h ^= ipa_hash(x->gw) ^ x->mpls_label;
 
   return h;
 }
@@ -209,7 +209,7 @@ int
 mpnh__same(struct mpnh *x, struct mpnh *y)
 {
   for (; x && y; x = x->next, y = y->next)
-    if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || (x->weight != y->weight))
+    if (!ipa_equal(x->gw, y->gw) || (x->mpls_label != y->mpls_label) || (x->iface != y->iface) || (x->weight != y->weight))
       return 0;
 
   return x == y;
@@ -230,6 +230,11 @@ mpnh_compare_node(struct mpnh *x, struct mpnh *y)
   if (r)
     return r;
 
+  if (x->mpls_label < y->mpls_label)
+    return -1;
+  else if (x->mpls_label > y->mpls_label)
+    return 1;
+
   r = ipa_compare(x->gw, y->gw);
   if (r)
     return r;
@@ -242,6 +247,7 @@ mpnh_copy_node(const struct mpnh *src, linpool *lp)
 {
   struct mpnh *n = lp_alloc(lp, sizeof(struct mpnh));
   n->gw = src->gw;
+  n->mpls_label = src->mpls_label;
   n->iface = src->iface;
   n->next = NULL;
   n->weight = src->weight;
@@ -341,6 +347,7 @@ mpnh_copy(struct mpnh *o)
     {
       struct mpnh *n = sl_alloc(mpnh_slab);
       n->gw = o->gw;
+      n->mpls_label = o->mpls_label;
       n->iface = o->iface;
       n->next = NULL;
       n->weight = o->weight;
@@ -1051,7 +1058,7 @@ rta_alloc_hash(void)
 static inline uint
 rta_hash(rta *a)
 {
-  return (((uint) (uintptr_t) a->src) ^ ipa_hash(a->gw) ^
+  return (((uint) (uintptr_t) a->src) ^ ipa_hash(a->gw) ^ a->mpls_label ^
 	  mpnh_hash(a->nexthops) ^ ea_hash(a->eattrs)) & 0xffff;
 }
 
@@ -1066,6 +1073,7 @@ rta_same(rta *x, rta *y)
 	  x->flags == y->flags &&
 	  x->igp_metric == y->igp_metric &&
 	  ipa_equal(x->gw, y->gw) &&
+	  x->mpls_label == y->mpls_label &&
 	  ipa_equal(x->from, y->from) &&
 	  x->iface == y->iface &&
 	  x->hostentry == y->hostentry &&
@@ -1214,7 +1222,11 @@ rta_dump(rta *a)
     debug(" !CACHED");
   debug(" <-%I", a->from);
   if (a->dest == RTD_ROUTER)
-    debug(" ->%I", a->gw);
+    {
+      debug(" ->%I", a->gw);
+      if (a->mpls_label)
+	debug(" (MPLS label %d)", a->mpls_label);
+    }
   if (a->dest == RTD_DEVICE || a->dest == RTD_ROUTER)
     debug(" [%s]", a->iface ? a->iface->name : "???" );
   if (a->eattrs)
diff --git a/nest/rt-table.c b/nest/rt-table.c
index c6e48c38..4e6954f0 100644
--- a/nest/rt-table.c
+++ b/nest/rt-table.c
@@ -594,7 +594,7 @@ rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *ol
 static struct mpnh *
 mpnh_merge_rta(struct mpnh *nhs, rta *a, linpool *pool, int max)
 {
-  struct mpnh nh = { .gw = a->gw, .iface = a->iface };
+  struct mpnh nh = { .gw = a->gw, .mpls_label = a->mpls_label, .iface = a->iface };
   struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
   return mpnh_merge(nhs, nh2, 1, 0, max, pool);
 }
@@ -1696,6 +1696,7 @@ rta_next_hop_outdated(rta *a)
     return a->dest != RTD_UNREACHABLE;
 
   return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) ||
+    (a->mpls_label != he->src->mpls_label) ||
     (a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
     !mpnh_same(a->nexthops, he->src->nexthops);
 }
@@ -1706,6 +1707,7 @@ rta_apply_hostentry(rta *a, struct hostentry *he)
   a->hostentry = he;
   a->iface = he->src ? he->src->iface : NULL;
   a->gw = he->gw;
+  a->mpls_label = he->src ? he->src->mpls_label : 0;
   a->dest = he->dest;
   a->igp_metric = he->igp_metric;
   a->nexthops = he->src ? he->src->nexthops : NULL;
@@ -2302,6 +2304,7 @@ rt_update_hostentry(rtable *tab, struct hostentry *he)
 	{
 	  /* The host is reachable through some route entry */
 	  he->gw = a->gw;
+	  // @@@ a->mpls_label;
 	  he->dest = a->dest;
 	}
 
@@ -2379,12 +2382,19 @@ rt_format_via(rte *e)
 {
   rta *a = e->attrs;
 
+  byte info[256];
+
+  if (a->mpls_label)
+    bsprintf(info, " (MPLS label %d)", a->mpls_label);
+  else
+    info[0] = 0;
+
   /* Max text length w/o IP addr and interface name is 16 */
   static byte via[STD_ADDRESS_P_LENGTH+sizeof(a->iface->name)+16];
 
   switch (a->dest)
     {
-    case RTD_ROUTER:	bsprintf(via, "via %I on %s", a->gw, a->iface->name); break;
+    case RTD_ROUTER:	bsprintf(via, "via %I%s on %s", a->gw, info, a->iface->name); break;
     case RTD_DEVICE:	bsprintf(via, "dev %s", a->iface->name); break;
     case RTD_BLACKHOLE:	bsprintf(via, "blackhole"); break;
     case RTD_UNREACHABLE:	bsprintf(via, "unreachable"); break;
@@ -2429,7 +2439,13 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tm
   cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, rt_format_via(e), a->src->proto->name,
 	     tm, from, primary ? (sync_error ? " !" : " *") : "", info);
   for (nh = a->nexthops; nh; nh = nh->next)
-    cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1);
+    {
+      if (nh->mpls_label)
+	bsprintf(info, " (MPLS label %d)", nh->mpls_label);
+      else
+	info[0] = 0;
+      cli_printf(c, -1007, "\tvia %I%s on %s weight %d", nh->gw, info, nh->iface->name, nh->weight + 1);
+    }
   if (d->verbose)
     rta_show(c, a, tmpa);
 }
diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c
index cd3f3f4f..a23b081c 100644
--- a/proto/bgp/packets.c
+++ b/proto/bgp/packets.c
@@ -1179,6 +1179,7 @@ bgp_set_next_hop(struct bgp_proto *p, rta *a)
 
       a->dest = RTD_ROUTER;
       a->gw = ng->addr;
+      a->mpls_label = 0;
       a->iface = ng->iface;
       a->hostentry = NULL;
       a->igp_metric = 0;
diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c
index 368e3d05..73ed474e 100644
--- a/proto/ospf/rt.c
+++ b/proto/ospf/rt.c
@@ -47,6 +47,7 @@ new_nexthop(struct ospf_proto *p, ip_addr gw, struct iface *iface, byte weight)
 {
   struct mpnh *nh = lp_alloc(p->nhpool, sizeof(struct mpnh));
   nh->gw = gw;
+  nh->mpls_label = 0;
   nh->iface = iface;
   nh->next = NULL;
   nh->weight = weight;
@@ -1893,6 +1894,7 @@ ort_changed(ort *nf, rta *nr)
     (nf->n.tag != nf->old_tag) || (nf->n.rid != nf->old_rid) ||
     (nr->source != or->source) || (nr->dest != or->dest) ||
     (nr->iface != or->iface) || !ipa_equal(nr->gw, or->gw) ||
+    (nr->mpls_label != or->mpls_label) ||
     !mpnh_same(nr->nexthops, or->nexthops);
 }
 
@@ -1956,6 +1958,7 @@ again1:
 	a0.dest = RTD_ROUTER;
 	a0.iface = nf->n.nhs->iface;
 	a0.gw = nf->n.nhs->gw;
+	a0.mpls_label = 0;
       }
       else
       {
diff --git a/proto/rip/rip.c b/proto/rip/rip.c
index 7b380097..81821dcd 100644
--- a/proto/rip/rip.c
+++ b/proto/rip/rip.c
@@ -182,6 +182,7 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en)
 
 	struct mpnh *nh = alloca(sizeof(struct mpnh));
 	nh->gw = rt->next_hop;
+	nh->mpls_label = 0;
 	nh->iface = rt->from->nbr->iface;
 	nh->weight = rt->from->ifa->cf->ecmp_weight;
 	mpnh_insert(&nhs, nh);
@@ -199,6 +200,7 @@ rip_announce_rte(struct rip_proto *p, struct rip_entry *en)
       /* Unipath route */
       a0.dest = RTD_ROUTER;
       a0.gw = rt->next_hop;
+      a0.mpls_label = 0;
       a0.iface = rt->from->nbr->iface;
       a0.from = rt->from->nbr->addr;
     }
diff --git a/proto/static/config.Y b/proto/static/config.Y
index 182721b3..9d2a9205 100644
--- a/proto/static/config.Y
+++ b/proto/static/config.Y
@@ -78,6 +78,9 @@ stat_multipath1:
  | stat_multipath1 BFD bool {
      this_srt_nh->use_bfd = $3; cf_check_bfd($3);
    }
+ | stat_multipath1 MPLS_LABEL expr {
+     this_srt_nh->mpls_label = $3;
+   }
  ;
 
 stat_multipath:
@@ -115,6 +118,7 @@ stat_route:
 stat_route_item:
    cmd { *this_srt_last_cmd = $1; this_srt_last_cmd = &($1->next); }
  | BFD bool ';' { this_srt->use_bfd = $2; cf_check_bfd($2); }
+ | MPLS_LABEL expr ';' { this_srt->mpls_label = $2; }
  ;
 
 stat_route_opts:
diff --git a/proto/static/static.c b/proto/static/static.c
index 849067b9..36a0bd60 100644
--- a/proto/static/static.c
+++ b/proto/static/static.c
@@ -75,6 +75,7 @@ static_install(struct proto *p, struct static_route *r, struct iface *ifa)
   a.cast = RTC_UNICAST;
   a.dest = r->dest;
   a.gw = r->via;
+  a.mpls_label = r->mpls_label;
   a.iface = ifa;
 
   if (r->dest == RTD_MULTIPATH)
@@ -87,6 +88,7 @@ static_install(struct proto *p, struct static_route *r, struct iface *ifa)
 	  {
 	    struct mpnh *nh = alloca(sizeof(struct mpnh));
 	    nh->gw = r2->via;
+	    nh->mpls_label = r2->mpls_label;
 	    nh->iface = r2->neigh->iface;
 	    nh->weight = r2->masklen; /* really */
 	    mpnh_insert(&nhs, nh);
@@ -98,6 +100,7 @@ static_install(struct proto *p, struct static_route *r, struct iface *ifa)
 	  /* Fallback to unipath route for exactly one nexthop */
 	  a.dest = RTD_ROUTER;
 	  a.gw = nhs->gw;
+	  a.mpls_label = nhs->mpls_label;
 	  a.iface = nhs->iface;
 	}
       else
@@ -128,11 +131,17 @@ static void
 static_remove(struct proto *p, struct static_route *r)
 {
   net *n;
+  byte info[256];
 
   if (!r->installed)
     return;
 
-  DBG("Removing static route %I/%d via %I\n", r->net, r->masklen, r->via);
+  if (r->mpls_label)
+    bsprintf(info, " (MPLS label %d)", r->mpls_label);
+  else
+    info[0] = 0;
+
+  DBG("Removing static route %I/%d via %I%s\n", r->net, r->masklen, r->via, info);
   n = net_find(p->table, r->net, r->masklen);
   rte_update(p, n, NULL);
   r->installed = 0;
@@ -389,7 +398,10 @@ static_dump_rt(struct static_route *r)
   switch (r->dest)
     {
     case RTD_ROUTER:
-      debug("via %I\n", r->via);
+      debug("via %I", r->via);
+      if (r->mpls_label)
+	debug(" (MPLS label %d)", r->mpls_label);
+      debug("\n", r->via);
       break;
     case RTD_DEVICE:
       debug("dev %s\n", r->if_name);
@@ -474,7 +486,7 @@ static_same_dest(struct static_route *x, struct static_route *y)
   switch (x->dest)
     {
     case RTD_ROUTER:
-      return ipa_equal(x->via, y->via) && (x->via_if == y->via_if);
+      return ipa_equal(x->via, y->via) && (x->mpls_label == y->mpls_label) && (x->via_if == y->via_if);
 
     case RTD_DEVICE:
       return !strcmp(x->if_name, y->if_name);
@@ -483,7 +495,7 @@ static_same_dest(struct static_route *x, struct static_route *y)
       for (x = x->mp_next, y = y->mp_next;
 	   x && y;
 	   x = x->mp_next, y = y->mp_next)
-	if (!ipa_equal(x->via, y->via) || (x->via_if != y->via_if) || (x->use_bfd != y->use_bfd))
+	if (!ipa_equal(x->via, y->via) || (x->mpls_label != y->mpls_label) || (x->via_if != y->via_if) || (x->use_bfd != y->use_bfd))
 	  return 0;
       return !x && !y;
 
@@ -645,6 +657,7 @@ static void
 static_show_rt(struct static_route *r)
 {
   byte via[STD_ADDRESS_P_LENGTH + 16];
+  byte info[256];
 
   switch (r->dest)
     {
@@ -657,14 +670,24 @@ static_show_rt(struct static_route *r)
     case RTDX_RECURSIVE: bsprintf(via, "recursive %I", r->via); break;
     default:		bsprintf(via, "???");
     }
-  cli_msg(-1009, "%I/%d %s%s%s", r->net, r->masklen, via,
+  if (r->dest == RTD_ROUTER && r->mpls_label)
+    bsprintf(info, " (MPLS label %d)", r->mpls_label);
+  else
+    info[0] = 0;
+  cli_msg(-1009, "%I/%d %s%s%s%s", r->net, r->masklen, via, info,
 	  r->bfd_req ? " (bfd)" : "", r->installed ? "" : " (dormant)");
 
   struct static_route *r2;
   if (r->dest == RTD_MULTIPATH)
     for (r2 = r->mp_next; r2; r2 = r2->mp_next)
-      cli_msg(-1009, "\tvia %I%J weight %d%s%s", r2->via, r2->via_if, r2->masklen + 1, /* really */
-	      r2->bfd_req ? " (bfd)" : "", r2->installed ? "" : " (dormant)");
+      {
+	if (r->dest == RTD_ROUTER && r->mpls_label)
+	  bsprintf(info, " (MPLS label %d)", r->mpls_label);
+	else
+	  info[0] = 0;
+	cli_msg(-1009, "\tvia %I%J%s weight %d%s%s", r2->via, r2->via_if, info, r2->masklen + 1, /* really */
+	        r2->bfd_req ? " (bfd)" : "", r2->installed ? "" : " (dormant)");
+      }
 }
 
 void
diff --git a/proto/static/static.h b/proto/static/static.h
index 6b047234..5811a888 100644
--- a/proto/static/static.h
+++ b/proto/static/static.h
@@ -30,6 +30,7 @@ struct static_route {
   int masklen;				/* Mask length */
   int dest;				/* Destination type (RTD_*) */
   ip_addr via;				/* Destination router */
+  u32 mpls_label;
   struct iface *via_if;			/* Destination iface, for link-local vias */
   struct neighbor *neigh;
   byte *if_name;			/* Name for RTD_DEVICE routes */
diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c
index 22313f43..16e8605f 100644
--- a/sysdep/linux/netlink.c
+++ b/sysdep/linux/netlink.c
@@ -49,6 +49,22 @@
 #define RTA_TABLE  15
 #endif
 
+#ifndef RTA_ENCAP_TYPE
+#define RTA_ENCAP_TYPE 21
+#endif
+
+#ifndef RTA_ENCAP
+#define RTA_ENCAP 22
+#endif
+
+#ifndef MPLS_IPTUNNEL_DST
+#define MPLS_IPTUNNEL_DST 1
+#endif
+
+#ifndef LWTUNNEL_ENCAP_MPLS
+#define LWTUNNEL_ENCAP_MPLS 1
+#endif
+
 
 #ifdef IPV6
 #define krt_ecmp6(X) 1
@@ -318,10 +334,12 @@ static struct nl_want_attrs ifa_attr_want6[BIRD_IFA_MAX] = {
 #endif
 
 
-#define BIRD_RTA_MAX  (RTA_TABLE+1)
+#define BIRD_RTA_MAX  (RTA_ENCAP + 1)
 
 static struct nl_want_attrs mpnh_attr_want4[BIRD_RTA_MAX] = {
   [RTA_GATEWAY]	  = { 1, 1, sizeof(ip4_addr) },
+  [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
+  [RTA_ENCAP]     = { 1, 1, RTA_LENGTH(4) },
 };
 
 #ifndef IPV6
@@ -335,6 +353,8 @@ static struct nl_want_attrs rtm_attr_want4[BIRD_RTA_MAX] = {
   [RTA_MULTIPATH] = { 1, 0, 0 },
   [RTA_FLOW]	  = { 1, 1, sizeof(u32) },
   [RTA_TABLE]	  = { 1, 1, sizeof(u32) },
+  [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
+  [RTA_ENCAP]     = { 1, 1, RTA_LENGTH(4) },
 };
 #else
 static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
@@ -347,6 +367,8 @@ static struct nl_want_attrs rtm_attr_want6[BIRD_RTA_MAX] = {
   [RTA_METRICS]	  = { 1, 0, 0 },
   [RTA_FLOW]	  = { 1, 1, sizeof(u32) },
   [RTA_TABLE]	  = { 1, 1, sizeof(u32) },
+  [RTA_ENCAP_TYPE]= { 1, 1, sizeof(u16) },
+  [RTA_ENCAP]     = { 1, 1, RTA_LENGTH(4) },
 };
 #endif
 
@@ -380,6 +402,9 @@ nl_parse_attrs(struct rtattr *a, struct nl_want_attrs *want, struct rtattr **k,
   return 1;
 }
 
+static inline u16 rta_get_u16(struct rtattr *a)
+{ return *(u16 *) RTA_DATA(a); }
+
 static inline u32 rta_get_u32(struct rtattr *a)
 { return *(u32 *) RTA_DATA(a); }
 
@@ -396,17 +421,23 @@ nl_add_attr(struct nlmsghdr *h, uint bufsize, uint code, const void *data, uint
   uint pos = NLMSG_ALIGN(h->nlmsg_len);
   uint len = RTA_LENGTH(dlen);
 
+  if (pos != h->nlmsg_len)
+    bug("nl_add_attr: packet buffer misaligned");
+
   if (pos + len > bufsize)
     bug("nl_add_attr: packet buffer overflow");
 
   struct rtattr *a = (struct rtattr *)((char *)h + pos);
   a->rta_type = code;
   a->rta_len = len;
-  h->nlmsg_len = pos + len;
+  h->nlmsg_len = NLMSG_ALIGN(pos + len);
 
   if (dlen > 0)
     memcpy(RTA_DATA(a), data, dlen);
 
+  if (h->nlmsg_len != pos + len)
+    memset(RTA_DATA(a) + len, 0, h->nlmsg_len - (pos + len));
+
   return a;
 }
 
@@ -432,7 +463,16 @@ nl_open_attr(struct nlmsghdr *h, uint bufsize, uint code)
 static inline void
 nl_close_attr(struct nlmsghdr *h, struct rtattr *a)
 {
-  a->rta_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)a;
+  uint pos;
+
+  a->rta_len = (void *)h + h->nlmsg_len - (void *)a;
+
+  pos = NLMSG_ALIGN(h->nlmsg_len);
+  if (pos != h->nlmsg_len)
+    {
+      memset((void *)h + h->nlmsg_len, 0, pos - h->nlmsg_len);
+      h->nlmsg_len = pos;
+    }
 }
 
 static inline struct rtnexthop *
@@ -441,10 +481,13 @@ nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
   uint pos = NLMSG_ALIGN(h->nlmsg_len);
   uint len = RTNH_LENGTH(0);
 
+  if (pos != h->nlmsg_len)
+    bug("nl_open_nexthop: packet buffer misaligned");
+
   if (pos + len > bufsize)
     bug("nl_open_nexthop: packet buffer overflow");
 
-  h->nlmsg_len = pos + len;
+  h->nlmsg_len = NLMSG_ALIGN(pos + len);
 
   return (void *)h + pos;
 }
@@ -452,7 +495,16 @@ nl_open_nexthop(struct nlmsghdr *h, uint bufsize)
 static inline void
 nl_close_nexthop(struct nlmsghdr *h, struct rtnexthop *nh)
 {
-  nh->rtnh_len = (void *)h + NLMSG_ALIGN(h->nlmsg_len) - (void *)nh;
+  uint pos;
+
+  nh->rtnh_len = (void *)h + h->nlmsg_len - (void *)nh;
+
+  pos = NLMSG_ALIGN(h->nlmsg_len);
+  if (pos != h->nlmsg_len)
+    {
+      memset((void *)h + h->nlmsg_len, 0, pos - h->nlmsg_len);
+      h->nlmsg_len = pos;
+    }
 }
 
 static void
@@ -470,6 +522,23 @@ nl_add_multipath(struct nlmsghdr *h, unsigned bufsize, struct mpnh *nh)
 
     nl_add_attr_ipa(h, bufsize, RTA_GATEWAY, nh->gw);
 
+    if (nh->mpls_label)
+      {
+	struct rta_encap {
+	  struct rtattr header;
+	  u32 mpls_label;
+	} encap;
+	u16 et;
+
+	encap.header.rta_len = sizeof(encap);
+	encap.header.rta_type = MPLS_IPTUNNEL_DST;
+	encap.mpls_label = htonl(((nh->mpls_label & 0xfffff) << 12) | 0x100);
+	nl_add_attr(h, bufsize, RTA_ENCAP, &encap, sizeof(encap));
+
+	et = LWTUNNEL_ENCAP_MPLS;
+	nl_add_attr(h, bufsize, RTA_ENCAP_TYPE, &et, sizeof(et));
+      }
+
     nl_close_nexthop(h, rtnh);
   }
 
@@ -529,6 +598,19 @@ nl_parse_multipath(struct krt_proto *p, struct rtattr *ra)
       else
 	return NULL;
 
+      rv->mpls_label = 0;
+      if (a[RTA_ENCAP_TYPE] && rta_get_u16(a[RTA_ENCAP_TYPE]) == LWTUNNEL_ENCAP_MPLS)
+	{
+	  struct rtattr *encap = a[RTA_ENCAP];
+
+	  if (encap != NULL)
+	    {
+	      struct rtattr *enc2 = RTA_DATA(encap);
+	      if (enc2->rta_type == MPLS_IPTUNNEL_DST)
+		rv->mpls_label = (ntohl(rta_get_u32(enc2)) >> 12) & 0xfffff;
+	    }
+	}
+
       len -= NLMSG_ALIGN(nh->rtnh_len);
       nh = RTNH_NEXT(nh);
     }
@@ -870,12 +952,12 @@ nh_bufsize(struct mpnh *nh)
 {
   int rv = 0;
   for (; nh != NULL; nh = nh->next)
-    rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)));
+    rv += RTNH_LENGTH(RTA_LENGTH(sizeof(ip_addr)) + RTA_LENGTH(RTA_LENGTH(4)) + RTA_LENGTH(2));
   return rv;
 }
 
 static int
-nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int dest, ip_addr gw, struct iface *iface)
+nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int dest, ip_addr gw, u32 mpls_label, struct iface *iface)
 {
   eattr *ea;
   net *net = e->net;
@@ -893,7 +975,7 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int d
   bzero(&r.h, sizeof(r.h));
   bzero(&r.r, sizeof(r.r));
   r.h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
-  r.h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
+  r.h.nlmsg_len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(struct rtmsg)));
   r.h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
 
   r.r.rtm_family = BIRD_AF;
@@ -964,6 +1046,22 @@ dest:
       r.r.rtm_type = RTN_UNICAST;
       nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, iface->index);
       nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, gw);
+      if (mpls_label)
+	{
+	  struct rta_encap {
+	    struct rtattr header;
+	    u32 mpls_label;
+	  } encap;
+	  u16 et;
+
+	  encap.header.rta_len = sizeof(encap);
+	  encap.header.rta_type = MPLS_IPTUNNEL_DST;
+	  encap.mpls_label = htonl(((mpls_label & 0xfffff) << 12) | 0x100);
+	  nl_add_attr(&r.h, sizeof(r), RTA_ENCAP, &encap, sizeof(encap));
+
+	  et = LWTUNNEL_ENCAP_MPLS;
+	  nl_add_attr(&r.h, sizeof(r), RTA_ENCAP_TYPE, &et, sizeof(et));
+	}
       break;
     case RTD_DEVICE:
       r.r.rtm_type = RTN_UNICAST;
@@ -1002,17 +1100,17 @@ nl_add_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
   {
     struct mpnh *nh = a->nexthops;
 
-    err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_ROUTER, nh->gw, nh->iface);
+    err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_ROUTER, nh->gw, nh->mpls_label, nh->iface);
     if (err < 0)
       return err;
 
     for (nh = nh->next; nh; nh = nh->next)
-      err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_ROUTER, nh->gw, nh->iface);
+      err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_ROUTER, nh->gw, nh->mpls_label, nh->iface);
 
     return err;
   }
 
-  return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, a->gw, a->iface);
+  return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, a->gw, a->mpls_label, a->iface);
 }
 
 static inline int
@@ -1022,7 +1120,7 @@ nl_delete_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
 
   /* For IPv6, we just repeatedly request DELETE until we get error */
   do
-    err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, IPA_NONE, NULL);
+    err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, IPA_NONE, 0, NULL);
   while (krt_ecmp6(p) && !err);
 
   return err;
@@ -1058,11 +1156,12 @@ krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list
 
 
 static inline struct mpnh *
-nl_alloc_mpnh(struct nl_parse_state *s, ip_addr gw, struct iface *iface, byte weight)
+nl_alloc_mpnh(struct nl_parse_state *s, ip_addr gw, u32 mpls_label, struct iface *iface, byte weight)
 {
   struct mpnh *nh = lp_alloc(s->pool, sizeof(struct mpnh));
 
   nh->gw = gw;
+  nh->mpls_label = mpls_label;
   nh->iface = iface;
   nh->next = NULL;
   nh->weight = weight;
@@ -1268,6 +1367,7 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
 	  ra->dest = RTD_ROUTER;
 	  memcpy(&ra->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra->gw));
 	  ipa_ntoh(ra->gw);
+	  ra->mpls_label = 0;
 
 #ifdef IPV6
 	  /* Silently skip strange 6to4 routes */
@@ -1290,6 +1390,18 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
 	  def_scope = RT_SCOPE_LINK;
 	}
 
+      if (a[RTA_ENCAP_TYPE] && rta_get_u16(a[RTA_ENCAP_TYPE]) == LWTUNNEL_ENCAP_MPLS)
+	{
+	  struct rtattr *encap = a[RTA_ENCAP];
+
+	  if (encap != NULL)
+	    {
+	      struct rtattr *enc2 = RTA_DATA(encap);
+	      if (enc2->rta_type == MPLS_IPTUNNEL_DST)
+		ra->mpls_label = (ntohl(rta_get_u32(enc2)) >> 12) & 0xfffff;
+	    }
+	}
+
       break;
     case RTN_BLACKHOLE:
       ra->dest = RTD_BLACKHOLE;
@@ -1409,10 +1521,10 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
     if (a->dest != RTD_MULTIPATH)
     {
       a->dest = RTD_MULTIPATH;
-      a->nexthops = nl_alloc_mpnh(s, a->gw, a->iface, 0);
+      a->nexthops = nl_alloc_mpnh(s, a->gw, a->mpls_label, a->iface, 0);
     }
 
-    mpnh_insert(&a->nexthops, nl_alloc_mpnh(s, ra->gw, ra->iface, 0));
+    mpnh_insert(&a->nexthops, nl_alloc_mpnh(s, ra->gw, ra->mpls_label, ra->iface, 0));
   }
 }
 
diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c
index 07a55c0d..2d4c4029 100644
--- a/sysdep/unix/krt.c
+++ b/sysdep/unix/krt.c
@@ -657,7 +657,7 @@ krt_same_dest(rte *k, rte *e)
   switch (ka->dest)
     {
     case RTD_ROUTER:
-      return ipa_equal(ka->gw, ea->gw);
+      return ipa_equal(ka->gw, ea->gw) && (ka->mpls_label == ea->mpls_label);
     case RTD_DEVICE:
       return !strcmp(ka->iface->name, ea->iface->name);
     case RTD_MULTIPATH:


More information about the Bird-users mailing list