[PATCH 1/1] * Implement correct RFC4271 MED comparison

Alexander V. Chernikov melifaro at ipfw.ru
Fri Nov 18 02:53:09 CET 2011


---
 filter/filter.c    |    6 +
 nest/proto-hooks.c |    3 +-
 nest/protocol.h    |    6 +-
 nest/route.h       |    8 ++
 nest/rt-table.c    |   58 +++++++++--
 proto/bgp/attrs.c  |  289 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 proto/bgp/bgp.c    |    4 +-
 proto/bgp/bgp.h    |    9 ++-
 proto/ospf/ospf.c  |    4 +-
 proto/rip/rip.c    |    4 +-
 10 files changed, 374 insertions(+), 17 deletions(-)

diff --git a/filter/filter.c b/filter/filter.c
index b3b17dc..77af39a 100644
--- a/filter/filter.c
+++ b/filter/filter.c
@@ -935,6 +935,12 @@ interpret(struct f_inst *what)
 	if (v1.type != T_PATH)
 	  runtime( "Setting path attribute to non-path value" );
 	l->attrs[0].u.ptr = v1.val.ad;
+	/*
+	 * Zero cache on AS-PATH change.
+	 * XXX: This breaks COW concept but
+	 * removing this flag is harmless
+	 */
+	(*f_rte)->pflags &= ~0x01;
 	break;
       case EAF_TYPE_INT_SET:
 	if (v1.type != T_CLIST)
diff --git a/nest/proto-hooks.c b/nest/proto-hooks.c
index f026192..1dec95c 100644
--- a/nest/proto-hooks.c
+++ b/nest/proto-hooks.c
@@ -271,6 +271,7 @@ int import_control(struct proto *p, rte **e, ea_list **attrs, struct linpool *po
  * rte_better - compare metrics of two routes
  * @new: the new route
  * @old: the original route
+ * @flags: protocol-specific comparison flags
  *
  * This hook gets called when the routing table contains two routes
  * for the same network which have originated from different instances
@@ -279,7 +280,7 @@ int import_control(struct proto *p, rte **e, ea_list **attrs, struct linpool *po
  *
  * Result: 1 if @new is better (more preferred) than @old, 0 otherwise.
  */
-int rte_better(rte *new, rte *old)
+int rte_better(rte *new, rte *old, u32 flags)
 { DUMMY; }
 
 /**
diff --git a/nest/protocol.h b/nest/protocol.h
index a7518c2..02defb4 100644
--- a/nest/protocol.h
+++ b/nest/protocol.h
@@ -178,13 +178,15 @@ struct proto {
   /*
    *	Routing entry hooks (called only for rte's belonging to this protocol):
    *
-   *	   rte_better	Compare two rte's and decide which one is better (1=first, 0=second).
+   *	   rte_best	Find best rte starting from given.
+   *	   rte_better	Compare two rte's and decide if better (1=first, 0=second).
    *       rte_same	Compare two rte's and decide whether they are identical (1=yes, 0=no).
    *	   rte_insert	Called whenever a rte is inserted to a routing table.
    *	   rte_remove	Called whenever a rte is removed from the routing table.
    */
 
-  int (*rte_better)(struct rte *, struct rte *);
+  struct rte *(*rte_best)(struct rte *);
+  int (*rte_better)(struct rte *, struct rte *, u32 flags);
   int (*rte_same)(struct rte *, struct rte *);
   void (*rte_insert)(struct network *, struct rte *);
   void (*rte_remove)(struct network *, struct rte *);
diff --git a/nest/route.h b/nest/route.h
index a4c0154..31a7a56 100644
--- a/nest/route.h
+++ b/nest/route.h
@@ -201,6 +201,12 @@ typedef struct rte {
       u32 router_id;			/* Router that originated this route */
     } ospf;
 #endif
+#ifdef CONFIG_BGP
+    struct {
+      u32 first_as;			/* First as number in AS-PATH or local AS */
+      struct rte *next_item;		/* Used in bgp_get_best */
+    } bgp;
+#endif
     struct {				/* Routes generated by krt sync (both temporary and inherited ones) */
       s8 src;				/* Alleged route source (see krt.h) */
       u8 proto;				/* Kernel source protocol ID */
@@ -212,6 +218,8 @@ typedef struct rte {
 } rte;
 
 #define REF_COW 1			/* Copy this rte on write */
+/* Protocol-specific flags */
+#define PFR_FLAG1	0x01		/* Definition of first possible flag */
 
 /* Types of route announcement, also used as flags */
 #define RA_OPTIMAL 1			/* Announcement of optimal route change */
diff --git a/nest/rt-table.c b/nest/rt-table.c
index e20d2f6..574711a 100644
--- a/nest/rt-table.c
+++ b/nest/rt-table.c
@@ -135,10 +135,23 @@ rte_do_cow(rte *r)
   return e;
 }
 
-static int				/* Actually better or at least as good as */
+/**
+ * rte_better - returns 1 if new rte is better
+ * @new: new rte
+ * @old: old BEST rte
+ *
+ * Functions compares old and new rte returning 1 IFF new rte is better.
+ * Best rte in @old->net is required to be specified in @old if @old and @new are
+ * originated from same protocol type. Specifying different rte can produce
+ * incorrect calculations in some protocols (see bgp_rte_better)
+ *
+ * Returns 1 if @new is better, 0 overwise
+ */
+static int
 rte_better(rte *new, rte *old)
 {
-  int (*better)(rte *, rte *);
+  int (*better)(rte *, rte *, u32);
+  struct proto *p_old, *p_new;
 
   if (!old)
     return 1;
@@ -146,17 +159,19 @@ rte_better(rte *new, rte *old)
     return 1;
   if (new->pref < old->pref)
     return 0;
-  if (new->attrs->proto->proto != old->attrs->proto->proto)
+  p_old = old->attrs->proto;
+  p_new = new->attrs->proto;
+  if (p_new->proto != p_old->proto)
     {
       /*
        *  If the user has configured protocol preferences, so that two different protocols
        *  have the same preference, try to break the tie by comparing addresses. Not too
        *  useful, but keeps the ordering of routes unambiguous.
        */
-      return new->attrs->proto->proto > old->attrs->proto->proto;
+      return p_new->proto > p_old->proto;
     }
-  if (better = new->attrs->proto->rte_better)
-    return better(new, old);
+  if (better = p_new->rte_better)
+    return better(new, old, 0);
   return 0;
 }
 
@@ -426,9 +441,11 @@ static void
 rte_recalculate(rtable *table, net *net, struct proto *p, struct proto *src, rte *new, ea_list *tmpa)
 {
   struct proto_stats *stats = &p->stats;
+  struct proto *rte_proto;
   rte *old_best = net->routes;
   rte *old = NULL;
-  rte **k, *r, *s;
+  rte **k, *r, *s, *t;
+  u32 proto_mask, attr_class;
 
 #ifdef CONFIG_PIPE
   if (proto_is_pipe(p) && (p->table == table))
@@ -526,9 +543,36 @@ rte_recalculate(rtable *table, net *net, struct proto *p, struct proto *src, rte
 
       /* Find new optimal route */
       r = NULL;
+      proto_mask = 0;
       for (s=net->routes; s; s=s->next)
+      {
+	rte_proto = s->attrs->proto;
+	attr_class = rte_proto->proto->attr_class;
+	/* Protocol has non-zero attr_class and rte_best hook */
+	if (attr_class && rte_proto->rte_best)
+	{
+	  /* Check if this is first hook invocation */
+	  if (!((2 << attr_class) & proto_mask))
+	  {
+	    proto_mask |= 2 << attr_class;
+	    /*
+	     * Find best route from protocol point of view
+	     * and compare it with current best route if exists
+	     */
+	    t = rte_proto->rte_best(s);
+	    if (!r || rte_better(r, t))
+	      r = t;
+	  }
+	  /*
+	   * Protocol has already supplied its best route.
+	   * Skip this rte
+	   */
+	  continue;
+	}
+
 	if (rte_better(s, r))
 	  r = s;
+      }
 
       /* Announce optimal route */
       rte_announce(table, RA_OPTIMAL, net, r, old_best, tmpa);
diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c
index 2832f42..0b417df 100644
--- a/proto/bgp/attrs.c
+++ b/proto/bgp/attrs.c
@@ -1118,7 +1118,7 @@ rte_resolvable(rte *rt)
 }
 
 int
-bgp_rte_better(rte *new, rte *old)
+bgp_rte_better(rte *new, rte *old, u32 flags UNUSED)
 {
   struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->proto;
   struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->proto;
@@ -1236,6 +1236,293 @@ bgp_rte_better(rte *new, rte *old)
   return (ipa_compare(new_bgp->cf->remote_ip, old_bgp->cf->remote_ip) < 0);
 }
 
+
+
+static inline u32
+bgp_get_neighbor_as(rte *s)
+{
+  u32 first_as;
+
+  if (s->flags & PFR_BGP_ASCACHE)
+    return s->u.bgp.first_as;
+
+  first_as = bgp_get_neighbor(s);
+  s->u.bgp.first_as = first_as;
+  s->pflags |= PFR_BGP_ASCACHE;
+
+  return first_as;
+}
+
+/**
+ * bgp_rte_better_med - Determine best route
+ * @new: new rte
+ * @old: old BEST rte in @old->net
+ *
+ * Function implements RFC 4271 best route selection
+ * with so-called 'deterministic' MED comparison.
+ * If @new and @old are announced from different neighbours
+ * @old is required to be the best route.
+ *
+ * Returns: 1 if @new is better, 0 overwise
+ */
+int
+bgp_rte_better_med(rte *new, rte *old, u32 flags)
+{
+  struct bgp_proto *new_bgp = (struct bgp_proto *) new->attrs->proto;
+  struct bgp_proto *old_bgp = (struct bgp_proto *) old->attrs->proto;
+  struct bgp_config *new_config, *old_config;
+  ea_list *new_eattrs = new->attrs->eattrs;
+  ea_list *old_eattrs = old->attrs->eattrs;
+  eattr *x, *y;
+  u32 n, o;
+  u32 new_as, old_as, tmp_as;
+  rte *s, *r;
+
+  /* RFC 4271 9.1.2.1. Route resolvability test */
+  n = rte_resolvable(new);
+  o = rte_resolvable(old);
+  if (n > o)
+    return 1;
+  if (n < o)
+    return 0;
+
+  /* Start with local preferences */
+  x = ea_find(new_eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
+  y = ea_find(old_eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF));
+  n = x ? x->u.data : new_bgp->cf->default_local_pref;
+  o = y ? y->u.data : old_bgp->cf->default_local_pref;
+  if (n > o)
+    return 1;
+  if (n < o)
+    return 0;
+
+  /* RFC 4271 9.1.2.2. a)  Use AS path lengths */
+  if (new_bgp->cf->compare_path_lengths || old_bgp->cf->compare_path_lengths)
+    {
+      x = ea_find(new_eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+      y = ea_find(old_eattrs, EA_CODE(EAP_BGP, BA_AS_PATH));
+      n = x ? as_path_getlen(x->u.ptr) : AS_PATH_MAXLEN;
+      o = y ? as_path_getlen(y->u.ptr) : AS_PATH_MAXLEN;
+      if (n < o)
+	return 1;
+      if (n > o)
+	return 0;
+    }
+
+  /* RFC 4271 9.1.2.2. b) Use origins */
+  x = ea_find(new_eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
+  y = ea_find(old_eattrs, EA_CODE(EAP_BGP, BA_ORIGIN));
+  n = x ? x->u.data : ORIGIN_INCOMPLETE;
+  o = y ? y->u.data : ORIGIN_INCOMPLETE;
+  if (n < o)
+    return 1;
+  if (n > o)
+    return 0;
+
+  /* RFC 4271 9.1.2.2. c) Compare MED's */
+  /*
+   * This is a bit tricky.
+   *
+   * First we retrieve/update neighbor ASNs from rte cache.
+   * If @old and @new ASNs are the same, we can immediately
+   * compare MED. If not, we need to compare best routes
+   * from both ASNs. Since @old is the best route already nothing
+   * needs to be done with @old. However, we need to scan all
+   * routes for given network (another place where @old is required
+   * to be the best (and the first) route) to get best rte in
+   * @new neighbor ASN. This changes complexity from O(1) to O(N).
+   * If found rte is not @new, we simply returns 0
+   * since @old is the best rte of all routes. Overwise, we
+   * continue to compare @old and @new skippind MED comparison.
+   */
+  /* Get neighbor info first as && update cache if nesessary */
+  old_as = bgp_get_neighbor_as(old);
+  new_as = bgp_get_neighbor_as(new);
+
+  new_config = new_bgp->cf;
+  old_config = old_bgp->cf;
+
+  if (new_as == old_as)
+  {
+    /* We can compare MED */
+    x = ea_find(new_eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+    y = ea_find(old_eattrs, EA_CODE(EAP_BGP, BA_MULTI_EXIT_DISC));
+    n = x ? x->u.data : new_config->default_med;
+    o = y ? y->u.data : old_config->default_med;
+    if (n < o)
+      return 1;
+    if (n > o)
+      return 0;
+  }
+  else
+  {
+    /*
+     * routes came from different neighbor ASses.
+     * We need to find best route within routes
+     * from new_as
+     */
+    r = new;
+    for (s = old; s; s = s->next)
+    {
+      /* Get neighbor ASN */
+      tmp_as = bgp_get_neighbor_as(s);
+      if (tmp_as != new_as)
+	continue;
+
+      /* 
+       * Okay, we've found another rte from the same neighbor.
+       * Let's check it.
+       */
+      if (bgp_rte_better_med(s, r, 0))
+	r = s;
+    }
+
+    /* 
+     * Best route for neighbor routes is saved in r.
+     * If new is not the best route in the group this
+     * means that current best route remains best.
+     */
+    if (r != new)
+      return 0;
+  }
+
+  /* RFC 4271 9.1.2.2. d) Prefer external peers */
+  if (new_bgp->is_internal > old_bgp->is_internal)
+    return 0;
+  if (new_bgp->is_internal < old_bgp->is_internal)
+    return 1;
+
+  /* RFC 4271 9.1.2.2. e) Compare IGP metrics */
+  n = new_config->igp_metric ? new->attrs->igp_metric : 0;
+  o = old_config->igp_metric ? old->attrs->igp_metric : 0;
+  if (n < o)
+    return 1;
+  if (n > o)
+    return 0;
+
+  /* RFC 4271 9.1.2.2. f) Compare BGP identifiers */
+  /* RFC 4456 9. a) Use ORIGINATOR_ID instead of local neighor ID */
+  x = ea_find(new_eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
+  y = ea_find(old_eattrs, EA_CODE(EAP_BGP, BA_ORIGINATOR_ID));
+  n = x ? x->u.data : new_bgp->remote_id;
+  o = y ? y->u.data : old_bgp->remote_id;
+
+  /* RFC 5004 - prefer older routes */
+  /* (if both are external and from different peer) */
+  if ((new_config->prefer_older || old_config->prefer_older) &&
+      !new_bgp->is_internal && n != o)
+    return 0;
+
+  /* rest of RFC 4271 9.1.2.2. f) */
+  if (n < o)
+    return 1;
+  if (n > o)
+    return 0;
+
+  /* RFC 4456 9. b) Compare cluster list lengths */
+  x = ea_find(new_eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
+  y = ea_find(old_eattrs, EA_CODE(EAP_BGP, BA_CLUSTER_LIST));
+  n = x ? int_set_get_size(x->u.ptr) : 0;
+  o = y ? int_set_get_size(y->u.ptr) : 0;
+  if (n < o)
+    return 1;
+  if (n > o)
+    return 0;
+
+  /* RFC 4271 9.1.2.2. g) Compare peer IP adresses */
+  return (ipa_compare(new_config->remote_ip, old_config->remote_ip) < 0);
+}
+
+#define LOCAL_HASH	16
+/**
+ * bgp_rte_best - returns the best rte in chain originated by BGP
+ *
+ */
+rte *
+bgp_rte_best(rte *old)
+{
+  rte **rgroups, *s, *ss, *best = NULL;
+  u32 local_as, best_as;
+  int i;
+
+  if (!old->next)
+    return old;
+
+  /* Allocate hash for ASn groups */
+  rgroups = alloca(LOCAL_HASH * sizeof(rte *));
+  memset(rgroups, 0, LOCAL_HASH * sizeof(rte *));
+
+  for (s = old; s; s = s->next)
+  {
+    local_as = bgp_get_neighbor_as(s);
+    best = rgroups[local_as & (LOCAL_HASH - 1)];
+    if (!best)
+    {
+      /* New group */
+      rgroups[local_as & (LOCAL_HASH - 1)] = s;
+      s->u.bgp.next_item = NULL;
+      continue;
+    }
+
+    ss = best;
+    best_as = bgp_get_neighbor_as(ss);
+    while (ss->u.bgp.next_item)
+    {
+      if ((best_as = bgp_get_neighbor_as(ss)) == local_as)
+	break;
+      /* Hash collision */
+      ss = ss->u.bgp.next_item;
+    }
+
+    /* List ended */
+    if (best_as == local_as)
+    {
+      /* Both rte are from the same group, let's compare */
+      if (bgp_rte_better_med(s, best, 0))
+      {
+	/* New best rte in group */
+	rgroups[local_as & (LOCAL_HASH - 1)] = s;
+	s->u.bgp.next_item = best->u.bgp.next_item;
+      }
+
+      continue;
+    }
+
+    /* New group */
+    ss->u.bgp.next_item = s;
+    s->u.bgp.next_item = NULL;
+  }
+
+  /* Cycle ended, now we have to compare all per-group best rtes */
+  for (i = 0, best = NULL; i < LOCAL_HASH; i++)
+  {
+    for (s = rgroups[i]; s; s = s->u.bgp.next_item)
+    {
+      if (!best)
+      {
+	best = s;
+	continue;
+      }
+
+      /* Do general preference checking */
+      if (best->pref > s->pref)
+	continue;
+      if (s->pref > best->pref)
+      {
+	best = s;
+	continue;
+      }
+
+      /* Do full bgp-specific checking without MED comparison */
+      if (bgp_rte_better_med(s, best, BGP_CFLAG_SKIPMED))
+	best = s;
+    }
+  }
+
+  return best;
+}
+#undef LOCAL_HASH
+
 static struct adata *
 bgp_aggregator_convert_to_new(struct adata *old, struct linpool *pool)
 {
diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c
index 675342d..a277680 100644
--- a/proto/bgp/bgp.c
+++ b/proto/bgp/bgp.c
@@ -904,7 +904,9 @@ bgp_init(struct proto_config *C)
 
   P->accept_ra_types = RA_OPTIMAL;
   P->rt_notify = bgp_rt_notify;
-  P->rte_better = bgp_rte_better;
+  //P->rte_better = bgp_rte_better;
+  P->rte_better = bgp_rte_better_med;
+  P->rte_best = bgp_rte_best;
   P->import_control = bgp_import_control;
   P->neigh_notify = bgp_neigh_notify;
   P->reload_routes = bgp_reload_routes;
diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h
index 437ba33..70e1ff9 100644
--- a/proto/bgp/bgp.h
+++ b/proto/bgp/bgp.h
@@ -184,7 +184,9 @@ void bgp_attach_attr(struct ea_list **to, struct linpool *pool, unsigned attr, u
 byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned attr, unsigned len);
 struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, unsigned int len, struct linpool *pool, int mandatory);
 int bgp_get_attr(struct eattr *e, byte *buf, int buflen);
-int bgp_rte_better(struct rte *, struct rte *);
+int bgp_rte_better(struct rte *, struct rte *, u32);
+int bgp_rte_better_med(struct rte *, struct rte *, u32);
+rte *bgp_rte_best(rte *old);
 void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs);
 int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *);
 void bgp_attr_init(struct bgp_proto *);
@@ -241,6 +243,11 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
 #define BA_AS4_PATH             0x11    /* [RFC4893] */
 #define BA_AS4_AGGREGATOR       0x12
 
+/* Protocol-specififc rte flags */
+#define PFR_BGP_ASCACHE		PFR_FLAG1	/* 0x01 Set if bgp.first_as rte field is valid */
+/* Protocol-specific rte comparison flags */
+#define BGP_CFLAG_SKIPMED	0x01		/* Skip MED when comparing rte from different neigbour ASNs */
+
 /* BGP connection states */
 
 #define BS_IDLE			0
diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c
index ce7ad37..12b209f 100644
--- a/proto/ospf/ospf.c
+++ b/proto/ospf/ospf.c
@@ -105,7 +105,7 @@
 
 static int ospf_reload_routes(struct proto *p);
 static void ospf_rt_notify(struct proto *p, struct rtable *table UNUSED, net * n, rte * new, rte * old UNUSED, ea_list * attrs);
-static int ospf_rte_better(struct rte *new, struct rte *old);
+static int ospf_rte_better(struct rte *new, struct rte *old, u32 flags UNUSED);
 static int ospf_rte_same(struct rte *new, struct rte *old);
 static void ospf_disp(timer *timer);
 
@@ -314,7 +314,7 @@ ospf_init(struct proto_config *c)
 
 /* If new is better return 1 */
 static int
-ospf_rte_better(struct rte *new, struct rte *old)
+ospf_rte_better(struct rte *new, struct rte *old, u32 flags UNUSED)
 {
   if (new->u.ospf.metric1 == LSINFINITY)
     return 0;
diff --git a/proto/rip/rip.c b/proto/rip/rip.c
index 543aa30..6967c68 100644
--- a/proto/rip/rip.c
+++ b/proto/rip/rip.c
@@ -265,7 +265,7 @@ rip_rte_update_if_better(rtable *tab, net *net, struct proto *p, rte *new)
   rte *old;
 
   old = rte_find(net, p);
-  if (!old || p->rte_better(new, old) ||
+  if (!old || p->rte_better(new, old, 0) ||
       (ipa_equal(old->attrs->from, new->attrs->from) &&
       (old->u.rip.metric != new->u.rip.metric)) )
     rte_update(tab, net, p, p, new);
@@ -909,7 +909,7 @@ rip_rte_same(struct rte *new, struct rte *old)
 
 
 static int
-rip_rte_better(struct rte *new, struct rte *old)
+rip_rte_better(struct rte *new, struct rte *old, u32 flags UNUSED)
 {
   struct proto *p = new->attrs->proto;
 
-- 
1.7.3.2


--------------070507060406060400060500--



More information about the Bird-users mailing list