[PATCH RFC v2] Preserve BGP attributes from non-BGP protocols
There is no reason to overwrite attributes like AS path, next hop, etc. when the route source is not BGP. Preserving them has several advantages: * no attributes are lost by using opaque pipes * BGP attributes can by pre-populated in other protocols' import filters As a side effect of moving code from bgp_create_attrs() to bgp_update_attrs(), the BGP protocol now uses the same logic to determine if the "gw" attribute should be used as next hop for routes from BGP sources as for those from other sources. Previously, the "gw" attribute was never for routes from BGP. Limitations: * The handling of routes imported from/exported to route reflectors isn't preserved through opaque pipes --- Hi, I found a small issue with my first patch (origin and local_pref attributes not being preserved), so here is a v2. Regards, Matthias proto/bgp/attrs.c | 101 ++++++++++++++++++++++++------------------------------ 1 file changed, 45 insertions(+), 56 deletions(-) diff --git a/proto/bgp/attrs.c b/proto/bgp/attrs.c index c27a498..7f3b739 100644 --- a/proto/bgp/attrs.c +++ b/proto/bgp/attrs.c @@ -903,47 +903,17 @@ bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UN bgp_schedule_packet(p->conn, PKT_UPDATE); } -static int +static void bgp_create_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool) { - ea_list *ea = lp_alloc(pool, sizeof(ea_list) + 4*sizeof(eattr)); rta *rta = e->attrs; - byte *z; - ea->next = *attrs; - *attrs = ea; - ea->flags = EALF_SORTED; - ea->count = 4; - - bgp_set_attr(ea->attrs, BA_ORIGIN, + if (!ea_find(rta->eattrs, EA_CODE(EAP_BGP, BA_ORIGIN))) + bgp_attach_attr(attrs, pool, BA_ORIGIN, ((rta->source == RTS_OSPF_EXT1) || (rta->source == RTS_OSPF_EXT2)) ? ORIGIN_INCOMPLETE : ORIGIN_IGP); - if (p->is_internal) - bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 0); - else - { - z = bgp_set_attr_wa(ea->attrs+1, pool, BA_AS_PATH, 6); - z[0] = AS_PATH_SEQUENCE; - z[1] = 1; /* 1 AS */ - put_u32(z+2, p->local_as); - } - - /* iBGP -> use gw, eBGP multi-hop -> use source_addr, - eBGP single-hop -> use gw if on the same iface */ - z = bgp_set_attr_wa(ea->attrs+2, pool, BA_NEXT_HOP, NEXT_HOP_LENGTH); - if (p->cf->next_hop_self || - rta->dest != RTD_ROUTER || - ipa_equal(rta->gw, IPA_NONE) || - ipa_has_link_scope(rta->gw) || - (!p->is_internal && !p->cf->next_hop_keep && - (!p->neigh || (rta->iface != p->neigh->iface)))) - set_next_hop(z, p->source_addr); - else - set_next_hop(z, rta->gw); - - bgp_set_attr(ea->attrs+3, BA_LOCAL_PREF, p->cf->default_local_pref); - - return 0; /* Leave decision to the filters */ + if (!ea_find(rta->eattrs, EA_CODE(EAP_BGP, BA_LOCAL_PREF))) + bgp_attach_attr(attrs, pool, BA_LOCAL_PREF, p->cf->default_local_pref); } @@ -973,7 +943,16 @@ static inline void bgp_path_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 as) { eattr *a = ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH)); - bgp_attach_attr(attrs, pool, BA_AS_PATH, (uintptr_t) as_path_prepend(pool, a->u.ptr, as)); + if (a) + bgp_attach_attr(attrs, pool, BA_AS_PATH, (uintptr_t) as_path_prepend(pool, a->u.ptr, as)); + else + { + byte *z; + z = bgp_attach_attr_wa(attrs, pool, BA_AS_PATH, 6); + z[0] = AS_PATH_SEQUENCE; + z[1] = 1; /* 1 AS */ + put_u32(z+2, as); + } } static inline void @@ -986,6 +965,7 @@ bgp_cluster_list_prepend(rte *e, ea_list **attrs, struct linpool *pool, u32 cid) static int bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *pool, int rr) { + rta *rta = e->attrs; eattr *a; if (!p->is_internal && !p->rs_client) @@ -1000,6 +980,8 @@ bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p if (a) bgp_attach_attr(attrs, pool, BA_MULTI_EXIT_DISC, 0); } + else if (!ea_find(e->attrs->eattrs, EA_CODE(EAP_BGP, BA_AS_PATH))) + bgp_attach_attr_wa(attrs, pool, BA_AS_PATH, 0); /* iBGP -> keep next_hop, eBGP multi-hop -> use source_addr, * eBGP single-hop -> keep next_hop if on the same iface. @@ -1019,7 +1001,15 @@ bgp_update_attrs(struct bgp_proto *p, rte *e, ea_list **attrs, struct linpool *p { /* Need to create new one */ byte *b = bgp_attach_attr_wa(attrs, pool, BA_NEXT_HOP, NEXT_HOP_LENGTH); - set_next_hop(b, p->source_addr); + if (p->cf->next_hop_self || + rta->dest != RTD_ROUTER || + ipa_equal(rta->gw, IPA_NONE) || + ipa_has_link_scope(rta->gw) || + (!p->is_internal && !p->cf->next_hop_keep && + (!p->neigh || (rta->iface != p->neigh->iface)))) + set_next_hop(b, p->source_addr); + else + set_next_hop(b, rta->gw); } if (rr) @@ -1079,30 +1069,29 @@ bgp_import_control(struct proto *P, rte **new, ea_list **attrs, struct linpool * if (p == new_bgp) /* Poison reverse updates */ return -1; - if (new_bgp) - { - /* We should check here for cluster list loop, because the receiving BGP instance - might have different cluster ID */ - if (bgp_cluster_list_loopy(p, e->attrs)) - return -1; - if (p->cf->interpret_communities && bgp_community_filter(p, e)) - return -1; + if (!new_bgp) + bgp_create_attrs(p, e, attrs, pool); - if (p->local_as == new_bgp->local_as && p->is_internal && new_bgp->is_internal) - { - /* Redistribution of internal routes with IBGP */ - if (p->rr_client || new_bgp->rr_client) - /* Route reflection, RFC 4456 */ - return bgp_update_attrs(p, e, attrs, pool, 1); - else - return -1; - } + /* We should check here for cluster list loop, because the receiving BGP instance + might have different cluster ID */ + if (bgp_cluster_list_loopy(p, e->attrs)) + return -1; + + if (p->cf->interpret_communities && bgp_community_filter(p, e)) + return -1; + + if (new_bgp && p->local_as == new_bgp->local_as && p->is_internal && new_bgp->is_internal) + { + /* Redistribution of internal routes with IBGP */ + if (p->rr_client || new_bgp->rr_client) + /* Route reflection, RFC 4456 */ + return bgp_update_attrs(p, e, attrs, pool, 1); else - return bgp_update_attrs(p, e, attrs, pool, 0); + return -1; } else - return bgp_create_attrs(p, e, attrs, pool); + return bgp_update_attrs(p, e, attrs, pool, 0); } static inline u32 -- 1.8.2.3
On Fri, May 17, 2013 at 04:31:07PM +0200, Matthias Schiffer wrote:
There is no reason to overwrite attributes like AS path, next hop, etc. when the route source is not BGP. Preserving them has several advantages:
* no attributes are lost by using opaque pipes * BGP attributes can by pre-populated in other protocols' import filters
I agree with the point that BGP should not overwrite BGP attributes from foreign routes exported to it. But the current behavior is both established and consistent with other protocols (like OSPF, where the situation is ever worse), so NACK for current minor releases. I plan to do some major changes to attribute behavior to make it more consistent and such change will be a part of it. There is also a question of how BGP attribute on non-BGP route should be interpreted by BGP protocol - either like BGP routes, or like non-BGP routes with the attribute assigned by BGP export filter. I would prefer the second behavior - the difference is in implicit modifications of current attributes (like bgp_next_hop), in the first variant it would be modified as it is from BGP route, in the second variant it is not modified as all (because it is set as a local policy). Therefore if you, e.g., originate a static route and export it to a BGP protocol, it wouldn't make a difference if you modify BGP attributes in the import filter of the static protocol or in the export filter of the BGP protocol. BTW, do you need this because you use opaque pipes? One of my long-term plans is to remove support for opaque pipes and replace them with transparent pipes (i.e. pipes retaining type and attributes) configured to propagate just the best route. Could this
As a side effect of moving code from bgp_create_attrs() to bgp_update_attrs(), the BGP protocol now uses the same logic to determine if the "gw" attribute should be used as next hop for routes from BGP sources as for those from other sources. Previously, the "gw" attribute was never for routes from BGP.
Well, as a side effect the code for NEXT_HOP attribute modification becames incomprehensible (altought the original code is also horrible from this point of view). BTW, is there any non-broken case in which 'gw' attribute is used for routes from BGP in your code? I cannot find any. -- Elen sila lumenn' omentielvo Ondrej 'SanTiago' Zajicek (email: santiago@crfreenet.org) OpenPGP encrypted e-mails preferred (KeyID 0x11DEADC3, wwwkeys.pgp.net) "To err is human -- to blame it on a computer is even more so."
On 05/22/2013 12:36 PM, Ondrej Zajicek wrote:
On Fri, May 17, 2013 at 04:31:07PM +0200, Matthias Schiffer wrote:
There is no reason to overwrite attributes like AS path, next hop, etc. when the route source is not BGP. Preserving them has several advantages:
* no attributes are lost by using opaque pipes * BGP attributes can by pre-populated in other protocols' import filters
I agree with the point that BGP should not overwrite BGP attributes from foreign routes exported to it. But the current behavior is both established and consistent with other protocols (like OSPF, where the situation is ever worse), so NACK for current minor releases. I plan to do some major changes to attribute behavior to make it more consistent and such change will be a part of it. Yes, I saw that issue as well... so I guess I'll have to use patched Bird versions till a major release includes this or something similar.
There is also a question of how BGP attribute on non-BGP route should be interpreted by BGP protocol - either like BGP routes, or like non-BGP routes with the attribute assigned by BGP export filter. I would prefer the second behavior - the difference is in implicit modifications of current attributes (like bgp_next_hop), in the first variant it would be modified as it is from BGP route, in the second variant it is not modified as all (because it is set as a local policy). Therefore if you, e.g., originate a static route and export it to a BGP protocol, it wouldn't make a difference if you modify BGP attributes in the import filter of the static protocol or in the export filter of the BGP protocol.
I think all routes should be handled the same way, without regard for the source protocol. I'd like the BGP protocol to handle route attributes which come from other protocols the same way attributes from BGP protocol instances are handled, my use case is the following config snippet, which works just like a eBGP peering (okay, I don't handle things like MED and other attributes that shouldn't be distributed over eBPG yet, but of course that can be added easily). Without my patch it works only for routes that come from BGP, but not those from static protocols and similar. table ffhl; table ffhh; function bgp_peer(int import_as; int export_as) { if (!defined(bgp_path)) then bgp_path = +empty+; if (bgp_path ~ [= * import_as * =]) then return false; bgp_path.prepend(export_as); return true; }; protocol pipe pipe_ffhl_ffhh { table ffhl; peer table ffhh; export where bgp_peer(65044, 65052); import where bgp_peer(65054, 65044); };
BTW, do you need this because you use opaque pipes? One of my long-term plans is to remove support for opaque pipes and replace them with transparent pipes (i.e. pipes retaining type and attributes) configured to propagate just the best route. Could this
Nope, I don't use opaque pipes at the moment.
As a side effect of moving code from bgp_create_attrs() to bgp_update_attrs(), the BGP protocol now uses the same logic to determine if the "gw" attribute should be used as next hop for routes from BGP sources as for those from other sources. Previously, the "gw" attribute was never for routes from BGP.
Well, as a side effect the code for NEXT_HOP attribute modification becames incomprehensible (altought the original code is also horrible from this point of view).
BTW, is there any non-broken case in which 'gw' attribute is used for routes from BGP in your code? I cannot find any.
Me neither, so that case could probably just be removed - I just kept that else-branch to change as little of the curent behaviour as possible. I hope my config snippet has made my use case a bit clearer, and I'd love to have this use case supported by Bird in some way :) Regards, Matthias
On Thu, May 23, 2013 at 07:35:29PM +0200, Matthias Schiffer wrote:
There is also a question of how BGP attribute on non-BGP route should be interpreted by BGP protocol - either like BGP routes, or like non-BGP routes with the attribute assigned by BGP export filter. I would prefer the second behavior - the difference is in implicit modifications of current attributes (like bgp_next_hop), in the first variant it would be modified as it is from BGP route, in the second variant it is not modified as all (because it is set as a local policy). Therefore if you, e.g., originate a static route and export it to a BGP protocol, it wouldn't make a difference if you modify BGP attributes in the import filter of the static protocol or in the export filter of the BGP protocol.
I think all routes should be handled the same way, without regard for the source protocol.
Well, this is nice idea, but BGP standard (RFC 4271) specifies for route propagation different behavior based on whether the route was received originally from iBGP or from eBGP. So even if you want that non-BGP routes should be handled like BGP ones, you would have to specify whether like iBGP or like eBGP. -- Elen sila lumenn' omentielvo Ondrej 'SanTiago' Zajicek (email: santiago@crfreenet.org) OpenPGP encrypted e-mails preferred (KeyID 0x11DEADC3, wwwkeys.pgp.net) "To err is human -- to blame it on a computer is even more so."
On 05/23/2013 11:36 PM, Ondrej Zajicek wrote:
On Thu, May 23, 2013 at 07:35:29PM +0200, Matthias Schiffer wrote:
I think all routes should be handled the same way, without regard for the source protocol.
Well, this is nice idea, but BGP standard (RFC 4271) specifies for route propagation different behavior based on whether the route was received originally from iBGP or from eBGP. So even if you want that non-BGP routes should be handled like BGP ones, you would have to specify whether like iBGP or like eBGP.
Hmm, this is true, but the distinction becomes hard to define as soon as you have BGP instances with different local AS# and propagate routes between then... maybe it would be beneficial to add an (filter readable/writable) attribute holding the remote AS# the route was received from and use this to differentiate between iBGP and eBGP? Matthias
participants (2)
-
Matthias Schiffer -
Ondrej Zajicek