[PATCH 3.3] Proto: Allow channel start() to refuse DOWN->START
From: Zhang Maiyun <me@maiyun.me> Signed-off-by: Zhang Maiyun <me@maiyun.me> --- nest/proto.c | 27 ++++++++++++++++++++------- proto/bgp/bgp.c | 2 +- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/nest/proto.c b/nest/proto.c index 4472fe6f..2edbe142 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -1086,7 +1086,7 @@ channel_setup_in_table(struct channel *c) } -static void +static int channel_do_start(struct channel *c) { c->proto->active_channels++; @@ -1094,9 +1094,12 @@ channel_do_start(struct channel *c) if ((c->in_keep & RIK_PREFILTER) == RIK_PREFILTER) channel_setup_in_table(c); - CALL(c->class->start, c); + if (c->class->start) + if (c->class->start(c) != 0) + return -1; channel_start_import(c); + return 0; } static void @@ -1176,7 +1179,11 @@ channel_set_state(struct channel *c, uint state) ASSERT(cs == CS_DOWN || cs == CS_PAUSE); if (cs == CS_DOWN) - channel_do_start(c); + if (channel_do_start(c) != 0) { + // Reject the channel start and return to down state + c->channel_state = CS_DOWN; + c->last_state_change = current_time(); + } break; @@ -1184,12 +1191,18 @@ channel_set_state(struct channel *c, uint state) ASSERT(cs == CS_DOWN || cs == CS_START || cs == CS_PAUSE); if (cs == CS_DOWN) - channel_do_start(c); + if (channel_do_start(c) != 0) { + // Reject the channel start and return to down state + c->channel_state = CS_DOWN; + c->last_state_change = current_time(); + } - if (!c->gr_wait && c->proto->rt_notify) - channel_start_export(c); + if (c->channel_state != CS_DOWN) { + if (!c->gr_wait && c->proto->rt_notify) + channel_start_export(c); - channel_do_up(c); + channel_do_up(c); + } break; case CS_PAUSE: diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 8d04a05d..1923a80d 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -3257,7 +3257,7 @@ bgp_channel_start(struct channel *C) if (ipa_zero(c->next_hop_addr)) { log(L_WARN "%s: Missing next hop address", p->p.name); - return 0; + return -1; } /* Set link-local address for IPv6 single-hop BGP */ -- 2.54.0
Hello All, I was hoping to get some feedback on this change. For example, bgp's channel start function gives up when it cannot find a proper next hop, but the channel is still treated as successfully initialized. This then leads to lots of errors in `bgp_decode_nlri` about bad NH. I think it might be a better idea to leave the channel in the DOWN state when this happens, and this patch allows the channel start function to reject the DOWN->START transition. Best, Maiyun On Thu, Jun 18, 2026 at 10:12:32AM -0400, me@maiyun.me wrote:
From: Zhang Maiyun <me@maiyun.me>
Signed-off-by: Zhang Maiyun <me@maiyun.me> --- nest/proto.c | 27 ++++++++++++++++++++------- proto/bgp/bgp.c | 2 +- 2 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/nest/proto.c b/nest/proto.c index 4472fe6f..2edbe142 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -1086,7 +1086,7 @@ channel_setup_in_table(struct channel *c) }
-static void +static int channel_do_start(struct channel *c) { c->proto->active_channels++; @@ -1094,9 +1094,12 @@ channel_do_start(struct channel *c) if ((c->in_keep & RIK_PREFILTER) == RIK_PREFILTER) channel_setup_in_table(c);
- CALL(c->class->start, c); + if (c->class->start) + if (c->class->start(c) != 0) + return -1;
channel_start_import(c); + return 0; }
static void @@ -1176,7 +1179,11 @@ channel_set_state(struct channel *c, uint state) ASSERT(cs == CS_DOWN || cs == CS_PAUSE);
if (cs == CS_DOWN) - channel_do_start(c); + if (channel_do_start(c) != 0) { + // Reject the channel start and return to down state + c->channel_state = CS_DOWN; + c->last_state_change = current_time(); + }
break;
@@ -1184,12 +1191,18 @@ channel_set_state(struct channel *c, uint state) ASSERT(cs == CS_DOWN || cs == CS_START || cs == CS_PAUSE);
if (cs == CS_DOWN) - channel_do_start(c); + if (channel_do_start(c) != 0) { + // Reject the channel start and return to down state + c->channel_state = CS_DOWN; + c->last_state_change = current_time(); + }
- if (!c->gr_wait && c->proto->rt_notify) - channel_start_export(c); + if (c->channel_state != CS_DOWN) { + if (!c->gr_wait && c->proto->rt_notify) + channel_start_export(c);
- channel_do_up(c); + channel_do_up(c); + } break;
case CS_PAUSE: diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 8d04a05d..1923a80d 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -3257,7 +3257,7 @@ bgp_channel_start(struct channel *C) if (ipa_zero(c->next_hop_addr)) { log(L_WARN "%s: Missing next hop address", p->p.name); - return 0; + return -1; }
/* Set link-local address for IPv6 single-hop BGP */ -- 2.54.0
Howdy! Thank you for your patch. This is a real issue which we'd like to handle differently because your approach needs internal API changes. We'll instead refactor the BGP channel startup code to not start the channel at all; we need this anyway to allow future development. Thank you again and have a nice day! Maria Internal reference: #447 On Thu, Jun 18, 2026 at 10:18:32AM -0400, Zhang Maiyun via Bird-users wrote:
Hello All,
I was hoping to get some feedback on this change. For example, bgp's channel start function gives up when it cannot find a proper next hop, but the channel is still treated as successfully initialized. This then leads to lots of errors in `bgp_decode_nlri` about bad NH.
I think it might be a better idea to leave the channel in the DOWN state when this happens, and this patch allows the channel start function to reject the DOWN->START transition.
Best, Maiyun
On Thu, Jun 18, 2026 at 10:12:32AM -0400, me@maiyun.me wrote:
From: Zhang Maiyun <me@maiyun.me>
Signed-off-by: Zhang Maiyun <me@maiyun.me> --- nest/proto.c | 27 ++++++++++++++++++++------- proto/bgp/bgp.c | 2 +- 2 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/nest/proto.c b/nest/proto.c index 4472fe6f..2edbe142 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -1086,7 +1086,7 @@ channel_setup_in_table(struct channel *c) }
-static void +static int channel_do_start(struct channel *c) { c->proto->active_channels++; @@ -1094,9 +1094,12 @@ channel_do_start(struct channel *c) if ((c->in_keep & RIK_PREFILTER) == RIK_PREFILTER) channel_setup_in_table(c);
- CALL(c->class->start, c); + if (c->class->start) + if (c->class->start(c) != 0) + return -1;
channel_start_import(c); + return 0; }
static void @@ -1176,7 +1179,11 @@ channel_set_state(struct channel *c, uint state) ASSERT(cs == CS_DOWN || cs == CS_PAUSE);
if (cs == CS_DOWN) - channel_do_start(c); + if (channel_do_start(c) != 0) { + // Reject the channel start and return to down state + c->channel_state = CS_DOWN; + c->last_state_change = current_time(); + }
break;
@@ -1184,12 +1191,18 @@ channel_set_state(struct channel *c, uint state) ASSERT(cs == CS_DOWN || cs == CS_START || cs == CS_PAUSE);
if (cs == CS_DOWN) - channel_do_start(c); + if (channel_do_start(c) != 0) { + // Reject the channel start and return to down state + c->channel_state = CS_DOWN; + c->last_state_change = current_time(); + }
- if (!c->gr_wait && c->proto->rt_notify) - channel_start_export(c); + if (c->channel_state != CS_DOWN) { + if (!c->gr_wait && c->proto->rt_notify) + channel_start_export(c);
- channel_do_up(c); + channel_do_up(c); + } break;
case CS_PAUSE: diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 8d04a05d..1923a80d 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -3257,7 +3257,7 @@ bgp_channel_start(struct channel *C) if (ipa_zero(c->next_hop_addr)) { log(L_WARN "%s: Missing next hop address", p->p.name); - return 0; + return -1; }
/* Set link-local address for IPv6 single-hop BGP */ -- 2.54.0
-- Maria Matejka (she/her) | BIRD Team Leader | CZ.NIC, z.s.p.o.
Hello Maria, Thanks for the reply. It makes sense and I will wait for the team's refactoring. In the meantime, I did find a different approach that does not involve API changes. It marks the channel as disabled if a next hop cannot be identified. The change is here if it might be interesting: https://github.com/myzhang1029/bird/commit/8348b5dab4fa0ab0c51248ba3277997d4... Best, Maiyun On Thu, Jun 18, 2026 at 05:39:50PM +0200, Maria Matejka wrote:
Howdy!
Thank you for your patch. This is a real issue which we'd like to handle differently because your approach needs internal API changes.
We'll instead refactor the BGP channel startup code to not start the channel at all; we need this anyway to allow future development.
Thank you again and have a nice day!
Maria
Internal reference: #447
On Thu, Jun 18, 2026 at 10:18:32AM -0400, Zhang Maiyun via Bird-users wrote:
Hello All,
I was hoping to get some feedback on this change. For example, bgp's channel start function gives up when it cannot find a proper next hop, but the channel is still treated as successfully initialized. This then leads to lots of errors in `bgp_decode_nlri` about bad NH.
I think it might be a better idea to leave the channel in the DOWN state when this happens, and this patch allows the channel start function to reject the DOWN->START transition.
Best, Maiyun
On Thu, Jun 18, 2026 at 10:12:32AM -0400, me@maiyun.me wrote:
From: Zhang Maiyun <me@maiyun.me>
Signed-off-by: Zhang Maiyun <me@maiyun.me> --- nest/proto.c | 27 ++++++++++++++++++++------- proto/bgp/bgp.c | 2 +- 2 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/nest/proto.c b/nest/proto.c index 4472fe6f..2edbe142 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -1086,7 +1086,7 @@ channel_setup_in_table(struct channel *c) }
-static void +static int channel_do_start(struct channel *c) { c->proto->active_channels++; @@ -1094,9 +1094,12 @@ channel_do_start(struct channel *c) if ((c->in_keep & RIK_PREFILTER) == RIK_PREFILTER) channel_setup_in_table(c);
- CALL(c->class->start, c); + if (c->class->start) + if (c->class->start(c) != 0) + return -1;
channel_start_import(c); + return 0; }
static void @@ -1176,7 +1179,11 @@ channel_set_state(struct channel *c, uint state) ASSERT(cs == CS_DOWN || cs == CS_PAUSE);
if (cs == CS_DOWN) - channel_do_start(c); + if (channel_do_start(c) != 0) { + // Reject the channel start and return to down state + c->channel_state = CS_DOWN; + c->last_state_change = current_time(); + }
break;
@@ -1184,12 +1191,18 @@ channel_set_state(struct channel *c, uint state) ASSERT(cs == CS_DOWN || cs == CS_START || cs == CS_PAUSE);
if (cs == CS_DOWN) - channel_do_start(c); + if (channel_do_start(c) != 0) { + // Reject the channel start and return to down state + c->channel_state = CS_DOWN; + c->last_state_change = current_time(); + }
- if (!c->gr_wait && c->proto->rt_notify) - channel_start_export(c); + if (c->channel_state != CS_DOWN) { + if (!c->gr_wait && c->proto->rt_notify) + channel_start_export(c);
- channel_do_up(c); + channel_do_up(c); + } break;
case CS_PAUSE: diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 8d04a05d..1923a80d 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -3257,7 +3257,7 @@ bgp_channel_start(struct channel *C) if (ipa_zero(c->next_hop_addr)) { log(L_WARN "%s: Missing next hop address", p->p.name); - return 0; + return -1; }
/* Set link-local address for IPv6 single-hop BGP */ -- 2.54.0
-- Maria Matejka (she/her) | BIRD Team Leader | CZ.NIC, z.s.p.o.
participants (3)
-
Maria Matejka -
me@maiyun.me -
Zhang Maiyun