I think this would be a very good addition to the program. I'm the one who originally had infinite loops in Bird due to heavy netlink traffic; in my case it was probably conntrackd, keeping track of ~1M flows. The fix by Ondrej and Jan solved the immediate problem, of infinite loops and hangs, but I still worry if the message drops might cause undefined behavior. Increasing the buffer seems logical. On 12/30/2016 03:53 PM, Michal wrote:
On 2016-12-30 13:30, Michal wrote:
On 2016-12-30 13:26, Ondrej Zajicek wrote:
On Fri, Dec 30, 2016 at 01:13:59PM +0100, Michal wrote:
Stupid... increasing read buffer helped of course
sysctl -w net.core.rmem_default=1064960
I see, the bird is not increasing the read buffer on it's netlink socket. Would it be bad idea to create an configuration option for that?
Yes, that is probably a good idea.
Ok, let's see if can pull up some C skills.
So here comes the ugly patch. I can confirm that rising rcvbuf size to something bigger fixes my problem.
conf/conf.c | 1 + conf/conf.h | 1 + sysdep/linux/netlink.Y | 7 ++++++- sysdep/linux/netlink.c | 18 ++++++++++++++---- 4 files changed, 22 insertions(+), 5 deletions(-)
diff --git a/conf/conf.c b/conf/conf.c index 0a4e3f8c..b8d77bdd 100644 --- a/conf/conf.c +++ b/conf/conf.c @@ -104,6 +104,7 @@ config_alloc(const byte *name) c->tf_route = c->tf_proto = (struct timeformat){"%T", "%F", 20*3600}; c->tf_base = c->tf_log = (struct timeformat){"%F %T", NULL, 0}; c->gr_wait = DEFAULT_GR_WAIT; + c->netlink_rcvbuf = 0;
return c; } diff --git a/conf/conf.h b/conf/conf.h index 41cb434f..cf0bfc67 100644 --- a/conf/conf.h +++ b/conf/conf.h @@ -55,6 +55,7 @@ struct config { int obstacle_count; /* Number of items blocking freeing of this config */ int shutdown; /* This is a pseudo-config for daemon shutdown */ bird_clock_t load_time; /* When we've got this configuration */ + size_t netlink_rcvbuf; /* netlink receive buffer size */ };
/* Please don't use these variables in protocols. Use proto_config->global instead. */ diff --git a/sysdep/linux/netlink.Y b/sysdep/linux/netlink.Y index f577244d..d7dc3347 100644 --- a/sysdep/linux/netlink.Y +++ b/sysdep/linux/netlink.Y @@ -10,7 +10,7 @@ CF_HDR
CF_DECLS
-CF_KEYWORDS(KERNEL, TABLE, METRIC, KRT_PREFSRC, KRT_REALM, KRT_SCOPE, KRT_MTU, KRT_WINDOW, +CF_KEYWORDS(NETLINK, RCVBUF, KERNEL, TABLE, METRIC, KRT_PREFSRC, KRT_REALM, KRT_SCOPE, KRT_MTU, KRT_WINDOW, KRT_RTT, KRT_RTTVAR, KRT_SSTRESH, KRT_CWND, KRT_ADVMSS, KRT_REORDERING, KRT_HOPLIMIT, KRT_INITCWND, KRT_RTO_MIN, KRT_INITRWND, KRT_QUICKACK, KRT_LOCK_MTU, KRT_LOCK_WINDOW, KRT_LOCK_RTT, KRT_LOCK_RTTVAR, @@ -19,6 +19,11 @@ CF_KEYWORDS(KERNEL, TABLE, METRIC, KRT_PREFSRC, KRT_REALM, KRT_SCOPE, KRT_MTU, K
CF_GRAMMAR
+CF_ADDTO(conf, netlink) + +netlink: + NETLINK RCVBUF expr ';' { new_config->netlink_rcvbuf = $3; } + CF_ADDTO(kern_proto, kern_proto kern_sys_item ';')
kern_sys_item: diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index 22313f43..28fd26f3 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -1533,11 +1533,12 @@ nl_async_err_hook(sock *sk, int e UNUSED) }
static void -nl_open_async(void) +nl_open_async(struct proto *p) { sock *sk; struct sockaddr_nl sa; int fd; + size_t rcvbuf;
if (nl_async_sk) return; @@ -1551,6 +1552,15 @@ nl_open_async(void) return; }
+ rcvbuf = p->cf->global->netlink_rcvbuf; + if (rcvbuf > 0) + { + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) < 0) + { + log(L_ERR "Unable to set netlink socket receive buffer size: %m"); + } + } + bzero(&sa, sizeof(sa)); sa.nl_family = AF_NETLINK; #ifdef IPV6 @@ -1603,7 +1613,7 @@ krt_sys_start(struct krt_proto *p) HASH_INSERT2(nl_table_map, RTH, krt_pool, p);
nl_open(); - nl_open_async(); + nl_open_async(&(p->p));
return 1; } @@ -1685,10 +1695,10 @@ krt_sys_get_attr(eattr *a, byte *buf, int buflen UNUSED)
void -kif_sys_start(struct kif_proto *p UNUSED) +kif_sys_start(struct kif_proto *p) { nl_open(); - nl_open_async(); + nl_open_async(&(p->p)); }
void