Merge branch 'netlink-buf'

Fixes an infinite loop if e.g. Netlink event sockets get too many
packets queued and poll() just returns POLLERR for the socket.  Also
increases the default receive buffer size for Netlink sockets to better
support systems with lots of route updates.

Closes strongswan/strongswan#1757
This commit is contained in:
Tobias Brunner 2023-07-26 15:15:24 +02:00
commit 006839b06a
5 changed files with 67 additions and 89 deletions

View File

@ -1,14 +1,6 @@
charon.plugins.kernel-netlink.buflen = <min(PAGE_SIZE, 8192)>
Buffer size for received Netlink messages.
charon.plugins.kernel-netlink.force_receive_buffer_size = no
Force maximum Netlink receive buffer on Netlink socket.
If the maximum Netlink socket receive buffer in bytes set by
_receive_buffer_size_ exceeds the system-wide maximum from
/proc/sys/net/core/rmem_max, this option can be used to override the limit.
Enabling this option requires special privileges (CAP_NET_ADMIN).
charon.plugins.kernel-netlink.fwmark =
Firewall mark to set on the routing rule that directs traffic to our routing
table.
@ -74,14 +66,16 @@ charon.plugins.kernel-netlink.process_rules = no
currently only useful if the kernel based route lookup is used (i.e. if
route installation is disabled or an inverted fwmark match is configured).
charon.plugins.kernel-netlink.receive_buffer_size = 0
charon.plugins.kernel-netlink.receive_buffer_size = 8388608
Maximum Netlink socket receive buffer in bytes.
Maximum Netlink socket receive buffer in bytes. This value controls how many
bytes of Netlink messages can be received on a Netlink socket. The default
value is set by /proc/sys/net/core/rmem_default. The specified value cannot
exceed the system-wide maximum from /proc/sys/net/core/rmem_max, unless
_force_receive_buffer_size_ is enabled.
bytes of Netlink messages can be queued to a Netlink socket. If set to 0,
the default from /proc/sys/net/core/rmem_default will apply. Note that the
kernel doubles the configured value to account for overhead. To exceed the
system-wide maximum from /proc/sys/net/core/rmem_max, special privileges
(CAP_NET_ADMIN) are necessary, otherwise, the kernel silently caps the
value.
charon.plugins.kernel-netlink.roam_events = yes
Whether to trigger roam events when interfaces, addresses or routes change.

View File

@ -57,6 +57,13 @@
#define SOL_NETLINK 270
#endif
/**
* Default receive buffer size
*/
#ifndef NETLINK_RCVBUF_DEFAULT
#define NETLINK_RCVBUF_DEFAULT (8 * 1024 * 1024)
#endif
typedef struct private_netlink_socket_t private_netlink_socket_t;
typedef struct private_netlink_event_socket_t private_netlink_event_socket_t;
@ -646,6 +653,29 @@ u_int netlink_get_buflen()
return buflen;
}
/**
* Set the configured receive buffer size on the given socket.
*/
static void set_rcvbuf_size(int socket)
{
int rcvbuf_size = 0;
rcvbuf_size = lib->settings->get_int(lib->settings,
"%s.plugins.kernel-netlink.receive_buffer_size",
NETLINK_RCVBUF_DEFAULT, lib->ns);
if (rcvbuf_size)
{
if (setsockopt(socket, SOL_SOCKET, SO_RCVBUFFORCE, &rcvbuf_size,
sizeof(rcvbuf_size)) == -1 &&
setsockopt(socket, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size,
sizeof(rcvbuf_size)) == -1)
{
DBG1(DBG_KNL, "failed to set receive buffer size to %d: %s",
rcvbuf_size, strerror(errno));
}
}
}
/*
* Described in header
*/
@ -656,8 +686,7 @@ netlink_socket_t *netlink_socket_create(int protocol, enum_name_t *names,
struct sockaddr_nl addr = {
.nl_family = AF_NETLINK,
};
bool force_buf = FALSE;
int on = 1, rcvbuf_size = 0;
int on = 1;
INIT(this,
.public = {
@ -705,25 +734,8 @@ netlink_socket_t *netlink_socket_create(int protocol, enum_name_t *names,
ignore_result(setsockopt(this->socket, SOL_NETLINK, NETLINK_EXT_ACK, &on,
sizeof(on)));
rcvbuf_size = lib->settings->get_int(lib->settings,
"%s.plugins.kernel-netlink.receive_buffer_size",
rcvbuf_size, lib->ns);
if (rcvbuf_size)
{
int optname;
set_rcvbuf_size(this->socket);
force_buf = lib->settings->get_bool(lib->settings,
"%s.plugins.kernel-netlink.force_receive_buffer_size",
force_buf, lib->ns);
optname = force_buf ? SO_RCVBUFFORCE : SO_RCVBUF;
if (setsockopt(this->socket, SOL_SOCKET, optname, &rcvbuf_size,
sizeof(rcvbuf_size)) == -1)
{
DBG1(DBG_KNL, "failed to %supdate receive buffer size to %d: %s",
force_buf ? "forcibly " : "", rcvbuf_size, strerror(errno));
}
}
if (this->parallel)
{
lib->watcher->add(lib->watcher, this->socket, WATCHER_READ, watch, this);
@ -805,6 +817,8 @@ netlink_event_socket_t *netlink_event_socket_create(int protocol, uint32_t group
return NULL;
}
set_rcvbuf_size(this->socket);
if (bind(this->socket, (struct sockaddr*)&addr, sizeof(addr)))
{
DBG1(DBG_KNL, "unable to bind netlink event socket: %s (%d)",

View File

@ -188,8 +188,6 @@ static bool watch(private_stream_t *this, int fd, watcher_event_t event)
this->write_cb = cb;
}
break;
case WATCHER_EXCEPT:
break;
}
return keep;
}

View File

@ -251,20 +251,17 @@ static void notify_end(notify_data_t *data)
if (removed)
{
DBG3(DBG_JOB, "removed fd %d[%s%s%s] from watcher after callback", data->fd,
DBG3(DBG_JOB, "removed fd %d[%s%s] from watcher after callback", data->fd,
data->event & WATCHER_READ ? "r" : "",
data->event & WATCHER_WRITE ? "w" : "",
data->event & WATCHER_EXCEPT ? "e" : "");
data->event & WATCHER_WRITE ? "w" : "");
}
else if (updated)
{
DBG3(DBG_JOB, "updated fd %d[%s%s%s] to %d[%s%s%s] after callback", data->fd,
DBG3(DBG_JOB, "updated fd %d[%s%s] to %d[%s%s] after callback", data->fd,
(updated | data->event) & WATCHER_READ ? "r" : "",
(updated | data->event) & WATCHER_WRITE ? "w" : "",
(updated | data->event) & WATCHER_EXCEPT ? "e" : "", data->fd,
(updated | data->event) & WATCHER_WRITE ? "w" : "", data->fd,
updated & WATCHER_READ ? "r" : "",
updated & WATCHER_WRITE ? "w" : "",
updated & WATCHER_EXCEPT ? "e" : "");
updated & WATCHER_WRITE ? "w" : "");
}
free(data);
}
@ -335,27 +332,6 @@ static inline int find_revents(struct pollfd *pfd, int count, int fd)
return 0;
}
/**
* Check if entry is waiting for a specific event, and if it got signaled
*/
static inline bool entry_ready(entry_t *entry, watcher_event_t event,
int revents)
{
if (entry->events & event)
{
switch (event)
{
case WATCHER_READ:
return (revents & (POLLIN | POLLHUP | POLLNVAL)) != 0;
case WATCHER_WRITE:
return (revents & (POLLOUT | POLLHUP | POLLNVAL)) != 0;
case WATCHER_EXCEPT:
return (revents & (POLLERR | POLLHUP | POLLNVAL)) != 0;
}
}
return FALSE;
}
#if DEBUG_LEVEL >= 2
#define reset_log(buf, pos, len) ({ buf[0] = '\0'; pos = buf; len = sizeof(buf); })
#define reset_event_log(buf, pos) ({ pos = buf; })
@ -431,11 +407,6 @@ static job_requeue_t watch(private_watcher_t *this)
log_event(eventpos, 'w');
pfd[count].events |= POLLOUT;
}
if (entry->events & WATCHER_EXCEPT)
{
log_event(eventpos, 'e');
pfd[count].events |= POLLERR;
}
end_event_log(eventpos);
log_fd(logpos, loglen, entry->fd, eventbuf);
count++;
@ -505,23 +476,27 @@ static job_requeue_t watch(private_watcher_t *this)
}
reset_event_log(eventbuf, eventpos);
revents = find_revents(pfd, count, entry->fd);
if (entry_ready(entry, WATCHER_EXCEPT, revents))
if (revents & POLLERR)
{
log_event(eventpos, 'e');
notify(this, entry, WATCHER_EXCEPT);
}
else
{
if (entry_ready(entry, WATCHER_READ, revents))
if (revents & POLLIN)
{
log_event(eventpos, 'r');
notify(this, entry, WATCHER_READ);
}
if (entry_ready(entry, WATCHER_WRITE, revents))
if (revents & POLLOUT)
{
log_event(eventpos, 'w');
notify(this, entry, WATCHER_WRITE);
}
if (entry->events & WATCHER_READ &&
revents & (POLLIN | POLLERR | POLLHUP | POLLNVAL))
{
notify(this, entry, WATCHER_READ);
}
if (entry->events & WATCHER_WRITE &&
revents & (POLLOUT | POLLERR | POLLHUP | POLLNVAL))
{
notify(this, entry, WATCHER_WRITE);
}
end_event_log(eventpos);
log_fd(logpos, loglen, entry->fd, eventbuf);
@ -571,10 +546,9 @@ METHOD(watcher_t, add, void,
.data = data,
);
DBG3(DBG_JOB, "adding fd %d[%s%s%s] to watcher", fd,
DBG3(DBG_JOB, "adding fd %d[%s%s] to watcher", fd,
events & WATCHER_READ ? "r" : "",
events & WATCHER_WRITE ? "w" : "",
events & WATCHER_EXCEPT ? "e" : "");
events & WATCHER_WRITE ? "w" : "");
this->mutex->lock(this->mutex);
add_entry(this, entry);
@ -631,10 +605,9 @@ METHOD(watcher_t, remove_, void,
{
update_and_unlock(this);
DBG3(DBG_JOB, "removed fd %d[%s%s%s] from watcher", fd,
DBG3(DBG_JOB, "removed fd %d[%s%s] from watcher", fd,
found & WATCHER_READ ? "r" : "",
found & WATCHER_WRITE ? "w" : "",
found & WATCHER_EXCEPT ? "e" : "");
found & WATCHER_WRITE ? "w" : "");
}
else
{

View File

@ -55,7 +55,6 @@ typedef bool (*watcher_cb_t)(void *data, int fd, watcher_event_t event);
enum watcher_event_t {
WATCHER_READ = (1<<0),
WATCHER_WRITE = (1<<1),
WATCHER_EXCEPT = (1<<2),
};
/**