aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-03-18 22:25:46 -0400
committerDavid S. Miller <davem@davemloft.net>2016-03-18 22:25:46 -0400
commit7fa7728d23e7928dfb6e9f5540a97fb2d562b989 (patch)
tree4b57cd88bdb39ff9025cafb3f9b8fcbe22660b6c
parent79d3b59a93ba25f3b2c72eb4099c189d41d30204 (diff)
parenta3382e408b645b4f68ec01f9c048e356c62598fb (diff)
Merge branch 'rds-buffer-tuning'
Sowmini Varadhan says: ==================== RDS: TCP: tunable socket buffer parameters Patch 1 uses sysctl to create tunable socket buffer size parameters. Patch 2 removes an unuused constant. v2: use sysctl v3: review comments from Santosh Shilimkar, Eric Dumazet v4: review comments from Hannes Sowa ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/rds/tcp.c145
1 files changed, 134 insertions, 11 deletions
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index ad60299b088b..61ed2a8764ba 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -52,7 +52,34 @@ static LIST_HEAD(rds_tcp_conn_list);
52 52
53static struct kmem_cache *rds_tcp_conn_slab; 53static struct kmem_cache *rds_tcp_conn_slab;
54 54
55#define RDS_TCP_DEFAULT_BUFSIZE (128 * 1024) 55static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
56 void __user *buffer, size_t *lenp,
57 loff_t *fpos);
58
59int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF;
60int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF;
61
62static struct ctl_table rds_tcp_sysctl_table[] = {
63#define RDS_TCP_SNDBUF 0
64 {
65 .procname = "rds_tcp_sndbuf",
66 /* data is per-net pointer */
67 .maxlen = sizeof(int),
68 .mode = 0644,
69 .proc_handler = rds_tcp_skbuf_handler,
70 .extra1 = &rds_tcp_min_sndbuf,
71 },
72#define RDS_TCP_RCVBUF 1
73 {
74 .procname = "rds_tcp_rcvbuf",
75 /* data is per-net pointer */
76 .maxlen = sizeof(int),
77 .mode = 0644,
78 .proc_handler = rds_tcp_skbuf_handler,
79 .extra1 = &rds_tcp_min_rcvbuf,
80 },
81 { }
82};
56 83
57/* doing it this way avoids calling tcp_sk() */ 84/* doing it this way avoids calling tcp_sk() */
58void rds_tcp_nonagle(struct socket *sock) 85void rds_tcp_nonagle(struct socket *sock)
@@ -66,15 +93,6 @@ void rds_tcp_nonagle(struct socket *sock)
66 set_fs(oldfs); 93 set_fs(oldfs);
67} 94}
68 95
69/* All module specific customizations to the RDS-TCP socket should be done in
70 * rds_tcp_tune() and applied after socket creation. In general these
71 * customizations should be tunable via module_param()
72 */
73void rds_tcp_tune(struct socket *sock)
74{
75 rds_tcp_nonagle(sock);
76}
77
78u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc) 96u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
79{ 97{
80 return tcp_sk(tc->t_sock->sk)->snd_nxt; 98 return tcp_sk(tc->t_sock->sk)->snd_nxt;
@@ -272,8 +290,34 @@ static int rds_tcp_netid;
272struct rds_tcp_net { 290struct rds_tcp_net {
273 struct socket *rds_tcp_listen_sock; 291 struct socket *rds_tcp_listen_sock;
274 struct work_struct rds_tcp_accept_w; 292 struct work_struct rds_tcp_accept_w;
293 struct ctl_table_header *rds_tcp_sysctl;
294 struct ctl_table *ctl_table;
295 int sndbuf_size;
296 int rcvbuf_size;
275}; 297};
276 298
299/* All module specific customizations to the RDS-TCP socket should be done in
300 * rds_tcp_tune() and applied after socket creation.
301 */
302void rds_tcp_tune(struct socket *sock)
303{
304 struct sock *sk = sock->sk;
305 struct net *net = sock_net(sk);
306 struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
307
308 rds_tcp_nonagle(sock);
309 lock_sock(sk);
310 if (rtn->sndbuf_size > 0) {
311 sk->sk_sndbuf = rtn->sndbuf_size;
312 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
313 }
314 if (rtn->rcvbuf_size > 0) {
315 sk->sk_sndbuf = rtn->rcvbuf_size;
316 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
317 }
318 release_sock(sk);
319}
320
277static void rds_tcp_accept_worker(struct work_struct *work) 321static void rds_tcp_accept_worker(struct work_struct *work)
278{ 322{
279 struct rds_tcp_net *rtn = container_of(work, 323 struct rds_tcp_net *rtn = container_of(work,
@@ -295,20 +339,60 @@ void rds_tcp_accept_work(struct sock *sk)
295static __net_init int rds_tcp_init_net(struct net *net) 339static __net_init int rds_tcp_init_net(struct net *net)
296{ 340{
297 struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); 341 struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
342 struct ctl_table *tbl;
343 int err = 0;
344
345 memset(rtn, 0, sizeof(*rtn));
298 346
347 /* {snd, rcv}buf_size default to 0, which implies we let the
348 * stack pick the value, and permit auto-tuning of buffer size.
349 */
350 if (net == &init_net) {
351 tbl = rds_tcp_sysctl_table;
352 } else {
353 tbl = kmemdup(rds_tcp_sysctl_table,
354 sizeof(rds_tcp_sysctl_table), GFP_KERNEL);
355 if (!tbl) {
356 pr_warn("could not set allocate syctl table\n");
357 return -ENOMEM;
358 }
359 rtn->ctl_table = tbl;
360 }
361 tbl[RDS_TCP_SNDBUF].data = &rtn->sndbuf_size;
362 tbl[RDS_TCP_RCVBUF].data = &rtn->rcvbuf_size;
363 rtn->rds_tcp_sysctl = register_net_sysctl(net, "net/rds/tcp", tbl);
364 if (!rtn->rds_tcp_sysctl) {
365 pr_warn("could not register sysctl\n");
366 err = -ENOMEM;
367 goto fail;
368 }
299 rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net); 369 rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net);
300 if (!rtn->rds_tcp_listen_sock) { 370 if (!rtn->rds_tcp_listen_sock) {
301 pr_warn("could not set up listen sock\n"); 371 pr_warn("could not set up listen sock\n");
302 return -EAFNOSUPPORT; 372 unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
373 rtn->rds_tcp_sysctl = NULL;
374 err = -EAFNOSUPPORT;
375 goto fail;
303 } 376 }
304 INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker); 377 INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker);
305 return 0; 378 return 0;
379
380fail:
381 if (net != &init_net)
382 kfree(tbl);
383 return err;
306} 384}
307 385
308static void __net_exit rds_tcp_exit_net(struct net *net) 386static void __net_exit rds_tcp_exit_net(struct net *net)
309{ 387{
310 struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); 388 struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
311 389
390 if (rtn->rds_tcp_sysctl)
391 unregister_net_sysctl_table(rtn->rds_tcp_sysctl);
392
393 if (net != &init_net && rtn->ctl_table)
394 kfree(rtn->ctl_table);
395
312 /* If rds_tcp_exit_net() is called as a result of netns deletion, 396 /* If rds_tcp_exit_net() is called as a result of netns deletion,
313 * the rds_tcp_kill_sock() device notifier would already have cleaned 397 * the rds_tcp_kill_sock() device notifier would already have cleaned
314 * up the listen socket, thus there is no work to do in this function. 398 * up the listen socket, thus there is no work to do in this function.
@@ -383,6 +467,45 @@ static struct notifier_block rds_tcp_dev_notifier = {
383 .priority = -10, /* must be called after other network notifiers */ 467 .priority = -10, /* must be called after other network notifiers */
384}; 468};
385 469
470/* when sysctl is used to modify some kernel socket parameters,this
471 * function resets the RDS connections in that netns so that we can
472 * restart with new parameters. The assumption is that such reset
473 * events are few and far-between.
474 */
475static void rds_tcp_sysctl_reset(struct net *net)
476{
477 struct rds_tcp_connection *tc, *_tc;
478
479 spin_lock_irq(&rds_tcp_conn_lock);
480 list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
481 struct net *c_net = read_pnet(&tc->conn->c_net);
482
483 if (net != c_net || !tc->t_sock)
484 continue;
485
486 rds_conn_drop(tc->conn); /* reconnect with new parameters */
487 }
488 spin_unlock_irq(&rds_tcp_conn_lock);
489}
490
491static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
492 void __user *buffer, size_t *lenp,
493 loff_t *fpos)
494{
495 struct net *net = current->nsproxy->net_ns;
496 int err;
497
498 err = proc_dointvec_minmax(ctl, write, buffer, lenp, fpos);
499 if (err < 0) {
500 pr_warn("Invalid input. Must be >= %d\n",
501 *(int *)(ctl->extra1));
502 return err;
503 }
504 if (write)
505 rds_tcp_sysctl_reset(net);
506 return 0;
507}
508
386static void rds_tcp_exit(void) 509static void rds_tcp_exit(void)
387{ 510{
388 rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); 511 rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);