aboutsummaryrefslogtreecommitdiffstats
path: root/net/rds
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
commit8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
treea8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /net/rds
parent406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
Patched in Tegra support.
Diffstat (limited to 'net/rds')
-rw-r--r--net/rds/cong.c1
-rw-r--r--net/rds/connection.c1
-rw-r--r--net/rds/ib.c1
-rw-r--r--net/rds/ib.h5
-rw-r--r--net/rds/ib_cm.c19
-rw-r--r--net/rds/ib_rdma.c112
-rw-r--r--net/rds/ib_recv.c42
-rw-r--r--net/rds/ib_sysctl.c11
-rw-r--r--net/rds/info.c7
-rw-r--r--net/rds/iw.c1
-rw-r--r--net/rds/iw_cm.c2
-rw-r--r--net/rds/iw_rdma.c15
-rw-r--r--net/rds/iw_recv.c9
-rw-r--r--net/rds/iw_sysctl.c11
-rw-r--r--net/rds/loop.c4
-rw-r--r--net/rds/message.c1
-rw-r--r--net/rds/page.c10
-rw-r--r--net/rds/rdma_transport.c1
-rw-r--r--net/rds/rds.h10
-rw-r--r--net/rds/recv.c6
-rw-r--r--net/rds/send.c5
-rw-r--r--net/rds/stats.c1
-rw-r--r--net/rds/sysctl.c11
-rw-r--r--net/rds/tcp.c1
-rw-r--r--net/rds/tcp_connect.c4
-rw-r--r--net/rds/tcp_listen.c6
-rw-r--r--net/rds/tcp_recv.c15
-rw-r--r--net/rds/tcp_send.c4
-rw-r--r--net/rds/threads.c1
29 files changed, 174 insertions, 143 deletions
diff --git a/net/rds/cong.c b/net/rds/cong.c
index e5b65acd650..6daaa49d133 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -34,7 +34,6 @@
34#include <linux/types.h> 34#include <linux/types.h>
35#include <linux/rbtree.h> 35#include <linux/rbtree.h>
36#include <linux/bitops.h> 36#include <linux/bitops.h>
37#include <linux/export.h>
38 37
39#include "rds.h" 38#include "rds.h"
40 39
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 9e07c756d1f..9334d892366 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -33,7 +33,6 @@
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/list.h> 34#include <linux/list.h>
35#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/export.h>
37#include <net/inet_hashtables.h> 36#include <net/inet_hashtables.h>
38 37
39#include "rds.h" 38#include "rds.h"
diff --git a/net/rds/ib.c b/net/rds/ib.c
index b4c8b0022fe..3b83086bcc3 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -38,7 +38,6 @@
38#include <linux/if_arp.h> 38#include <linux/if_arp.h>
39#include <linux/delay.h> 39#include <linux/delay.h>
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/module.h>
42 41
43#include "rds.h" 42#include "rds.h"
44#include "ib.h" 43#include "ib.h"
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 7280ab8810c..edfaaaf164e 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -50,7 +50,7 @@ struct rds_ib_cache_head {
50}; 50};
51 51
52struct rds_ib_refill_cache { 52struct rds_ib_refill_cache {
53 struct rds_ib_cache_head __percpu *percpu; 53 struct rds_ib_cache_head *percpu;
54 struct list_head *xfer; 54 struct list_head *xfer;
55 struct list_head *ready; 55 struct list_head *ready;
56}; 56};
@@ -186,7 +186,8 @@ struct rds_ib_device {
186 struct work_struct free_work; 186 struct work_struct free_work;
187}; 187};
188 188
189#define ibdev_to_node(ibdev) dev_to_node(ibdev->dma_device) 189#define pcidev_to_node(pcidev) pcibus_to_node(pcidev->bus)
190#define ibdev_to_node(ibdev) pcidev_to_node(to_pci_dev(ibdev->dma_device))
190#define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev) 191#define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev)
191 192
192/* bits for i_ack_flags */ 193/* bits for i_ack_flags */
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 31b74f5e61a..cd67026be2d 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -375,21 +375,23 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
375 goto out; 375 goto out;
376 } 376 }
377 377
378 ic->i_sends = vzalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work), 378 ic->i_sends = vmalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work),
379 ibdev_to_node(dev)); 379 ibdev_to_node(dev));
380 if (!ic->i_sends) { 380 if (!ic->i_sends) {
381 ret = -ENOMEM; 381 ret = -ENOMEM;
382 rdsdebug("send allocation failed\n"); 382 rdsdebug("send allocation failed\n");
383 goto out; 383 goto out;
384 } 384 }
385 memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work));
385 386
386 ic->i_recvs = vzalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work), 387 ic->i_recvs = vmalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work),
387 ibdev_to_node(dev)); 388 ibdev_to_node(dev));
388 if (!ic->i_recvs) { 389 if (!ic->i_recvs) {
389 ret = -ENOMEM; 390 ret = -ENOMEM;
390 rdsdebug("recv allocation failed\n"); 391 rdsdebug("recv allocation failed\n");
391 goto out; 392 goto out;
392 } 393 }
394 memset(ic->i_recvs, 0, ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work));
393 395
394 rds_ib_recv_init_ack(ic); 396 rds_ib_recv_init_ack(ic);
395 397
@@ -434,11 +436,12 @@ static u32 rds_ib_protocol_compatible(struct rdma_cm_event *event)
434 version = RDS_PROTOCOL_3_0; 436 version = RDS_PROTOCOL_3_0;
435 while ((common >>= 1) != 0) 437 while ((common >>= 1) != 0)
436 version++; 438 version++;
437 } else 439 }
438 printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using incompatible protocol version %u.%u\n", 440 printk_ratelimited(KERN_NOTICE "RDS: Connection from %pI4 using "
439 &dp->dp_saddr, 441 "incompatible protocol version %u.%u\n",
440 dp->dp_protocol_major, 442 &dp->dp_saddr,
441 dp->dp_protocol_minor); 443 dp->dp_protocol_major,
444 dp->dp_protocol_minor);
442 return version; 445 return version;
443} 446}
444 447
@@ -748,7 +751,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
748 int ret; 751 int ret;
749 752
750 /* XXX too lazy? */ 753 /* XXX too lazy? */
751 ic = kzalloc(sizeof(struct rds_ib_connection), gfp); 754 ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL);
752 if (!ic) 755 if (!ic)
753 return -ENOMEM; 756 return -ENOMEM;
754 757
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index e8fdb172adb..819c35a0d9c 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -33,10 +33,10 @@
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/rculist.h> 35#include <linux/rculist.h>
36#include <linux/llist.h>
37 36
38#include "rds.h" 37#include "rds.h"
39#include "ib.h" 38#include "ib.h"
39#include "xlist.h"
40 40
41static DEFINE_PER_CPU(unsigned long, clean_list_grace); 41static DEFINE_PER_CPU(unsigned long, clean_list_grace);
42#define CLEAN_LIST_BUSY_BIT 0 42#define CLEAN_LIST_BUSY_BIT 0
@@ -49,7 +49,7 @@ struct rds_ib_mr {
49 struct rds_ib_mr_pool *pool; 49 struct rds_ib_mr_pool *pool;
50 struct ib_fmr *fmr; 50 struct ib_fmr *fmr;
51 51
52 struct llist_node llnode; 52 struct xlist_head xlist;
53 53
54 /* unmap_list is for freeing */ 54 /* unmap_list is for freeing */
55 struct list_head unmap_list; 55 struct list_head unmap_list;
@@ -71,9 +71,9 @@ struct rds_ib_mr_pool {
71 atomic_t item_count; /* total # of MRs */ 71 atomic_t item_count; /* total # of MRs */
72 atomic_t dirty_count; /* # dirty of MRs */ 72 atomic_t dirty_count; /* # dirty of MRs */
73 73
74 struct llist_head drop_list; /* MRs that have reached their max_maps limit */ 74 struct xlist_head drop_list; /* MRs that have reached their max_maps limit */
75 struct llist_head free_list; /* unused MRs */ 75 struct xlist_head free_list; /* unused MRs */
76 struct llist_head clean_list; /* global unused & unamapped MRs */ 76 struct xlist_head clean_list; /* global unused & unamapped MRs */
77 wait_queue_head_t flush_wait; 77 wait_queue_head_t flush_wait;
78 78
79 atomic_t free_pinned; /* memory pinned by free MRs */ 79 atomic_t free_pinned; /* memory pinned by free MRs */
@@ -220,9 +220,9 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
220 if (!pool) 220 if (!pool)
221 return ERR_PTR(-ENOMEM); 221 return ERR_PTR(-ENOMEM);
222 222
223 init_llist_head(&pool->free_list); 223 INIT_XLIST_HEAD(&pool->free_list);
224 init_llist_head(&pool->drop_list); 224 INIT_XLIST_HEAD(&pool->drop_list);
225 init_llist_head(&pool->clean_list); 225 INIT_XLIST_HEAD(&pool->clean_list);
226 mutex_init(&pool->flush_lock); 226 mutex_init(&pool->flush_lock);
227 init_waitqueue_head(&pool->flush_wait); 227 init_waitqueue_head(&pool->flush_wait);
228 INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); 228 INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
@@ -260,18 +260,26 @@ void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
260 kfree(pool); 260 kfree(pool);
261} 261}
262 262
263static void refill_local(struct rds_ib_mr_pool *pool, struct xlist_head *xl,
264 struct rds_ib_mr **ibmr_ret)
265{
266 struct xlist_head *ibmr_xl;
267 ibmr_xl = xlist_del_head_fast(xl);
268 *ibmr_ret = list_entry(ibmr_xl, struct rds_ib_mr, xlist);
269}
270
263static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool) 271static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool)
264{ 272{
265 struct rds_ib_mr *ibmr = NULL; 273 struct rds_ib_mr *ibmr = NULL;
266 struct llist_node *ret; 274 struct xlist_head *ret;
267 unsigned long *flag; 275 unsigned long *flag;
268 276
269 preempt_disable(); 277 preempt_disable();
270 flag = &__get_cpu_var(clean_list_grace); 278 flag = &__get_cpu_var(clean_list_grace);
271 set_bit(CLEAN_LIST_BUSY_BIT, flag); 279 set_bit(CLEAN_LIST_BUSY_BIT, flag);
272 ret = llist_del_first(&pool->clean_list); 280 ret = xlist_del_head(&pool->clean_list);
273 if (ret) 281 if (ret)
274 ibmr = llist_entry(ret, struct rds_ib_mr, llnode); 282 ibmr = list_entry(ret, struct rds_ib_mr, xlist);
275 283
276 clear_bit(CLEAN_LIST_BUSY_BIT, flag); 284 clear_bit(CLEAN_LIST_BUSY_BIT, flag);
277 preempt_enable(); 285 preempt_enable();
@@ -521,44 +529,46 @@ static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int fr
521} 529}
522 530
523/* 531/*
524 * given an llist of mrs, put them all into the list_head for more processing 532 * given an xlist of mrs, put them all into the list_head for more processing
525 */ 533 */
526static void llist_append_to_list(struct llist_head *llist, struct list_head *list) 534static void xlist_append_to_list(struct xlist_head *xlist, struct list_head *list)
527{ 535{
528 struct rds_ib_mr *ibmr; 536 struct rds_ib_mr *ibmr;
529 struct llist_node *node; 537 struct xlist_head splice;
530 struct llist_node *next; 538 struct xlist_head *cur;
531 539 struct xlist_head *next;
532 node = llist_del_all(llist); 540
533 while (node) { 541 splice.next = NULL;
534 next = node->next; 542 xlist_splice(xlist, &splice);
535 ibmr = llist_entry(node, struct rds_ib_mr, llnode); 543 cur = splice.next;
544 while (cur) {
545 next = cur->next;
546 ibmr = list_entry(cur, struct rds_ib_mr, xlist);
536 list_add_tail(&ibmr->unmap_list, list); 547 list_add_tail(&ibmr->unmap_list, list);
537 node = next; 548 cur = next;
538 } 549 }
539} 550}
540 551
541/* 552/*
542 * this takes a list head of mrs and turns it into linked llist nodes 553 * this takes a list head of mrs and turns it into an xlist of clusters.
543 * of clusters. Each cluster has linked llist nodes of 554 * each cluster has an xlist of MR_CLUSTER_SIZE mrs that are ready for
544 * MR_CLUSTER_SIZE mrs that are ready for reuse. 555 * reuse.
545 */ 556 */
546static void list_to_llist_nodes(struct rds_ib_mr_pool *pool, 557static void list_append_to_xlist(struct rds_ib_mr_pool *pool,
547 struct list_head *list, 558 struct list_head *list, struct xlist_head *xlist,
548 struct llist_node **nodes_head, 559 struct xlist_head **tail_ret)
549 struct llist_node **nodes_tail)
550{ 560{
551 struct rds_ib_mr *ibmr; 561 struct rds_ib_mr *ibmr;
552 struct llist_node *cur = NULL; 562 struct xlist_head *cur_mr = xlist;
553 struct llist_node **next = nodes_head; 563 struct xlist_head *tail_mr = NULL;
554 564
555 list_for_each_entry(ibmr, list, unmap_list) { 565 list_for_each_entry(ibmr, list, unmap_list) {
556 cur = &ibmr->llnode; 566 tail_mr = &ibmr->xlist;
557 *next = cur; 567 tail_mr->next = NULL;
558 next = &cur->next; 568 cur_mr->next = tail_mr;
569 cur_mr = tail_mr;
559 } 570 }
560 *next = NULL; 571 *tail_ret = tail_mr;
561 *nodes_tail = cur;
562} 572}
563 573
564/* 574/*
@@ -571,8 +581,8 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
571 int free_all, struct rds_ib_mr **ibmr_ret) 581 int free_all, struct rds_ib_mr **ibmr_ret)
572{ 582{
573 struct rds_ib_mr *ibmr, *next; 583 struct rds_ib_mr *ibmr, *next;
574 struct llist_node *clean_nodes; 584 struct xlist_head clean_xlist;
575 struct llist_node *clean_tail; 585 struct xlist_head *clean_tail;
576 LIST_HEAD(unmap_list); 586 LIST_HEAD(unmap_list);
577 LIST_HEAD(fmr_list); 587 LIST_HEAD(fmr_list);
578 unsigned long unpinned = 0; 588 unsigned long unpinned = 0;
@@ -593,7 +603,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
593 603
594 prepare_to_wait(&pool->flush_wait, &wait, 604 prepare_to_wait(&pool->flush_wait, &wait,
595 TASK_UNINTERRUPTIBLE); 605 TASK_UNINTERRUPTIBLE);
596 if (llist_empty(&pool->clean_list)) 606 if (xlist_empty(&pool->clean_list))
597 schedule(); 607 schedule();
598 608
599 ibmr = rds_ib_reuse_fmr(pool); 609 ibmr = rds_ib_reuse_fmr(pool);
@@ -618,10 +628,10 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
618 /* Get the list of all MRs to be dropped. Ordering matters - 628 /* Get the list of all MRs to be dropped. Ordering matters -
619 * we want to put drop_list ahead of free_list. 629 * we want to put drop_list ahead of free_list.
620 */ 630 */
621 llist_append_to_list(&pool->drop_list, &unmap_list); 631 xlist_append_to_list(&pool->drop_list, &unmap_list);
622 llist_append_to_list(&pool->free_list, &unmap_list); 632 xlist_append_to_list(&pool->free_list, &unmap_list);
623 if (free_all) 633 if (free_all)
624 llist_append_to_list(&pool->clean_list, &unmap_list); 634 xlist_append_to_list(&pool->clean_list, &unmap_list);
625 635
626 free_goal = rds_ib_flush_goal(pool, free_all); 636 free_goal = rds_ib_flush_goal(pool, free_all);
627 637
@@ -653,22 +663,22 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
653 if (!list_empty(&unmap_list)) { 663 if (!list_empty(&unmap_list)) {
654 /* we have to make sure that none of the things we're about 664 /* we have to make sure that none of the things we're about
655 * to put on the clean list would race with other cpus trying 665 * to put on the clean list would race with other cpus trying
656 * to pull items off. The llist would explode if we managed to 666 * to pull items off. The xlist would explode if we managed to
657 * remove something from the clean list and then add it back again 667 * remove something from the clean list and then add it back again
658 * while another CPU was spinning on that same item in llist_del_first. 668 * while another CPU was spinning on that same item in xlist_del_head.
659 * 669 *
660 * This is pretty unlikely, but just in case wait for an llist grace period 670 * This is pretty unlikely, but just in case wait for an xlist grace period
661 * here before adding anything back into the clean list. 671 * here before adding anything back into the clean list.
662 */ 672 */
663 wait_clean_list_grace(); 673 wait_clean_list_grace();
664 674
665 list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail); 675 list_append_to_xlist(pool, &unmap_list, &clean_xlist, &clean_tail);
666 if (ibmr_ret) 676 if (ibmr_ret)
667 *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode); 677 refill_local(pool, &clean_xlist, ibmr_ret);
668 678
669 /* more than one entry in llist nodes */ 679 /* refill_local may have emptied our list */
670 if (clean_nodes->next) 680 if (!xlist_empty(&clean_xlist))
671 llist_add_batch(clean_nodes->next, clean_tail, &pool->clean_list); 681 xlist_add(clean_xlist.next, clean_tail, &pool->clean_list);
672 682
673 } 683 }
674 684
@@ -701,9 +711,9 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
701 711
702 /* Return it to the pool's free list */ 712 /* Return it to the pool's free list */
703 if (ibmr->remap_count >= pool->fmr_attr.max_maps) 713 if (ibmr->remap_count >= pool->fmr_attr.max_maps)
704 llist_add(&ibmr->llnode, &pool->drop_list); 714 xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->drop_list);
705 else 715 else
706 llist_add(&ibmr->llnode, &pool->free_list); 716 xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->free_list);
707 717
708 atomic_add(ibmr->sg_len, &pool->free_pinned); 718 atomic_add(ibmr->sg_len, &pool->free_pinned);
709 atomic_inc(&pool->dirty_count); 719 atomic_inc(&pool->dirty_count);
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 8eb9501e3d6..e29e0ca32f7 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -339,8 +339,8 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
339 sge->length = sizeof(struct rds_header); 339 sge->length = sizeof(struct rds_header);
340 340
341 sge = &recv->r_sge[1]; 341 sge = &recv->r_sge[1];
342 sge->addr = ib_sg_dma_address(ic->i_cm_id->device, &recv->r_frag->f_sg); 342 sge->addr = sg_dma_address(&recv->r_frag->f_sg);
343 sge->length = ib_sg_dma_len(ic->i_cm_id->device, &recv->r_frag->f_sg); 343 sge->length = sg_dma_len(&recv->r_frag->f_sg);
344 344
345 ret = 0; 345 ret = 0;
346out: 346out:
@@ -381,10 +381,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill)
381 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); 381 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
382 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, 382 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
383 recv->r_ibinc, sg_page(&recv->r_frag->f_sg), 383 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
384 (long) ib_sg_dma_address( 384 (long) sg_dma_address(&recv->r_frag->f_sg), ret);
385 ic->i_cm_id->device,
386 &recv->r_frag->f_sg),
387 ret);
388 if (ret) { 385 if (ret) {
389 rds_ib_conn_error(conn, "recv post on " 386 rds_ib_conn_error(conn, "recv post on "
390 "%pI4 returned %d, disconnecting and " 387 "%pI4 returned %d, disconnecting and "
@@ -421,21 +418,20 @@ static void rds_ib_recv_cache_put(struct list_head *new_item,
421 struct rds_ib_refill_cache *cache) 418 struct rds_ib_refill_cache *cache)
422{ 419{
423 unsigned long flags; 420 unsigned long flags;
421 struct rds_ib_cache_head *chp;
424 struct list_head *old; 422 struct list_head *old;
425 struct list_head __percpu *chpfirst;
426 423
427 local_irq_save(flags); 424 local_irq_save(flags);
428 425
429 chpfirst = __this_cpu_read(cache->percpu->first); 426 chp = per_cpu_ptr(cache->percpu, smp_processor_id());
430 if (!chpfirst) 427 if (!chp->first)
431 INIT_LIST_HEAD(new_item); 428 INIT_LIST_HEAD(new_item);
432 else /* put on front */ 429 else /* put on front */
433 list_add_tail(new_item, chpfirst); 430 list_add_tail(new_item, chp->first);
431 chp->first = new_item;
432 chp->count++;
434 433
435 __this_cpu_write(chpfirst, new_item); 434 if (chp->count < RDS_IB_RECYCLE_BATCH_COUNT)
436 __this_cpu_inc(cache->percpu->count);
437
438 if (__this_cpu_read(cache->percpu->count) < RDS_IB_RECYCLE_BATCH_COUNT)
439 goto end; 435 goto end;
440 436
441 /* 437 /*
@@ -447,13 +443,12 @@ static void rds_ib_recv_cache_put(struct list_head *new_item,
447 do { 443 do {
448 old = xchg(&cache->xfer, NULL); 444 old = xchg(&cache->xfer, NULL);
449 if (old) 445 if (old)
450 list_splice_entire_tail(old, chpfirst); 446 list_splice_entire_tail(old, chp->first);
451 old = cmpxchg(&cache->xfer, NULL, chpfirst); 447 old = cmpxchg(&cache->xfer, NULL, chp->first);
452 } while (old); 448 } while (old);
453 449
454 450 chp->first = NULL;
455 __this_cpu_write(chpfirst, NULL); 451 chp->count = 0;
456 __this_cpu_write(cache->percpu->count, 0);
457end: 452end:
458 local_irq_restore(flags); 453 local_irq_restore(flags);
459} 454}
@@ -768,7 +763,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
768 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); 763 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
769 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ 764 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */
770 765
771 addr = kmap_atomic(sg_page(&frag->f_sg)); 766 addr = kmap_atomic(sg_page(&frag->f_sg), KM_SOFTIRQ0);
772 767
773 src = addr + frag_off; 768 src = addr + frag_off;
774 dst = (void *)map->m_page_addrs[map_page] + map_off; 769 dst = (void *)map->m_page_addrs[map_page] + map_off;
@@ -778,7 +773,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
778 uncongested |= ~(*src) & *dst; 773 uncongested |= ~(*src) & *dst;
779 *dst++ = *src++; 774 *dst++ = *src++;
780 } 775 }
781 kunmap_atomic(addr); 776 kunmap_atomic(addr, KM_SOFTIRQ0);
782 777
783 copied += to_copy; 778 copied += to_copy;
784 779
@@ -831,7 +826,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
831 826
832 if (data_len < sizeof(struct rds_header)) { 827 if (data_len < sizeof(struct rds_header)) {
833 rds_ib_conn_error(conn, "incoming message " 828 rds_ib_conn_error(conn, "incoming message "
834 "from %pI4 didn't include a " 829 "from %pI4 didn't inclue a "
835 "header, disconnecting and " 830 "header, disconnecting and "
836 "reconnecting\n", 831 "reconnecting\n",
837 &conn->c_faddr); 832 &conn->c_faddr);
@@ -924,7 +919,8 @@ static void rds_ib_process_recv(struct rds_connection *conn,
924 rds_ib_cong_recv(conn, ibinc); 919 rds_ib_cong_recv(conn, ibinc);
925 else { 920 else {
926 rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr, 921 rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
927 &ibinc->ii_inc, GFP_ATOMIC); 922 &ibinc->ii_inc, GFP_ATOMIC,
923 KM_SOFTIRQ0);
928 state->ack_next = be64_to_cpu(hdr->h_sequence); 924 state->ack_next = be64_to_cpu(hdr->h_sequence);
929 state->ack_next_valid = 1; 925 state->ack_next_valid = 1;
930 } 926 }
diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c
index 7e643bafb4a..1253b006efd 100644
--- a/net/rds/ib_sysctl.c
+++ b/net/rds/ib_sysctl.c
@@ -106,15 +106,22 @@ static ctl_table rds_ib_sysctl_table[] = {
106 { } 106 { }
107}; 107};
108 108
109static struct ctl_path rds_ib_sysctl_path[] = {
110 { .procname = "net", },
111 { .procname = "rds", },
112 { .procname = "ib", },
113 { }
114};
115
109void rds_ib_sysctl_exit(void) 116void rds_ib_sysctl_exit(void)
110{ 117{
111 if (rds_ib_sysctl_hdr) 118 if (rds_ib_sysctl_hdr)
112 unregister_net_sysctl_table(rds_ib_sysctl_hdr); 119 unregister_sysctl_table(rds_ib_sysctl_hdr);
113} 120}
114 121
115int rds_ib_sysctl_init(void) 122int rds_ib_sysctl_init(void)
116{ 123{
117 rds_ib_sysctl_hdr = register_net_sysctl(&init_net, "net/rds/ib", rds_ib_sysctl_table); 124 rds_ib_sysctl_hdr = register_sysctl_paths(rds_ib_sysctl_path, rds_ib_sysctl_table);
118 if (!rds_ib_sysctl_hdr) 125 if (!rds_ib_sysctl_hdr)
119 return -ENOMEM; 126 return -ENOMEM;
120 return 0; 127 return 0;
diff --git a/net/rds/info.c b/net/rds/info.c
index 9a6b4f66187..4fdf1b6e84f 100644
--- a/net/rds/info.c
+++ b/net/rds/info.c
@@ -34,7 +34,6 @@
34#include <linux/seq_file.h> 34#include <linux/seq_file.h>
35#include <linux/slab.h> 35#include <linux/slab.h>
36#include <linux/proc_fs.h> 36#include <linux/proc_fs.h>
37#include <linux/export.h>
38 37
39#include "rds.h" 38#include "rds.h"
40 39
@@ -104,7 +103,7 @@ EXPORT_SYMBOL_GPL(rds_info_deregister_func);
104void rds_info_iter_unmap(struct rds_info_iterator *iter) 103void rds_info_iter_unmap(struct rds_info_iterator *iter)
105{ 104{
106 if (iter->addr) { 105 if (iter->addr) {
107 kunmap_atomic(iter->addr); 106 kunmap_atomic(iter->addr, KM_USER0);
108 iter->addr = NULL; 107 iter->addr = NULL;
109 } 108 }
110} 109}
@@ -119,7 +118,7 @@ void rds_info_copy(struct rds_info_iterator *iter, void *data,
119 118
120 while (bytes) { 119 while (bytes) {
121 if (!iter->addr) 120 if (!iter->addr)
122 iter->addr = kmap_atomic(*iter->pages); 121 iter->addr = kmap_atomic(*iter->pages, KM_USER0);
123 122
124 this = min(bytes, PAGE_SIZE - iter->offset); 123 this = min(bytes, PAGE_SIZE - iter->offset);
125 124
@@ -134,7 +133,7 @@ void rds_info_copy(struct rds_info_iterator *iter, void *data,
134 iter->offset += this; 133 iter->offset += this;
135 134
136 if (iter->offset == PAGE_SIZE) { 135 if (iter->offset == PAGE_SIZE) {
137 kunmap_atomic(iter->addr); 136 kunmap_atomic(iter->addr, KM_USER0);
138 iter->addr = NULL; 137 iter->addr = NULL;
139 iter->offset = 0; 138 iter->offset = 0;
140 iter->pages++; 139 iter->pages++;
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 7826d46baa7..f7474844f09 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -38,7 +38,6 @@
38#include <linux/if_arp.h> 38#include <linux/if_arp.h>
39#include <linux/delay.h> 39#include <linux/delay.h>
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include <linux/module.h>
42 41
43#include "rds.h" 42#include "rds.h"
44#include "iw.h" 43#include "iw.h"
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index a91e1db62ee..9556d2895f7 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -694,7 +694,7 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
694 unsigned long flags; 694 unsigned long flags;
695 695
696 /* XXX too lazy? */ 696 /* XXX too lazy? */
697 ic = kzalloc(sizeof(struct rds_iw_connection), gfp); 697 ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL);
698 if (!ic) 698 if (!ic)
699 return -ENOMEM; 699 return -ENOMEM;
700 700
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index a817705ce2d..4e1de171866 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -477,6 +477,17 @@ void rds_iw_sync_mr(void *trans_private, int direction)
477 } 477 }
478} 478}
479 479
480static inline unsigned int rds_iw_flush_goal(struct rds_iw_mr_pool *pool, int free_all)
481{
482 unsigned int item_count;
483
484 item_count = atomic_read(&pool->item_count);
485 if (free_all)
486 return item_count;
487
488 return 0;
489}
490
480/* 491/*
481 * Flush our pool of MRs. 492 * Flush our pool of MRs.
482 * At a minimum, all currently unused MRs are unmapped. 493 * At a minimum, all currently unused MRs are unmapped.
@@ -489,7 +500,7 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
489 LIST_HEAD(unmap_list); 500 LIST_HEAD(unmap_list);
490 LIST_HEAD(kill_list); 501 LIST_HEAD(kill_list);
491 unsigned long flags; 502 unsigned long flags;
492 unsigned int nfreed = 0, ncleaned = 0, unpinned = 0; 503 unsigned int nfreed = 0, ncleaned = 0, unpinned = 0, free_goal;
493 int ret = 0; 504 int ret = 0;
494 505
495 rds_iw_stats_inc(s_iw_rdma_mr_pool_flush); 506 rds_iw_stats_inc(s_iw_rdma_mr_pool_flush);
@@ -503,6 +514,8 @@ static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
503 list_splice_init(&pool->clean_list, &kill_list); 514 list_splice_init(&pool->clean_list, &kill_list);
504 spin_unlock_irqrestore(&pool->list_lock, flags); 515 spin_unlock_irqrestore(&pool->list_lock, flags);
505 516
517 free_goal = rds_iw_flush_goal(pool, free_all);
518
506 /* Batched invalidate of dirty MRs. 519 /* Batched invalidate of dirty MRs.
507 * For FMR based MRs, the mappings on the unmap list are 520 * For FMR based MRs, the mappings on the unmap list are
508 * actually members of an ibmr (ibmr->mapping). They either 521 * actually members of an ibmr (ibmr->mapping). They either
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index 45033358358..5e57347f49f 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -598,7 +598,7 @@ static void rds_iw_cong_recv(struct rds_connection *conn,
598 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); 598 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
599 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ 599 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */
600 600
601 addr = kmap_atomic(frag->f_page); 601 addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0);
602 602
603 src = addr + frag_off; 603 src = addr + frag_off;
604 dst = (void *)map->m_page_addrs[map_page] + map_off; 604 dst = (void *)map->m_page_addrs[map_page] + map_off;
@@ -608,7 +608,7 @@ static void rds_iw_cong_recv(struct rds_connection *conn,
608 uncongested |= ~(*src) & *dst; 608 uncongested |= ~(*src) & *dst;
609 *dst++ = *src++; 609 *dst++ = *src++;
610 } 610 }
611 kunmap_atomic(addr); 611 kunmap_atomic(addr, KM_SOFTIRQ0);
612 612
613 copied += to_copy; 613 copied += to_copy;
614 614
@@ -661,7 +661,7 @@ static void rds_iw_process_recv(struct rds_connection *conn,
661 661
662 if (byte_len < sizeof(struct rds_header)) { 662 if (byte_len < sizeof(struct rds_header)) {
663 rds_iw_conn_error(conn, "incoming message " 663 rds_iw_conn_error(conn, "incoming message "
664 "from %pI4 didn't include a " 664 "from %pI4 didn't inclue a "
665 "header, disconnecting and " 665 "header, disconnecting and "
666 "reconnecting\n", 666 "reconnecting\n",
667 &conn->c_faddr); 667 &conn->c_faddr);
@@ -754,7 +754,8 @@ static void rds_iw_process_recv(struct rds_connection *conn,
754 rds_iw_cong_recv(conn, iwinc); 754 rds_iw_cong_recv(conn, iwinc);
755 else { 755 else {
756 rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr, 756 rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
757 &iwinc->ii_inc, GFP_ATOMIC); 757 &iwinc->ii_inc, GFP_ATOMIC,
758 KM_SOFTIRQ0);
758 state->ack_next = be64_to_cpu(hdr->h_sequence); 759 state->ack_next = be64_to_cpu(hdr->h_sequence);
759 state->ack_next_valid = 1; 760 state->ack_next_valid = 1;
760 } 761 }
diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c
index 5d5ebd576f3..e2e47176e72 100644
--- a/net/rds/iw_sysctl.c
+++ b/net/rds/iw_sysctl.c
@@ -109,15 +109,22 @@ static ctl_table rds_iw_sysctl_table[] = {
109 { } 109 { }
110}; 110};
111 111
112static struct ctl_path rds_iw_sysctl_path[] = {
113 { .procname = "net", },
114 { .procname = "rds", },
115 { .procname = "iw", },
116 { }
117};
118
112void rds_iw_sysctl_exit(void) 119void rds_iw_sysctl_exit(void)
113{ 120{
114 if (rds_iw_sysctl_hdr) 121 if (rds_iw_sysctl_hdr)
115 unregister_net_sysctl_table(rds_iw_sysctl_hdr); 122 unregister_sysctl_table(rds_iw_sysctl_hdr);
116} 123}
117 124
118int rds_iw_sysctl_init(void) 125int rds_iw_sysctl_init(void)
119{ 126{
120 rds_iw_sysctl_hdr = register_net_sysctl(&init_net, "net/rds/iw", rds_iw_sysctl_table); 127 rds_iw_sysctl_hdr = register_sysctl_paths(rds_iw_sysctl_path, rds_iw_sysctl_table);
121 if (!rds_iw_sysctl_hdr) 128 if (!rds_iw_sysctl_hdr)
122 return -ENOMEM; 129 return -ENOMEM;
123 return 0; 130 return 0;
diff --git a/net/rds/loop.c b/net/rds/loop.c
index 6b12b68541a..bca6761a3ca 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -79,7 +79,7 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
79 rds_message_addref(rm); 79 rds_message_addref(rm);
80 80
81 rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc, 81 rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc,
82 GFP_KERNEL); 82 GFP_KERNEL, KM_USER0);
83 83
84 rds_send_drop_acked(conn, be64_to_cpu(rm->m_inc.i_hdr.h_sequence), 84 rds_send_drop_acked(conn, be64_to_cpu(rm->m_inc.i_hdr.h_sequence),
85 NULL); 85 NULL);
@@ -121,7 +121,7 @@ static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp)
121 struct rds_loop_connection *lc; 121 struct rds_loop_connection *lc;
122 unsigned long flags; 122 unsigned long flags;
123 123
124 lc = kzalloc(sizeof(struct rds_loop_connection), gfp); 124 lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL);
125 if (!lc) 125 if (!lc)
126 return -ENOMEM; 126 return -ENOMEM;
127 127
diff --git a/net/rds/message.c b/net/rds/message.c
index f0a4658f327..1fd3d29023d 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -32,7 +32,6 @@
32 */ 32 */
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/export.h>
36 35
37#include "rds.h" 36#include "rds.h"
38 37
diff --git a/net/rds/page.c b/net/rds/page.c
index 9005a2c920e..b82d63e77b0 100644
--- a/net/rds/page.c
+++ b/net/rds/page.c
@@ -33,7 +33,6 @@
33#include <linux/highmem.h> 33#include <linux/highmem.h>
34#include <linux/gfp.h> 34#include <linux/gfp.h>
35#include <linux/cpu.h> 35#include <linux/cpu.h>
36#include <linux/export.h>
37 36
38#include "rds.h" 37#include "rds.h"
39 38
@@ -74,12 +73,11 @@ int rds_page_copy_user(struct page *page, unsigned long offset,
74} 73}
75EXPORT_SYMBOL_GPL(rds_page_copy_user); 74EXPORT_SYMBOL_GPL(rds_page_copy_user);
76 75
77/** 76/*
78 * rds_page_remainder_alloc - build up regions of a message. 77 * Message allocation uses this to build up regions of a message.
79 * 78 *
80 * @scat: Scatter list for message 79 * @bytes - the number of bytes needed.
81 * @bytes: the number of bytes needed. 80 * @gfp - the waiting behaviour of the allocation
82 * @gfp: the waiting behaviour of the allocation
83 * 81 *
84 * @gfp is always ored with __GFP_HIGHMEM. Callers must be prepared to 82 * @gfp is always ored with __GFP_HIGHMEM. Callers must be prepared to
85 * kmap the pages, etc. 83 * kmap the pages, etc.
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index c2be901d19e..f8760e1b668 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -30,7 +30,6 @@
30 * SOFTWARE. 30 * SOFTWARE.
31 * 31 *
32 */ 32 */
33#include <linux/module.h>
34#include <rdma/rdma_cm.h> 33#include <rdma/rdma_cm.h>
35 34
36#include "rdma_transport.h" 35#include "rdma_transport.h"
diff --git a/net/rds/rds.h b/net/rds/rds.h
index ec1d731ecff..da8adac2bf0 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -36,8 +36,8 @@
36#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) 36#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args)
37#else 37#else
38/* sigh, pr_debug() causes unused variable warnings */ 38/* sigh, pr_debug() causes unused variable warnings */
39static inline __printf(1, 2) 39static inline void __attribute__ ((format (printf, 1, 2)))
40void rdsdebug(char *fmt, ...) 40rdsdebug(char *fmt, ...)
41{ 41{
42} 42}
43#endif 43#endif
@@ -625,8 +625,8 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
625 struct rds_info_lengths *lens, 625 struct rds_info_lengths *lens,
626 int (*visitor)(struct rds_connection *, void *), 626 int (*visitor)(struct rds_connection *, void *),
627 size_t item_len); 627 size_t item_len);
628__printf(2, 3) 628void __rds_conn_error(struct rds_connection *conn, const char *, ...)
629void __rds_conn_error(struct rds_connection *conn, const char *, ...); 629 __attribute__ ((format (printf, 2, 3)));
630#define rds_conn_error(conn, fmt...) \ 630#define rds_conn_error(conn, fmt...) \
631 __rds_conn_error(conn, KERN_WARNING "RDS: " fmt) 631 __rds_conn_error(conn, KERN_WARNING "RDS: " fmt)
632 632
@@ -704,7 +704,7 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
704 __be32 saddr); 704 __be32 saddr);
705void rds_inc_put(struct rds_incoming *inc); 705void rds_inc_put(struct rds_incoming *inc);
706void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, 706void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
707 struct rds_incoming *inc, gfp_t gfp); 707 struct rds_incoming *inc, gfp_t gfp, enum km_type km);
708int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, 708int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
709 size_t size, int msg_flags); 709 size_t size, int msg_flags);
710void rds_clear_recv_queue(struct rds_sock *rs); 710void rds_clear_recv_queue(struct rds_sock *rs);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 9f0f17cf6bf..596689e5927 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -34,7 +34,6 @@
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <net/sock.h> 35#include <net/sock.h>
36#include <linux/in.h> 36#include <linux/in.h>
37#include <linux/export.h>
38 37
39#include "rds.h" 38#include "rds.h"
40 39
@@ -155,7 +154,7 @@ static void rds_recv_incoming_exthdrs(struct rds_incoming *inc, struct rds_sock
155 * tell us which roles the addrs in the conn are playing for this message. 154 * tell us which roles the addrs in the conn are playing for this message.
156 */ 155 */
157void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, 156void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
158 struct rds_incoming *inc, gfp_t gfp) 157 struct rds_incoming *inc, gfp_t gfp, enum km_type km)
159{ 158{
160 struct rds_sock *rs = NULL; 159 struct rds_sock *rs = NULL;
161 struct sock *sk; 160 struct sock *sk;
@@ -410,8 +409,6 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
410 409
411 rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo); 410 rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo);
412 411
413 msg->msg_namelen = 0;
414
415 if (msg_flags & MSG_OOB) 412 if (msg_flags & MSG_OOB)
416 goto out; 413 goto out;
417 414
@@ -487,7 +484,6 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
487 sin->sin_port = inc->i_hdr.h_sport; 484 sin->sin_port = inc->i_hdr.h_sport;
488 sin->sin_addr.s_addr = inc->i_saddr; 485 sin->sin_addr.s_addr = inc->i_saddr;
489 memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); 486 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
490 msg->msg_namelen = sizeof(*sin);
491 } 487 }
492 break; 488 break;
493 } 489 }
diff --git a/net/rds/send.c b/net/rds/send.c
index 88eace57dd6..aa57e22539e 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -31,13 +31,11 @@
31 * 31 *
32 */ 32 */
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/moduleparam.h>
35#include <linux/gfp.h> 34#include <linux/gfp.h>
36#include <net/sock.h> 35#include <net/sock.h>
37#include <linux/in.h> 36#include <linux/in.h>
38#include <linux/list.h> 37#include <linux/list.h>
39#include <linux/ratelimit.h> 38#include <linux/ratelimit.h>
40#include <linux/export.h>
41 39
42#include "rds.h" 40#include "rds.h"
43 41
@@ -935,6 +933,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
935 /* Mirror Linux UDP mirror of BSD error message compatibility */ 933 /* Mirror Linux UDP mirror of BSD error message compatibility */
936 /* XXX: Perhaps MSG_MORE someday */ 934 /* XXX: Perhaps MSG_MORE someday */
937 if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) { 935 if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) {
936 printk(KERN_INFO "msg_flags 0x%08X\n", msg->msg_flags);
938 ret = -EOPNOTSUPP; 937 ret = -EOPNOTSUPP;
939 goto out; 938 goto out;
940 } 939 }
@@ -1122,7 +1121,7 @@ rds_send_pong(struct rds_connection *conn, __be16 dport)
1122 rds_stats_inc(s_send_pong); 1121 rds_stats_inc(s_send_pong);
1123 1122
1124 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags)) 1123 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
1125 queue_delayed_work(rds_wq, &conn->c_send_w, 0); 1124 rds_send_xmit(conn);
1126 1125
1127 rds_message_put(rm); 1126 rds_message_put(rm);
1128 return 0; 1127 return 0;
diff --git a/net/rds/stats.c b/net/rds/stats.c
index 7be790d60b9..10c759ccac0 100644
--- a/net/rds/stats.c
+++ b/net/rds/stats.c
@@ -33,7 +33,6 @@
33#include <linux/percpu.h> 33#include <linux/percpu.h>
34#include <linux/seq_file.h> 34#include <linux/seq_file.h>
35#include <linux/proc_fs.h> 35#include <linux/proc_fs.h>
36#include <linux/export.h>
37 36
38#include "rds.h" 37#include "rds.h"
39 38
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index 907214b4c4d..25ad0c77a26 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c
@@ -92,10 +92,17 @@ static ctl_table rds_sysctl_rds_table[] = {
92 { } 92 { }
93}; 93};
94 94
95static struct ctl_path rds_sysctl_path[] = {
96 { .procname = "net", },
97 { .procname = "rds", },
98 { }
99};
100
101
95void rds_sysctl_exit(void) 102void rds_sysctl_exit(void)
96{ 103{
97 if (rds_sysctl_reg_table) 104 if (rds_sysctl_reg_table)
98 unregister_net_sysctl_table(rds_sysctl_reg_table); 105 unregister_sysctl_table(rds_sysctl_reg_table);
99} 106}
100 107
101int rds_sysctl_init(void) 108int rds_sysctl_init(void)
@@ -103,7 +110,7 @@ int rds_sysctl_init(void)
103 rds_sysctl_reconnect_min = msecs_to_jiffies(1); 110 rds_sysctl_reconnect_min = msecs_to_jiffies(1);
104 rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min; 111 rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min;
105 112
106 rds_sysctl_reg_table = register_net_sysctl(&init_net,"net/rds", rds_sysctl_rds_table); 113 rds_sysctl_reg_table = register_sysctl_paths(rds_sysctl_path, rds_sysctl_rds_table);
107 if (!rds_sysctl_reg_table) 114 if (!rds_sysctl_reg_table)
108 return -ENOMEM; 115 return -ENOMEM;
109 return 0; 116 return 0;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index edac9ef2bc8..8e0a32001c9 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -33,7 +33,6 @@
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/in.h> 35#include <linux/in.h>
36#include <linux/module.h>
37#include <net/tcp.h> 36#include <net/tcp.h>
38 37
39#include "rds.h" 38#include "rds.h"
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index a65ee78db0c..af95c8e058f 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -43,7 +43,7 @@ void rds_tcp_state_change(struct sock *sk)
43 struct rds_connection *conn; 43 struct rds_connection *conn;
44 struct rds_tcp_connection *tc; 44 struct rds_tcp_connection *tc;
45 45
46 read_lock(&sk->sk_callback_lock); 46 read_lock_bh(&sk->sk_callback_lock);
47 conn = sk->sk_user_data; 47 conn = sk->sk_user_data;
48 if (!conn) { 48 if (!conn) {
49 state_change = sk->sk_state_change; 49 state_change = sk->sk_state_change;
@@ -68,7 +68,7 @@ void rds_tcp_state_change(struct sock *sk)
68 break; 68 break;
69 } 69 }
70out: 70out:
71 read_unlock(&sk->sk_callback_lock); 71 read_unlock_bh(&sk->sk_callback_lock);
72 state_change(sk); 72 state_change(sk);
73} 73}
74 74
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 7787537e9c2..8b5cc4aa886 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -114,7 +114,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes)
114 114
115 rdsdebug("listen data ready sk %p\n", sk); 115 rdsdebug("listen data ready sk %p\n", sk);
116 116
117 read_lock(&sk->sk_callback_lock); 117 read_lock_bh(&sk->sk_callback_lock);
118 ready = sk->sk_user_data; 118 ready = sk->sk_user_data;
119 if (!ready) { /* check for teardown race */ 119 if (!ready) { /* check for teardown race */
120 ready = sk->sk_data_ready; 120 ready = sk->sk_data_ready;
@@ -131,7 +131,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes)
131 queue_work(rds_wq, &rds_tcp_listen_work); 131 queue_work(rds_wq, &rds_tcp_listen_work);
132 132
133out: 133out:
134 read_unlock(&sk->sk_callback_lock); 134 read_unlock_bh(&sk->sk_callback_lock);
135 ready(sk, bytes); 135 ready(sk, bytes);
136} 136}
137 137
@@ -145,7 +145,7 @@ int rds_tcp_listen_init(void)
145 if (ret < 0) 145 if (ret < 0)
146 goto out; 146 goto out;
147 147
148 sock->sk->sk_reuse = SK_CAN_REUSE; 148 sock->sk->sk_reuse = 1;
149 rds_tcp_nonagle(sock); 149 rds_tcp_nonagle(sock);
150 150
151 write_lock_bh(&sock->sk->sk_callback_lock); 151 write_lock_bh(&sock->sk->sk_callback_lock);
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index 4fac4f2bb9d..78205e25500 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -169,6 +169,7 @@ static void rds_tcp_cong_recv(struct rds_connection *conn,
169struct rds_tcp_desc_arg { 169struct rds_tcp_desc_arg {
170 struct rds_connection *conn; 170 struct rds_connection *conn;
171 gfp_t gfp; 171 gfp_t gfp;
172 enum km_type km;
172}; 173};
173 174
174static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, 175static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
@@ -254,7 +255,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
254 else 255 else
255 rds_recv_incoming(conn, conn->c_faddr, 256 rds_recv_incoming(conn, conn->c_faddr,
256 conn->c_laddr, &tinc->ti_inc, 257 conn->c_laddr, &tinc->ti_inc,
257 arg->gfp); 258 arg->gfp, arg->km);
258 259
259 tc->t_tinc_hdr_rem = sizeof(struct rds_header); 260 tc->t_tinc_hdr_rem = sizeof(struct rds_header);
260 tc->t_tinc_data_rem = 0; 261 tc->t_tinc_data_rem = 0;
@@ -271,7 +272,8 @@ out:
271} 272}
272 273
273/* the caller has to hold the sock lock */ 274/* the caller has to hold the sock lock */
274static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp) 275static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp,
276 enum km_type km)
275{ 277{
276 struct rds_tcp_connection *tc = conn->c_transport_data; 278 struct rds_tcp_connection *tc = conn->c_transport_data;
277 struct socket *sock = tc->t_sock; 279 struct socket *sock = tc->t_sock;
@@ -281,6 +283,7 @@ static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp)
281 /* It's like glib in the kernel! */ 283 /* It's like glib in the kernel! */
282 arg.conn = conn; 284 arg.conn = conn;
283 arg.gfp = gfp; 285 arg.gfp = gfp;
286 arg.km = km;
284 desc.arg.data = &arg; 287 desc.arg.data = &arg;
285 desc.error = 0; 288 desc.error = 0;
286 desc.count = 1; /* give more than one skb per call */ 289 desc.count = 1; /* give more than one skb per call */
@@ -308,7 +311,7 @@ int rds_tcp_recv(struct rds_connection *conn)
308 rdsdebug("recv worker conn %p tc %p sock %p\n", conn, tc, sock); 311 rdsdebug("recv worker conn %p tc %p sock %p\n", conn, tc, sock);
309 312
310 lock_sock(sock->sk); 313 lock_sock(sock->sk);
311 ret = rds_tcp_read_sock(conn, GFP_KERNEL); 314 ret = rds_tcp_read_sock(conn, GFP_KERNEL, KM_USER0);
312 release_sock(sock->sk); 315 release_sock(sock->sk);
313 316
314 return ret; 317 return ret;
@@ -322,7 +325,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes)
322 325
323 rdsdebug("data ready sk %p bytes %d\n", sk, bytes); 326 rdsdebug("data ready sk %p bytes %d\n", sk, bytes);
324 327
325 read_lock(&sk->sk_callback_lock); 328 read_lock_bh(&sk->sk_callback_lock);
326 conn = sk->sk_user_data; 329 conn = sk->sk_user_data;
327 if (!conn) { /* check for teardown race */ 330 if (!conn) { /* check for teardown race */
328 ready = sk->sk_data_ready; 331 ready = sk->sk_data_ready;
@@ -333,10 +336,10 @@ void rds_tcp_data_ready(struct sock *sk, int bytes)
333 ready = tc->t_orig_data_ready; 336 ready = tc->t_orig_data_ready;
334 rds_tcp_stats_inc(s_tcp_data_ready_calls); 337 rds_tcp_stats_inc(s_tcp_data_ready_calls);
335 338
336 if (rds_tcp_read_sock(conn, GFP_ATOMIC) == -ENOMEM) 339 if (rds_tcp_read_sock(conn, GFP_ATOMIC, KM_SOFTIRQ0) == -ENOMEM)
337 queue_delayed_work(rds_wq, &conn->c_recv_w, 0); 340 queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
338out: 341out:
339 read_unlock(&sk->sk_callback_lock); 342 read_unlock_bh(&sk->sk_callback_lock);
340 ready(sk, bytes); 343 ready(sk, bytes);
341} 344}
342 345
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 81cf5a4c5e4..1b4fd68f0c7 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -174,7 +174,7 @@ void rds_tcp_write_space(struct sock *sk)
174 struct rds_connection *conn; 174 struct rds_connection *conn;
175 struct rds_tcp_connection *tc; 175 struct rds_tcp_connection *tc;
176 176
177 read_lock(&sk->sk_callback_lock); 177 read_lock_bh(&sk->sk_callback_lock);
178 conn = sk->sk_user_data; 178 conn = sk->sk_user_data;
179 if (!conn) { 179 if (!conn) {
180 write_space = sk->sk_write_space; 180 write_space = sk->sk_write_space;
@@ -194,7 +194,7 @@ void rds_tcp_write_space(struct sock *sk)
194 queue_delayed_work(rds_wq, &conn->c_send_w, 0); 194 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
195 195
196out: 196out:
197 read_unlock(&sk->sk_callback_lock); 197 read_unlock_bh(&sk->sk_callback_lock);
198 198
199 /* 199 /*
200 * write_space is only called when data leaves tcp's send queue if 200 * write_space is only called when data leaves tcp's send queue if
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 65eaefcab24..0fd90f8c5f5 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -32,7 +32,6 @@
32 */ 32 */
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/random.h> 34#include <linux/random.h>
35#include <linux/export.h>
36 35
37#include "rds.h" 36#include "rds.h"
38 37