aboutsummaryrefslogtreecommitdiffstats
path: root/net/rds
diff options
context:
space:
mode:
authorChris Zankel <chris@zankel.net>2009-04-03 05:29:05 -0400
committerChris Zankel <chris@zankel.net>2009-04-03 05:29:05 -0400
commit65127d28e312bb6b38ce84a7bb71d762ef63ad4c (patch)
treed5fdf52a2d0731f7fab0ce0ed394faac50b04fbc /net/rds
parentb8bb76713ec50df2f11efee386e16f93d51e1076 (diff)
parent8fe74cf053de7ad2124a894996f84fa890a81093 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into merge
Diffstat (limited to 'net/rds')
-rw-r--r--net/rds/ib.c5
-rw-r--r--net/rds/ib.h28
-rw-r--r--net/rds/ib_cm.c43
-rw-r--r--net/rds/ib_rdma.c43
-rw-r--r--net/rds/ib_recv.c37
-rw-r--r--net/rds/iw.c5
-rw-r--r--net/rds/iw.h28
-rw-r--r--net/rds/iw_cm.c44
-rw-r--r--net/rds/iw_rdma.c44
-rw-r--r--net/rds/iw_recv.c37
-rw-r--r--net/rds/rds.h4
-rw-r--r--net/rds/send.c6
12 files changed, 212 insertions, 112 deletions
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 06a7b798d9a7..4933b380985e 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -51,6 +51,7 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
51 51
52struct list_head rds_ib_devices; 52struct list_head rds_ib_devices;
53 53
54/* NOTE: if also grabbing ibdev lock, grab this first */
54DEFINE_SPINLOCK(ib_nodev_conns_lock); 55DEFINE_SPINLOCK(ib_nodev_conns_lock);
55LIST_HEAD(ib_nodev_conns); 56LIST_HEAD(ib_nodev_conns);
56 57
@@ -137,7 +138,7 @@ void rds_ib_remove_one(struct ib_device *device)
137 kfree(i_ipaddr); 138 kfree(i_ipaddr);
138 } 139 }
139 140
140 rds_ib_remove_conns(rds_ibdev); 141 rds_ib_destroy_conns(rds_ibdev);
141 142
142 if (rds_ibdev->mr_pool) 143 if (rds_ibdev->mr_pool)
143 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); 144 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool);
@@ -249,7 +250,7 @@ static int rds_ib_laddr_check(__be32 addr)
249void rds_ib_exit(void) 250void rds_ib_exit(void)
250{ 251{
251 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); 252 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
252 rds_ib_remove_nodev_conns(); 253 rds_ib_destroy_nodev_conns();
253 ib_unregister_client(&rds_ib_client); 254 ib_unregister_client(&rds_ib_client);
254 rds_ib_sysctl_exit(); 255 rds_ib_sysctl_exit();
255 rds_ib_recv_exit(); 256 rds_ib_recv_exit();
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 8be563a1363a..069206cae733 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -108,7 +108,12 @@ struct rds_ib_connection {
108 108
109 /* sending acks */ 109 /* sending acks */
110 unsigned long i_ack_flags; 110 unsigned long i_ack_flags;
111#ifdef KERNEL_HAS_ATOMIC64
112 atomic64_t i_ack_next; /* next ACK to send */
113#else
114 spinlock_t i_ack_lock; /* protect i_ack_next */
111 u64 i_ack_next; /* next ACK to send */ 115 u64 i_ack_next; /* next ACK to send */
116#endif
112 struct rds_header *i_ack; 117 struct rds_header *i_ack;
113 struct ib_send_wr i_ack_wr; 118 struct ib_send_wr i_ack_wr;
114 struct ib_sge i_ack_sge; 119 struct ib_sge i_ack_sge;
@@ -267,9 +272,17 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn,
267 272
268/* ib_rdma.c */ 273/* ib_rdma.c */
269int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr); 274int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
270int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 275void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
271void rds_ib_remove_nodev_conns(void); 276void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
272void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev); 277void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock);
278static inline void rds_ib_destroy_nodev_conns(void)
279{
280 __rds_ib_destroy_conns(&ib_nodev_conns, &ib_nodev_conns_lock);
281}
282static inline void rds_ib_destroy_conns(struct rds_ib_device *rds_ibdev)
283{
284 __rds_ib_destroy_conns(&rds_ibdev->conn_list, &rds_ibdev->spinlock);
285}
273struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *); 286struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *);
274void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); 287void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
275void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); 288void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
@@ -355,13 +368,4 @@ rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
355 return &sge[1]; 368 return &sge[1];
356} 369}
357 370
358static inline void rds_ib_set_64bit(u64 *ptr, u64 val)
359{
360#if BITS_PER_LONG == 64
361 *ptr = val;
362#else
363 set_64bit(ptr, val);
364#endif
365}
366
367#endif 371#endif
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 0532237bd128..f8e40e1a6038 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -126,9 +126,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
126 err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr); 126 err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr);
127 if (err) 127 if (err)
128 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); 128 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err);
129 err = rds_ib_add_conn(rds_ibdev, conn); 129 rds_ib_add_conn(rds_ibdev, conn);
130 if (err)
131 printk(KERN_ERR "rds_ib_add_conn failed (%d)\n", err);
132 130
133 /* If the peer gave us the last packet it saw, process this as if 131 /* If the peer gave us the last packet it saw, process this as if
134 * we had received a regular ACK. */ 132 * we had received a regular ACK. */
@@ -616,18 +614,8 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
616 /* 614 /*
617 * Move connection back to the nodev list. 615 * Move connection back to the nodev list.
618 */ 616 */
619 if (ic->rds_ibdev) { 617 if (ic->rds_ibdev)
620 618 rds_ib_remove_conn(ic->rds_ibdev, conn);
621 spin_lock_irq(&ic->rds_ibdev->spinlock);
622 BUG_ON(list_empty(&ic->ib_node));
623 list_del(&ic->ib_node);
624 spin_unlock_irq(&ic->rds_ibdev->spinlock);
625
626 spin_lock_irq(&ib_nodev_conns_lock);
627 list_add_tail(&ic->ib_node, &ib_nodev_conns);
628 spin_unlock_irq(&ib_nodev_conns_lock);
629 ic->rds_ibdev = NULL;
630 }
631 619
632 ic->i_cm_id = NULL; 620 ic->i_cm_id = NULL;
633 ic->i_pd = NULL; 621 ic->i_pd = NULL;
@@ -648,7 +636,11 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
648 636
649 /* Clear the ACK state */ 637 /* Clear the ACK state */
650 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); 638 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
651 rds_ib_set_64bit(&ic->i_ack_next, 0); 639#ifdef KERNEL_HAS_ATOMIC64
640 atomic64_set(&ic->i_ack_next, 0);
641#else
642 ic->i_ack_next = 0;
643#endif
652 ic->i_ack_recv = 0; 644 ic->i_ack_recv = 0;
653 645
654 /* Clear flow control state */ 646 /* Clear flow control state */
@@ -681,6 +673,9 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
681 673
682 INIT_LIST_HEAD(&ic->ib_node); 674 INIT_LIST_HEAD(&ic->ib_node);
683 mutex_init(&ic->i_recv_mutex); 675 mutex_init(&ic->i_recv_mutex);
676#ifndef KERNEL_HAS_ATOMIC64
677 spin_lock_init(&ic->i_ack_lock);
678#endif
684 679
685 /* 680 /*
686 * rds_ib_conn_shutdown() waits for these to be emptied so they 681 * rds_ib_conn_shutdown() waits for these to be emptied so they
@@ -701,11 +696,27 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
701 return 0; 696 return 0;
702} 697}
703 698
699/*
700 * Free a connection. Connection must be shut down and not set for reconnect.
701 */
704void rds_ib_conn_free(void *arg) 702void rds_ib_conn_free(void *arg)
705{ 703{
706 struct rds_ib_connection *ic = arg; 704 struct rds_ib_connection *ic = arg;
705 spinlock_t *lock_ptr;
706
707 rdsdebug("ic %p\n", ic); 707 rdsdebug("ic %p\n", ic);
708
709 /*
710 * Conn is either on a dev's list or on the nodev list.
711 * A race with shutdown() or connect() would cause problems
712 * (since rds_ibdev would change) but that should never happen.
713 */
714 lock_ptr = ic->rds_ibdev ? &ic->rds_ibdev->spinlock : &ib_nodev_conns_lock;
715
716 spin_lock_irq(lock_ptr);
708 list_del(&ic->ib_node); 717 list_del(&ic->ib_node);
718 spin_unlock_irq(lock_ptr);
719
709 kfree(ic); 720 kfree(ic);
710} 721}
711 722
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 69a6289ed672..81033af93020 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -139,7 +139,7 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
139 return rds_ib_add_ipaddr(rds_ibdev, ipaddr); 139 return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
140} 140}
141 141
142int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) 142void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
143{ 143{
144 struct rds_ib_connection *ic = conn->c_transport_data; 144 struct rds_ib_connection *ic = conn->c_transport_data;
145 145
@@ -148,45 +148,44 @@ int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn
148 BUG_ON(list_empty(&ib_nodev_conns)); 148 BUG_ON(list_empty(&ib_nodev_conns));
149 BUG_ON(list_empty(&ic->ib_node)); 149 BUG_ON(list_empty(&ic->ib_node));
150 list_del(&ic->ib_node); 150 list_del(&ic->ib_node);
151 spin_unlock_irq(&ib_nodev_conns_lock);
152 151
153 spin_lock_irq(&rds_ibdev->spinlock); 152 spin_lock_irq(&rds_ibdev->spinlock);
154 list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); 153 list_add_tail(&ic->ib_node, &rds_ibdev->conn_list);
155 spin_unlock_irq(&rds_ibdev->spinlock); 154 spin_unlock_irq(&rds_ibdev->spinlock);
155 spin_unlock_irq(&ib_nodev_conns_lock);
156 156
157 ic->rds_ibdev = rds_ibdev; 157 ic->rds_ibdev = rds_ibdev;
158
159 return 0;
160} 158}
161 159
162void rds_ib_remove_nodev_conns(void) 160void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
163{ 161{
164 struct rds_ib_connection *ic, *_ic; 162 struct rds_ib_connection *ic = conn->c_transport_data;
165 LIST_HEAD(tmp_list);
166 163
167 /* avoid calling conn_destroy with irqs off */ 164 /* place conn on nodev_conns_list */
168 spin_lock_irq(&ib_nodev_conns_lock); 165 spin_lock(&ib_nodev_conns_lock);
169 list_splice(&ib_nodev_conns, &tmp_list);
170 INIT_LIST_HEAD(&ib_nodev_conns);
171 spin_unlock_irq(&ib_nodev_conns_lock);
172 166
173 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { 167 spin_lock_irq(&rds_ibdev->spinlock);
174 if (ic->conn->c_passive) 168 BUG_ON(list_empty(&ic->ib_node));
175 rds_conn_destroy(ic->conn->c_passive); 169 list_del(&ic->ib_node);
176 rds_conn_destroy(ic->conn); 170 spin_unlock_irq(&rds_ibdev->spinlock);
177 } 171
172 list_add_tail(&ic->ib_node, &ib_nodev_conns);
173
174 spin_unlock(&ib_nodev_conns_lock);
175
176 ic->rds_ibdev = NULL;
178} 177}
179 178
180void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev) 179void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock)
181{ 180{
182 struct rds_ib_connection *ic, *_ic; 181 struct rds_ib_connection *ic, *_ic;
183 LIST_HEAD(tmp_list); 182 LIST_HEAD(tmp_list);
184 183
185 /* avoid calling conn_destroy with irqs off */ 184 /* avoid calling conn_destroy with irqs off */
186 spin_lock_irq(&rds_ibdev->spinlock); 185 spin_lock_irq(list_lock);
187 list_splice(&rds_ibdev->conn_list, &tmp_list); 186 list_splice(list, &tmp_list);
188 INIT_LIST_HEAD(&rds_ibdev->conn_list); 187 INIT_LIST_HEAD(list);
189 spin_unlock_irq(&rds_ibdev->spinlock); 188 spin_unlock_irq(list_lock);
190 189
191 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { 190 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) {
192 if (ic->conn->c_passive) 191 if (ic->conn->c_passive)
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 5061b5502162..36d931573ff4 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -395,10 +395,37 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic)
395 * room for it beyond the ring size. Send completion notices its special 395 * room for it beyond the ring size. Send completion notices its special
396 * wr_id and avoids working with the ring in that case. 396 * wr_id and avoids working with the ring in that case.
397 */ 397 */
398#ifndef KERNEL_HAS_ATOMIC64
398static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, 399static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
399 int ack_required) 400 int ack_required)
400{ 401{
401 rds_ib_set_64bit(&ic->i_ack_next, seq); 402 unsigned long flags;
403
404 spin_lock_irqsave(&ic->i_ack_lock, flags);
405 ic->i_ack_next = seq;
406 if (ack_required)
407 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
408 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
409}
410
411static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
412{
413 unsigned long flags;
414 u64 seq;
415
416 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
417
418 spin_lock_irqsave(&ic->i_ack_lock, flags);
419 seq = ic->i_ack_next;
420 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
421
422 return seq;
423}
424#else
425static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
426 int ack_required)
427{
428 atomic64_set(&ic->i_ack_next, seq);
402 if (ack_required) { 429 if (ack_required) {
403 smp_mb__before_clear_bit(); 430 smp_mb__before_clear_bit();
404 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 431 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
@@ -410,8 +437,10 @@ static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
410 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 437 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
411 smp_mb__after_clear_bit(); 438 smp_mb__after_clear_bit();
412 439
413 return ic->i_ack_next; 440 return atomic64_read(&ic->i_ack_next);
414} 441}
442#endif
443
415 444
416static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits) 445static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits)
417{ 446{
@@ -464,6 +493,10 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi
464 * - i_ack_next, which is the last sequence number we received 493 * - i_ack_next, which is the last sequence number we received
465 * 494 *
466 * Potentially, send queue and receive queue handlers can run concurrently. 495 * Potentially, send queue and receive queue handlers can run concurrently.
496 * It would be nice to not have to use a spinlock to synchronize things,
497 * but the one problem that rules this out is that 64bit updates are
498 * not atomic on all platforms. Things would be a lot simpler if
499 * we had atomic64 or maybe cmpxchg64 everywhere.
467 * 500 *
468 * Reconnecting complicates this picture just slightly. When we 501 * Reconnecting complicates this picture just slightly. When we
469 * reconnect, we may be seeing duplicate packets. The peer 502 * reconnect, we may be seeing duplicate packets. The peer
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 1b56905c4c08..b732efb5b634 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -51,6 +51,7 @@ MODULE_PARM_DESC(fastreg_message_size, " Max size of a RDMA transfer (fastreg MR
51 51
52struct list_head rds_iw_devices; 52struct list_head rds_iw_devices;
53 53
54/* NOTE: if also grabbing iwdev lock, grab this first */
54DEFINE_SPINLOCK(iw_nodev_conns_lock); 55DEFINE_SPINLOCK(iw_nodev_conns_lock);
55LIST_HEAD(iw_nodev_conns); 56LIST_HEAD(iw_nodev_conns);
56 57
@@ -145,7 +146,7 @@ void rds_iw_remove_one(struct ib_device *device)
145 } 146 }
146 spin_unlock_irq(&rds_iwdev->spinlock); 147 spin_unlock_irq(&rds_iwdev->spinlock);
147 148
148 rds_iw_remove_conns(rds_iwdev); 149 rds_iw_destroy_conns(rds_iwdev);
149 150
150 if (rds_iwdev->mr_pool) 151 if (rds_iwdev->mr_pool)
151 rds_iw_destroy_mr_pool(rds_iwdev->mr_pool); 152 rds_iw_destroy_mr_pool(rds_iwdev->mr_pool);
@@ -258,7 +259,7 @@ static int rds_iw_laddr_check(__be32 addr)
258void rds_iw_exit(void) 259void rds_iw_exit(void)
259{ 260{
260 rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info); 261 rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info);
261 rds_iw_remove_nodev_conns(); 262 rds_iw_destroy_nodev_conns();
262 ib_unregister_client(&rds_iw_client); 263 ib_unregister_client(&rds_iw_client);
263 rds_iw_sysctl_exit(); 264 rds_iw_sysctl_exit();
264 rds_iw_recv_exit(); 265 rds_iw_recv_exit();
diff --git a/net/rds/iw.h b/net/rds/iw.h
index 0ddda34f2a1c..b4fb27252895 100644
--- a/net/rds/iw.h
+++ b/net/rds/iw.h
@@ -131,7 +131,12 @@ struct rds_iw_connection {
131 131
132 /* sending acks */ 132 /* sending acks */
133 unsigned long i_ack_flags; 133 unsigned long i_ack_flags;
134#ifdef KERNEL_HAS_ATOMIC64
135 atomic64_t i_ack_next; /* next ACK to send */
136#else
137 spinlock_t i_ack_lock; /* protect i_ack_next */
134 u64 i_ack_next; /* next ACK to send */ 138 u64 i_ack_next; /* next ACK to send */
139#endif
135 struct rds_header *i_ack; 140 struct rds_header *i_ack;
136 struct ib_send_wr i_ack_wr; 141 struct ib_send_wr i_ack_wr;
137 struct ib_sge i_ack_sge; 142 struct ib_sge i_ack_sge;
@@ -294,9 +299,17 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn,
294 299
295/* ib_rdma.c */ 300/* ib_rdma.c */
296int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id); 301int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id);
297int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn); 302void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
298void rds_iw_remove_nodev_conns(void); 303void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
299void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev); 304void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock);
305static inline void rds_iw_destroy_nodev_conns(void)
306{
307 __rds_iw_destroy_conns(&iw_nodev_conns, &iw_nodev_conns_lock);
308}
309static inline void rds_iw_destroy_conns(struct rds_iw_device *rds_iwdev)
310{
311 __rds_iw_destroy_conns(&rds_iwdev->conn_list, &rds_iwdev->spinlock);
312}
300struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *); 313struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *);
301void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo); 314void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo);
302void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *); 315void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *);
@@ -383,13 +396,4 @@ rds_iw_data_sge(struct rds_iw_connection *ic, struct ib_sge *sge)
383 return &sge[1]; 396 return &sge[1];
384} 397}
385 398
386static inline void rds_iw_set_64bit(u64 *ptr, u64 val)
387{
388#if BITS_PER_LONG == 64
389 *ptr = val;
390#else
391 set_64bit(ptr, val);
392#endif
393}
394
395#endif 399#endif
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index 57ecb3d4b8a5..a416b0d492b1 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -86,9 +86,7 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
86 err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id); 86 err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id);
87 if (err) 87 if (err)
88 printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err); 88 printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err);
89 err = rds_iw_add_conn(rds_iwdev, conn); 89 rds_iw_add_conn(rds_iwdev, conn);
90 if (err)
91 printk(KERN_ERR "rds_iw_add_conn failed (%d)\n", err);
92 90
93 /* If the peer gave us the last packet it saw, process this as if 91 /* If the peer gave us the last packet it saw, process this as if
94 * we had received a regular ACK. */ 92 * we had received a regular ACK. */
@@ -637,19 +635,8 @@ void rds_iw_conn_shutdown(struct rds_connection *conn)
637 * Move connection back to the nodev list. 635 * Move connection back to the nodev list.
638 * Remove cm_id from the device cm_id list. 636 * Remove cm_id from the device cm_id list.
639 */ 637 */
640 if (ic->rds_iwdev) { 638 if (ic->rds_iwdev)
641 639 rds_iw_remove_conn(ic->rds_iwdev, conn);
642 spin_lock_irq(&ic->rds_iwdev->spinlock);
643 BUG_ON(list_empty(&ic->iw_node));
644 list_del(&ic->iw_node);
645 spin_unlock_irq(&ic->rds_iwdev->spinlock);
646
647 spin_lock_irq(&iw_nodev_conns_lock);
648 list_add_tail(&ic->iw_node, &iw_nodev_conns);
649 spin_unlock_irq(&iw_nodev_conns_lock);
650 rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id);
651 ic->rds_iwdev = NULL;
652 }
653 640
654 rdma_destroy_id(ic->i_cm_id); 641 rdma_destroy_id(ic->i_cm_id);
655 642
@@ -672,7 +659,11 @@ void rds_iw_conn_shutdown(struct rds_connection *conn)
672 659
673 /* Clear the ACK state */ 660 /* Clear the ACK state */
674 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); 661 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
675 rds_iw_set_64bit(&ic->i_ack_next, 0); 662#ifdef KERNEL_HAS_ATOMIC64
663 atomic64_set(&ic->i_ack_next, 0);
664#else
665 ic->i_ack_next = 0;
666#endif
676 ic->i_ack_recv = 0; 667 ic->i_ack_recv = 0;
677 668
678 /* Clear flow control state */ 669 /* Clear flow control state */
@@ -706,6 +697,9 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
706 697
707 INIT_LIST_HEAD(&ic->iw_node); 698 INIT_LIST_HEAD(&ic->iw_node);
708 mutex_init(&ic->i_recv_mutex); 699 mutex_init(&ic->i_recv_mutex);
700#ifndef KERNEL_HAS_ATOMIC64
701 spin_lock_init(&ic->i_ack_lock);
702#endif
709 703
710 /* 704 /*
711 * rds_iw_conn_shutdown() waits for these to be emptied so they 705 * rds_iw_conn_shutdown() waits for these to be emptied so they
@@ -726,11 +720,27 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
726 return 0; 720 return 0;
727} 721}
728 722
723/*
724 * Free a connection. Connection must be shut down and not set for reconnect.
725 */
729void rds_iw_conn_free(void *arg) 726void rds_iw_conn_free(void *arg)
730{ 727{
731 struct rds_iw_connection *ic = arg; 728 struct rds_iw_connection *ic = arg;
729 spinlock_t *lock_ptr;
730
732 rdsdebug("ic %p\n", ic); 731 rdsdebug("ic %p\n", ic);
732
733 /*
734 * Conn is either on a dev's list or on the nodev list.
735 * A race with shutdown() or connect() would cause problems
736 * (since rds_iwdev would change) but that should never happen.
737 */
738 lock_ptr = ic->rds_iwdev ? &ic->rds_iwdev->spinlock : &iw_nodev_conns_lock;
739
740 spin_lock_irq(lock_ptr);
733 list_del(&ic->iw_node); 741 list_del(&ic->iw_node);
742 spin_unlock_irq(lock_ptr);
743
734 kfree(ic); 744 kfree(ic);
735} 745}
736 746
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 1c02a8f952d0..dcdb37da80f2 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -196,7 +196,7 @@ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_i
196 return rds_iw_add_cm_id(rds_iwdev, cm_id); 196 return rds_iw_add_cm_id(rds_iwdev, cm_id);
197} 197}
198 198
199int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn) 199void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
200{ 200{
201 struct rds_iw_connection *ic = conn->c_transport_data; 201 struct rds_iw_connection *ic = conn->c_transport_data;
202 202
@@ -205,45 +205,45 @@ int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn
205 BUG_ON(list_empty(&iw_nodev_conns)); 205 BUG_ON(list_empty(&iw_nodev_conns));
206 BUG_ON(list_empty(&ic->iw_node)); 206 BUG_ON(list_empty(&ic->iw_node));
207 list_del(&ic->iw_node); 207 list_del(&ic->iw_node);
208 spin_unlock_irq(&iw_nodev_conns_lock);
209 208
210 spin_lock_irq(&rds_iwdev->spinlock); 209 spin_lock_irq(&rds_iwdev->spinlock);
211 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list); 210 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list);
212 spin_unlock_irq(&rds_iwdev->spinlock); 211 spin_unlock_irq(&rds_iwdev->spinlock);
212 spin_unlock_irq(&iw_nodev_conns_lock);
213 213
214 ic->rds_iwdev = rds_iwdev; 214 ic->rds_iwdev = rds_iwdev;
215
216 return 0;
217} 215}
218 216
219void rds_iw_remove_nodev_conns(void) 217void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
220{ 218{
221 struct rds_iw_connection *ic, *_ic; 219 struct rds_iw_connection *ic = conn->c_transport_data;
222 LIST_HEAD(tmp_list);
223 220
224 /* avoid calling conn_destroy with irqs off */ 221 /* place conn on nodev_conns_list */
225 spin_lock_irq(&iw_nodev_conns_lock); 222 spin_lock(&iw_nodev_conns_lock);
226 list_splice(&iw_nodev_conns, &tmp_list);
227 INIT_LIST_HEAD(&iw_nodev_conns);
228 spin_unlock_irq(&iw_nodev_conns_lock);
229 223
230 list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { 224 spin_lock_irq(&rds_iwdev->spinlock);
231 if (ic->conn->c_passive) 225 BUG_ON(list_empty(&ic->iw_node));
232 rds_conn_destroy(ic->conn->c_passive); 226 list_del(&ic->iw_node);
233 rds_conn_destroy(ic->conn); 227 spin_unlock_irq(&rds_iwdev->spinlock);
234 } 228
229 list_add_tail(&ic->iw_node, &iw_nodev_conns);
230
231 spin_unlock(&iw_nodev_conns_lock);
232
233 rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id);
234 ic->rds_iwdev = NULL;
235} 235}
236 236
237void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev) 237void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock)
238{ 238{
239 struct rds_iw_connection *ic, *_ic; 239 struct rds_iw_connection *ic, *_ic;
240 LIST_HEAD(tmp_list); 240 LIST_HEAD(tmp_list);
241 241
242 /* avoid calling conn_destroy with irqs off */ 242 /* avoid calling conn_destroy with irqs off */
243 spin_lock_irq(&rds_iwdev->spinlock); 243 spin_lock_irq(list_lock);
244 list_splice(&rds_iwdev->conn_list, &tmp_list); 244 list_splice(list, &tmp_list);
245 INIT_LIST_HEAD(&rds_iwdev->conn_list); 245 INIT_LIST_HEAD(list);
246 spin_unlock_irq(&rds_iwdev->spinlock); 246 spin_unlock_irq(list_lock);
247 247
248 list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { 248 list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) {
249 if (ic->conn->c_passive) 249 if (ic->conn->c_passive)
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index a1931f0027a2..fde470fa50d5 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -395,10 +395,37 @@ void rds_iw_recv_init_ack(struct rds_iw_connection *ic)
395 * room for it beyond the ring size. Send completion notices its special 395 * room for it beyond the ring size. Send completion notices its special
396 * wr_id and avoids working with the ring in that case. 396 * wr_id and avoids working with the ring in that case.
397 */ 397 */
398#ifndef KERNEL_HAS_ATOMIC64
398static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, 399static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
399 int ack_required) 400 int ack_required)
400{ 401{
401 rds_iw_set_64bit(&ic->i_ack_next, seq); 402 unsigned long flags;
403
404 spin_lock_irqsave(&ic->i_ack_lock, flags);
405 ic->i_ack_next = seq;
406 if (ack_required)
407 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
408 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
409}
410
411static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
412{
413 unsigned long flags;
414 u64 seq;
415
416 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
417
418 spin_lock_irqsave(&ic->i_ack_lock, flags);
419 seq = ic->i_ack_next;
420 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
421
422 return seq;
423}
424#else
425static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
426 int ack_required)
427{
428 atomic64_set(&ic->i_ack_next, seq);
402 if (ack_required) { 429 if (ack_required) {
403 smp_mb__before_clear_bit(); 430 smp_mb__before_clear_bit();
404 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 431 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
@@ -410,8 +437,10 @@ static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
410 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 437 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
411 smp_mb__after_clear_bit(); 438 smp_mb__after_clear_bit();
412 439
413 return ic->i_ack_next; 440 return atomic64_read(&ic->i_ack_next);
414} 441}
442#endif
443
415 444
416static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits) 445static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits)
417{ 446{
@@ -464,6 +493,10 @@ static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credi
464 * - i_ack_next, which is the last sequence number we received 493 * - i_ack_next, which is the last sequence number we received
465 * 494 *
466 * Potentially, send queue and receive queue handlers can run concurrently. 495 * Potentially, send queue and receive queue handlers can run concurrently.
496 * It would be nice to not have to use a spinlock to synchronize things,
497 * but the one problem that rules this out is that 64bit updates are
498 * not atomic on all platforms. Things would be a lot simpler if
499 * we had atomic64 or maybe cmpxchg64 everywhere.
467 * 500 *
468 * Reconnecting complicates this picture just slightly. When we 501 * Reconnecting complicates this picture just slightly. When we
469 * reconnect, we may be seeing duplicate packets. The peer 502 * reconnect, we may be seeing duplicate packets. The peer
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 060400704979..619f0a30a4e5 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -28,6 +28,10 @@
28 */ 28 */
29#define RDS_PORT 18634 29#define RDS_PORT 18634
30 30
31#ifdef ATOMIC64_INIT
32#define KERNEL_HAS_ATOMIC64
33#endif
34
31#ifdef DEBUG 35#ifdef DEBUG
32#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) 36#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args)
33#else 37#else
diff --git a/net/rds/send.c b/net/rds/send.c
index 1b37364656f0..104fe033203d 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -615,7 +615,7 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
615{ 615{
616 struct rds_message *rm, *tmp; 616 struct rds_message *rm, *tmp;
617 struct rds_connection *conn; 617 struct rds_connection *conn;
618 unsigned long flags; 618 unsigned long flags, flags2;
619 LIST_HEAD(list); 619 LIST_HEAD(list);
620 int wake = 0; 620 int wake = 0;
621 621
@@ -651,9 +651,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
651 list_for_each_entry(rm, &list, m_sock_item) { 651 list_for_each_entry(rm, &list, m_sock_item) {
652 /* We do this here rather than in the loop above, so that 652 /* We do this here rather than in the loop above, so that
653 * we don't have to nest m_rs_lock under rs->rs_lock */ 653 * we don't have to nest m_rs_lock under rs->rs_lock */
654 spin_lock(&rm->m_rs_lock); 654 spin_lock_irqsave(&rm->m_rs_lock, flags2);
655 rm->m_rs = NULL; 655 rm->m_rs = NULL;
656 spin_unlock(&rm->m_rs_lock); 656 spin_unlock_irqrestore(&rm->m_rs_lock, flags2);
657 657
658 /* 658 /*
659 * If we see this flag cleared then we're *sure* that someone 659 * If we see this flag cleared then we're *sure* that someone