diff options
Diffstat (limited to 'net/rds')
-rw-r--r-- | net/rds/ib.c | 5 | ||||
-rw-r--r-- | net/rds/ib.h | 28 | ||||
-rw-r--r-- | net/rds/ib_cm.c | 43 | ||||
-rw-r--r-- | net/rds/ib_rdma.c | 43 | ||||
-rw-r--r-- | net/rds/ib_recv.c | 37 | ||||
-rw-r--r-- | net/rds/iw.c | 5 | ||||
-rw-r--r-- | net/rds/iw.h | 28 | ||||
-rw-r--r-- | net/rds/iw_cm.c | 44 | ||||
-rw-r--r-- | net/rds/iw_rdma.c | 44 | ||||
-rw-r--r-- | net/rds/iw_recv.c | 37 | ||||
-rw-r--r-- | net/rds/rds.h | 4 | ||||
-rw-r--r-- | net/rds/send.c | 6 |
12 files changed, 212 insertions, 112 deletions
diff --git a/net/rds/ib.c b/net/rds/ib.c index 06a7b798d9a7..4933b380985e 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c | |||
@@ -51,6 +51,7 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer"); | |||
51 | 51 | ||
52 | struct list_head rds_ib_devices; | 52 | struct list_head rds_ib_devices; |
53 | 53 | ||
54 | /* NOTE: if also grabbing ibdev lock, grab this first */ | ||
54 | DEFINE_SPINLOCK(ib_nodev_conns_lock); | 55 | DEFINE_SPINLOCK(ib_nodev_conns_lock); |
55 | LIST_HEAD(ib_nodev_conns); | 56 | LIST_HEAD(ib_nodev_conns); |
56 | 57 | ||
@@ -137,7 +138,7 @@ void rds_ib_remove_one(struct ib_device *device) | |||
137 | kfree(i_ipaddr); | 138 | kfree(i_ipaddr); |
138 | } | 139 | } |
139 | 140 | ||
140 | rds_ib_remove_conns(rds_ibdev); | 141 | rds_ib_destroy_conns(rds_ibdev); |
141 | 142 | ||
142 | if (rds_ibdev->mr_pool) | 143 | if (rds_ibdev->mr_pool) |
143 | rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); | 144 | rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); |
@@ -249,7 +250,7 @@ static int rds_ib_laddr_check(__be32 addr) | |||
249 | void rds_ib_exit(void) | 250 | void rds_ib_exit(void) |
250 | { | 251 | { |
251 | rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); | 252 | rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); |
252 | rds_ib_remove_nodev_conns(); | 253 | rds_ib_destroy_nodev_conns(); |
253 | ib_unregister_client(&rds_ib_client); | 254 | ib_unregister_client(&rds_ib_client); |
254 | rds_ib_sysctl_exit(); | 255 | rds_ib_sysctl_exit(); |
255 | rds_ib_recv_exit(); | 256 | rds_ib_recv_exit(); |
diff --git a/net/rds/ib.h b/net/rds/ib.h index 8be563a1363a..069206cae733 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h | |||
@@ -108,7 +108,12 @@ struct rds_ib_connection { | |||
108 | 108 | ||
109 | /* sending acks */ | 109 | /* sending acks */ |
110 | unsigned long i_ack_flags; | 110 | unsigned long i_ack_flags; |
111 | #ifdef KERNEL_HAS_ATOMIC64 | ||
112 | atomic64_t i_ack_next; /* next ACK to send */ | ||
113 | #else | ||
114 | spinlock_t i_ack_lock; /* protect i_ack_next */ | ||
111 | u64 i_ack_next; /* next ACK to send */ | 115 | u64 i_ack_next; /* next ACK to send */ |
116 | #endif | ||
112 | struct rds_header *i_ack; | 117 | struct rds_header *i_ack; |
113 | struct ib_send_wr i_ack_wr; | 118 | struct ib_send_wr i_ack_wr; |
114 | struct ib_sge i_ack_sge; | 119 | struct ib_sge i_ack_sge; |
@@ -267,9 +272,17 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, | |||
267 | 272 | ||
268 | /* ib_rdma.c */ | 273 | /* ib_rdma.c */ |
269 | int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr); | 274 | int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr); |
270 | int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); | 275 | void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); |
271 | void rds_ib_remove_nodev_conns(void); | 276 | void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); |
272 | void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev); | 277 | void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock); |
278 | static inline void rds_ib_destroy_nodev_conns(void) | ||
279 | { | ||
280 | __rds_ib_destroy_conns(&ib_nodev_conns, &ib_nodev_conns_lock); | ||
281 | } | ||
282 | static inline void rds_ib_destroy_conns(struct rds_ib_device *rds_ibdev) | ||
283 | { | ||
284 | __rds_ib_destroy_conns(&rds_ibdev->conn_list, &rds_ibdev->spinlock); | ||
285 | } | ||
273 | struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *); | 286 | struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *); |
274 | void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); | 287 | void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); |
275 | void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); | 288 | void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); |
@@ -355,13 +368,4 @@ rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge) | |||
355 | return &sge[1]; | 368 | return &sge[1]; |
356 | } | 369 | } |
357 | 370 | ||
358 | static inline void rds_ib_set_64bit(u64 *ptr, u64 val) | ||
359 | { | ||
360 | #if BITS_PER_LONG == 64 | ||
361 | *ptr = val; | ||
362 | #else | ||
363 | set_64bit(ptr, val); | ||
364 | #endif | ||
365 | } | ||
366 | |||
367 | #endif | 371 | #endif |
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 0532237bd128..f8e40e1a6038 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c | |||
@@ -126,9 +126,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even | |||
126 | err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr); | 126 | err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr); |
127 | if (err) | 127 | if (err) |
128 | printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); | 128 | printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); |
129 | err = rds_ib_add_conn(rds_ibdev, conn); | 129 | rds_ib_add_conn(rds_ibdev, conn); |
130 | if (err) | ||
131 | printk(KERN_ERR "rds_ib_add_conn failed (%d)\n", err); | ||
132 | 130 | ||
133 | /* If the peer gave us the last packet it saw, process this as if | 131 | /* If the peer gave us the last packet it saw, process this as if |
134 | * we had received a regular ACK. */ | 132 | * we had received a regular ACK. */ |
@@ -616,18 +614,8 @@ void rds_ib_conn_shutdown(struct rds_connection *conn) | |||
616 | /* | 614 | /* |
617 | * Move connection back to the nodev list. | 615 | * Move connection back to the nodev list. |
618 | */ | 616 | */ |
619 | if (ic->rds_ibdev) { | 617 | if (ic->rds_ibdev) |
620 | 618 | rds_ib_remove_conn(ic->rds_ibdev, conn); | |
621 | spin_lock_irq(&ic->rds_ibdev->spinlock); | ||
622 | BUG_ON(list_empty(&ic->ib_node)); | ||
623 | list_del(&ic->ib_node); | ||
624 | spin_unlock_irq(&ic->rds_ibdev->spinlock); | ||
625 | |||
626 | spin_lock_irq(&ib_nodev_conns_lock); | ||
627 | list_add_tail(&ic->ib_node, &ib_nodev_conns); | ||
628 | spin_unlock_irq(&ib_nodev_conns_lock); | ||
629 | ic->rds_ibdev = NULL; | ||
630 | } | ||
631 | 619 | ||
632 | ic->i_cm_id = NULL; | 620 | ic->i_cm_id = NULL; |
633 | ic->i_pd = NULL; | 621 | ic->i_pd = NULL; |
@@ -648,7 +636,11 @@ void rds_ib_conn_shutdown(struct rds_connection *conn) | |||
648 | 636 | ||
649 | /* Clear the ACK state */ | 637 | /* Clear the ACK state */ |
650 | clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); | 638 | clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); |
651 | rds_ib_set_64bit(&ic->i_ack_next, 0); | 639 | #ifdef KERNEL_HAS_ATOMIC64 |
640 | atomic64_set(&ic->i_ack_next, 0); | ||
641 | #else | ||
642 | ic->i_ack_next = 0; | ||
643 | #endif | ||
652 | ic->i_ack_recv = 0; | 644 | ic->i_ack_recv = 0; |
653 | 645 | ||
654 | /* Clear flow control state */ | 646 | /* Clear flow control state */ |
@@ -681,6 +673,9 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) | |||
681 | 673 | ||
682 | INIT_LIST_HEAD(&ic->ib_node); | 674 | INIT_LIST_HEAD(&ic->ib_node); |
683 | mutex_init(&ic->i_recv_mutex); | 675 | mutex_init(&ic->i_recv_mutex); |
676 | #ifndef KERNEL_HAS_ATOMIC64 | ||
677 | spin_lock_init(&ic->i_ack_lock); | ||
678 | #endif | ||
684 | 679 | ||
685 | /* | 680 | /* |
686 | * rds_ib_conn_shutdown() waits for these to be emptied so they | 681 | * rds_ib_conn_shutdown() waits for these to be emptied so they |
@@ -701,11 +696,27 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) | |||
701 | return 0; | 696 | return 0; |
702 | } | 697 | } |
703 | 698 | ||
699 | /* | ||
700 | * Free a connection. Connection must be shut down and not set for reconnect. | ||
701 | */ | ||
704 | void rds_ib_conn_free(void *arg) | 702 | void rds_ib_conn_free(void *arg) |
705 | { | 703 | { |
706 | struct rds_ib_connection *ic = arg; | 704 | struct rds_ib_connection *ic = arg; |
705 | spinlock_t *lock_ptr; | ||
706 | |||
707 | rdsdebug("ic %p\n", ic); | 707 | rdsdebug("ic %p\n", ic); |
708 | |||
709 | /* | ||
710 | * Conn is either on a dev's list or on the nodev list. | ||
711 | * A race with shutdown() or connect() would cause problems | ||
712 | * (since rds_ibdev would change) but that should never happen. | ||
713 | */ | ||
714 | lock_ptr = ic->rds_ibdev ? &ic->rds_ibdev->spinlock : &ib_nodev_conns_lock; | ||
715 | |||
716 | spin_lock_irq(lock_ptr); | ||
708 | list_del(&ic->ib_node); | 717 | list_del(&ic->ib_node); |
718 | spin_unlock_irq(lock_ptr); | ||
719 | |||
709 | kfree(ic); | 720 | kfree(ic); |
710 | } | 721 | } |
711 | 722 | ||
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 69a6289ed672..81033af93020 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c | |||
@@ -139,7 +139,7 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) | |||
139 | return rds_ib_add_ipaddr(rds_ibdev, ipaddr); | 139 | return rds_ib_add_ipaddr(rds_ibdev, ipaddr); |
140 | } | 140 | } |
141 | 141 | ||
142 | int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) | 142 | void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) |
143 | { | 143 | { |
144 | struct rds_ib_connection *ic = conn->c_transport_data; | 144 | struct rds_ib_connection *ic = conn->c_transport_data; |
145 | 145 | ||
@@ -148,45 +148,44 @@ int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn | |||
148 | BUG_ON(list_empty(&ib_nodev_conns)); | 148 | BUG_ON(list_empty(&ib_nodev_conns)); |
149 | BUG_ON(list_empty(&ic->ib_node)); | 149 | BUG_ON(list_empty(&ic->ib_node)); |
150 | list_del(&ic->ib_node); | 150 | list_del(&ic->ib_node); |
151 | spin_unlock_irq(&ib_nodev_conns_lock); | ||
152 | 151 | ||
153 | spin_lock_irq(&rds_ibdev->spinlock); | 152 | spin_lock_irq(&rds_ibdev->spinlock); |
154 | list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); | 153 | list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); |
155 | spin_unlock_irq(&rds_ibdev->spinlock); | 154 | spin_unlock_irq(&rds_ibdev->spinlock); |
155 | spin_unlock_irq(&ib_nodev_conns_lock); | ||
156 | 156 | ||
157 | ic->rds_ibdev = rds_ibdev; | 157 | ic->rds_ibdev = rds_ibdev; |
158 | |||
159 | return 0; | ||
160 | } | 158 | } |
161 | 159 | ||
162 | void rds_ib_remove_nodev_conns(void) | 160 | void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) |
163 | { | 161 | { |
164 | struct rds_ib_connection *ic, *_ic; | 162 | struct rds_ib_connection *ic = conn->c_transport_data; |
165 | LIST_HEAD(tmp_list); | ||
166 | 163 | ||
167 | /* avoid calling conn_destroy with irqs off */ | 164 | /* place conn on nodev_conns_list */ |
168 | spin_lock_irq(&ib_nodev_conns_lock); | 165 | spin_lock(&ib_nodev_conns_lock); |
169 | list_splice(&ib_nodev_conns, &tmp_list); | ||
170 | INIT_LIST_HEAD(&ib_nodev_conns); | ||
171 | spin_unlock_irq(&ib_nodev_conns_lock); | ||
172 | 166 | ||
173 | list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { | 167 | spin_lock_irq(&rds_ibdev->spinlock); |
174 | if (ic->conn->c_passive) | 168 | BUG_ON(list_empty(&ic->ib_node)); |
175 | rds_conn_destroy(ic->conn->c_passive); | 169 | list_del(&ic->ib_node); |
176 | rds_conn_destroy(ic->conn); | 170 | spin_unlock_irq(&rds_ibdev->spinlock); |
177 | } | 171 | |
172 | list_add_tail(&ic->ib_node, &ib_nodev_conns); | ||
173 | |||
174 | spin_unlock(&ib_nodev_conns_lock); | ||
175 | |||
176 | ic->rds_ibdev = NULL; | ||
178 | } | 177 | } |
179 | 178 | ||
180 | void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev) | 179 | void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock) |
181 | { | 180 | { |
182 | struct rds_ib_connection *ic, *_ic; | 181 | struct rds_ib_connection *ic, *_ic; |
183 | LIST_HEAD(tmp_list); | 182 | LIST_HEAD(tmp_list); |
184 | 183 | ||
185 | /* avoid calling conn_destroy with irqs off */ | 184 | /* avoid calling conn_destroy with irqs off */ |
186 | spin_lock_irq(&rds_ibdev->spinlock); | 185 | spin_lock_irq(list_lock); |
187 | list_splice(&rds_ibdev->conn_list, &tmp_list); | 186 | list_splice(list, &tmp_list); |
188 | INIT_LIST_HEAD(&rds_ibdev->conn_list); | 187 | INIT_LIST_HEAD(list); |
189 | spin_unlock_irq(&rds_ibdev->spinlock); | 188 | spin_unlock_irq(list_lock); |
190 | 189 | ||
191 | list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { | 190 | list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { |
192 | if (ic->conn->c_passive) | 191 | if (ic->conn->c_passive) |
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 5061b5502162..36d931573ff4 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c | |||
@@ -395,10 +395,37 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic) | |||
395 | * room for it beyond the ring size. Send completion notices its special | 395 | * room for it beyond the ring size. Send completion notices its special |
396 | * wr_id and avoids working with the ring in that case. | 396 | * wr_id and avoids working with the ring in that case. |
397 | */ | 397 | */ |
398 | #ifndef KERNEL_HAS_ATOMIC64 | ||
398 | static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, | 399 | static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, |
399 | int ack_required) | 400 | int ack_required) |
400 | { | 401 | { |
401 | rds_ib_set_64bit(&ic->i_ack_next, seq); | 402 | unsigned long flags; |
403 | |||
404 | spin_lock_irqsave(&ic->i_ack_lock, flags); | ||
405 | ic->i_ack_next = seq; | ||
406 | if (ack_required) | ||
407 | set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | ||
408 | spin_unlock_irqrestore(&ic->i_ack_lock, flags); | ||
409 | } | ||
410 | |||
411 | static u64 rds_ib_get_ack(struct rds_ib_connection *ic) | ||
412 | { | ||
413 | unsigned long flags; | ||
414 | u64 seq; | ||
415 | |||
416 | clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | ||
417 | |||
418 | spin_lock_irqsave(&ic->i_ack_lock, flags); | ||
419 | seq = ic->i_ack_next; | ||
420 | spin_unlock_irqrestore(&ic->i_ack_lock, flags); | ||
421 | |||
422 | return seq; | ||
423 | } | ||
424 | #else | ||
425 | static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, | ||
426 | int ack_required) | ||
427 | { | ||
428 | atomic64_set(&ic->i_ack_next, seq); | ||
402 | if (ack_required) { | 429 | if (ack_required) { |
403 | smp_mb__before_clear_bit(); | 430 | smp_mb__before_clear_bit(); |
404 | set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | 431 | set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); |
@@ -410,8 +437,10 @@ static u64 rds_ib_get_ack(struct rds_ib_connection *ic) | |||
410 | clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | 437 | clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); |
411 | smp_mb__after_clear_bit(); | 438 | smp_mb__after_clear_bit(); |
412 | 439 | ||
413 | return ic->i_ack_next; | 440 | return atomic64_read(&ic->i_ack_next); |
414 | } | 441 | } |
442 | #endif | ||
443 | |||
415 | 444 | ||
416 | static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits) | 445 | static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits) |
417 | { | 446 | { |
@@ -464,6 +493,10 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi | |||
464 | * - i_ack_next, which is the last sequence number we received | 493 | * - i_ack_next, which is the last sequence number we received |
465 | * | 494 | * |
466 | * Potentially, send queue and receive queue handlers can run concurrently. | 495 | * Potentially, send queue and receive queue handlers can run concurrently. |
496 | * It would be nice to not have to use a spinlock to synchronize things, | ||
497 | * but the one problem that rules this out is that 64bit updates are | ||
498 | * not atomic on all platforms. Things would be a lot simpler if | ||
499 | * we had atomic64 or maybe cmpxchg64 everywhere. | ||
467 | * | 500 | * |
468 | * Reconnecting complicates this picture just slightly. When we | 501 | * Reconnecting complicates this picture just slightly. When we |
469 | * reconnect, we may be seeing duplicate packets. The peer | 502 | * reconnect, we may be seeing duplicate packets. The peer |
diff --git a/net/rds/iw.c b/net/rds/iw.c index 1b56905c4c08..b732efb5b634 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c | |||
@@ -51,6 +51,7 @@ MODULE_PARM_DESC(fastreg_message_size, " Max size of a RDMA transfer (fastreg MR | |||
51 | 51 | ||
52 | struct list_head rds_iw_devices; | 52 | struct list_head rds_iw_devices; |
53 | 53 | ||
54 | /* NOTE: if also grabbing iwdev lock, grab this first */ | ||
54 | DEFINE_SPINLOCK(iw_nodev_conns_lock); | 55 | DEFINE_SPINLOCK(iw_nodev_conns_lock); |
55 | LIST_HEAD(iw_nodev_conns); | 56 | LIST_HEAD(iw_nodev_conns); |
56 | 57 | ||
@@ -145,7 +146,7 @@ void rds_iw_remove_one(struct ib_device *device) | |||
145 | } | 146 | } |
146 | spin_unlock_irq(&rds_iwdev->spinlock); | 147 | spin_unlock_irq(&rds_iwdev->spinlock); |
147 | 148 | ||
148 | rds_iw_remove_conns(rds_iwdev); | 149 | rds_iw_destroy_conns(rds_iwdev); |
149 | 150 | ||
150 | if (rds_iwdev->mr_pool) | 151 | if (rds_iwdev->mr_pool) |
151 | rds_iw_destroy_mr_pool(rds_iwdev->mr_pool); | 152 | rds_iw_destroy_mr_pool(rds_iwdev->mr_pool); |
@@ -258,7 +259,7 @@ static int rds_iw_laddr_check(__be32 addr) | |||
258 | void rds_iw_exit(void) | 259 | void rds_iw_exit(void) |
259 | { | 260 | { |
260 | rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info); | 261 | rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info); |
261 | rds_iw_remove_nodev_conns(); | 262 | rds_iw_destroy_nodev_conns(); |
262 | ib_unregister_client(&rds_iw_client); | 263 | ib_unregister_client(&rds_iw_client); |
263 | rds_iw_sysctl_exit(); | 264 | rds_iw_sysctl_exit(); |
264 | rds_iw_recv_exit(); | 265 | rds_iw_recv_exit(); |
diff --git a/net/rds/iw.h b/net/rds/iw.h index 0ddda34f2a1c..b4fb27252895 100644 --- a/net/rds/iw.h +++ b/net/rds/iw.h | |||
@@ -131,7 +131,12 @@ struct rds_iw_connection { | |||
131 | 131 | ||
132 | /* sending acks */ | 132 | /* sending acks */ |
133 | unsigned long i_ack_flags; | 133 | unsigned long i_ack_flags; |
134 | #ifdef KERNEL_HAS_ATOMIC64 | ||
135 | atomic64_t i_ack_next; /* next ACK to send */ | ||
136 | #else | ||
137 | spinlock_t i_ack_lock; /* protect i_ack_next */ | ||
134 | u64 i_ack_next; /* next ACK to send */ | 138 | u64 i_ack_next; /* next ACK to send */ |
139 | #endif | ||
135 | struct rds_header *i_ack; | 140 | struct rds_header *i_ack; |
136 | struct ib_send_wr i_ack_wr; | 141 | struct ib_send_wr i_ack_wr; |
137 | struct ib_sge i_ack_sge; | 142 | struct ib_sge i_ack_sge; |
@@ -294,9 +299,17 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn, | |||
294 | 299 | ||
295 | /* ib_rdma.c */ | 300 | /* ib_rdma.c */ |
296 | int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id); | 301 | int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id); |
297 | int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn); | 302 | void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn); |
298 | void rds_iw_remove_nodev_conns(void); | 303 | void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn); |
299 | void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev); | 304 | void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock); |
305 | static inline void rds_iw_destroy_nodev_conns(void) | ||
306 | { | ||
307 | __rds_iw_destroy_conns(&iw_nodev_conns, &iw_nodev_conns_lock); | ||
308 | } | ||
309 | static inline void rds_iw_destroy_conns(struct rds_iw_device *rds_iwdev) | ||
310 | { | ||
311 | __rds_iw_destroy_conns(&rds_iwdev->conn_list, &rds_iwdev->spinlock); | ||
312 | } | ||
300 | struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *); | 313 | struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *); |
301 | void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo); | 314 | void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo); |
302 | void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *); | 315 | void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *); |
@@ -383,13 +396,4 @@ rds_iw_data_sge(struct rds_iw_connection *ic, struct ib_sge *sge) | |||
383 | return &sge[1]; | 396 | return &sge[1]; |
384 | } | 397 | } |
385 | 398 | ||
386 | static inline void rds_iw_set_64bit(u64 *ptr, u64 val) | ||
387 | { | ||
388 | #if BITS_PER_LONG == 64 | ||
389 | *ptr = val; | ||
390 | #else | ||
391 | set_64bit(ptr, val); | ||
392 | #endif | ||
393 | } | ||
394 | |||
395 | #endif | 399 | #endif |
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index 57ecb3d4b8a5..a416b0d492b1 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c | |||
@@ -86,9 +86,7 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even | |||
86 | err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id); | 86 | err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id); |
87 | if (err) | 87 | if (err) |
88 | printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err); | 88 | printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err); |
89 | err = rds_iw_add_conn(rds_iwdev, conn); | 89 | rds_iw_add_conn(rds_iwdev, conn); |
90 | if (err) | ||
91 | printk(KERN_ERR "rds_iw_add_conn failed (%d)\n", err); | ||
92 | 90 | ||
93 | /* If the peer gave us the last packet it saw, process this as if | 91 | /* If the peer gave us the last packet it saw, process this as if |
94 | * we had received a regular ACK. */ | 92 | * we had received a regular ACK. */ |
@@ -637,19 +635,8 @@ void rds_iw_conn_shutdown(struct rds_connection *conn) | |||
637 | * Move connection back to the nodev list. | 635 | * Move connection back to the nodev list. |
638 | * Remove cm_id from the device cm_id list. | 636 | * Remove cm_id from the device cm_id list. |
639 | */ | 637 | */ |
640 | if (ic->rds_iwdev) { | 638 | if (ic->rds_iwdev) |
641 | 639 | rds_iw_remove_conn(ic->rds_iwdev, conn); | |
642 | spin_lock_irq(&ic->rds_iwdev->spinlock); | ||
643 | BUG_ON(list_empty(&ic->iw_node)); | ||
644 | list_del(&ic->iw_node); | ||
645 | spin_unlock_irq(&ic->rds_iwdev->spinlock); | ||
646 | |||
647 | spin_lock_irq(&iw_nodev_conns_lock); | ||
648 | list_add_tail(&ic->iw_node, &iw_nodev_conns); | ||
649 | spin_unlock_irq(&iw_nodev_conns_lock); | ||
650 | rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id); | ||
651 | ic->rds_iwdev = NULL; | ||
652 | } | ||
653 | 640 | ||
654 | rdma_destroy_id(ic->i_cm_id); | 641 | rdma_destroy_id(ic->i_cm_id); |
655 | 642 | ||
@@ -672,7 +659,11 @@ void rds_iw_conn_shutdown(struct rds_connection *conn) | |||
672 | 659 | ||
673 | /* Clear the ACK state */ | 660 | /* Clear the ACK state */ |
674 | clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); | 661 | clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); |
675 | rds_iw_set_64bit(&ic->i_ack_next, 0); | 662 | #ifdef KERNEL_HAS_ATOMIC64 |
663 | atomic64_set(&ic->i_ack_next, 0); | ||
664 | #else | ||
665 | ic->i_ack_next = 0; | ||
666 | #endif | ||
676 | ic->i_ack_recv = 0; | 667 | ic->i_ack_recv = 0; |
677 | 668 | ||
678 | /* Clear flow control state */ | 669 | /* Clear flow control state */ |
@@ -706,6 +697,9 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp) | |||
706 | 697 | ||
707 | INIT_LIST_HEAD(&ic->iw_node); | 698 | INIT_LIST_HEAD(&ic->iw_node); |
708 | mutex_init(&ic->i_recv_mutex); | 699 | mutex_init(&ic->i_recv_mutex); |
700 | #ifndef KERNEL_HAS_ATOMIC64 | ||
701 | spin_lock_init(&ic->i_ack_lock); | ||
702 | #endif | ||
709 | 703 | ||
710 | /* | 704 | /* |
711 | * rds_iw_conn_shutdown() waits for these to be emptied so they | 705 | * rds_iw_conn_shutdown() waits for these to be emptied so they |
@@ -726,11 +720,27 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp) | |||
726 | return 0; | 720 | return 0; |
727 | } | 721 | } |
728 | 722 | ||
723 | /* | ||
724 | * Free a connection. Connection must be shut down and not set for reconnect. | ||
725 | */ | ||
729 | void rds_iw_conn_free(void *arg) | 726 | void rds_iw_conn_free(void *arg) |
730 | { | 727 | { |
731 | struct rds_iw_connection *ic = arg; | 728 | struct rds_iw_connection *ic = arg; |
729 | spinlock_t *lock_ptr; | ||
730 | |||
732 | rdsdebug("ic %p\n", ic); | 731 | rdsdebug("ic %p\n", ic); |
732 | |||
733 | /* | ||
734 | * Conn is either on a dev's list or on the nodev list. | ||
735 | * A race with shutdown() or connect() would cause problems | ||
736 | * (since rds_iwdev would change) but that should never happen. | ||
737 | */ | ||
738 | lock_ptr = ic->rds_iwdev ? &ic->rds_iwdev->spinlock : &iw_nodev_conns_lock; | ||
739 | |||
740 | spin_lock_irq(lock_ptr); | ||
733 | list_del(&ic->iw_node); | 741 | list_del(&ic->iw_node); |
742 | spin_unlock_irq(lock_ptr); | ||
743 | |||
734 | kfree(ic); | 744 | kfree(ic); |
735 | } | 745 | } |
736 | 746 | ||
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index 1c02a8f952d0..dcdb37da80f2 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c | |||
@@ -196,7 +196,7 @@ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_i | |||
196 | return rds_iw_add_cm_id(rds_iwdev, cm_id); | 196 | return rds_iw_add_cm_id(rds_iwdev, cm_id); |
197 | } | 197 | } |
198 | 198 | ||
199 | int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn) | 199 | void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn) |
200 | { | 200 | { |
201 | struct rds_iw_connection *ic = conn->c_transport_data; | 201 | struct rds_iw_connection *ic = conn->c_transport_data; |
202 | 202 | ||
@@ -205,45 +205,45 @@ int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn | |||
205 | BUG_ON(list_empty(&iw_nodev_conns)); | 205 | BUG_ON(list_empty(&iw_nodev_conns)); |
206 | BUG_ON(list_empty(&ic->iw_node)); | 206 | BUG_ON(list_empty(&ic->iw_node)); |
207 | list_del(&ic->iw_node); | 207 | list_del(&ic->iw_node); |
208 | spin_unlock_irq(&iw_nodev_conns_lock); | ||
209 | 208 | ||
210 | spin_lock_irq(&rds_iwdev->spinlock); | 209 | spin_lock_irq(&rds_iwdev->spinlock); |
211 | list_add_tail(&ic->iw_node, &rds_iwdev->conn_list); | 210 | list_add_tail(&ic->iw_node, &rds_iwdev->conn_list); |
212 | spin_unlock_irq(&rds_iwdev->spinlock); | 211 | spin_unlock_irq(&rds_iwdev->spinlock); |
212 | spin_unlock_irq(&iw_nodev_conns_lock); | ||
213 | 213 | ||
214 | ic->rds_iwdev = rds_iwdev; | 214 | ic->rds_iwdev = rds_iwdev; |
215 | |||
216 | return 0; | ||
217 | } | 215 | } |
218 | 216 | ||
219 | void rds_iw_remove_nodev_conns(void) | 217 | void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn) |
220 | { | 218 | { |
221 | struct rds_iw_connection *ic, *_ic; | 219 | struct rds_iw_connection *ic = conn->c_transport_data; |
222 | LIST_HEAD(tmp_list); | ||
223 | 220 | ||
224 | /* avoid calling conn_destroy with irqs off */ | 221 | /* place conn on nodev_conns_list */ |
225 | spin_lock_irq(&iw_nodev_conns_lock); | 222 | spin_lock(&iw_nodev_conns_lock); |
226 | list_splice(&iw_nodev_conns, &tmp_list); | ||
227 | INIT_LIST_HEAD(&iw_nodev_conns); | ||
228 | spin_unlock_irq(&iw_nodev_conns_lock); | ||
229 | 223 | ||
230 | list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { | 224 | spin_lock_irq(&rds_iwdev->spinlock); |
231 | if (ic->conn->c_passive) | 225 | BUG_ON(list_empty(&ic->iw_node)); |
232 | rds_conn_destroy(ic->conn->c_passive); | 226 | list_del(&ic->iw_node); |
233 | rds_conn_destroy(ic->conn); | 227 | spin_unlock_irq(&rds_iwdev->spinlock); |
234 | } | 228 | |
229 | list_add_tail(&ic->iw_node, &iw_nodev_conns); | ||
230 | |||
231 | spin_unlock(&iw_nodev_conns_lock); | ||
232 | |||
233 | rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id); | ||
234 | ic->rds_iwdev = NULL; | ||
235 | } | 235 | } |
236 | 236 | ||
237 | void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev) | 237 | void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock) |
238 | { | 238 | { |
239 | struct rds_iw_connection *ic, *_ic; | 239 | struct rds_iw_connection *ic, *_ic; |
240 | LIST_HEAD(tmp_list); | 240 | LIST_HEAD(tmp_list); |
241 | 241 | ||
242 | /* avoid calling conn_destroy with irqs off */ | 242 | /* avoid calling conn_destroy with irqs off */ |
243 | spin_lock_irq(&rds_iwdev->spinlock); | 243 | spin_lock_irq(list_lock); |
244 | list_splice(&rds_iwdev->conn_list, &tmp_list); | 244 | list_splice(list, &tmp_list); |
245 | INIT_LIST_HEAD(&rds_iwdev->conn_list); | 245 | INIT_LIST_HEAD(list); |
246 | spin_unlock_irq(&rds_iwdev->spinlock); | 246 | spin_unlock_irq(list_lock); |
247 | 247 | ||
248 | list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { | 248 | list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { |
249 | if (ic->conn->c_passive) | 249 | if (ic->conn->c_passive) |
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c index a1931f0027a2..fde470fa50d5 100644 --- a/net/rds/iw_recv.c +++ b/net/rds/iw_recv.c | |||
@@ -395,10 +395,37 @@ void rds_iw_recv_init_ack(struct rds_iw_connection *ic) | |||
395 | * room for it beyond the ring size. Send completion notices its special | 395 | * room for it beyond the ring size. Send completion notices its special |
396 | * wr_id and avoids working with the ring in that case. | 396 | * wr_id and avoids working with the ring in that case. |
397 | */ | 397 | */ |
398 | #ifndef KERNEL_HAS_ATOMIC64 | ||
398 | static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, | 399 | static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, |
399 | int ack_required) | 400 | int ack_required) |
400 | { | 401 | { |
401 | rds_iw_set_64bit(&ic->i_ack_next, seq); | 402 | unsigned long flags; |
403 | |||
404 | spin_lock_irqsave(&ic->i_ack_lock, flags); | ||
405 | ic->i_ack_next = seq; | ||
406 | if (ack_required) | ||
407 | set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | ||
408 | spin_unlock_irqrestore(&ic->i_ack_lock, flags); | ||
409 | } | ||
410 | |||
411 | static u64 rds_iw_get_ack(struct rds_iw_connection *ic) | ||
412 | { | ||
413 | unsigned long flags; | ||
414 | u64 seq; | ||
415 | |||
416 | clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | ||
417 | |||
418 | spin_lock_irqsave(&ic->i_ack_lock, flags); | ||
419 | seq = ic->i_ack_next; | ||
420 | spin_unlock_irqrestore(&ic->i_ack_lock, flags); | ||
421 | |||
422 | return seq; | ||
423 | } | ||
424 | #else | ||
425 | static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, | ||
426 | int ack_required) | ||
427 | { | ||
428 | atomic64_set(&ic->i_ack_next, seq); | ||
402 | if (ack_required) { | 429 | if (ack_required) { |
403 | smp_mb__before_clear_bit(); | 430 | smp_mb__before_clear_bit(); |
404 | set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | 431 | set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); |
@@ -410,8 +437,10 @@ static u64 rds_iw_get_ack(struct rds_iw_connection *ic) | |||
410 | clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | 437 | clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); |
411 | smp_mb__after_clear_bit(); | 438 | smp_mb__after_clear_bit(); |
412 | 439 | ||
413 | return ic->i_ack_next; | 440 | return atomic64_read(&ic->i_ack_next); |
414 | } | 441 | } |
442 | #endif | ||
443 | |||
415 | 444 | ||
416 | static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits) | 445 | static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits) |
417 | { | 446 | { |
@@ -464,6 +493,10 @@ static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credi | |||
464 | * - i_ack_next, which is the last sequence number we received | 493 | * - i_ack_next, which is the last sequence number we received |
465 | * | 494 | * |
466 | * Potentially, send queue and receive queue handlers can run concurrently. | 495 | * Potentially, send queue and receive queue handlers can run concurrently. |
496 | * It would be nice to not have to use a spinlock to synchronize things, | ||
497 | * but the one problem that rules this out is that 64bit updates are | ||
498 | * not atomic on all platforms. Things would be a lot simpler if | ||
499 | * we had atomic64 or maybe cmpxchg64 everywhere. | ||
467 | * | 500 | * |
468 | * Reconnecting complicates this picture just slightly. When we | 501 | * Reconnecting complicates this picture just slightly. When we |
469 | * reconnect, we may be seeing duplicate packets. The peer | 502 | * reconnect, we may be seeing duplicate packets. The peer |
diff --git a/net/rds/rds.h b/net/rds/rds.h index 060400704979..619f0a30a4e5 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h | |||
@@ -28,6 +28,10 @@ | |||
28 | */ | 28 | */ |
29 | #define RDS_PORT 18634 | 29 | #define RDS_PORT 18634 |
30 | 30 | ||
31 | #ifdef ATOMIC64_INIT | ||
32 | #define KERNEL_HAS_ATOMIC64 | ||
33 | #endif | ||
34 | |||
31 | #ifdef DEBUG | 35 | #ifdef DEBUG |
32 | #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) | 36 | #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) |
33 | #else | 37 | #else |
diff --git a/net/rds/send.c b/net/rds/send.c index 1b37364656f0..104fe033203d 100644 --- a/net/rds/send.c +++ b/net/rds/send.c | |||
@@ -615,7 +615,7 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) | |||
615 | { | 615 | { |
616 | struct rds_message *rm, *tmp; | 616 | struct rds_message *rm, *tmp; |
617 | struct rds_connection *conn; | 617 | struct rds_connection *conn; |
618 | unsigned long flags; | 618 | unsigned long flags, flags2; |
619 | LIST_HEAD(list); | 619 | LIST_HEAD(list); |
620 | int wake = 0; | 620 | int wake = 0; |
621 | 621 | ||
@@ -651,9 +651,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) | |||
651 | list_for_each_entry(rm, &list, m_sock_item) { | 651 | list_for_each_entry(rm, &list, m_sock_item) { |
652 | /* We do this here rather than in the loop above, so that | 652 | /* We do this here rather than in the loop above, so that |
653 | * we don't have to nest m_rs_lock under rs->rs_lock */ | 653 | * we don't have to nest m_rs_lock under rs->rs_lock */ |
654 | spin_lock(&rm->m_rs_lock); | 654 | spin_lock_irqsave(&rm->m_rs_lock, flags2); |
655 | rm->m_rs = NULL; | 655 | rm->m_rs = NULL; |
656 | spin_unlock(&rm->m_rs_lock); | 656 | spin_unlock_irqrestore(&rm->m_rs_lock, flags2); |
657 | 657 | ||
658 | /* | 658 | /* |
659 | * If we see this flag cleared then we're *sure* that someone | 659 | * If we see this flag cleared then we're *sure* that someone |