diff options
Diffstat (limited to 'net')
28 files changed, 938 insertions, 600 deletions
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index bced8c074c12..7bc2208b6cc4 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c | |||
@@ -108,9 +108,7 @@ struct p9_poll_wait { | |||
108 | * @unsent_req_list: accounting for requests that haven't been sent | 108 | * @unsent_req_list: accounting for requests that haven't been sent |
109 | * @req: current request being processed (if any) | 109 | * @req: current request being processed (if any) |
110 | * @tmp_buf: temporary buffer to read in header | 110 | * @tmp_buf: temporary buffer to read in header |
111 | * @rsize: amount to read for current frame | 111 | * @rc: temporary fcall for reading current frame |
112 | * @rpos: read position in current frame | ||
113 | * @rbuf: current read buffer | ||
114 | * @wpos: write position for current frame | 112 | * @wpos: write position for current frame |
115 | * @wsize: amount of data to write for current frame | 113 | * @wsize: amount of data to write for current frame |
116 | * @wbuf: current write buffer | 114 | * @wbuf: current write buffer |
@@ -131,9 +129,7 @@ struct p9_conn { | |||
131 | struct list_head unsent_req_list; | 129 | struct list_head unsent_req_list; |
132 | struct p9_req_t *req; | 130 | struct p9_req_t *req; |
133 | char tmp_buf[7]; | 131 | char tmp_buf[7]; |
134 | int rsize; | 132 | struct p9_fcall rc; |
135 | int rpos; | ||
136 | char *rbuf; | ||
137 | int wpos; | 133 | int wpos; |
138 | int wsize; | 134 | int wsize; |
139 | char *wbuf; | 135 | char *wbuf; |
@@ -305,69 +301,77 @@ static void p9_read_work(struct work_struct *work) | |||
305 | if (m->err < 0) | 301 | if (m->err < 0) |
306 | return; | 302 | return; |
307 | 303 | ||
308 | p9_debug(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos); | 304 | p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset); |
309 | 305 | ||
310 | if (!m->rbuf) { | 306 | if (!m->rc.sdata) { |
311 | m->rbuf = m->tmp_buf; | 307 | m->rc.sdata = m->tmp_buf; |
312 | m->rpos = 0; | 308 | m->rc.offset = 0; |
313 | m->rsize = 7; /* start by reading header */ | 309 | m->rc.capacity = 7; /* start by reading header */ |
314 | } | 310 | } |
315 | 311 | ||
316 | clear_bit(Rpending, &m->wsched); | 312 | clear_bit(Rpending, &m->wsched); |
317 | p9_debug(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n", | 313 | p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n", |
318 | m, m->rpos, m->rsize, m->rsize-m->rpos); | 314 | m, m->rc.offset, m->rc.capacity, |
319 | err = p9_fd_read(m->client, m->rbuf + m->rpos, | 315 | m->rc.capacity - m->rc.offset); |
320 | m->rsize - m->rpos); | 316 | err = p9_fd_read(m->client, m->rc.sdata + m->rc.offset, |
317 | m->rc.capacity - m->rc.offset); | ||
321 | p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); | 318 | p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); |
322 | if (err == -EAGAIN) { | 319 | if (err == -EAGAIN) |
323 | goto end_clear; | 320 | goto end_clear; |
324 | } | ||
325 | 321 | ||
326 | if (err <= 0) | 322 | if (err <= 0) |
327 | goto error; | 323 | goto error; |
328 | 324 | ||
329 | m->rpos += err; | 325 | m->rc.offset += err; |
330 | 326 | ||
331 | if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */ | 327 | /* header read in */ |
332 | u16 tag; | 328 | if ((!m->req) && (m->rc.offset == m->rc.capacity)) { |
333 | p9_debug(P9_DEBUG_TRANS, "got new header\n"); | 329 | p9_debug(P9_DEBUG_TRANS, "got new header\n"); |
334 | 330 | ||
335 | n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */ | 331 | err = p9_parse_header(&m->rc, NULL, NULL, NULL, 0); |
336 | if (n >= m->client->msize) { | 332 | if (err) { |
333 | p9_debug(P9_DEBUG_ERROR, | ||
334 | "error parsing header: %d\n", err); | ||
335 | goto error; | ||
336 | } | ||
337 | |||
338 | if (m->rc.size >= m->client->msize) { | ||
337 | p9_debug(P9_DEBUG_ERROR, | 339 | p9_debug(P9_DEBUG_ERROR, |
338 | "requested packet size too big: %d\n", n); | 340 | "requested packet size too big: %d\n", |
341 | m->rc.size); | ||
339 | err = -EIO; | 342 | err = -EIO; |
340 | goto error; | 343 | goto error; |
341 | } | 344 | } |
342 | 345 | ||
343 | tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */ | ||
344 | p9_debug(P9_DEBUG_TRANS, | 346 | p9_debug(P9_DEBUG_TRANS, |
345 | "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag); | 347 | "mux %p pkt: size: %d bytes tag: %d\n", |
348 | m, m->rc.size, m->rc.tag); | ||
346 | 349 | ||
347 | m->req = p9_tag_lookup(m->client, tag); | 350 | m->req = p9_tag_lookup(m->client, m->rc.tag); |
348 | if (!m->req || (m->req->status != REQ_STATUS_SENT)) { | 351 | if (!m->req || (m->req->status != REQ_STATUS_SENT)) { |
349 | p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", | 352 | p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", |
350 | tag); | 353 | m->rc.tag); |
351 | err = -EIO; | 354 | err = -EIO; |
352 | goto error; | 355 | goto error; |
353 | } | 356 | } |
354 | 357 | ||
355 | if (m->req->rc == NULL) { | 358 | if (m->req->rc == NULL) { |
356 | m->req->rc = kmalloc(sizeof(struct p9_fcall) + | 359 | p9_debug(P9_DEBUG_ERROR, |
357 | m->client->msize, GFP_NOFS); | 360 | "No recv fcall for tag %d (req %p), disconnecting!\n", |
358 | if (!m->req->rc) { | 361 | m->rc.tag, m->req); |
359 | m->req = NULL; | 362 | m->req = NULL; |
360 | err = -ENOMEM; | 363 | err = -EIO; |
361 | goto error; | 364 | goto error; |
362 | } | ||
363 | } | 365 | } |
364 | m->rbuf = (char *)m->req->rc + sizeof(struct p9_fcall); | 366 | m->rc.sdata = (char *)m->req->rc + sizeof(struct p9_fcall); |
365 | memcpy(m->rbuf, m->tmp_buf, m->rsize); | 367 | memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity); |
366 | m->rsize = n; | 368 | m->rc.capacity = m->rc.size; |
367 | } | 369 | } |
368 | 370 | ||
369 | /* not an else because some packets (like clunk) have no payload */ | 371 | /* packet is read in |
370 | if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */ | 372 | * not an else because some packets (like clunk) have no payload |
373 | */ | ||
374 | if ((m->req) && (m->rc.offset == m->rc.capacity)) { | ||
371 | p9_debug(P9_DEBUG_TRANS, "got new packet\n"); | 375 | p9_debug(P9_DEBUG_TRANS, "got new packet\n"); |
372 | spin_lock(&m->client->lock); | 376 | spin_lock(&m->client->lock); |
373 | if (m->req->status != REQ_STATUS_ERROR) | 377 | if (m->req->status != REQ_STATUS_ERROR) |
@@ -375,9 +379,9 @@ static void p9_read_work(struct work_struct *work) | |||
375 | list_del(&m->req->req_list); | 379 | list_del(&m->req->req_list); |
376 | spin_unlock(&m->client->lock); | 380 | spin_unlock(&m->client->lock); |
377 | p9_client_cb(m->client, m->req, status); | 381 | p9_client_cb(m->client, m->req, status); |
378 | m->rbuf = NULL; | 382 | m->rc.sdata = NULL; |
379 | m->rpos = 0; | 383 | m->rc.offset = 0; |
380 | m->rsize = 0; | 384 | m->rc.capacity = 0; |
381 | m->req = NULL; | 385 | m->req = NULL; |
382 | } | 386 | } |
383 | 387 | ||
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 199bc76202d2..4acb1d5417aa 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c | |||
@@ -658,7 +658,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args) | |||
658 | mutex_unlock(&virtio_9p_lock); | 658 | mutex_unlock(&virtio_9p_lock); |
659 | 659 | ||
660 | if (!found) { | 660 | if (!found) { |
661 | pr_err("no channels available\n"); | 661 | pr_err("no channels available for device %s\n", devname); |
662 | return ret; | 662 | return ret; |
663 | } | 663 | } |
664 | 664 | ||
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 10d87753ed87..9e43a315e662 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c | |||
@@ -152,7 +152,6 @@ static int process_one_ticket(struct ceph_auth_client *ac, | |||
152 | void *ticket_buf = NULL; | 152 | void *ticket_buf = NULL; |
153 | void *tp, *tpend; | 153 | void *tp, *tpend; |
154 | void **ptp; | 154 | void **ptp; |
155 | struct ceph_timespec new_validity; | ||
156 | struct ceph_crypto_key new_session_key; | 155 | struct ceph_crypto_key new_session_key; |
157 | struct ceph_buffer *new_ticket_blob; | 156 | struct ceph_buffer *new_ticket_blob; |
158 | unsigned long new_expires, new_renew_after; | 157 | unsigned long new_expires, new_renew_after; |
@@ -193,8 +192,8 @@ static int process_one_ticket(struct ceph_auth_client *ac, | |||
193 | if (ret) | 192 | if (ret) |
194 | goto out; | 193 | goto out; |
195 | 194 | ||
196 | ceph_decode_copy(&dp, &new_validity, sizeof(new_validity)); | 195 | ceph_decode_timespec(&validity, dp); |
197 | ceph_decode_timespec(&validity, &new_validity); | 196 | dp += sizeof(struct ceph_timespec); |
198 | new_expires = get_seconds() + validity.tv_sec; | 197 | new_expires = get_seconds() + validity.tv_sec; |
199 | new_renew_after = new_expires - (validity.tv_sec / 4); | 198 | new_renew_after = new_expires - (validity.tv_sec / 4); |
200 | dout(" expires=%lu renew_after=%lu\n", new_expires, | 199 | dout(" expires=%lu renew_after=%lu\n", new_expires, |
@@ -233,10 +232,10 @@ static int process_one_ticket(struct ceph_auth_client *ac, | |||
233 | ceph_buffer_put(th->ticket_blob); | 232 | ceph_buffer_put(th->ticket_blob); |
234 | th->session_key = new_session_key; | 233 | th->session_key = new_session_key; |
235 | th->ticket_blob = new_ticket_blob; | 234 | th->ticket_blob = new_ticket_blob; |
236 | th->validity = new_validity; | ||
237 | th->secret_id = new_secret_id; | 235 | th->secret_id = new_secret_id; |
238 | th->expires = new_expires; | 236 | th->expires = new_expires; |
239 | th->renew_after = new_renew_after; | 237 | th->renew_after = new_renew_after; |
238 | th->have_key = true; | ||
240 | dout(" got ticket service %d (%s) secret_id %lld len %d\n", | 239 | dout(" got ticket service %d (%s) secret_id %lld len %d\n", |
241 | type, ceph_entity_type_name(type), th->secret_id, | 240 | type, ceph_entity_type_name(type), th->secret_id, |
242 | (int)th->ticket_blob->vec.iov_len); | 241 | (int)th->ticket_blob->vec.iov_len); |
@@ -384,6 +383,24 @@ bad: | |||
384 | return -ERANGE; | 383 | return -ERANGE; |
385 | } | 384 | } |
386 | 385 | ||
386 | static bool need_key(struct ceph_x_ticket_handler *th) | ||
387 | { | ||
388 | if (!th->have_key) | ||
389 | return true; | ||
390 | |||
391 | return get_seconds() >= th->renew_after; | ||
392 | } | ||
393 | |||
394 | static bool have_key(struct ceph_x_ticket_handler *th) | ||
395 | { | ||
396 | if (th->have_key) { | ||
397 | if (get_seconds() >= th->expires) | ||
398 | th->have_key = false; | ||
399 | } | ||
400 | |||
401 | return th->have_key; | ||
402 | } | ||
403 | |||
387 | static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed) | 404 | static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed) |
388 | { | 405 | { |
389 | int want = ac->want_keys; | 406 | int want = ac->want_keys; |
@@ -402,20 +419,18 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed) | |||
402 | continue; | 419 | continue; |
403 | 420 | ||
404 | th = get_ticket_handler(ac, service); | 421 | th = get_ticket_handler(ac, service); |
405 | |||
406 | if (IS_ERR(th)) { | 422 | if (IS_ERR(th)) { |
407 | *pneed |= service; | 423 | *pneed |= service; |
408 | continue; | 424 | continue; |
409 | } | 425 | } |
410 | 426 | ||
411 | if (get_seconds() >= th->renew_after) | 427 | if (need_key(th)) |
412 | *pneed |= service; | 428 | *pneed |= service; |
413 | if (get_seconds() >= th->expires) | 429 | if (!have_key(th)) |
414 | xi->have_keys &= ~service; | 430 | xi->have_keys &= ~service; |
415 | } | 431 | } |
416 | } | 432 | } |
417 | 433 | ||
418 | |||
419 | static int ceph_x_build_request(struct ceph_auth_client *ac, | 434 | static int ceph_x_build_request(struct ceph_auth_client *ac, |
420 | void *buf, void *end) | 435 | void *buf, void *end) |
421 | { | 436 | { |
@@ -667,14 +682,26 @@ static void ceph_x_destroy(struct ceph_auth_client *ac) | |||
667 | ac->private = NULL; | 682 | ac->private = NULL; |
668 | } | 683 | } |
669 | 684 | ||
670 | static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, | 685 | static void invalidate_ticket(struct ceph_auth_client *ac, int peer_type) |
671 | int peer_type) | ||
672 | { | 686 | { |
673 | struct ceph_x_ticket_handler *th; | 687 | struct ceph_x_ticket_handler *th; |
674 | 688 | ||
675 | th = get_ticket_handler(ac, peer_type); | 689 | th = get_ticket_handler(ac, peer_type); |
676 | if (!IS_ERR(th)) | 690 | if (!IS_ERR(th)) |
677 | memset(&th->validity, 0, sizeof(th->validity)); | 691 | th->have_key = false; |
692 | } | ||
693 | |||
694 | static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, | ||
695 | int peer_type) | ||
696 | { | ||
697 | /* | ||
698 | * We are to invalidate a service ticket in the hopes of | ||
699 | * getting a new, hopefully more valid, one. But, we won't get | ||
700 | * it unless our AUTH ticket is good, so invalidate AUTH ticket | ||
701 | * as well, just in case. | ||
702 | */ | ||
703 | invalidate_ticket(ac, peer_type); | ||
704 | invalidate_ticket(ac, CEPH_ENTITY_TYPE_AUTH); | ||
678 | } | 705 | } |
679 | 706 | ||
680 | static int calcu_signature(struct ceph_x_authorizer *au, | 707 | static int calcu_signature(struct ceph_x_authorizer *au, |
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h index e8b7c6917d47..40b1a3cf7397 100644 --- a/net/ceph/auth_x.h +++ b/net/ceph/auth_x.h | |||
@@ -16,7 +16,7 @@ struct ceph_x_ticket_handler { | |||
16 | unsigned int service; | 16 | unsigned int service; |
17 | 17 | ||
18 | struct ceph_crypto_key session_key; | 18 | struct ceph_crypto_key session_key; |
19 | struct ceph_timespec validity; | 19 | bool have_key; |
20 | 20 | ||
21 | u64 secret_id; | 21 | u64 secret_id; |
22 | struct ceph_buffer *ticket_blob; | 22 | struct ceph_buffer *ticket_blob; |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 9981039ef4ff..9cfedf565f5b 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
@@ -23,9 +23,6 @@ | |||
23 | #include <linux/ceph/pagelist.h> | 23 | #include <linux/ceph/pagelist.h> |
24 | #include <linux/export.h> | 24 | #include <linux/export.h> |
25 | 25 | ||
26 | #define list_entry_next(pos, member) \ | ||
27 | list_entry(pos->member.next, typeof(*pos), member) | ||
28 | |||
29 | /* | 26 | /* |
30 | * Ceph uses the messenger to exchange ceph_msg messages with other | 27 | * Ceph uses the messenger to exchange ceph_msg messages with other |
31 | * hosts in the system. The messenger provides ordered and reliable | 28 | * hosts in the system. The messenger provides ordered and reliable |
@@ -672,6 +669,8 @@ static void reset_connection(struct ceph_connection *con) | |||
672 | } | 669 | } |
673 | con->in_seq = 0; | 670 | con->in_seq = 0; |
674 | con->in_seq_acked = 0; | 671 | con->in_seq_acked = 0; |
672 | |||
673 | con->out_skip = 0; | ||
675 | } | 674 | } |
676 | 675 | ||
677 | /* | 676 | /* |
@@ -771,6 +770,8 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt) | |||
771 | 770 | ||
772 | static void con_out_kvec_reset(struct ceph_connection *con) | 771 | static void con_out_kvec_reset(struct ceph_connection *con) |
773 | { | 772 | { |
773 | BUG_ON(con->out_skip); | ||
774 | |||
774 | con->out_kvec_left = 0; | 775 | con->out_kvec_left = 0; |
775 | con->out_kvec_bytes = 0; | 776 | con->out_kvec_bytes = 0; |
776 | con->out_kvec_cur = &con->out_kvec[0]; | 777 | con->out_kvec_cur = &con->out_kvec[0]; |
@@ -779,9 +780,9 @@ static void con_out_kvec_reset(struct ceph_connection *con) | |||
779 | static void con_out_kvec_add(struct ceph_connection *con, | 780 | static void con_out_kvec_add(struct ceph_connection *con, |
780 | size_t size, void *data) | 781 | size_t size, void *data) |
781 | { | 782 | { |
782 | int index; | 783 | int index = con->out_kvec_left; |
783 | 784 | ||
784 | index = con->out_kvec_left; | 785 | BUG_ON(con->out_skip); |
785 | BUG_ON(index >= ARRAY_SIZE(con->out_kvec)); | 786 | BUG_ON(index >= ARRAY_SIZE(con->out_kvec)); |
786 | 787 | ||
787 | con->out_kvec[index].iov_len = size; | 788 | con->out_kvec[index].iov_len = size; |
@@ -790,6 +791,27 @@ static void con_out_kvec_add(struct ceph_connection *con, | |||
790 | con->out_kvec_bytes += size; | 791 | con->out_kvec_bytes += size; |
791 | } | 792 | } |
792 | 793 | ||
794 | /* | ||
795 | * Chop off a kvec from the end. Return residual number of bytes for | ||
796 | * that kvec, i.e. how many bytes would have been written if the kvec | ||
797 | * hadn't been nuked. | ||
798 | */ | ||
799 | static int con_out_kvec_skip(struct ceph_connection *con) | ||
800 | { | ||
801 | int off = con->out_kvec_cur - con->out_kvec; | ||
802 | int skip = 0; | ||
803 | |||
804 | if (con->out_kvec_bytes > 0) { | ||
805 | skip = con->out_kvec[off + con->out_kvec_left - 1].iov_len; | ||
806 | BUG_ON(con->out_kvec_bytes < skip); | ||
807 | BUG_ON(!con->out_kvec_left); | ||
808 | con->out_kvec_bytes -= skip; | ||
809 | con->out_kvec_left--; | ||
810 | } | ||
811 | |||
812 | return skip; | ||
813 | } | ||
814 | |||
793 | #ifdef CONFIG_BLOCK | 815 | #ifdef CONFIG_BLOCK |
794 | 816 | ||
795 | /* | 817 | /* |
@@ -1042,7 +1064,7 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor, | |||
1042 | /* Move on to the next page */ | 1064 | /* Move on to the next page */ |
1043 | 1065 | ||
1044 | BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head)); | 1066 | BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head)); |
1045 | cursor->page = list_entry_next(cursor->page, lru); | 1067 | cursor->page = list_next_entry(cursor->page, lru); |
1046 | cursor->last_piece = cursor->resid <= PAGE_SIZE; | 1068 | cursor->last_piece = cursor->resid <= PAGE_SIZE; |
1047 | 1069 | ||
1048 | return true; | 1070 | return true; |
@@ -1166,7 +1188,7 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, | |||
1166 | if (!cursor->resid && cursor->total_resid) { | 1188 | if (!cursor->resid && cursor->total_resid) { |
1167 | WARN_ON(!cursor->last_piece); | 1189 | WARN_ON(!cursor->last_piece); |
1168 | BUG_ON(list_is_last(&cursor->data->links, cursor->data_head)); | 1190 | BUG_ON(list_is_last(&cursor->data->links, cursor->data_head)); |
1169 | cursor->data = list_entry_next(cursor->data, links); | 1191 | cursor->data = list_next_entry(cursor->data, links); |
1170 | __ceph_msg_data_cursor_init(cursor); | 1192 | __ceph_msg_data_cursor_init(cursor); |
1171 | new_piece = true; | 1193 | new_piece = true; |
1172 | } | 1194 | } |
@@ -1197,7 +1219,6 @@ static void prepare_write_message_footer(struct ceph_connection *con) | |||
1197 | m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; | 1219 | m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; |
1198 | 1220 | ||
1199 | dout("prepare_write_message_footer %p\n", con); | 1221 | dout("prepare_write_message_footer %p\n", con); |
1200 | con->out_kvec_is_msg = true; | ||
1201 | con->out_kvec[v].iov_base = &m->footer; | 1222 | con->out_kvec[v].iov_base = &m->footer; |
1202 | if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { | 1223 | if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { |
1203 | if (con->ops->sign_message) | 1224 | if (con->ops->sign_message) |
@@ -1225,7 +1246,6 @@ static void prepare_write_message(struct ceph_connection *con) | |||
1225 | u32 crc; | 1246 | u32 crc; |
1226 | 1247 | ||
1227 | con_out_kvec_reset(con); | 1248 | con_out_kvec_reset(con); |
1228 | con->out_kvec_is_msg = true; | ||
1229 | con->out_msg_done = false; | 1249 | con->out_msg_done = false; |
1230 | 1250 | ||
1231 | /* Sneak an ack in there first? If we can get it into the same | 1251 | /* Sneak an ack in there first? If we can get it into the same |
@@ -1265,18 +1285,19 @@ static void prepare_write_message(struct ceph_connection *con) | |||
1265 | 1285 | ||
1266 | /* tag + hdr + front + middle */ | 1286 | /* tag + hdr + front + middle */ |
1267 | con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); | 1287 | con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); |
1268 | con_out_kvec_add(con, sizeof (m->hdr), &m->hdr); | 1288 | con_out_kvec_add(con, sizeof(con->out_hdr), &con->out_hdr); |
1269 | con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); | 1289 | con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); |
1270 | 1290 | ||
1271 | if (m->middle) | 1291 | if (m->middle) |
1272 | con_out_kvec_add(con, m->middle->vec.iov_len, | 1292 | con_out_kvec_add(con, m->middle->vec.iov_len, |
1273 | m->middle->vec.iov_base); | 1293 | m->middle->vec.iov_base); |
1274 | 1294 | ||
1275 | /* fill in crc (except data pages), footer */ | 1295 | /* fill in hdr crc and finalize hdr */ |
1276 | crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); | 1296 | crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); |
1277 | con->out_msg->hdr.crc = cpu_to_le32(crc); | 1297 | con->out_msg->hdr.crc = cpu_to_le32(crc); |
1278 | con->out_msg->footer.flags = 0; | 1298 | memcpy(&con->out_hdr, &con->out_msg->hdr, sizeof(con->out_hdr)); |
1279 | 1299 | ||
1300 | /* fill in front and middle crc, footer */ | ||
1280 | crc = crc32c(0, m->front.iov_base, m->front.iov_len); | 1301 | crc = crc32c(0, m->front.iov_base, m->front.iov_len); |
1281 | con->out_msg->footer.front_crc = cpu_to_le32(crc); | 1302 | con->out_msg->footer.front_crc = cpu_to_le32(crc); |
1282 | if (m->middle) { | 1303 | if (m->middle) { |
@@ -1288,6 +1309,7 @@ static void prepare_write_message(struct ceph_connection *con) | |||
1288 | dout("%s front_crc %u middle_crc %u\n", __func__, | 1309 | dout("%s front_crc %u middle_crc %u\n", __func__, |
1289 | le32_to_cpu(con->out_msg->footer.front_crc), | 1310 | le32_to_cpu(con->out_msg->footer.front_crc), |
1290 | le32_to_cpu(con->out_msg->footer.middle_crc)); | 1311 | le32_to_cpu(con->out_msg->footer.middle_crc)); |
1312 | con->out_msg->footer.flags = 0; | ||
1291 | 1313 | ||
1292 | /* is there a data payload? */ | 1314 | /* is there a data payload? */ |
1293 | con->out_msg->footer.data_crc = 0; | 1315 | con->out_msg->footer.data_crc = 0; |
@@ -1492,7 +1514,6 @@ static int write_partial_kvec(struct ceph_connection *con) | |||
1492 | } | 1514 | } |
1493 | } | 1515 | } |
1494 | con->out_kvec_left = 0; | 1516 | con->out_kvec_left = 0; |
1495 | con->out_kvec_is_msg = false; | ||
1496 | ret = 1; | 1517 | ret = 1; |
1497 | out: | 1518 | out: |
1498 | dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con, | 1519 | dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con, |
@@ -1584,6 +1605,7 @@ static int write_partial_skip(struct ceph_connection *con) | |||
1584 | { | 1605 | { |
1585 | int ret; | 1606 | int ret; |
1586 | 1607 | ||
1608 | dout("%s %p %d left\n", __func__, con, con->out_skip); | ||
1587 | while (con->out_skip > 0) { | 1609 | while (con->out_skip > 0) { |
1588 | size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE); | 1610 | size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE); |
1589 | 1611 | ||
@@ -2506,13 +2528,13 @@ more: | |||
2506 | 2528 | ||
2507 | more_kvec: | 2529 | more_kvec: |
2508 | /* kvec data queued? */ | 2530 | /* kvec data queued? */ |
2509 | if (con->out_skip) { | 2531 | if (con->out_kvec_left) { |
2510 | ret = write_partial_skip(con); | 2532 | ret = write_partial_kvec(con); |
2511 | if (ret <= 0) | 2533 | if (ret <= 0) |
2512 | goto out; | 2534 | goto out; |
2513 | } | 2535 | } |
2514 | if (con->out_kvec_left) { | 2536 | if (con->out_skip) { |
2515 | ret = write_partial_kvec(con); | 2537 | ret = write_partial_skip(con); |
2516 | if (ret <= 0) | 2538 | if (ret <= 0) |
2517 | goto out; | 2539 | goto out; |
2518 | } | 2540 | } |
@@ -2805,13 +2827,17 @@ static bool con_backoff(struct ceph_connection *con) | |||
2805 | 2827 | ||
2806 | static void con_fault_finish(struct ceph_connection *con) | 2828 | static void con_fault_finish(struct ceph_connection *con) |
2807 | { | 2829 | { |
2830 | dout("%s %p\n", __func__, con); | ||
2831 | |||
2808 | /* | 2832 | /* |
2809 | * in case we faulted due to authentication, invalidate our | 2833 | * in case we faulted due to authentication, invalidate our |
2810 | * current tickets so that we can get new ones. | 2834 | * current tickets so that we can get new ones. |
2811 | */ | 2835 | */ |
2812 | if (con->auth_retry && con->ops->invalidate_authorizer) { | 2836 | if (con->auth_retry) { |
2813 | dout("calling invalidate_authorizer()\n"); | 2837 | dout("auth_retry %d, invalidating\n", con->auth_retry); |
2814 | con->ops->invalidate_authorizer(con); | 2838 | if (con->ops->invalidate_authorizer) |
2839 | con->ops->invalidate_authorizer(con); | ||
2840 | con->auth_retry = 0; | ||
2815 | } | 2841 | } |
2816 | 2842 | ||
2817 | if (con->ops->fault) | 2843 | if (con->ops->fault) |
@@ -3050,16 +3076,31 @@ void ceph_msg_revoke(struct ceph_msg *msg) | |||
3050 | ceph_msg_put(msg); | 3076 | ceph_msg_put(msg); |
3051 | } | 3077 | } |
3052 | if (con->out_msg == msg) { | 3078 | if (con->out_msg == msg) { |
3053 | dout("%s %p msg %p - was sending\n", __func__, con, msg); | 3079 | BUG_ON(con->out_skip); |
3054 | con->out_msg = NULL; | 3080 | /* footer */ |
3055 | if (con->out_kvec_is_msg) { | 3081 | if (con->out_msg_done) { |
3056 | con->out_skip = con->out_kvec_bytes; | 3082 | con->out_skip += con_out_kvec_skip(con); |
3057 | con->out_kvec_is_msg = false; | 3083 | } else { |
3084 | BUG_ON(!msg->data_length); | ||
3085 | if (con->peer_features & CEPH_FEATURE_MSG_AUTH) | ||
3086 | con->out_skip += sizeof(msg->footer); | ||
3087 | else | ||
3088 | con->out_skip += sizeof(msg->old_footer); | ||
3058 | } | 3089 | } |
3090 | /* data, middle, front */ | ||
3091 | if (msg->data_length) | ||
3092 | con->out_skip += msg->cursor.total_resid; | ||
3093 | if (msg->middle) | ||
3094 | con->out_skip += con_out_kvec_skip(con); | ||
3095 | con->out_skip += con_out_kvec_skip(con); | ||
3096 | |||
3097 | dout("%s %p msg %p - was sending, will write %d skip %d\n", | ||
3098 | __func__, con, msg, con->out_kvec_bytes, con->out_skip); | ||
3059 | msg->hdr.seq = 0; | 3099 | msg->hdr.seq = 0; |
3060 | 3100 | con->out_msg = NULL; | |
3061 | ceph_msg_put(msg); | 3101 | ceph_msg_put(msg); |
3062 | } | 3102 | } |
3103 | |||
3063 | mutex_unlock(&con->mutex); | 3104 | mutex_unlock(&con->mutex); |
3064 | } | 3105 | } |
3065 | 3106 | ||
@@ -3361,9 +3402,7 @@ static void ceph_msg_free(struct ceph_msg *m) | |||
3361 | static void ceph_msg_release(struct kref *kref) | 3402 | static void ceph_msg_release(struct kref *kref) |
3362 | { | 3403 | { |
3363 | struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); | 3404 | struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); |
3364 | LIST_HEAD(data); | 3405 | struct ceph_msg_data *data, *next; |
3365 | struct list_head *links; | ||
3366 | struct list_head *next; | ||
3367 | 3406 | ||
3368 | dout("%s %p\n", __func__, m); | 3407 | dout("%s %p\n", __func__, m); |
3369 | WARN_ON(!list_empty(&m->list_head)); | 3408 | WARN_ON(!list_empty(&m->list_head)); |
@@ -3376,12 +3415,8 @@ static void ceph_msg_release(struct kref *kref) | |||
3376 | m->middle = NULL; | 3415 | m->middle = NULL; |
3377 | } | 3416 | } |
3378 | 3417 | ||
3379 | list_splice_init(&m->data, &data); | 3418 | list_for_each_entry_safe(data, next, &m->data, links) { |
3380 | list_for_each_safe(links, next, &data) { | 3419 | list_del_init(&data->links); |
3381 | struct ceph_msg_data *data; | ||
3382 | |||
3383 | data = list_entry(links, struct ceph_msg_data, links); | ||
3384 | list_del_init(links); | ||
3385 | ceph_msg_data_destroy(data); | 3420 | ceph_msg_data_destroy(data); |
3386 | } | 3421 | } |
3387 | m->data_length = 0; | 3422 | m->data_length = 0; |
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index edda01626a45..de85dddc3dc0 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c | |||
@@ -364,10 +364,6 @@ static bool have_debugfs_info(struct ceph_mon_client *monc) | |||
364 | return monc->client->have_fsid && monc->auth->global_id > 0; | 364 | return monc->client->have_fsid && monc->auth->global_id > 0; |
365 | } | 365 | } |
366 | 366 | ||
367 | /* | ||
368 | * The monitor responds with mount ack indicate mount success. The | ||
369 | * included client ticket allows the client to talk to MDSs and OSDs. | ||
370 | */ | ||
371 | static void ceph_monc_handle_map(struct ceph_mon_client *monc, | 367 | static void ceph_monc_handle_map(struct ceph_mon_client *monc, |
372 | struct ceph_msg *msg) | 368 | struct ceph_msg *msg) |
373 | { | 369 | { |
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index c29809f765dc..62c049b647e9 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile | |||
@@ -56,7 +56,6 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o | |||
56 | obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o | 56 | obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o |
57 | obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o | 57 | obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o |
58 | obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o | 58 | obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o |
59 | obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o | ||
60 | obj-$(CONFIG_NETLABEL) += cipso_ipv4.o | 59 | obj-$(CONFIG_NETLABEL) += cipso_ipv4.o |
61 | 60 | ||
62 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ | 61 | obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 22e73171ea63..d07fc076bea0 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -289,10 +289,8 @@ static void __node_free_rcu(struct rcu_head *head) | |||
289 | 289 | ||
290 | if (!n->tn_bits) | 290 | if (!n->tn_bits) |
291 | kmem_cache_free(trie_leaf_kmem, n); | 291 | kmem_cache_free(trie_leaf_kmem, n); |
292 | else if (n->tn_bits <= TNODE_KMALLOC_MAX) | ||
293 | kfree(n); | ||
294 | else | 292 | else |
295 | vfree(n); | 293 | kvfree(n); |
296 | } | 294 | } |
297 | 295 | ||
298 | #define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu) | 296 | #define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu) |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 46ce410703b1..4d367b4139a3 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -24,7 +24,6 @@ | |||
24 | #include <net/cipso_ipv4.h> | 24 | #include <net/cipso_ipv4.h> |
25 | #include <net/inet_frag.h> | 25 | #include <net/inet_frag.h> |
26 | #include <net/ping.h> | 26 | #include <net/ping.h> |
27 | #include <net/tcp_memcontrol.h> | ||
28 | 27 | ||
29 | static int zero; | 28 | static int zero; |
30 | static int one = 1; | 29 | static int one = 1; |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2a67244f97ca..a4d523709ab3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -73,7 +73,6 @@ | |||
73 | #include <net/timewait_sock.h> | 73 | #include <net/timewait_sock.h> |
74 | #include <net/xfrm.h> | 74 | #include <net/xfrm.h> |
75 | #include <net/secure_seq.h> | 75 | #include <net/secure_seq.h> |
76 | #include <net/tcp_memcontrol.h> | ||
77 | #include <net/busy_poll.h> | 76 | #include <net/busy_poll.h> |
78 | 77 | ||
79 | #include <linux/inet.h> | 78 | #include <linux/inet.h> |
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c deleted file mode 100644 index 18bc7f745e9c..000000000000 --- a/net/ipv4/tcp_memcontrol.c +++ /dev/null | |||
@@ -1,200 +0,0 @@ | |||
1 | #include <net/tcp.h> | ||
2 | #include <net/tcp_memcontrol.h> | ||
3 | #include <net/sock.h> | ||
4 | #include <net/ip.h> | ||
5 | #include <linux/nsproxy.h> | ||
6 | #include <linux/memcontrol.h> | ||
7 | #include <linux/module.h> | ||
8 | |||
9 | int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) | ||
10 | { | ||
11 | struct mem_cgroup *parent = parent_mem_cgroup(memcg); | ||
12 | struct page_counter *counter_parent = NULL; | ||
13 | /* | ||
14 | * The root cgroup does not use page_counters, but rather, | ||
15 | * rely on the data already collected by the network | ||
16 | * subsystem | ||
17 | */ | ||
18 | if (memcg == root_mem_cgroup) | ||
19 | return 0; | ||
20 | |||
21 | memcg->tcp_mem.memory_pressure = 0; | ||
22 | |||
23 | if (parent) | ||
24 | counter_parent = &parent->tcp_mem.memory_allocated; | ||
25 | |||
26 | page_counter_init(&memcg->tcp_mem.memory_allocated, counter_parent); | ||
27 | |||
28 | return 0; | ||
29 | } | ||
30 | |||
31 | void tcp_destroy_cgroup(struct mem_cgroup *memcg) | ||
32 | { | ||
33 | if (memcg == root_mem_cgroup) | ||
34 | return; | ||
35 | |||
36 | if (memcg->tcp_mem.active) | ||
37 | static_branch_dec(&memcg_sockets_enabled_key); | ||
38 | } | ||
39 | |||
40 | static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages) | ||
41 | { | ||
42 | int ret; | ||
43 | |||
44 | if (memcg == root_mem_cgroup) | ||
45 | return -EINVAL; | ||
46 | |||
47 | ret = page_counter_limit(&memcg->tcp_mem.memory_allocated, nr_pages); | ||
48 | if (ret) | ||
49 | return ret; | ||
50 | |||
51 | if (!memcg->tcp_mem.active) { | ||
52 | /* | ||
53 | * The active flag needs to be written after the static_key | ||
54 | * update. This is what guarantees that the socket activation | ||
55 | * function is the last one to run. See sock_update_memcg() for | ||
56 | * details, and note that we don't mark any socket as belonging | ||
57 | * to this memcg until that flag is up. | ||
58 | * | ||
59 | * We need to do this, because static_keys will span multiple | ||
60 | * sites, but we can't control their order. If we mark a socket | ||
61 | * as accounted, but the accounting functions are not patched in | ||
62 | * yet, we'll lose accounting. | ||
63 | * | ||
64 | * We never race with the readers in sock_update_memcg(), | ||
65 | * because when this value change, the code to process it is not | ||
66 | * patched in yet. | ||
67 | */ | ||
68 | static_branch_inc(&memcg_sockets_enabled_key); | ||
69 | memcg->tcp_mem.active = true; | ||
70 | } | ||
71 | |||
72 | return 0; | ||
73 | } | ||
74 | |||
75 | enum { | ||
76 | RES_USAGE, | ||
77 | RES_LIMIT, | ||
78 | RES_MAX_USAGE, | ||
79 | RES_FAILCNT, | ||
80 | }; | ||
81 | |||
82 | static DEFINE_MUTEX(tcp_limit_mutex); | ||
83 | |||
84 | static ssize_t tcp_cgroup_write(struct kernfs_open_file *of, | ||
85 | char *buf, size_t nbytes, loff_t off) | ||
86 | { | ||
87 | struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); | ||
88 | unsigned long nr_pages; | ||
89 | int ret = 0; | ||
90 | |||
91 | buf = strstrip(buf); | ||
92 | |||
93 | switch (of_cft(of)->private) { | ||
94 | case RES_LIMIT: | ||
95 | /* see memcontrol.c */ | ||
96 | ret = page_counter_memparse(buf, "-1", &nr_pages); | ||
97 | if (ret) | ||
98 | break; | ||
99 | mutex_lock(&tcp_limit_mutex); | ||
100 | ret = tcp_update_limit(memcg, nr_pages); | ||
101 | mutex_unlock(&tcp_limit_mutex); | ||
102 | break; | ||
103 | default: | ||
104 | ret = -EINVAL; | ||
105 | break; | ||
106 | } | ||
107 | return ret ?: nbytes; | ||
108 | } | ||
109 | |||
110 | static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft) | ||
111 | { | ||
112 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | ||
113 | u64 val; | ||
114 | |||
115 | switch (cft->private) { | ||
116 | case RES_LIMIT: | ||
117 | if (memcg == root_mem_cgroup) | ||
118 | val = PAGE_COUNTER_MAX; | ||
119 | else | ||
120 | val = memcg->tcp_mem.memory_allocated.limit; | ||
121 | val *= PAGE_SIZE; | ||
122 | break; | ||
123 | case RES_USAGE: | ||
124 | if (memcg == root_mem_cgroup) | ||
125 | val = atomic_long_read(&tcp_memory_allocated); | ||
126 | else | ||
127 | val = page_counter_read(&memcg->tcp_mem.memory_allocated); | ||
128 | val *= PAGE_SIZE; | ||
129 | break; | ||
130 | case RES_FAILCNT: | ||
131 | if (memcg == root_mem_cgroup) | ||
132 | return 0; | ||
133 | val = memcg->tcp_mem.memory_allocated.failcnt; | ||
134 | break; | ||
135 | case RES_MAX_USAGE: | ||
136 | if (memcg == root_mem_cgroup) | ||
137 | return 0; | ||
138 | val = memcg->tcp_mem.memory_allocated.watermark; | ||
139 | val *= PAGE_SIZE; | ||
140 | break; | ||
141 | default: | ||
142 | BUG(); | ||
143 | } | ||
144 | return val; | ||
145 | } | ||
146 | |||
147 | static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of, | ||
148 | char *buf, size_t nbytes, loff_t off) | ||
149 | { | ||
150 | struct mem_cgroup *memcg; | ||
151 | |||
152 | memcg = mem_cgroup_from_css(of_css(of)); | ||
153 | if (memcg == root_mem_cgroup) | ||
154 | return nbytes; | ||
155 | |||
156 | switch (of_cft(of)->private) { | ||
157 | case RES_MAX_USAGE: | ||
158 | page_counter_reset_watermark(&memcg->tcp_mem.memory_allocated); | ||
159 | break; | ||
160 | case RES_FAILCNT: | ||
161 | memcg->tcp_mem.memory_allocated.failcnt = 0; | ||
162 | break; | ||
163 | } | ||
164 | |||
165 | return nbytes; | ||
166 | } | ||
167 | |||
168 | static struct cftype tcp_files[] = { | ||
169 | { | ||
170 | .name = "kmem.tcp.limit_in_bytes", | ||
171 | .write = tcp_cgroup_write, | ||
172 | .read_u64 = tcp_cgroup_read, | ||
173 | .private = RES_LIMIT, | ||
174 | }, | ||
175 | { | ||
176 | .name = "kmem.tcp.usage_in_bytes", | ||
177 | .read_u64 = tcp_cgroup_read, | ||
178 | .private = RES_USAGE, | ||
179 | }, | ||
180 | { | ||
181 | .name = "kmem.tcp.failcnt", | ||
182 | .private = RES_FAILCNT, | ||
183 | .write = tcp_cgroup_reset, | ||
184 | .read_u64 = tcp_cgroup_read, | ||
185 | }, | ||
186 | { | ||
187 | .name = "kmem.tcp.max_usage_in_bytes", | ||
188 | .private = RES_MAX_USAGE, | ||
189 | .write = tcp_cgroup_reset, | ||
190 | .read_u64 = tcp_cgroup_read, | ||
191 | }, | ||
192 | { } /* terminate */ | ||
193 | }; | ||
194 | |||
195 | static int __init tcp_memcontrol_init(void) | ||
196 | { | ||
197 | WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, tcp_files)); | ||
198 | return 0; | ||
199 | } | ||
200 | __initcall(tcp_memcontrol_init); | ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4ad8edb46f7c..006396e31cb0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -61,7 +61,6 @@ | |||
61 | #include <net/timewait_sock.h> | 61 | #include <net/timewait_sock.h> |
62 | #include <net/inet_common.h> | 62 | #include <net/inet_common.h> |
63 | #include <net/secure_seq.h> | 63 | #include <net/secure_seq.h> |
64 | #include <net/tcp_memcontrol.h> | ||
65 | #include <net/busy_poll.h> | 64 | #include <net/busy_poll.h> |
66 | 65 | ||
67 | #include <linux/proc_fs.h> | 66 | #include <linux/proc_fs.h> |
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index abbdff03ce92..3e24d0ddb51b 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c | |||
@@ -91,7 +91,7 @@ static const struct file_operations reset_ops = { | |||
91 | }; | 91 | }; |
92 | #endif | 92 | #endif |
93 | 93 | ||
94 | static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = { | 94 | static const char *hw_flag_names[] = { |
95 | #define FLAG(F) [IEEE80211_HW_##F] = #F | 95 | #define FLAG(F) [IEEE80211_HW_##F] = #F |
96 | FLAG(HAS_RATE_CONTROL), | 96 | FLAG(HAS_RATE_CONTROL), |
97 | FLAG(RX_INCLUDES_FCS), | 97 | FLAG(RX_INCLUDES_FCS), |
@@ -126,9 +126,6 @@ static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = { | |||
126 | FLAG(SUPPORTS_AMSDU_IN_AMPDU), | 126 | FLAG(SUPPORTS_AMSDU_IN_AMPDU), |
127 | FLAG(BEACON_TX_STATUS), | 127 | FLAG(BEACON_TX_STATUS), |
128 | FLAG(NEEDS_UNIQUE_STA_ADDR), | 128 | FLAG(NEEDS_UNIQUE_STA_ADDR), |
129 | |||
130 | /* keep last for the build bug below */ | ||
131 | (void *)0x1 | ||
132 | #undef FLAG | 129 | #undef FLAG |
133 | }; | 130 | }; |
134 | 131 | ||
@@ -148,7 +145,7 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf, | |||
148 | /* fail compilation if somebody adds or removes | 145 | /* fail compilation if somebody adds or removes |
149 | * a flag without updating the name array above | 146 | * a flag without updating the name array above |
150 | */ | 147 | */ |
151 | BUILD_BUG_ON(hw_flag_names[NUM_IEEE80211_HW_FLAGS] != (void *)0x1); | 148 | BUILD_BUG_ON(ARRAY_SIZE(hw_flag_names) != NUM_IEEE80211_HW_FLAGS); |
152 | 149 | ||
153 | for (i = 0; i < NUM_IEEE80211_HW_FLAGS; i++) { | 150 | for (i = 0; i < NUM_IEEE80211_HW_FLAGS; i++) { |
154 | if (test_bit(i, local->hw.flags)) | 151 | if (test_bit(i, local->hw.flags)) |
diff --git a/net/rds/ib.c b/net/rds/ib.c index f222885ac0c7..9481d55ff6cb 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c | |||
@@ -122,44 +122,34 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev) | |||
122 | static void rds_ib_add_one(struct ib_device *device) | 122 | static void rds_ib_add_one(struct ib_device *device) |
123 | { | 123 | { |
124 | struct rds_ib_device *rds_ibdev; | 124 | struct rds_ib_device *rds_ibdev; |
125 | struct ib_device_attr *dev_attr; | ||
126 | 125 | ||
127 | /* Only handle IB (no iWARP) devices */ | 126 | /* Only handle IB (no iWARP) devices */ |
128 | if (device->node_type != RDMA_NODE_IB_CA) | 127 | if (device->node_type != RDMA_NODE_IB_CA) |
129 | return; | 128 | return; |
130 | 129 | ||
131 | dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL); | ||
132 | if (!dev_attr) | ||
133 | return; | ||
134 | |||
135 | if (ib_query_device(device, dev_attr)) { | ||
136 | rdsdebug("Query device failed for %s\n", device->name); | ||
137 | goto free_attr; | ||
138 | } | ||
139 | |||
140 | rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL, | 130 | rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL, |
141 | ibdev_to_node(device)); | 131 | ibdev_to_node(device)); |
142 | if (!rds_ibdev) | 132 | if (!rds_ibdev) |
143 | goto free_attr; | 133 | return; |
144 | 134 | ||
145 | spin_lock_init(&rds_ibdev->spinlock); | 135 | spin_lock_init(&rds_ibdev->spinlock); |
146 | atomic_set(&rds_ibdev->refcount, 1); | 136 | atomic_set(&rds_ibdev->refcount, 1); |
147 | INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free); | 137 | INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free); |
148 | 138 | ||
149 | rds_ibdev->max_wrs = dev_attr->max_qp_wr; | 139 | rds_ibdev->max_wrs = device->attrs.max_qp_wr; |
150 | rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE); | 140 | rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE); |
151 | 141 | ||
152 | rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32; | 142 | rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32; |
153 | rds_ibdev->max_1m_fmrs = dev_attr->max_mr ? | 143 | rds_ibdev->max_1m_fmrs = device->attrs.max_mr ? |
154 | min_t(unsigned int, (dev_attr->max_mr / 2), | 144 | min_t(unsigned int, (device->attrs.max_mr / 2), |
155 | rds_ib_fmr_1m_pool_size) : rds_ib_fmr_1m_pool_size; | 145 | rds_ib_fmr_1m_pool_size) : rds_ib_fmr_1m_pool_size; |
156 | 146 | ||
157 | rds_ibdev->max_8k_fmrs = dev_attr->max_mr ? | 147 | rds_ibdev->max_8k_fmrs = device->attrs.max_mr ? |
158 | min_t(unsigned int, ((dev_attr->max_mr / 2) * RDS_MR_8K_SCALE), | 148 | min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE), |
159 | rds_ib_fmr_8k_pool_size) : rds_ib_fmr_8k_pool_size; | 149 | rds_ib_fmr_8k_pool_size) : rds_ib_fmr_8k_pool_size; |
160 | 150 | ||
161 | rds_ibdev->max_initiator_depth = dev_attr->max_qp_init_rd_atom; | 151 | rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom; |
162 | rds_ibdev->max_responder_resources = dev_attr->max_qp_rd_atom; | 152 | rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom; |
163 | 153 | ||
164 | rds_ibdev->dev = device; | 154 | rds_ibdev->dev = device; |
165 | rds_ibdev->pd = ib_alloc_pd(device); | 155 | rds_ibdev->pd = ib_alloc_pd(device); |
@@ -183,7 +173,7 @@ static void rds_ib_add_one(struct ib_device *device) | |||
183 | } | 173 | } |
184 | 174 | ||
185 | rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_fmrs = %d, max_8k_fmrs = %d\n", | 175 | rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_fmrs = %d, max_8k_fmrs = %d\n", |
186 | dev_attr->max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge, | 176 | device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge, |
187 | rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_fmrs, | 177 | rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_fmrs, |
188 | rds_ibdev->max_8k_fmrs); | 178 | rds_ibdev->max_8k_fmrs); |
189 | 179 | ||
@@ -202,8 +192,6 @@ static void rds_ib_add_one(struct ib_device *device) | |||
202 | 192 | ||
203 | put_dev: | 193 | put_dev: |
204 | rds_ib_dev_put(rds_ibdev); | 194 | rds_ib_dev_put(rds_ibdev); |
205 | free_attr: | ||
206 | kfree(dev_attr); | ||
207 | } | 195 | } |
208 | 196 | ||
209 | /* | 197 | /* |
diff --git a/net/rds/iw.c b/net/rds/iw.c index 576f1825fc55..f4a9fff829e0 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c | |||
@@ -60,30 +60,20 @@ LIST_HEAD(iw_nodev_conns); | |||
60 | static void rds_iw_add_one(struct ib_device *device) | 60 | static void rds_iw_add_one(struct ib_device *device) |
61 | { | 61 | { |
62 | struct rds_iw_device *rds_iwdev; | 62 | struct rds_iw_device *rds_iwdev; |
63 | struct ib_device_attr *dev_attr; | ||
64 | 63 | ||
65 | /* Only handle iwarp devices */ | 64 | /* Only handle iwarp devices */ |
66 | if (device->node_type != RDMA_NODE_RNIC) | 65 | if (device->node_type != RDMA_NODE_RNIC) |
67 | return; | 66 | return; |
68 | 67 | ||
69 | dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL); | ||
70 | if (!dev_attr) | ||
71 | return; | ||
72 | |||
73 | if (ib_query_device(device, dev_attr)) { | ||
74 | rdsdebug("Query device failed for %s\n", device->name); | ||
75 | goto free_attr; | ||
76 | } | ||
77 | |||
78 | rds_iwdev = kmalloc(sizeof *rds_iwdev, GFP_KERNEL); | 68 | rds_iwdev = kmalloc(sizeof *rds_iwdev, GFP_KERNEL); |
79 | if (!rds_iwdev) | 69 | if (!rds_iwdev) |
80 | goto free_attr; | 70 | return; |
81 | 71 | ||
82 | spin_lock_init(&rds_iwdev->spinlock); | 72 | spin_lock_init(&rds_iwdev->spinlock); |
83 | 73 | ||
84 | rds_iwdev->dma_local_lkey = !!(dev_attr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY); | 74 | rds_iwdev->dma_local_lkey = !!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY); |
85 | rds_iwdev->max_wrs = dev_attr->max_qp_wr; | 75 | rds_iwdev->max_wrs = device->attrs.max_qp_wr; |
86 | rds_iwdev->max_sge = min(dev_attr->max_sge, RDS_IW_MAX_SGE); | 76 | rds_iwdev->max_sge = min(device->attrs.max_sge, RDS_IW_MAX_SGE); |
87 | 77 | ||
88 | rds_iwdev->dev = device; | 78 | rds_iwdev->dev = device; |
89 | rds_iwdev->pd = ib_alloc_pd(device); | 79 | rds_iwdev->pd = ib_alloc_pd(device); |
@@ -111,8 +101,7 @@ static void rds_iw_add_one(struct ib_device *device) | |||
111 | list_add_tail(&rds_iwdev->list, &rds_iw_devices); | 101 | list_add_tail(&rds_iwdev->list, &rds_iw_devices); |
112 | 102 | ||
113 | ib_set_client_data(device, &rds_iw_client, rds_iwdev); | 103 | ib_set_client_data(device, &rds_iw_client, rds_iwdev); |
114 | 104 | return; | |
115 | goto free_attr; | ||
116 | 105 | ||
117 | err_mr: | 106 | err_mr: |
118 | if (rds_iwdev->mr) | 107 | if (rds_iwdev->mr) |
@@ -121,8 +110,6 @@ err_pd: | |||
121 | ib_dealloc_pd(rds_iwdev->pd); | 110 | ib_dealloc_pd(rds_iwdev->pd); |
122 | free_dev: | 111 | free_dev: |
123 | kfree(rds_iwdev); | 112 | kfree(rds_iwdev); |
124 | free_attr: | ||
125 | kfree(dev_attr); | ||
126 | } | 113 | } |
127 | 114 | ||
128 | static void rds_iw_remove_one(struct ib_device *device, void *client_data) | 115 | static void rds_iw_remove_one(struct ib_device *device, void *client_data) |
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 5e4f815c2b34..2b32fd602669 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c | |||
@@ -771,7 +771,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, | |||
771 | if (count == 0) | 771 | if (count == 0) |
772 | return 0; | 772 | return 0; |
773 | 773 | ||
774 | mutex_lock(&inode->i_mutex); /* protect against multiple concurrent | 774 | inode_lock(inode); /* protect against multiple concurrent |
775 | * readers on this file */ | 775 | * readers on this file */ |
776 | again: | 776 | again: |
777 | spin_lock(&queue_lock); | 777 | spin_lock(&queue_lock); |
@@ -784,7 +784,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, | |||
784 | } | 784 | } |
785 | if (rp->q.list.next == &cd->queue) { | 785 | if (rp->q.list.next == &cd->queue) { |
786 | spin_unlock(&queue_lock); | 786 | spin_unlock(&queue_lock); |
787 | mutex_unlock(&inode->i_mutex); | 787 | inode_unlock(inode); |
788 | WARN_ON_ONCE(rp->offset); | 788 | WARN_ON_ONCE(rp->offset); |
789 | return 0; | 789 | return 0; |
790 | } | 790 | } |
@@ -838,7 +838,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, | |||
838 | } | 838 | } |
839 | if (err == -EAGAIN) | 839 | if (err == -EAGAIN) |
840 | goto again; | 840 | goto again; |
841 | mutex_unlock(&inode->i_mutex); | 841 | inode_unlock(inode); |
842 | return err ? err : count; | 842 | return err ? err : count; |
843 | } | 843 | } |
844 | 844 | ||
@@ -909,9 +909,9 @@ static ssize_t cache_write(struct file *filp, const char __user *buf, | |||
909 | if (!cd->cache_parse) | 909 | if (!cd->cache_parse) |
910 | goto out; | 910 | goto out; |
911 | 911 | ||
912 | mutex_lock(&inode->i_mutex); | 912 | inode_lock(inode); |
913 | ret = cache_downcall(mapping, buf, count, cd); | 913 | ret = cache_downcall(mapping, buf, count, cd); |
914 | mutex_unlock(&inode->i_mutex); | 914 | inode_unlock(inode); |
915 | out: | 915 | out: |
916 | return ret; | 916 | return ret; |
917 | } | 917 | } |
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 14f45bf0410c..31789ef3e614 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c | |||
@@ -172,7 +172,7 @@ rpc_close_pipes(struct inode *inode) | |||
172 | int need_release; | 172 | int need_release; |
173 | LIST_HEAD(free_list); | 173 | LIST_HEAD(free_list); |
174 | 174 | ||
175 | mutex_lock(&inode->i_mutex); | 175 | inode_lock(inode); |
176 | spin_lock(&pipe->lock); | 176 | spin_lock(&pipe->lock); |
177 | need_release = pipe->nreaders != 0 || pipe->nwriters != 0; | 177 | need_release = pipe->nreaders != 0 || pipe->nwriters != 0; |
178 | pipe->nreaders = 0; | 178 | pipe->nreaders = 0; |
@@ -188,7 +188,7 @@ rpc_close_pipes(struct inode *inode) | |||
188 | cancel_delayed_work_sync(&pipe->queue_timeout); | 188 | cancel_delayed_work_sync(&pipe->queue_timeout); |
189 | rpc_inode_setowner(inode, NULL); | 189 | rpc_inode_setowner(inode, NULL); |
190 | RPC_I(inode)->pipe = NULL; | 190 | RPC_I(inode)->pipe = NULL; |
191 | mutex_unlock(&inode->i_mutex); | 191 | inode_unlock(inode); |
192 | } | 192 | } |
193 | 193 | ||
194 | static struct inode * | 194 | static struct inode * |
@@ -221,7 +221,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp) | |||
221 | int first_open; | 221 | int first_open; |
222 | int res = -ENXIO; | 222 | int res = -ENXIO; |
223 | 223 | ||
224 | mutex_lock(&inode->i_mutex); | 224 | inode_lock(inode); |
225 | pipe = RPC_I(inode)->pipe; | 225 | pipe = RPC_I(inode)->pipe; |
226 | if (pipe == NULL) | 226 | if (pipe == NULL) |
227 | goto out; | 227 | goto out; |
@@ -237,7 +237,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp) | |||
237 | pipe->nwriters++; | 237 | pipe->nwriters++; |
238 | res = 0; | 238 | res = 0; |
239 | out: | 239 | out: |
240 | mutex_unlock(&inode->i_mutex); | 240 | inode_unlock(inode); |
241 | return res; | 241 | return res; |
242 | } | 242 | } |
243 | 243 | ||
@@ -248,7 +248,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp) | |||
248 | struct rpc_pipe_msg *msg; | 248 | struct rpc_pipe_msg *msg; |
249 | int last_close; | 249 | int last_close; |
250 | 250 | ||
251 | mutex_lock(&inode->i_mutex); | 251 | inode_lock(inode); |
252 | pipe = RPC_I(inode)->pipe; | 252 | pipe = RPC_I(inode)->pipe; |
253 | if (pipe == NULL) | 253 | if (pipe == NULL) |
254 | goto out; | 254 | goto out; |
@@ -278,7 +278,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp) | |||
278 | if (last_close && pipe->ops->release_pipe) | 278 | if (last_close && pipe->ops->release_pipe) |
279 | pipe->ops->release_pipe(inode); | 279 | pipe->ops->release_pipe(inode); |
280 | out: | 280 | out: |
281 | mutex_unlock(&inode->i_mutex); | 281 | inode_unlock(inode); |
282 | return 0; | 282 | return 0; |
283 | } | 283 | } |
284 | 284 | ||
@@ -290,7 +290,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) | |||
290 | struct rpc_pipe_msg *msg; | 290 | struct rpc_pipe_msg *msg; |
291 | int res = 0; | 291 | int res = 0; |
292 | 292 | ||
293 | mutex_lock(&inode->i_mutex); | 293 | inode_lock(inode); |
294 | pipe = RPC_I(inode)->pipe; | 294 | pipe = RPC_I(inode)->pipe; |
295 | if (pipe == NULL) { | 295 | if (pipe == NULL) { |
296 | res = -EPIPE; | 296 | res = -EPIPE; |
@@ -322,7 +322,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset) | |||
322 | pipe->ops->destroy_msg(msg); | 322 | pipe->ops->destroy_msg(msg); |
323 | } | 323 | } |
324 | out_unlock: | 324 | out_unlock: |
325 | mutex_unlock(&inode->i_mutex); | 325 | inode_unlock(inode); |
326 | return res; | 326 | return res; |
327 | } | 327 | } |
328 | 328 | ||
@@ -332,11 +332,11 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of | |||
332 | struct inode *inode = file_inode(filp); | 332 | struct inode *inode = file_inode(filp); |
333 | int res; | 333 | int res; |
334 | 334 | ||
335 | mutex_lock(&inode->i_mutex); | 335 | inode_lock(inode); |
336 | res = -EPIPE; | 336 | res = -EPIPE; |
337 | if (RPC_I(inode)->pipe != NULL) | 337 | if (RPC_I(inode)->pipe != NULL) |
338 | res = RPC_I(inode)->pipe->ops->downcall(filp, buf, len); | 338 | res = RPC_I(inode)->pipe->ops->downcall(filp, buf, len); |
339 | mutex_unlock(&inode->i_mutex); | 339 | inode_unlock(inode); |
340 | return res; | 340 | return res; |
341 | } | 341 | } |
342 | 342 | ||
@@ -349,12 +349,12 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait) | |||
349 | 349 | ||
350 | poll_wait(filp, &rpci->waitq, wait); | 350 | poll_wait(filp, &rpci->waitq, wait); |
351 | 351 | ||
352 | mutex_lock(&inode->i_mutex); | 352 | inode_lock(inode); |
353 | if (rpci->pipe == NULL) | 353 | if (rpci->pipe == NULL) |
354 | mask |= POLLERR | POLLHUP; | 354 | mask |= POLLERR | POLLHUP; |
355 | else if (filp->private_data || !list_empty(&rpci->pipe->pipe)) | 355 | else if (filp->private_data || !list_empty(&rpci->pipe->pipe)) |
356 | mask |= POLLIN | POLLRDNORM; | 356 | mask |= POLLIN | POLLRDNORM; |
357 | mutex_unlock(&inode->i_mutex); | 357 | inode_unlock(inode); |
358 | return mask; | 358 | return mask; |
359 | } | 359 | } |
360 | 360 | ||
@@ -367,10 +367,10 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
367 | 367 | ||
368 | switch (cmd) { | 368 | switch (cmd) { |
369 | case FIONREAD: | 369 | case FIONREAD: |
370 | mutex_lock(&inode->i_mutex); | 370 | inode_lock(inode); |
371 | pipe = RPC_I(inode)->pipe; | 371 | pipe = RPC_I(inode)->pipe; |
372 | if (pipe == NULL) { | 372 | if (pipe == NULL) { |
373 | mutex_unlock(&inode->i_mutex); | 373 | inode_unlock(inode); |
374 | return -EPIPE; | 374 | return -EPIPE; |
375 | } | 375 | } |
376 | spin_lock(&pipe->lock); | 376 | spin_lock(&pipe->lock); |
@@ -381,7 +381,7 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
381 | len += msg->len - msg->copied; | 381 | len += msg->len - msg->copied; |
382 | } | 382 | } |
383 | spin_unlock(&pipe->lock); | 383 | spin_unlock(&pipe->lock); |
384 | mutex_unlock(&inode->i_mutex); | 384 | inode_unlock(inode); |
385 | return put_user(len, (int __user *)arg); | 385 | return put_user(len, (int __user *)arg); |
386 | default: | 386 | default: |
387 | return -EINVAL; | 387 | return -EINVAL; |
@@ -617,9 +617,9 @@ int rpc_rmdir(struct dentry *dentry) | |||
617 | 617 | ||
618 | parent = dget_parent(dentry); | 618 | parent = dget_parent(dentry); |
619 | dir = d_inode(parent); | 619 | dir = d_inode(parent); |
620 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | 620 | inode_lock_nested(dir, I_MUTEX_PARENT); |
621 | error = __rpc_rmdir(dir, dentry); | 621 | error = __rpc_rmdir(dir, dentry); |
622 | mutex_unlock(&dir->i_mutex); | 622 | inode_unlock(dir); |
623 | dput(parent); | 623 | dput(parent); |
624 | return error; | 624 | return error; |
625 | } | 625 | } |
@@ -701,9 +701,9 @@ static void rpc_depopulate(struct dentry *parent, | |||
701 | { | 701 | { |
702 | struct inode *dir = d_inode(parent); | 702 | struct inode *dir = d_inode(parent); |
703 | 703 | ||
704 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD); | 704 | inode_lock_nested(dir, I_MUTEX_CHILD); |
705 | __rpc_depopulate(parent, files, start, eof); | 705 | __rpc_depopulate(parent, files, start, eof); |
706 | mutex_unlock(&dir->i_mutex); | 706 | inode_unlock(dir); |
707 | } | 707 | } |
708 | 708 | ||
709 | static int rpc_populate(struct dentry *parent, | 709 | static int rpc_populate(struct dentry *parent, |
@@ -715,7 +715,7 @@ static int rpc_populate(struct dentry *parent, | |||
715 | struct dentry *dentry; | 715 | struct dentry *dentry; |
716 | int i, err; | 716 | int i, err; |
717 | 717 | ||
718 | mutex_lock(&dir->i_mutex); | 718 | inode_lock(dir); |
719 | for (i = start; i < eof; i++) { | 719 | for (i = start; i < eof; i++) { |
720 | dentry = __rpc_lookup_create_exclusive(parent, files[i].name); | 720 | dentry = __rpc_lookup_create_exclusive(parent, files[i].name); |
721 | err = PTR_ERR(dentry); | 721 | err = PTR_ERR(dentry); |
@@ -739,11 +739,11 @@ static int rpc_populate(struct dentry *parent, | |||
739 | if (err != 0) | 739 | if (err != 0) |
740 | goto out_bad; | 740 | goto out_bad; |
741 | } | 741 | } |
742 | mutex_unlock(&dir->i_mutex); | 742 | inode_unlock(dir); |
743 | return 0; | 743 | return 0; |
744 | out_bad: | 744 | out_bad: |
745 | __rpc_depopulate(parent, files, start, eof); | 745 | __rpc_depopulate(parent, files, start, eof); |
746 | mutex_unlock(&dir->i_mutex); | 746 | inode_unlock(dir); |
747 | printk(KERN_WARNING "%s: %s failed to populate directory %pd\n", | 747 | printk(KERN_WARNING "%s: %s failed to populate directory %pd\n", |
748 | __FILE__, __func__, parent); | 748 | __FILE__, __func__, parent); |
749 | return err; | 749 | return err; |
@@ -757,7 +757,7 @@ static struct dentry *rpc_mkdir_populate(struct dentry *parent, | |||
757 | struct inode *dir = d_inode(parent); | 757 | struct inode *dir = d_inode(parent); |
758 | int error; | 758 | int error; |
759 | 759 | ||
760 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | 760 | inode_lock_nested(dir, I_MUTEX_PARENT); |
761 | dentry = __rpc_lookup_create_exclusive(parent, name); | 761 | dentry = __rpc_lookup_create_exclusive(parent, name); |
762 | if (IS_ERR(dentry)) | 762 | if (IS_ERR(dentry)) |
763 | goto out; | 763 | goto out; |
@@ -770,7 +770,7 @@ static struct dentry *rpc_mkdir_populate(struct dentry *parent, | |||
770 | goto err_rmdir; | 770 | goto err_rmdir; |
771 | } | 771 | } |
772 | out: | 772 | out: |
773 | mutex_unlock(&dir->i_mutex); | 773 | inode_unlock(dir); |
774 | return dentry; | 774 | return dentry; |
775 | err_rmdir: | 775 | err_rmdir: |
776 | __rpc_rmdir(dir, dentry); | 776 | __rpc_rmdir(dir, dentry); |
@@ -788,11 +788,11 @@ static int rpc_rmdir_depopulate(struct dentry *dentry, | |||
788 | 788 | ||
789 | parent = dget_parent(dentry); | 789 | parent = dget_parent(dentry); |
790 | dir = d_inode(parent); | 790 | dir = d_inode(parent); |
791 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | 791 | inode_lock_nested(dir, I_MUTEX_PARENT); |
792 | if (depopulate != NULL) | 792 | if (depopulate != NULL) |
793 | depopulate(dentry); | 793 | depopulate(dentry); |
794 | error = __rpc_rmdir(dir, dentry); | 794 | error = __rpc_rmdir(dir, dentry); |
795 | mutex_unlock(&dir->i_mutex); | 795 | inode_unlock(dir); |
796 | dput(parent); | 796 | dput(parent); |
797 | return error; | 797 | return error; |
798 | } | 798 | } |
@@ -828,7 +828,7 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, | |||
828 | if (pipe->ops->downcall == NULL) | 828 | if (pipe->ops->downcall == NULL) |
829 | umode &= ~S_IWUGO; | 829 | umode &= ~S_IWUGO; |
830 | 830 | ||
831 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | 831 | inode_lock_nested(dir, I_MUTEX_PARENT); |
832 | dentry = __rpc_lookup_create_exclusive(parent, name); | 832 | dentry = __rpc_lookup_create_exclusive(parent, name); |
833 | if (IS_ERR(dentry)) | 833 | if (IS_ERR(dentry)) |
834 | goto out; | 834 | goto out; |
@@ -837,7 +837,7 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, | |||
837 | if (err) | 837 | if (err) |
838 | goto out_err; | 838 | goto out_err; |
839 | out: | 839 | out: |
840 | mutex_unlock(&dir->i_mutex); | 840 | inode_unlock(dir); |
841 | return dentry; | 841 | return dentry; |
842 | out_err: | 842 | out_err: |
843 | dentry = ERR_PTR(err); | 843 | dentry = ERR_PTR(err); |
@@ -865,9 +865,9 @@ rpc_unlink(struct dentry *dentry) | |||
865 | 865 | ||
866 | parent = dget_parent(dentry); | 866 | parent = dget_parent(dentry); |
867 | dir = d_inode(parent); | 867 | dir = d_inode(parent); |
868 | mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); | 868 | inode_lock_nested(dir, I_MUTEX_PARENT); |
869 | error = __rpc_rmpipe(dir, dentry); | 869 | error = __rpc_rmpipe(dir, dentry); |
870 | mutex_unlock(&dir->i_mutex); | 870 | inode_unlock(dir); |
871 | dput(parent); | 871 | dput(parent); |
872 | return error; | 872 | return error; |
873 | } | 873 | } |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 2e98f4a243e5..37edea6fa92d 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
@@ -1425,3 +1425,4 @@ void xprt_put(struct rpc_xprt *xprt) | |||
1425 | if (atomic_dec_and_test(&xprt->count)) | 1425 | if (atomic_dec_and_test(&xprt->count)) |
1426 | xprt_destroy(xprt); | 1426 | xprt_destroy(xprt); |
1427 | } | 1427 | } |
1428 | EXPORT_SYMBOL_GPL(xprt_put); | ||
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index 33f99d3004f2..dc9f3b513a05 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile | |||
@@ -2,7 +2,7 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o | |||
2 | 2 | ||
3 | rpcrdma-y := transport.o rpc_rdma.o verbs.o \ | 3 | rpcrdma-y := transport.o rpc_rdma.o verbs.o \ |
4 | fmr_ops.o frwr_ops.o physical_ops.o \ | 4 | fmr_ops.o frwr_ops.o physical_ops.o \ |
5 | svc_rdma.o svc_rdma_transport.o \ | 5 | svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \ |
6 | svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \ | 6 | svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \ |
7 | module.o | 7 | module.o |
8 | rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o | 8 | rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o |
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index c6836844bd0e..e16567389e28 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c | |||
@@ -190,12 +190,11 @@ static int | |||
190 | frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | 190 | frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, |
191 | struct rpcrdma_create_data_internal *cdata) | 191 | struct rpcrdma_create_data_internal *cdata) |
192 | { | 192 | { |
193 | struct ib_device_attr *devattr = &ia->ri_devattr; | ||
194 | int depth, delta; | 193 | int depth, delta; |
195 | 194 | ||
196 | ia->ri_max_frmr_depth = | 195 | ia->ri_max_frmr_depth = |
197 | min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, | 196 | min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, |
198 | devattr->max_fast_reg_page_list_len); | 197 | ia->ri_device->attrs.max_fast_reg_page_list_len); |
199 | dprintk("RPC: %s: device's max FR page list len = %u\n", | 198 | dprintk("RPC: %s: device's max FR page list len = %u\n", |
200 | __func__, ia->ri_max_frmr_depth); | 199 | __func__, ia->ri_max_frmr_depth); |
201 | 200 | ||
@@ -222,8 +221,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, | |||
222 | } | 221 | } |
223 | 222 | ||
224 | ep->rep_attr.cap.max_send_wr *= depth; | 223 | ep->rep_attr.cap.max_send_wr *= depth; |
225 | if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) { | 224 | if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) { |
226 | cdata->max_requests = devattr->max_qp_wr / depth; | 225 | cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth; |
227 | if (!cdata->max_requests) | 226 | if (!cdata->max_requests) |
228 | return -EINVAL; | 227 | return -EINVAL; |
229 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * | 228 | ep->rep_attr.cap.max_send_wr = cdata->max_requests * |
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 1b7051bdbdc8..c846ca9f1eba 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c | |||
@@ -55,6 +55,7 @@ unsigned int svcrdma_ord = RPCRDMA_ORD; | |||
55 | static unsigned int min_ord = 1; | 55 | static unsigned int min_ord = 1; |
56 | static unsigned int max_ord = 4096; | 56 | static unsigned int max_ord = 4096; |
57 | unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS; | 57 | unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS; |
58 | unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS; | ||
58 | static unsigned int min_max_requests = 4; | 59 | static unsigned int min_max_requests = 4; |
59 | static unsigned int max_max_requests = 16384; | 60 | static unsigned int max_max_requests = 16384; |
60 | unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE; | 61 | unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE; |
@@ -71,10 +72,6 @@ atomic_t rdma_stat_rq_prod; | |||
71 | atomic_t rdma_stat_sq_poll; | 72 | atomic_t rdma_stat_sq_poll; |
72 | atomic_t rdma_stat_sq_prod; | 73 | atomic_t rdma_stat_sq_prod; |
73 | 74 | ||
74 | /* Temporary NFS request map and context caches */ | ||
75 | struct kmem_cache *svc_rdma_map_cachep; | ||
76 | struct kmem_cache *svc_rdma_ctxt_cachep; | ||
77 | |||
78 | struct workqueue_struct *svc_rdma_wq; | 75 | struct workqueue_struct *svc_rdma_wq; |
79 | 76 | ||
80 | /* | 77 | /* |
@@ -243,17 +240,16 @@ void svc_rdma_cleanup(void) | |||
243 | svc_unreg_xprt_class(&svc_rdma_bc_class); | 240 | svc_unreg_xprt_class(&svc_rdma_bc_class); |
244 | #endif | 241 | #endif |
245 | svc_unreg_xprt_class(&svc_rdma_class); | 242 | svc_unreg_xprt_class(&svc_rdma_class); |
246 | kmem_cache_destroy(svc_rdma_map_cachep); | ||
247 | kmem_cache_destroy(svc_rdma_ctxt_cachep); | ||
248 | } | 243 | } |
249 | 244 | ||
250 | int svc_rdma_init(void) | 245 | int svc_rdma_init(void) |
251 | { | 246 | { |
252 | dprintk("SVCRDMA Module Init, register RPC RDMA transport\n"); | 247 | dprintk("SVCRDMA Module Init, register RPC RDMA transport\n"); |
253 | dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord); | 248 | dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord); |
254 | dprintk("\tmax_requests : %d\n", svcrdma_max_requests); | 249 | dprintk("\tmax_requests : %u\n", svcrdma_max_requests); |
255 | dprintk("\tsq_depth : %d\n", | 250 | dprintk("\tsq_depth : %u\n", |
256 | svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT); | 251 | svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT); |
252 | dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests); | ||
257 | dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); | 253 | dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); |
258 | 254 | ||
259 | svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0); | 255 | svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0); |
@@ -264,39 +260,10 @@ int svc_rdma_init(void) | |||
264 | svcrdma_table_header = | 260 | svcrdma_table_header = |
265 | register_sysctl_table(svcrdma_root_table); | 261 | register_sysctl_table(svcrdma_root_table); |
266 | 262 | ||
267 | /* Create the temporary map cache */ | ||
268 | svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache", | ||
269 | sizeof(struct svc_rdma_req_map), | ||
270 | 0, | ||
271 | SLAB_HWCACHE_ALIGN, | ||
272 | NULL); | ||
273 | if (!svc_rdma_map_cachep) { | ||
274 | printk(KERN_INFO "Could not allocate map cache.\n"); | ||
275 | goto err0; | ||
276 | } | ||
277 | |||
278 | /* Create the temporary context cache */ | ||
279 | svc_rdma_ctxt_cachep = | ||
280 | kmem_cache_create("svc_rdma_ctxt_cache", | ||
281 | sizeof(struct svc_rdma_op_ctxt), | ||
282 | 0, | ||
283 | SLAB_HWCACHE_ALIGN, | ||
284 | NULL); | ||
285 | if (!svc_rdma_ctxt_cachep) { | ||
286 | printk(KERN_INFO "Could not allocate WR ctxt cache.\n"); | ||
287 | goto err1; | ||
288 | } | ||
289 | |||
290 | /* Register RDMA with the SVC transport switch */ | 263 | /* Register RDMA with the SVC transport switch */ |
291 | svc_reg_xprt_class(&svc_rdma_class); | 264 | svc_reg_xprt_class(&svc_rdma_class); |
292 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) | 265 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
293 | svc_reg_xprt_class(&svc_rdma_bc_class); | 266 | svc_reg_xprt_class(&svc_rdma_bc_class); |
294 | #endif | 267 | #endif |
295 | return 0; | 268 | return 0; |
296 | err1: | ||
297 | kmem_cache_destroy(svc_rdma_map_cachep); | ||
298 | err0: | ||
299 | unregister_sysctl_table(svcrdma_table_header); | ||
300 | destroy_workqueue(svc_rdma_wq); | ||
301 | return -ENOMEM; | ||
302 | } | 269 | } |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c new file mode 100644 index 000000000000..65a7c232a345 --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c | |||
@@ -0,0 +1,371 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2015 Oracle. All rights reserved. | ||
3 | * | ||
4 | * Support for backward direction RPCs on RPC/RDMA (server-side). | ||
5 | */ | ||
6 | |||
7 | #include <linux/sunrpc/svc_rdma.h> | ||
8 | #include "xprt_rdma.h" | ||
9 | |||
10 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
11 | |||
12 | #undef SVCRDMA_BACKCHANNEL_DEBUG | ||
13 | |||
14 | int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp, | ||
15 | struct xdr_buf *rcvbuf) | ||
16 | { | ||
17 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | ||
18 | struct kvec *dst, *src = &rcvbuf->head[0]; | ||
19 | struct rpc_rqst *req; | ||
20 | unsigned long cwnd; | ||
21 | u32 credits; | ||
22 | size_t len; | ||
23 | __be32 xid; | ||
24 | __be32 *p; | ||
25 | int ret; | ||
26 | |||
27 | p = (__be32 *)src->iov_base; | ||
28 | len = src->iov_len; | ||
29 | xid = rmsgp->rm_xid; | ||
30 | |||
31 | #ifdef SVCRDMA_BACKCHANNEL_DEBUG | ||
32 | pr_info("%s: xid=%08x, length=%zu\n", | ||
33 | __func__, be32_to_cpu(xid), len); | ||
34 | pr_info("%s: RPC/RDMA: %*ph\n", | ||
35 | __func__, (int)RPCRDMA_HDRLEN_MIN, rmsgp); | ||
36 | pr_info("%s: RPC: %*ph\n", | ||
37 | __func__, (int)len, p); | ||
38 | #endif | ||
39 | |||
40 | ret = -EAGAIN; | ||
41 | if (src->iov_len < 24) | ||
42 | goto out_shortreply; | ||
43 | |||
44 | spin_lock_bh(&xprt->transport_lock); | ||
45 | req = xprt_lookup_rqst(xprt, xid); | ||
46 | if (!req) | ||
47 | goto out_notfound; | ||
48 | |||
49 | dst = &req->rq_private_buf.head[0]; | ||
50 | memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); | ||
51 | if (dst->iov_len < len) | ||
52 | goto out_unlock; | ||
53 | memcpy(dst->iov_base, p, len); | ||
54 | |||
55 | credits = be32_to_cpu(rmsgp->rm_credit); | ||
56 | if (credits == 0) | ||
57 | credits = 1; /* don't deadlock */ | ||
58 | else if (credits > r_xprt->rx_buf.rb_bc_max_requests) | ||
59 | credits = r_xprt->rx_buf.rb_bc_max_requests; | ||
60 | |||
61 | cwnd = xprt->cwnd; | ||
62 | xprt->cwnd = credits << RPC_CWNDSHIFT; | ||
63 | if (xprt->cwnd > cwnd) | ||
64 | xprt_release_rqst_cong(req->rq_task); | ||
65 | |||
66 | ret = 0; | ||
67 | xprt_complete_rqst(req->rq_task, rcvbuf->len); | ||
68 | rcvbuf->len = 0; | ||
69 | |||
70 | out_unlock: | ||
71 | spin_unlock_bh(&xprt->transport_lock); | ||
72 | out: | ||
73 | return ret; | ||
74 | |||
75 | out_shortreply: | ||
76 | dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n", | ||
77 | xprt, src->iov_len); | ||
78 | goto out; | ||
79 | |||
80 | out_notfound: | ||
81 | dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n", | ||
82 | xprt, be32_to_cpu(xid)); | ||
83 | |||
84 | goto out_unlock; | ||
85 | } | ||
86 | |||
87 | /* Send a backwards direction RPC call. | ||
88 | * | ||
89 | * Caller holds the connection's mutex and has already marshaled | ||
90 | * the RPC/RDMA request. | ||
91 | * | ||
92 | * This is similar to svc_rdma_reply, but takes an rpc_rqst | ||
93 | * instead, does not support chunks, and avoids blocking memory | ||
94 | * allocation. | ||
95 | * | ||
96 | * XXX: There is still an opportunity to block in svc_rdma_send() | ||
97 | * if there are no SQ entries to post the Send. This may occur if | ||
98 | * the adapter has a small maximum SQ depth. | ||
99 | */ | ||
100 | static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, | ||
101 | struct rpc_rqst *rqst) | ||
102 | { | ||
103 | struct xdr_buf *sndbuf = &rqst->rq_snd_buf; | ||
104 | struct svc_rdma_op_ctxt *ctxt; | ||
105 | struct svc_rdma_req_map *vec; | ||
106 | struct ib_send_wr send_wr; | ||
107 | int ret; | ||
108 | |||
109 | vec = svc_rdma_get_req_map(rdma); | ||
110 | ret = svc_rdma_map_xdr(rdma, sndbuf, vec); | ||
111 | if (ret) | ||
112 | goto out_err; | ||
113 | |||
114 | /* Post a recv buffer to handle the reply for this request. */ | ||
115 | ret = svc_rdma_post_recv(rdma, GFP_NOIO); | ||
116 | if (ret) { | ||
117 | pr_err("svcrdma: Failed to post bc receive buffer, err=%d.\n", | ||
118 | ret); | ||
119 | pr_err("svcrdma: closing transport %p.\n", rdma); | ||
120 | set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags); | ||
121 | ret = -ENOTCONN; | ||
122 | goto out_err; | ||
123 | } | ||
124 | |||
125 | ctxt = svc_rdma_get_context(rdma); | ||
126 | ctxt->pages[0] = virt_to_page(rqst->rq_buffer); | ||
127 | ctxt->count = 1; | ||
128 | |||
129 | ctxt->wr_op = IB_WR_SEND; | ||
130 | ctxt->direction = DMA_TO_DEVICE; | ||
131 | ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey; | ||
132 | ctxt->sge[0].length = sndbuf->len; | ||
133 | ctxt->sge[0].addr = | ||
134 | ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0, | ||
135 | sndbuf->len, DMA_TO_DEVICE); | ||
136 | if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) { | ||
137 | ret = -EIO; | ||
138 | goto out_unmap; | ||
139 | } | ||
140 | atomic_inc(&rdma->sc_dma_used); | ||
141 | |||
142 | memset(&send_wr, 0, sizeof(send_wr)); | ||
143 | send_wr.wr_id = (unsigned long)ctxt; | ||
144 | send_wr.sg_list = ctxt->sge; | ||
145 | send_wr.num_sge = 1; | ||
146 | send_wr.opcode = IB_WR_SEND; | ||
147 | send_wr.send_flags = IB_SEND_SIGNALED; | ||
148 | |||
149 | ret = svc_rdma_send(rdma, &send_wr); | ||
150 | if (ret) { | ||
151 | ret = -EIO; | ||
152 | goto out_unmap; | ||
153 | } | ||
154 | |||
155 | out_err: | ||
156 | svc_rdma_put_req_map(rdma, vec); | ||
157 | dprintk("svcrdma: %s returns %d\n", __func__, ret); | ||
158 | return ret; | ||
159 | |||
160 | out_unmap: | ||
161 | svc_rdma_unmap_dma(ctxt); | ||
162 | svc_rdma_put_context(ctxt, 1); | ||
163 | goto out_err; | ||
164 | } | ||
165 | |||
166 | /* Server-side transport endpoint wants a whole page for its send | ||
167 | * buffer. The client RPC code constructs the RPC header in this | ||
168 | * buffer before it invokes ->send_request. | ||
169 | * | ||
170 | * Returns NULL if there was a temporary allocation failure. | ||
171 | */ | ||
172 | static void * | ||
173 | xprt_rdma_bc_allocate(struct rpc_task *task, size_t size) | ||
174 | { | ||
175 | struct rpc_rqst *rqst = task->tk_rqstp; | ||
176 | struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt; | ||
177 | struct svcxprt_rdma *rdma; | ||
178 | struct page *page; | ||
179 | |||
180 | rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); | ||
181 | |||
182 | /* Prevent an infinite loop: try to make this case work */ | ||
183 | if (size > PAGE_SIZE) | ||
184 | WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n", | ||
185 | size); | ||
186 | |||
187 | page = alloc_page(RPCRDMA_DEF_GFP); | ||
188 | if (!page) | ||
189 | return NULL; | ||
190 | |||
191 | return page_address(page); | ||
192 | } | ||
193 | |||
194 | static void | ||
195 | xprt_rdma_bc_free(void *buffer) | ||
196 | { | ||
197 | /* No-op: ctxt and page have already been freed. */ | ||
198 | } | ||
199 | |||
200 | static int | ||
201 | rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) | ||
202 | { | ||
203 | struct rpc_xprt *xprt = rqst->rq_xprt; | ||
204 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | ||
205 | struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer; | ||
206 | int rc; | ||
207 | |||
208 | /* Space in the send buffer for an RPC/RDMA header is reserved | ||
209 | * via xprt->tsh_size. | ||
210 | */ | ||
211 | headerp->rm_xid = rqst->rq_xid; | ||
212 | headerp->rm_vers = rpcrdma_version; | ||
213 | headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests); | ||
214 | headerp->rm_type = rdma_msg; | ||
215 | headerp->rm_body.rm_chunks[0] = xdr_zero; | ||
216 | headerp->rm_body.rm_chunks[1] = xdr_zero; | ||
217 | headerp->rm_body.rm_chunks[2] = xdr_zero; | ||
218 | |||
219 | #ifdef SVCRDMA_BACKCHANNEL_DEBUG | ||
220 | pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer); | ||
221 | #endif | ||
222 | |||
223 | rc = svc_rdma_bc_sendto(rdma, rqst); | ||
224 | if (rc) | ||
225 | goto drop_connection; | ||
226 | return rc; | ||
227 | |||
228 | drop_connection: | ||
229 | dprintk("svcrdma: failed to send bc call\n"); | ||
230 | xprt_disconnect_done(xprt); | ||
231 | return -ENOTCONN; | ||
232 | } | ||
233 | |||
234 | /* Send an RPC call on the passive end of a transport | ||
235 | * connection. | ||
236 | */ | ||
237 | static int | ||
238 | xprt_rdma_bc_send_request(struct rpc_task *task) | ||
239 | { | ||
240 | struct rpc_rqst *rqst = task->tk_rqstp; | ||
241 | struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt; | ||
242 | struct svcxprt_rdma *rdma; | ||
243 | int ret; | ||
244 | |||
245 | dprintk("svcrdma: sending bc call with xid: %08x\n", | ||
246 | be32_to_cpu(rqst->rq_xid)); | ||
247 | |||
248 | if (!mutex_trylock(&sxprt->xpt_mutex)) { | ||
249 | rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL); | ||
250 | if (!mutex_trylock(&sxprt->xpt_mutex)) | ||
251 | return -EAGAIN; | ||
252 | rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task); | ||
253 | } | ||
254 | |||
255 | ret = -ENOTCONN; | ||
256 | rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); | ||
257 | if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) | ||
258 | ret = rpcrdma_bc_send_request(rdma, rqst); | ||
259 | |||
260 | mutex_unlock(&sxprt->xpt_mutex); | ||
261 | |||
262 | if (ret < 0) | ||
263 | return ret; | ||
264 | return 0; | ||
265 | } | ||
266 | |||
267 | static void | ||
268 | xprt_rdma_bc_close(struct rpc_xprt *xprt) | ||
269 | { | ||
270 | dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); | ||
271 | } | ||
272 | |||
273 | static void | ||
274 | xprt_rdma_bc_put(struct rpc_xprt *xprt) | ||
275 | { | ||
276 | dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); | ||
277 | |||
278 | xprt_free(xprt); | ||
279 | module_put(THIS_MODULE); | ||
280 | } | ||
281 | |||
282 | static struct rpc_xprt_ops xprt_rdma_bc_procs = { | ||
283 | .reserve_xprt = xprt_reserve_xprt_cong, | ||
284 | .release_xprt = xprt_release_xprt_cong, | ||
285 | .alloc_slot = xprt_alloc_slot, | ||
286 | .release_request = xprt_release_rqst_cong, | ||
287 | .buf_alloc = xprt_rdma_bc_allocate, | ||
288 | .buf_free = xprt_rdma_bc_free, | ||
289 | .send_request = xprt_rdma_bc_send_request, | ||
290 | .set_retrans_timeout = xprt_set_retrans_timeout_def, | ||
291 | .close = xprt_rdma_bc_close, | ||
292 | .destroy = xprt_rdma_bc_put, | ||
293 | .print_stats = xprt_rdma_print_stats | ||
294 | }; | ||
295 | |||
296 | static const struct rpc_timeout xprt_rdma_bc_timeout = { | ||
297 | .to_initval = 60 * HZ, | ||
298 | .to_maxval = 60 * HZ, | ||
299 | }; | ||
300 | |||
301 | /* It shouldn't matter if the number of backchannel session slots | ||
302 | * doesn't match the number of RPC/RDMA credits. That just means | ||
303 | * one or the other will have extra slots that aren't used. | ||
304 | */ | ||
305 | static struct rpc_xprt * | ||
306 | xprt_setup_rdma_bc(struct xprt_create *args) | ||
307 | { | ||
308 | struct rpc_xprt *xprt; | ||
309 | struct rpcrdma_xprt *new_xprt; | ||
310 | |||
311 | if (args->addrlen > sizeof(xprt->addr)) { | ||
312 | dprintk("RPC: %s: address too large\n", __func__); | ||
313 | return ERR_PTR(-EBADF); | ||
314 | } | ||
315 | |||
316 | xprt = xprt_alloc(args->net, sizeof(*new_xprt), | ||
317 | RPCRDMA_MAX_BC_REQUESTS, | ||
318 | RPCRDMA_MAX_BC_REQUESTS); | ||
319 | if (!xprt) { | ||
320 | dprintk("RPC: %s: couldn't allocate rpc_xprt\n", | ||
321 | __func__); | ||
322 | return ERR_PTR(-ENOMEM); | ||
323 | } | ||
324 | |||
325 | xprt->timeout = &xprt_rdma_bc_timeout; | ||
326 | xprt_set_bound(xprt); | ||
327 | xprt_set_connected(xprt); | ||
328 | xprt->bind_timeout = RPCRDMA_BIND_TO; | ||
329 | xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; | ||
330 | xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; | ||
331 | |||
332 | xprt->prot = XPRT_TRANSPORT_BC_RDMA; | ||
333 | xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32); | ||
334 | xprt->ops = &xprt_rdma_bc_procs; | ||
335 | |||
336 | memcpy(&xprt->addr, args->dstaddr, args->addrlen); | ||
337 | xprt->addrlen = args->addrlen; | ||
338 | xprt_rdma_format_addresses(xprt, (struct sockaddr *)&xprt->addr); | ||
339 | xprt->resvport = 0; | ||
340 | |||
341 | xprt->max_payload = xprt_rdma_max_inline_read; | ||
342 | |||
343 | new_xprt = rpcx_to_rdmax(xprt); | ||
344 | new_xprt->rx_buf.rb_bc_max_requests = xprt->max_reqs; | ||
345 | |||
346 | xprt_get(xprt); | ||
347 | args->bc_xprt->xpt_bc_xprt = xprt; | ||
348 | xprt->bc_xprt = args->bc_xprt; | ||
349 | |||
350 | if (!try_module_get(THIS_MODULE)) | ||
351 | goto out_fail; | ||
352 | |||
353 | /* Final put for backchannel xprt is in __svc_rdma_free */ | ||
354 | xprt_get(xprt); | ||
355 | return xprt; | ||
356 | |||
357 | out_fail: | ||
358 | xprt_rdma_free_addresses(xprt); | ||
359 | args->bc_xprt->xpt_bc_xprt = NULL; | ||
360 | xprt_put(xprt); | ||
361 | xprt_free(xprt); | ||
362 | return ERR_PTR(-EINVAL); | ||
363 | } | ||
364 | |||
365 | struct xprt_class xprt_rdma_bc = { | ||
366 | .list = LIST_HEAD_INIT(xprt_rdma_bc.list), | ||
367 | .name = "rdma backchannel", | ||
368 | .owner = THIS_MODULE, | ||
369 | .ident = XPRT_TRANSPORT_BC_RDMA, | ||
370 | .setup = xprt_setup_rdma_bc, | ||
371 | }; | ||
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index ff4f01e527ec..c8b8a8b4181e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | |||
@@ -144,6 +144,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, | |||
144 | 144 | ||
145 | head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; | 145 | head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; |
146 | head->arg.page_len += len; | 146 | head->arg.page_len += len; |
147 | |||
147 | head->arg.len += len; | 148 | head->arg.len += len; |
148 | if (!pg_off) | 149 | if (!pg_off) |
149 | head->count++; | 150 | head->count++; |
@@ -160,8 +161,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, | |||
160 | goto err; | 161 | goto err; |
161 | atomic_inc(&xprt->sc_dma_used); | 162 | atomic_inc(&xprt->sc_dma_used); |
162 | 163 | ||
163 | /* The lkey here is either a local dma lkey or a dma_mr lkey */ | 164 | ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey; |
164 | ctxt->sge[pno].lkey = xprt->sc_dma_lkey; | ||
165 | ctxt->sge[pno].length = len; | 165 | ctxt->sge[pno].length = len; |
166 | ctxt->count++; | 166 | ctxt->count++; |
167 | 167 | ||
@@ -567,6 +567,38 @@ static int rdma_read_complete(struct svc_rqst *rqstp, | |||
567 | return ret; | 567 | return ret; |
568 | } | 568 | } |
569 | 569 | ||
570 | /* By convention, backchannel calls arrive via rdma_msg type | ||
571 | * messages, and never populate the chunk lists. This makes | ||
572 | * the RPC/RDMA header small and fixed in size, so it is | ||
573 | * straightforward to check the RPC header's direction field. | ||
574 | */ | ||
575 | static bool | ||
576 | svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, struct rpcrdma_msg *rmsgp) | ||
577 | { | ||
578 | __be32 *p = (__be32 *)rmsgp; | ||
579 | |||
580 | if (!xprt->xpt_bc_xprt) | ||
581 | return false; | ||
582 | |||
583 | if (rmsgp->rm_type != rdma_msg) | ||
584 | return false; | ||
585 | if (rmsgp->rm_body.rm_chunks[0] != xdr_zero) | ||
586 | return false; | ||
587 | if (rmsgp->rm_body.rm_chunks[1] != xdr_zero) | ||
588 | return false; | ||
589 | if (rmsgp->rm_body.rm_chunks[2] != xdr_zero) | ||
590 | return false; | ||
591 | |||
592 | /* sanity */ | ||
593 | if (p[7] != rmsgp->rm_xid) | ||
594 | return false; | ||
595 | /* call direction */ | ||
596 | if (p[8] == cpu_to_be32(RPC_CALL)) | ||
597 | return false; | ||
598 | |||
599 | return true; | ||
600 | } | ||
601 | |||
570 | /* | 602 | /* |
571 | * Set up the rqstp thread context to point to the RQ buffer. If | 603 | * Set up the rqstp thread context to point to the RQ buffer. If |
572 | * necessary, pull additional data from the client with an RDMA_READ | 604 | * necessary, pull additional data from the client with an RDMA_READ |
@@ -632,6 +664,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) | |||
632 | goto close_out; | 664 | goto close_out; |
633 | } | 665 | } |
634 | 666 | ||
667 | if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) { | ||
668 | ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp, | ||
669 | &rqstp->rq_arg); | ||
670 | svc_rdma_put_context(ctxt, 0); | ||
671 | if (ret) | ||
672 | goto repost; | ||
673 | return ret; | ||
674 | } | ||
675 | |||
635 | /* Read read-list data. */ | 676 | /* Read read-list data. */ |
636 | ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt); | 677 | ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt); |
637 | if (ret > 0) { | 678 | if (ret > 0) { |
@@ -668,4 +709,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) | |||
668 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | 709 | set_bit(XPT_CLOSE, &xprt->xpt_flags); |
669 | defer: | 710 | defer: |
670 | return 0; | 711 | return 0; |
712 | |||
713 | repost: | ||
714 | ret = svc_rdma_post_recv(rdma_xprt, GFP_KERNEL); | ||
715 | if (ret) { | ||
716 | pr_err("svcrdma: could not post a receive buffer, err=%d.\n", | ||
717 | ret); | ||
718 | pr_err("svcrdma: closing transport %p.\n", rdma_xprt); | ||
719 | set_bit(XPT_CLOSE, &rdma_xprt->sc_xprt.xpt_flags); | ||
720 | ret = -ENOTCONN; | ||
721 | } | ||
722 | return ret; | ||
671 | } | 723 | } |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 969a1ab75fc3..df57f3ce6cd2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c | |||
@@ -50,9 +50,9 @@ | |||
50 | 50 | ||
51 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | 51 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT |
52 | 52 | ||
53 | static int map_xdr(struct svcxprt_rdma *xprt, | 53 | int svc_rdma_map_xdr(struct svcxprt_rdma *xprt, |
54 | struct xdr_buf *xdr, | 54 | struct xdr_buf *xdr, |
55 | struct svc_rdma_req_map *vec) | 55 | struct svc_rdma_req_map *vec) |
56 | { | 56 | { |
57 | int sge_no; | 57 | int sge_no; |
58 | u32 sge_bytes; | 58 | u32 sge_bytes; |
@@ -62,7 +62,7 @@ static int map_xdr(struct svcxprt_rdma *xprt, | |||
62 | 62 | ||
63 | if (xdr->len != | 63 | if (xdr->len != |
64 | (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) { | 64 | (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) { |
65 | pr_err("svcrdma: map_xdr: XDR buffer length error\n"); | 65 | pr_err("svcrdma: %s: XDR buffer length error\n", __func__); |
66 | return -EIO; | 66 | return -EIO; |
67 | } | 67 | } |
68 | 68 | ||
@@ -97,9 +97,9 @@ static int map_xdr(struct svcxprt_rdma *xprt, | |||
97 | sge_no++; | 97 | sge_no++; |
98 | } | 98 | } |
99 | 99 | ||
100 | dprintk("svcrdma: map_xdr: sge_no %d page_no %d " | 100 | dprintk("svcrdma: %s: sge_no %d page_no %d " |
101 | "page_base %u page_len %u head_len %zu tail_len %zu\n", | 101 | "page_base %u page_len %u head_len %zu tail_len %zu\n", |
102 | sge_no, page_no, xdr->page_base, xdr->page_len, | 102 | __func__, sge_no, page_no, xdr->page_base, xdr->page_len, |
103 | xdr->head[0].iov_len, xdr->tail[0].iov_len); | 103 | xdr->head[0].iov_len, xdr->tail[0].iov_len); |
104 | 104 | ||
105 | vec->count = sge_no; | 105 | vec->count = sge_no; |
@@ -265,7 +265,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, | |||
265 | sge[sge_no].addr)) | 265 | sge[sge_no].addr)) |
266 | goto err; | 266 | goto err; |
267 | atomic_inc(&xprt->sc_dma_used); | 267 | atomic_inc(&xprt->sc_dma_used); |
268 | sge[sge_no].lkey = xprt->sc_dma_lkey; | 268 | sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; |
269 | ctxt->count++; | 269 | ctxt->count++; |
270 | sge_off = 0; | 270 | sge_off = 0; |
271 | sge_no++; | 271 | sge_no++; |
@@ -465,7 +465,7 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
465 | int ret; | 465 | int ret; |
466 | 466 | ||
467 | /* Post a recv buffer to handle another request. */ | 467 | /* Post a recv buffer to handle another request. */ |
468 | ret = svc_rdma_post_recv(rdma); | 468 | ret = svc_rdma_post_recv(rdma, GFP_KERNEL); |
469 | if (ret) { | 469 | if (ret) { |
470 | printk(KERN_INFO | 470 | printk(KERN_INFO |
471 | "svcrdma: could not post a receive buffer, err=%d." | 471 | "svcrdma: could not post a receive buffer, err=%d." |
@@ -480,7 +480,7 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
480 | ctxt->count = 1; | 480 | ctxt->count = 1; |
481 | 481 | ||
482 | /* Prepare the SGE for the RPCRDMA Header */ | 482 | /* Prepare the SGE for the RPCRDMA Header */ |
483 | ctxt->sge[0].lkey = rdma->sc_dma_lkey; | 483 | ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey; |
484 | ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); | 484 | ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); |
485 | ctxt->sge[0].addr = | 485 | ctxt->sge[0].addr = |
486 | ib_dma_map_page(rdma->sc_cm_id->device, page, 0, | 486 | ib_dma_map_page(rdma->sc_cm_id->device, page, 0, |
@@ -504,7 +504,7 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
504 | ctxt->sge[sge_no].addr)) | 504 | ctxt->sge[sge_no].addr)) |
505 | goto err; | 505 | goto err; |
506 | atomic_inc(&rdma->sc_dma_used); | 506 | atomic_inc(&rdma->sc_dma_used); |
507 | ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; | 507 | ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey; |
508 | ctxt->sge[sge_no].length = sge_bytes; | 508 | ctxt->sge[sge_no].length = sge_bytes; |
509 | } | 509 | } |
510 | if (byte_count != 0) { | 510 | if (byte_count != 0) { |
@@ -591,14 +591,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) | |||
591 | /* Build an req vec for the XDR */ | 591 | /* Build an req vec for the XDR */ |
592 | ctxt = svc_rdma_get_context(rdma); | 592 | ctxt = svc_rdma_get_context(rdma); |
593 | ctxt->direction = DMA_TO_DEVICE; | 593 | ctxt->direction = DMA_TO_DEVICE; |
594 | vec = svc_rdma_get_req_map(); | 594 | vec = svc_rdma_get_req_map(rdma); |
595 | ret = map_xdr(rdma, &rqstp->rq_res, vec); | 595 | ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec); |
596 | if (ret) | 596 | if (ret) |
597 | goto err0; | 597 | goto err0; |
598 | inline_bytes = rqstp->rq_res.len; | 598 | inline_bytes = rqstp->rq_res.len; |
599 | 599 | ||
600 | /* Create the RDMA response header */ | 600 | /* Create the RDMA response header */ |
601 | res_page = alloc_page(GFP_KERNEL | __GFP_NOFAIL); | 601 | ret = -ENOMEM; |
602 | res_page = alloc_page(GFP_KERNEL); | ||
603 | if (!res_page) | ||
604 | goto err0; | ||
602 | rdma_resp = page_address(res_page); | 605 | rdma_resp = page_address(res_page); |
603 | reply_ary = svc_rdma_get_reply_array(rdma_argp); | 606 | reply_ary = svc_rdma_get_reply_array(rdma_argp); |
604 | if (reply_ary) | 607 | if (reply_ary) |
@@ -630,14 +633,14 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) | |||
630 | 633 | ||
631 | ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec, | 634 | ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec, |
632 | inline_bytes); | 635 | inline_bytes); |
633 | svc_rdma_put_req_map(vec); | 636 | svc_rdma_put_req_map(rdma, vec); |
634 | dprintk("svcrdma: send_reply returns %d\n", ret); | 637 | dprintk("svcrdma: send_reply returns %d\n", ret); |
635 | return ret; | 638 | return ret; |
636 | 639 | ||
637 | err1: | 640 | err1: |
638 | put_page(res_page); | 641 | put_page(res_page); |
639 | err0: | 642 | err0: |
640 | svc_rdma_put_req_map(vec); | 643 | svc_rdma_put_req_map(rdma, vec); |
641 | svc_rdma_put_context(ctxt, 0); | 644 | svc_rdma_put_context(ctxt, 0); |
642 | return ret; | 645 | return ret; |
643 | } | 646 | } |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index b348b4adef29..5763825d09bf 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c | |||
@@ -153,18 +153,76 @@ static void svc_rdma_bc_free(struct svc_xprt *xprt) | |||
153 | } | 153 | } |
154 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ | 154 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ |
155 | 155 | ||
156 | struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) | 156 | static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt, |
157 | gfp_t flags) | ||
157 | { | 158 | { |
158 | struct svc_rdma_op_ctxt *ctxt; | 159 | struct svc_rdma_op_ctxt *ctxt; |
159 | 160 | ||
160 | ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, | 161 | ctxt = kmalloc(sizeof(*ctxt), flags); |
161 | GFP_KERNEL | __GFP_NOFAIL); | 162 | if (ctxt) { |
162 | ctxt->xprt = xprt; | 163 | ctxt->xprt = xprt; |
163 | INIT_LIST_HEAD(&ctxt->dto_q); | 164 | INIT_LIST_HEAD(&ctxt->free); |
165 | INIT_LIST_HEAD(&ctxt->dto_q); | ||
166 | } | ||
167 | return ctxt; | ||
168 | } | ||
169 | |||
170 | static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt) | ||
171 | { | ||
172 | unsigned int i; | ||
173 | |||
174 | /* Each RPC/RDMA credit can consume a number of send | ||
175 | * and receive WQEs. One ctxt is allocated for each. | ||
176 | */ | ||
177 | i = xprt->sc_sq_depth + xprt->sc_rq_depth; | ||
178 | |||
179 | while (i--) { | ||
180 | struct svc_rdma_op_ctxt *ctxt; | ||
181 | |||
182 | ctxt = alloc_ctxt(xprt, GFP_KERNEL); | ||
183 | if (!ctxt) { | ||
184 | dprintk("svcrdma: No memory for RDMA ctxt\n"); | ||
185 | return false; | ||
186 | } | ||
187 | list_add(&ctxt->free, &xprt->sc_ctxts); | ||
188 | } | ||
189 | return true; | ||
190 | } | ||
191 | |||
192 | struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) | ||
193 | { | ||
194 | struct svc_rdma_op_ctxt *ctxt = NULL; | ||
195 | |||
196 | spin_lock_bh(&xprt->sc_ctxt_lock); | ||
197 | xprt->sc_ctxt_used++; | ||
198 | if (list_empty(&xprt->sc_ctxts)) | ||
199 | goto out_empty; | ||
200 | |||
201 | ctxt = list_first_entry(&xprt->sc_ctxts, | ||
202 | struct svc_rdma_op_ctxt, free); | ||
203 | list_del_init(&ctxt->free); | ||
204 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
205 | |||
206 | out: | ||
164 | ctxt->count = 0; | 207 | ctxt->count = 0; |
165 | ctxt->frmr = NULL; | 208 | ctxt->frmr = NULL; |
166 | atomic_inc(&xprt->sc_ctxt_used); | ||
167 | return ctxt; | 209 | return ctxt; |
210 | |||
211 | out_empty: | ||
212 | /* Either pre-allocation missed the mark, or send | ||
213 | * queue accounting is broken. | ||
214 | */ | ||
215 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
216 | |||
217 | ctxt = alloc_ctxt(xprt, GFP_NOIO); | ||
218 | if (ctxt) | ||
219 | goto out; | ||
220 | |||
221 | spin_lock_bh(&xprt->sc_ctxt_lock); | ||
222 | xprt->sc_ctxt_used--; | ||
223 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
224 | WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n"); | ||
225 | return NULL; | ||
168 | } | 226 | } |
169 | 227 | ||
170 | void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) | 228 | void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) |
@@ -174,11 +232,11 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) | |||
174 | for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { | 232 | for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { |
175 | /* | 233 | /* |
176 | * Unmap the DMA addr in the SGE if the lkey matches | 234 | * Unmap the DMA addr in the SGE if the lkey matches |
177 | * the sc_dma_lkey, otherwise, ignore it since it is | 235 | * the local_dma_lkey, otherwise, ignore it since it is |
178 | * an FRMR lkey and will be unmapped later when the | 236 | * an FRMR lkey and will be unmapped later when the |
179 | * last WR that uses it completes. | 237 | * last WR that uses it completes. |
180 | */ | 238 | */ |
181 | if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) { | 239 | if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) { |
182 | atomic_dec(&xprt->sc_dma_used); | 240 | atomic_dec(&xprt->sc_dma_used); |
183 | ib_dma_unmap_page(xprt->sc_cm_id->device, | 241 | ib_dma_unmap_page(xprt->sc_cm_id->device, |
184 | ctxt->sge[i].addr, | 242 | ctxt->sge[i].addr, |
@@ -190,35 +248,108 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) | |||
190 | 248 | ||
191 | void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) | 249 | void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) |
192 | { | 250 | { |
193 | struct svcxprt_rdma *xprt; | 251 | struct svcxprt_rdma *xprt = ctxt->xprt; |
194 | int i; | 252 | int i; |
195 | 253 | ||
196 | xprt = ctxt->xprt; | ||
197 | if (free_pages) | 254 | if (free_pages) |
198 | for (i = 0; i < ctxt->count; i++) | 255 | for (i = 0; i < ctxt->count; i++) |
199 | put_page(ctxt->pages[i]); | 256 | put_page(ctxt->pages[i]); |
200 | 257 | ||
201 | kmem_cache_free(svc_rdma_ctxt_cachep, ctxt); | 258 | spin_lock_bh(&xprt->sc_ctxt_lock); |
202 | atomic_dec(&xprt->sc_ctxt_used); | 259 | xprt->sc_ctxt_used--; |
260 | list_add(&ctxt->free, &xprt->sc_ctxts); | ||
261 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
203 | } | 262 | } |
204 | 263 | ||
205 | /* | 264 | static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt) |
206 | * Temporary NFS req mappings are shared across all transport | 265 | { |
207 | * instances. These are short lived and should be bounded by the number | 266 | while (!list_empty(&xprt->sc_ctxts)) { |
208 | * of concurrent server threads * depth of the SQ. | 267 | struct svc_rdma_op_ctxt *ctxt; |
209 | */ | 268 | |
210 | struct svc_rdma_req_map *svc_rdma_get_req_map(void) | 269 | ctxt = list_first_entry(&xprt->sc_ctxts, |
270 | struct svc_rdma_op_ctxt, free); | ||
271 | list_del(&ctxt->free); | ||
272 | kfree(ctxt); | ||
273 | } | ||
274 | } | ||
275 | |||
276 | static struct svc_rdma_req_map *alloc_req_map(gfp_t flags) | ||
211 | { | 277 | { |
212 | struct svc_rdma_req_map *map; | 278 | struct svc_rdma_req_map *map; |
213 | map = kmem_cache_alloc(svc_rdma_map_cachep, | 279 | |
214 | GFP_KERNEL | __GFP_NOFAIL); | 280 | map = kmalloc(sizeof(*map), flags); |
281 | if (map) | ||
282 | INIT_LIST_HEAD(&map->free); | ||
283 | return map; | ||
284 | } | ||
285 | |||
286 | static bool svc_rdma_prealloc_maps(struct svcxprt_rdma *xprt) | ||
287 | { | ||
288 | unsigned int i; | ||
289 | |||
290 | /* One for each receive buffer on this connection. */ | ||
291 | i = xprt->sc_max_requests; | ||
292 | |||
293 | while (i--) { | ||
294 | struct svc_rdma_req_map *map; | ||
295 | |||
296 | map = alloc_req_map(GFP_KERNEL); | ||
297 | if (!map) { | ||
298 | dprintk("svcrdma: No memory for request map\n"); | ||
299 | return false; | ||
300 | } | ||
301 | list_add(&map->free, &xprt->sc_maps); | ||
302 | } | ||
303 | return true; | ||
304 | } | ||
305 | |||
306 | struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *xprt) | ||
307 | { | ||
308 | struct svc_rdma_req_map *map = NULL; | ||
309 | |||
310 | spin_lock(&xprt->sc_map_lock); | ||
311 | if (list_empty(&xprt->sc_maps)) | ||
312 | goto out_empty; | ||
313 | |||
314 | map = list_first_entry(&xprt->sc_maps, | ||
315 | struct svc_rdma_req_map, free); | ||
316 | list_del_init(&map->free); | ||
317 | spin_unlock(&xprt->sc_map_lock); | ||
318 | |||
319 | out: | ||
215 | map->count = 0; | 320 | map->count = 0; |
216 | return map; | 321 | return map; |
322 | |||
323 | out_empty: | ||
324 | spin_unlock(&xprt->sc_map_lock); | ||
325 | |||
326 | /* Pre-allocation amount was incorrect */ | ||
327 | map = alloc_req_map(GFP_NOIO); | ||
328 | if (map) | ||
329 | goto out; | ||
330 | |||
331 | WARN_ONCE(1, "svcrdma: empty request map list?\n"); | ||
332 | return NULL; | ||
333 | } | ||
334 | |||
335 | void svc_rdma_put_req_map(struct svcxprt_rdma *xprt, | ||
336 | struct svc_rdma_req_map *map) | ||
337 | { | ||
338 | spin_lock(&xprt->sc_map_lock); | ||
339 | list_add(&map->free, &xprt->sc_maps); | ||
340 | spin_unlock(&xprt->sc_map_lock); | ||
217 | } | 341 | } |
218 | 342 | ||
219 | void svc_rdma_put_req_map(struct svc_rdma_req_map *map) | 343 | static void svc_rdma_destroy_maps(struct svcxprt_rdma *xprt) |
220 | { | 344 | { |
221 | kmem_cache_free(svc_rdma_map_cachep, map); | 345 | while (!list_empty(&xprt->sc_maps)) { |
346 | struct svc_rdma_req_map *map; | ||
347 | |||
348 | map = list_first_entry(&xprt->sc_maps, | ||
349 | struct svc_rdma_req_map, free); | ||
350 | list_del(&map->free); | ||
351 | kfree(map); | ||
352 | } | ||
222 | } | 353 | } |
223 | 354 | ||
224 | /* ib_cq event handler */ | 355 | /* ib_cq event handler */ |
@@ -386,46 +517,44 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt) | |||
386 | static void process_context(struct svcxprt_rdma *xprt, | 517 | static void process_context(struct svcxprt_rdma *xprt, |
387 | struct svc_rdma_op_ctxt *ctxt) | 518 | struct svc_rdma_op_ctxt *ctxt) |
388 | { | 519 | { |
520 | struct svc_rdma_op_ctxt *read_hdr; | ||
521 | int free_pages = 0; | ||
522 | |||
389 | svc_rdma_unmap_dma(ctxt); | 523 | svc_rdma_unmap_dma(ctxt); |
390 | 524 | ||
391 | switch (ctxt->wr_op) { | 525 | switch (ctxt->wr_op) { |
392 | case IB_WR_SEND: | 526 | case IB_WR_SEND: |
393 | if (ctxt->frmr) | 527 | free_pages = 1; |
394 | pr_err("svcrdma: SEND: ctxt->frmr != NULL\n"); | ||
395 | svc_rdma_put_context(ctxt, 1); | ||
396 | break; | 528 | break; |
397 | 529 | ||
398 | case IB_WR_RDMA_WRITE: | 530 | case IB_WR_RDMA_WRITE: |
399 | if (ctxt->frmr) | ||
400 | pr_err("svcrdma: WRITE: ctxt->frmr != NULL\n"); | ||
401 | svc_rdma_put_context(ctxt, 0); | ||
402 | break; | 531 | break; |
403 | 532 | ||
404 | case IB_WR_RDMA_READ: | 533 | case IB_WR_RDMA_READ: |
405 | case IB_WR_RDMA_READ_WITH_INV: | 534 | case IB_WR_RDMA_READ_WITH_INV: |
406 | svc_rdma_put_frmr(xprt, ctxt->frmr); | 535 | svc_rdma_put_frmr(xprt, ctxt->frmr); |
407 | if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { | 536 | |
408 | struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; | 537 | if (!test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) |
409 | if (read_hdr) { | 538 | break; |
410 | spin_lock_bh(&xprt->sc_rq_dto_lock); | 539 | |
411 | set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); | 540 | read_hdr = ctxt->read_hdr; |
412 | list_add_tail(&read_hdr->dto_q, | ||
413 | &xprt->sc_read_complete_q); | ||
414 | spin_unlock_bh(&xprt->sc_rq_dto_lock); | ||
415 | } else { | ||
416 | pr_err("svcrdma: ctxt->read_hdr == NULL\n"); | ||
417 | } | ||
418 | svc_xprt_enqueue(&xprt->sc_xprt); | ||
419 | } | ||
420 | svc_rdma_put_context(ctxt, 0); | 541 | svc_rdma_put_context(ctxt, 0); |
421 | break; | 542 | |
543 | spin_lock_bh(&xprt->sc_rq_dto_lock); | ||
544 | set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); | ||
545 | list_add_tail(&read_hdr->dto_q, | ||
546 | &xprt->sc_read_complete_q); | ||
547 | spin_unlock_bh(&xprt->sc_rq_dto_lock); | ||
548 | svc_xprt_enqueue(&xprt->sc_xprt); | ||
549 | return; | ||
422 | 550 | ||
423 | default: | 551 | default: |
424 | printk(KERN_ERR "svcrdma: unexpected completion type, " | 552 | dprintk("svcrdma: unexpected completion opcode=%d\n", |
425 | "opcode=%d\n", | 553 | ctxt->wr_op); |
426 | ctxt->wr_op); | ||
427 | break; | 554 | break; |
428 | } | 555 | } |
556 | |||
557 | svc_rdma_put_context(ctxt, free_pages); | ||
429 | } | 558 | } |
430 | 559 | ||
431 | /* | 560 | /* |
@@ -523,19 +652,15 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, | |||
523 | INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); | 652 | INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); |
524 | INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); | 653 | INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); |
525 | INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); | 654 | INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); |
655 | INIT_LIST_HEAD(&cma_xprt->sc_ctxts); | ||
656 | INIT_LIST_HEAD(&cma_xprt->sc_maps); | ||
526 | init_waitqueue_head(&cma_xprt->sc_send_wait); | 657 | init_waitqueue_head(&cma_xprt->sc_send_wait); |
527 | 658 | ||
528 | spin_lock_init(&cma_xprt->sc_lock); | 659 | spin_lock_init(&cma_xprt->sc_lock); |
529 | spin_lock_init(&cma_xprt->sc_rq_dto_lock); | 660 | spin_lock_init(&cma_xprt->sc_rq_dto_lock); |
530 | spin_lock_init(&cma_xprt->sc_frmr_q_lock); | 661 | spin_lock_init(&cma_xprt->sc_frmr_q_lock); |
531 | 662 | spin_lock_init(&cma_xprt->sc_ctxt_lock); | |
532 | cma_xprt->sc_ord = svcrdma_ord; | 663 | spin_lock_init(&cma_xprt->sc_map_lock); |
533 | |||
534 | cma_xprt->sc_max_req_size = svcrdma_max_req_size; | ||
535 | cma_xprt->sc_max_requests = svcrdma_max_requests; | ||
536 | cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT; | ||
537 | atomic_set(&cma_xprt->sc_sq_count, 0); | ||
538 | atomic_set(&cma_xprt->sc_ctxt_used, 0); | ||
539 | 664 | ||
540 | if (listener) | 665 | if (listener) |
541 | set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); | 666 | set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); |
@@ -543,7 +668,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, | |||
543 | return cma_xprt; | 668 | return cma_xprt; |
544 | } | 669 | } |
545 | 670 | ||
546 | int svc_rdma_post_recv(struct svcxprt_rdma *xprt) | 671 | int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags) |
547 | { | 672 | { |
548 | struct ib_recv_wr recv_wr, *bad_recv_wr; | 673 | struct ib_recv_wr recv_wr, *bad_recv_wr; |
549 | struct svc_rdma_op_ctxt *ctxt; | 674 | struct svc_rdma_op_ctxt *ctxt; |
@@ -561,7 +686,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) | |||
561 | pr_err("svcrdma: Too many sges (%d)\n", sge_no); | 686 | pr_err("svcrdma: Too many sges (%d)\n", sge_no); |
562 | goto err_put_ctxt; | 687 | goto err_put_ctxt; |
563 | } | 688 | } |
564 | page = alloc_page(GFP_KERNEL | __GFP_NOFAIL); | 689 | page = alloc_page(flags); |
690 | if (!page) | ||
691 | goto err_put_ctxt; | ||
565 | ctxt->pages[sge_no] = page; | 692 | ctxt->pages[sge_no] = page; |
566 | pa = ib_dma_map_page(xprt->sc_cm_id->device, | 693 | pa = ib_dma_map_page(xprt->sc_cm_id->device, |
567 | page, 0, PAGE_SIZE, | 694 | page, 0, PAGE_SIZE, |
@@ -571,7 +698,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) | |||
571 | atomic_inc(&xprt->sc_dma_used); | 698 | atomic_inc(&xprt->sc_dma_used); |
572 | ctxt->sge[sge_no].addr = pa; | 699 | ctxt->sge[sge_no].addr = pa; |
573 | ctxt->sge[sge_no].length = PAGE_SIZE; | 700 | ctxt->sge[sge_no].length = PAGE_SIZE; |
574 | ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; | 701 | ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey; |
575 | ctxt->count = sge_no + 1; | 702 | ctxt->count = sge_no + 1; |
576 | buflen += PAGE_SIZE; | 703 | buflen += PAGE_SIZE; |
577 | } | 704 | } |
@@ -886,11 +1013,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
886 | struct rdma_conn_param conn_param; | 1013 | struct rdma_conn_param conn_param; |
887 | struct ib_cq_init_attr cq_attr = {}; | 1014 | struct ib_cq_init_attr cq_attr = {}; |
888 | struct ib_qp_init_attr qp_attr; | 1015 | struct ib_qp_init_attr qp_attr; |
889 | struct ib_device_attr devattr; | 1016 | struct ib_device *dev; |
890 | int uninitialized_var(dma_mr_acc); | 1017 | unsigned int i; |
891 | int need_dma_mr = 0; | 1018 | int ret = 0; |
892 | int ret; | ||
893 | int i; | ||
894 | 1019 | ||
895 | listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); | 1020 | listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); |
896 | clear_bit(XPT_CONN, &xprt->xpt_flags); | 1021 | clear_bit(XPT_CONN, &xprt->xpt_flags); |
@@ -910,37 +1035,42 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
910 | dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n", | 1035 | dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n", |
911 | newxprt, newxprt->sc_cm_id); | 1036 | newxprt, newxprt->sc_cm_id); |
912 | 1037 | ||
913 | ret = ib_query_device(newxprt->sc_cm_id->device, &devattr); | 1038 | dev = newxprt->sc_cm_id->device; |
914 | if (ret) { | ||
915 | dprintk("svcrdma: could not query device attributes on " | ||
916 | "device %p, rc=%d\n", newxprt->sc_cm_id->device, ret); | ||
917 | goto errout; | ||
918 | } | ||
919 | 1039 | ||
920 | /* Qualify the transport resource defaults with the | 1040 | /* Qualify the transport resource defaults with the |
921 | * capabilities of this particular device */ | 1041 | * capabilities of this particular device */ |
922 | newxprt->sc_max_sge = min((size_t)devattr.max_sge, | 1042 | newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge, |
923 | (size_t)RPCSVC_MAXPAGES); | 1043 | (size_t)RPCSVC_MAXPAGES); |
924 | newxprt->sc_max_sge_rd = min_t(size_t, devattr.max_sge_rd, | 1044 | newxprt->sc_max_sge_rd = min_t(size_t, dev->attrs.max_sge_rd, |
925 | RPCSVC_MAXPAGES); | 1045 | RPCSVC_MAXPAGES); |
926 | newxprt->sc_max_requests = min((size_t)devattr.max_qp_wr, | 1046 | newxprt->sc_max_req_size = svcrdma_max_req_size; |
927 | (size_t)svcrdma_max_requests); | 1047 | newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr, |
928 | newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests; | 1048 | svcrdma_max_requests); |
1049 | newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr, | ||
1050 | svcrdma_max_bc_requests); | ||
1051 | newxprt->sc_rq_depth = newxprt->sc_max_requests + | ||
1052 | newxprt->sc_max_bc_requests; | ||
1053 | newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth; | ||
1054 | |||
1055 | if (!svc_rdma_prealloc_ctxts(newxprt)) | ||
1056 | goto errout; | ||
1057 | if (!svc_rdma_prealloc_maps(newxprt)) | ||
1058 | goto errout; | ||
929 | 1059 | ||
930 | /* | 1060 | /* |
931 | * Limit ORD based on client limit, local device limit, and | 1061 | * Limit ORD based on client limit, local device limit, and |
932 | * configured svcrdma limit. | 1062 | * configured svcrdma limit. |
933 | */ | 1063 | */ |
934 | newxprt->sc_ord = min_t(size_t, devattr.max_qp_rd_atom, newxprt->sc_ord); | 1064 | newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord); |
935 | newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord); | 1065 | newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord); |
936 | 1066 | ||
937 | newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device); | 1067 | newxprt->sc_pd = ib_alloc_pd(dev); |
938 | if (IS_ERR(newxprt->sc_pd)) { | 1068 | if (IS_ERR(newxprt->sc_pd)) { |
939 | dprintk("svcrdma: error creating PD for connect request\n"); | 1069 | dprintk("svcrdma: error creating PD for connect request\n"); |
940 | goto errout; | 1070 | goto errout; |
941 | } | 1071 | } |
942 | cq_attr.cqe = newxprt->sc_sq_depth; | 1072 | cq_attr.cqe = newxprt->sc_sq_depth; |
943 | newxprt->sc_sq_cq = ib_create_cq(newxprt->sc_cm_id->device, | 1073 | newxprt->sc_sq_cq = ib_create_cq(dev, |
944 | sq_comp_handler, | 1074 | sq_comp_handler, |
945 | cq_event_handler, | 1075 | cq_event_handler, |
946 | newxprt, | 1076 | newxprt, |
@@ -949,8 +1079,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
949 | dprintk("svcrdma: error creating SQ CQ for connect request\n"); | 1079 | dprintk("svcrdma: error creating SQ CQ for connect request\n"); |
950 | goto errout; | 1080 | goto errout; |
951 | } | 1081 | } |
952 | cq_attr.cqe = newxprt->sc_max_requests; | 1082 | cq_attr.cqe = newxprt->sc_rq_depth; |
953 | newxprt->sc_rq_cq = ib_create_cq(newxprt->sc_cm_id->device, | 1083 | newxprt->sc_rq_cq = ib_create_cq(dev, |
954 | rq_comp_handler, | 1084 | rq_comp_handler, |
955 | cq_event_handler, | 1085 | cq_event_handler, |
956 | newxprt, | 1086 | newxprt, |
@@ -964,7 +1094,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
964 | qp_attr.event_handler = qp_event_handler; | 1094 | qp_attr.event_handler = qp_event_handler; |
965 | qp_attr.qp_context = &newxprt->sc_xprt; | 1095 | qp_attr.qp_context = &newxprt->sc_xprt; |
966 | qp_attr.cap.max_send_wr = newxprt->sc_sq_depth; | 1096 | qp_attr.cap.max_send_wr = newxprt->sc_sq_depth; |
967 | qp_attr.cap.max_recv_wr = newxprt->sc_max_requests; | 1097 | qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth; |
968 | qp_attr.cap.max_send_sge = newxprt->sc_max_sge; | 1098 | qp_attr.cap.max_send_sge = newxprt->sc_max_sge; |
969 | qp_attr.cap.max_recv_sge = newxprt->sc_max_sge; | 1099 | qp_attr.cap.max_recv_sge = newxprt->sc_max_sge; |
970 | qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; | 1100 | qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; |
@@ -978,7 +1108,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
978 | " cap.max_send_sge = %d\n" | 1108 | " cap.max_send_sge = %d\n" |
979 | " cap.max_recv_sge = %d\n", | 1109 | " cap.max_recv_sge = %d\n", |
980 | newxprt->sc_cm_id, newxprt->sc_pd, | 1110 | newxprt->sc_cm_id, newxprt->sc_pd, |
981 | newxprt->sc_cm_id->device, newxprt->sc_pd->device, | 1111 | dev, newxprt->sc_pd->device, |
982 | qp_attr.cap.max_send_wr, | 1112 | qp_attr.cap.max_send_wr, |
983 | qp_attr.cap.max_recv_wr, | 1113 | qp_attr.cap.max_recv_wr, |
984 | qp_attr.cap.max_send_sge, | 1114 | qp_attr.cap.max_send_sge, |
@@ -1014,9 +1144,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
1014 | * of an RDMA_READ. IB does not. | 1144 | * of an RDMA_READ. IB does not. |
1015 | */ | 1145 | */ |
1016 | newxprt->sc_reader = rdma_read_chunk_lcl; | 1146 | newxprt->sc_reader = rdma_read_chunk_lcl; |
1017 | if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { | 1147 | if (dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { |
1018 | newxprt->sc_frmr_pg_list_len = | 1148 | newxprt->sc_frmr_pg_list_len = |
1019 | devattr.max_fast_reg_page_list_len; | 1149 | dev->attrs.max_fast_reg_page_list_len; |
1020 | newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; | 1150 | newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; |
1021 | newxprt->sc_reader = rdma_read_chunk_frmr; | 1151 | newxprt->sc_reader = rdma_read_chunk_frmr; |
1022 | } | 1152 | } |
@@ -1024,44 +1154,16 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | |||
1024 | /* | 1154 | /* |
1025 | * Determine if a DMA MR is required and if so, what privs are required | 1155 | * Determine if a DMA MR is required and if so, what privs are required |
1026 | */ | 1156 | */ |
1027 | if (!rdma_protocol_iwarp(newxprt->sc_cm_id->device, | 1157 | if (!rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num) && |
1028 | newxprt->sc_cm_id->port_num) && | 1158 | !rdma_ib_or_roce(dev, newxprt->sc_cm_id->port_num)) |
1029 | !rdma_ib_or_roce(newxprt->sc_cm_id->device, | ||
1030 | newxprt->sc_cm_id->port_num)) | ||
1031 | goto errout; | 1159 | goto errout; |
1032 | 1160 | ||
1033 | if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) || | 1161 | if (rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num)) |
1034 | !(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { | ||
1035 | need_dma_mr = 1; | ||
1036 | dma_mr_acc = IB_ACCESS_LOCAL_WRITE; | ||
1037 | if (rdma_protocol_iwarp(newxprt->sc_cm_id->device, | ||
1038 | newxprt->sc_cm_id->port_num) && | ||
1039 | !(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) | ||
1040 | dma_mr_acc |= IB_ACCESS_REMOTE_WRITE; | ||
1041 | } | ||
1042 | |||
1043 | if (rdma_protocol_iwarp(newxprt->sc_cm_id->device, | ||
1044 | newxprt->sc_cm_id->port_num)) | ||
1045 | newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV; | 1162 | newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV; |
1046 | 1163 | ||
1047 | /* Create the DMA MR if needed, otherwise, use the DMA LKEY */ | ||
1048 | if (need_dma_mr) { | ||
1049 | /* Register all of physical memory */ | ||
1050 | newxprt->sc_phys_mr = | ||
1051 | ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc); | ||
1052 | if (IS_ERR(newxprt->sc_phys_mr)) { | ||
1053 | dprintk("svcrdma: Failed to create DMA MR ret=%d\n", | ||
1054 | ret); | ||
1055 | goto errout; | ||
1056 | } | ||
1057 | newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey; | ||
1058 | } else | ||
1059 | newxprt->sc_dma_lkey = | ||
1060 | newxprt->sc_cm_id->device->local_dma_lkey; | ||
1061 | |||
1062 | /* Post receive buffers */ | 1164 | /* Post receive buffers */ |
1063 | for (i = 0; i < newxprt->sc_max_requests; i++) { | 1165 | for (i = 0; i < newxprt->sc_rq_depth; i++) { |
1064 | ret = svc_rdma_post_recv(newxprt); | 1166 | ret = svc_rdma_post_recv(newxprt, GFP_KERNEL); |
1065 | if (ret) { | 1167 | if (ret) { |
1066 | dprintk("svcrdma: failure posting receive buffers\n"); | 1168 | dprintk("svcrdma: failure posting receive buffers\n"); |
1067 | goto errout; | 1169 | goto errout; |
@@ -1160,12 +1262,14 @@ static void __svc_rdma_free(struct work_struct *work) | |||
1160 | { | 1262 | { |
1161 | struct svcxprt_rdma *rdma = | 1263 | struct svcxprt_rdma *rdma = |
1162 | container_of(work, struct svcxprt_rdma, sc_work); | 1264 | container_of(work, struct svcxprt_rdma, sc_work); |
1163 | dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); | 1265 | struct svc_xprt *xprt = &rdma->sc_xprt; |
1266 | |||
1267 | dprintk("svcrdma: %s(%p)\n", __func__, rdma); | ||
1164 | 1268 | ||
1165 | /* We should only be called from kref_put */ | 1269 | /* We should only be called from kref_put */ |
1166 | if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0) | 1270 | if (atomic_read(&xprt->xpt_ref.refcount) != 0) |
1167 | pr_err("svcrdma: sc_xprt still in use? (%d)\n", | 1271 | pr_err("svcrdma: sc_xprt still in use? (%d)\n", |
1168 | atomic_read(&rdma->sc_xprt.xpt_ref.refcount)); | 1272 | atomic_read(&xprt->xpt_ref.refcount)); |
1169 | 1273 | ||
1170 | /* | 1274 | /* |
1171 | * Destroy queued, but not processed read completions. Note | 1275 | * Destroy queued, but not processed read completions. Note |
@@ -1193,15 +1297,22 @@ static void __svc_rdma_free(struct work_struct *work) | |||
1193 | } | 1297 | } |
1194 | 1298 | ||
1195 | /* Warn if we leaked a resource or under-referenced */ | 1299 | /* Warn if we leaked a resource or under-referenced */ |
1196 | if (atomic_read(&rdma->sc_ctxt_used) != 0) | 1300 | if (rdma->sc_ctxt_used != 0) |
1197 | pr_err("svcrdma: ctxt still in use? (%d)\n", | 1301 | pr_err("svcrdma: ctxt still in use? (%d)\n", |
1198 | atomic_read(&rdma->sc_ctxt_used)); | 1302 | rdma->sc_ctxt_used); |
1199 | if (atomic_read(&rdma->sc_dma_used) != 0) | 1303 | if (atomic_read(&rdma->sc_dma_used) != 0) |
1200 | pr_err("svcrdma: dma still in use? (%d)\n", | 1304 | pr_err("svcrdma: dma still in use? (%d)\n", |
1201 | atomic_read(&rdma->sc_dma_used)); | 1305 | atomic_read(&rdma->sc_dma_used)); |
1202 | 1306 | ||
1203 | /* De-allocate fastreg mr */ | 1307 | /* Final put of backchannel client transport */ |
1308 | if (xprt->xpt_bc_xprt) { | ||
1309 | xprt_put(xprt->xpt_bc_xprt); | ||
1310 | xprt->xpt_bc_xprt = NULL; | ||
1311 | } | ||
1312 | |||
1204 | rdma_dealloc_frmr_q(rdma); | 1313 | rdma_dealloc_frmr_q(rdma); |
1314 | svc_rdma_destroy_ctxts(rdma); | ||
1315 | svc_rdma_destroy_maps(rdma); | ||
1205 | 1316 | ||
1206 | /* Destroy the QP if present (not a listener) */ | 1317 | /* Destroy the QP if present (not a listener) */ |
1207 | if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) | 1318 | if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) |
@@ -1213,9 +1324,6 @@ static void __svc_rdma_free(struct work_struct *work) | |||
1213 | if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq)) | 1324 | if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq)) |
1214 | ib_destroy_cq(rdma->sc_rq_cq); | 1325 | ib_destroy_cq(rdma->sc_rq_cq); |
1215 | 1326 | ||
1216 | if (rdma->sc_phys_mr && !IS_ERR(rdma->sc_phys_mr)) | ||
1217 | ib_dereg_mr(rdma->sc_phys_mr); | ||
1218 | |||
1219 | if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) | 1327 | if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) |
1220 | ib_dealloc_pd(rdma->sc_pd); | 1328 | ib_dealloc_pd(rdma->sc_pd); |
1221 | 1329 | ||
@@ -1321,7 +1429,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, | |||
1321 | int length; | 1429 | int length; |
1322 | int ret; | 1430 | int ret; |
1323 | 1431 | ||
1324 | p = alloc_page(GFP_KERNEL | __GFP_NOFAIL); | 1432 | p = alloc_page(GFP_KERNEL); |
1433 | if (!p) | ||
1434 | return; | ||
1325 | va = page_address(p); | 1435 | va = page_address(p); |
1326 | 1436 | ||
1327 | /* XDR encode error */ | 1437 | /* XDR encode error */ |
@@ -1341,7 +1451,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, | |||
1341 | return; | 1451 | return; |
1342 | } | 1452 | } |
1343 | atomic_inc(&xprt->sc_dma_used); | 1453 | atomic_inc(&xprt->sc_dma_used); |
1344 | ctxt->sge[0].lkey = xprt->sc_dma_lkey; | 1454 | ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey; |
1345 | ctxt->sge[0].length = length; | 1455 | ctxt->sge[0].length = length; |
1346 | 1456 | ||
1347 | /* Prepare SEND WR */ | 1457 | /* Prepare SEND WR */ |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 740bddcf3488..b1b009f10ea3 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
@@ -63,7 +63,7 @@ | |||
63 | */ | 63 | */ |
64 | 64 | ||
65 | static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; | 65 | static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; |
66 | static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; | 66 | unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; |
67 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; | 67 | static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; |
68 | static unsigned int xprt_rdma_inline_write_padding; | 68 | static unsigned int xprt_rdma_inline_write_padding; |
69 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; | 69 | static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; |
@@ -143,12 +143,7 @@ static struct ctl_table sunrpc_table[] = { | |||
143 | 143 | ||
144 | #endif | 144 | #endif |
145 | 145 | ||
146 | #define RPCRDMA_BIND_TO (60U * HZ) | 146 | static struct rpc_xprt_ops xprt_rdma_procs; /*forward reference */ |
147 | #define RPCRDMA_INIT_REEST_TO (5U * HZ) | ||
148 | #define RPCRDMA_MAX_REEST_TO (30U * HZ) | ||
149 | #define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ) | ||
150 | |||
151 | static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ | ||
152 | 147 | ||
153 | static void | 148 | static void |
154 | xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap) | 149 | xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap) |
@@ -174,7 +169,7 @@ xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap) | |||
174 | xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6; | 169 | xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6; |
175 | } | 170 | } |
176 | 171 | ||
177 | static void | 172 | void |
178 | xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap) | 173 | xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap) |
179 | { | 174 | { |
180 | char buf[128]; | 175 | char buf[128]; |
@@ -203,7 +198,7 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap) | |||
203 | xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; | 198 | xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; |
204 | } | 199 | } |
205 | 200 | ||
206 | static void | 201 | void |
207 | xprt_rdma_free_addresses(struct rpc_xprt *xprt) | 202 | xprt_rdma_free_addresses(struct rpc_xprt *xprt) |
208 | { | 203 | { |
209 | unsigned int i; | 204 | unsigned int i; |
@@ -499,7 +494,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) | |||
499 | if (req == NULL) | 494 | if (req == NULL) |
500 | return NULL; | 495 | return NULL; |
501 | 496 | ||
502 | flags = GFP_NOIO | __GFP_NOWARN; | 497 | flags = RPCRDMA_DEF_GFP; |
503 | if (RPC_IS_SWAPPER(task)) | 498 | if (RPC_IS_SWAPPER(task)) |
504 | flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; | 499 | flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; |
505 | 500 | ||
@@ -642,7 +637,7 @@ drop_connection: | |||
642 | return -ENOTCONN; /* implies disconnect */ | 637 | return -ENOTCONN; /* implies disconnect */ |
643 | } | 638 | } |
644 | 639 | ||
645 | static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) | 640 | void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) |
646 | { | 641 | { |
647 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); | 642 | struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); |
648 | long idle_time = 0; | 643 | long idle_time = 0; |
@@ -743,6 +738,11 @@ void xprt_rdma_cleanup(void) | |||
743 | 738 | ||
744 | rpcrdma_destroy_wq(); | 739 | rpcrdma_destroy_wq(); |
745 | frwr_destroy_recovery_wq(); | 740 | frwr_destroy_recovery_wq(); |
741 | |||
742 | rc = xprt_unregister_transport(&xprt_rdma_bc); | ||
743 | if (rc) | ||
744 | dprintk("RPC: %s: xprt_unregister(bc) returned %i\n", | ||
745 | __func__, rc); | ||
746 | } | 746 | } |
747 | 747 | ||
748 | int xprt_rdma_init(void) | 748 | int xprt_rdma_init(void) |
@@ -766,6 +766,14 @@ int xprt_rdma_init(void) | |||
766 | return rc; | 766 | return rc; |
767 | } | 767 | } |
768 | 768 | ||
769 | rc = xprt_register_transport(&xprt_rdma_bc); | ||
770 | if (rc) { | ||
771 | xprt_unregister_transport(&xprt_rdma); | ||
772 | rpcrdma_destroy_wq(); | ||
773 | frwr_destroy_recovery_wq(); | ||
774 | return rc; | ||
775 | } | ||
776 | |||
769 | dprintk("RPCRDMA Module Init, register RPC RDMA transport\n"); | 777 | dprintk("RPCRDMA Module Init, register RPC RDMA transport\n"); |
770 | 778 | ||
771 | dprintk("Defaults:\n"); | 779 | dprintk("Defaults:\n"); |
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 732c71ce5dca..878f1bfb1db9 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c | |||
@@ -462,7 +462,6 @@ int | |||
462 | rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | 462 | rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) |
463 | { | 463 | { |
464 | struct rpcrdma_ia *ia = &xprt->rx_ia; | 464 | struct rpcrdma_ia *ia = &xprt->rx_ia; |
465 | struct ib_device_attr *devattr = &ia->ri_devattr; | ||
466 | int rc; | 465 | int rc; |
467 | 466 | ||
468 | ia->ri_dma_mr = NULL; | 467 | ia->ri_dma_mr = NULL; |
@@ -482,16 +481,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) | |||
482 | goto out2; | 481 | goto out2; |
483 | } | 482 | } |
484 | 483 | ||
485 | rc = ib_query_device(ia->ri_device, devattr); | ||
486 | if (rc) { | ||
487 | dprintk("RPC: %s: ib_query_device failed %d\n", | ||
488 | __func__, rc); | ||
489 | goto out3; | ||
490 | } | ||
491 | |||
492 | if (memreg == RPCRDMA_FRMR) { | 484 | if (memreg == RPCRDMA_FRMR) { |
493 | if (!(devattr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) || | 485 | if (!(ia->ri_device->attrs.device_cap_flags & |
494 | (devattr->max_fast_reg_page_list_len == 0)) { | 486 | IB_DEVICE_MEM_MGT_EXTENSIONS) || |
487 | (ia->ri_device->attrs.max_fast_reg_page_list_len == 0)) { | ||
495 | dprintk("RPC: %s: FRMR registration " | 488 | dprintk("RPC: %s: FRMR registration " |
496 | "not supported by HCA\n", __func__); | 489 | "not supported by HCA\n", __func__); |
497 | memreg = RPCRDMA_MTHCAFMR; | 490 | memreg = RPCRDMA_MTHCAFMR; |
@@ -566,24 +559,23 @@ int | |||
566 | rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | 559 | rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, |
567 | struct rpcrdma_create_data_internal *cdata) | 560 | struct rpcrdma_create_data_internal *cdata) |
568 | { | 561 | { |
569 | struct ib_device_attr *devattr = &ia->ri_devattr; | ||
570 | struct ib_cq *sendcq, *recvcq; | 562 | struct ib_cq *sendcq, *recvcq; |
571 | struct ib_cq_init_attr cq_attr = {}; | 563 | struct ib_cq_init_attr cq_attr = {}; |
572 | unsigned int max_qp_wr; | 564 | unsigned int max_qp_wr; |
573 | int rc, err; | 565 | int rc, err; |
574 | 566 | ||
575 | if (devattr->max_sge < RPCRDMA_MAX_IOVS) { | 567 | if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) { |
576 | dprintk("RPC: %s: insufficient sge's available\n", | 568 | dprintk("RPC: %s: insufficient sge's available\n", |
577 | __func__); | 569 | __func__); |
578 | return -ENOMEM; | 570 | return -ENOMEM; |
579 | } | 571 | } |
580 | 572 | ||
581 | if (devattr->max_qp_wr <= RPCRDMA_BACKWARD_WRS) { | 573 | if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) { |
582 | dprintk("RPC: %s: insufficient wqe's available\n", | 574 | dprintk("RPC: %s: insufficient wqe's available\n", |
583 | __func__); | 575 | __func__); |
584 | return -ENOMEM; | 576 | return -ENOMEM; |
585 | } | 577 | } |
586 | max_qp_wr = devattr->max_qp_wr - RPCRDMA_BACKWARD_WRS; | 578 | max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS; |
587 | 579 | ||
588 | /* check provider's send/recv wr limits */ | 580 | /* check provider's send/recv wr limits */ |
589 | if (cdata->max_requests > max_qp_wr) | 581 | if (cdata->max_requests > max_qp_wr) |
@@ -668,11 +660,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, | |||
668 | 660 | ||
669 | /* Client offers RDMA Read but does not initiate */ | 661 | /* Client offers RDMA Read but does not initiate */ |
670 | ep->rep_remote_cma.initiator_depth = 0; | 662 | ep->rep_remote_cma.initiator_depth = 0; |
671 | if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */ | 663 | if (ia->ri_device->attrs.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ |
672 | ep->rep_remote_cma.responder_resources = 32; | 664 | ep->rep_remote_cma.responder_resources = 32; |
673 | else | 665 | else |
674 | ep->rep_remote_cma.responder_resources = | 666 | ep->rep_remote_cma.responder_resources = |
675 | devattr->max_qp_rd_atom; | 667 | ia->ri_device->attrs.max_qp_rd_atom; |
676 | 668 | ||
677 | ep->rep_remote_cma.retry_count = 7; | 669 | ep->rep_remote_cma.retry_count = 7; |
678 | ep->rep_remote_cma.flow_control = 0; | 670 | ep->rep_remote_cma.flow_control = 0; |
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 728101ddc44b..38fe11b09875 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h | |||
@@ -55,6 +55,11 @@ | |||
55 | #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ | 55 | #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ |
56 | #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ | 56 | #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ |
57 | 57 | ||
58 | #define RPCRDMA_BIND_TO (60U * HZ) | ||
59 | #define RPCRDMA_INIT_REEST_TO (5U * HZ) | ||
60 | #define RPCRDMA_MAX_REEST_TO (30U * HZ) | ||
61 | #define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ) | ||
62 | |||
58 | /* | 63 | /* |
59 | * Interface Adapter -- one per transport instance | 64 | * Interface Adapter -- one per transport instance |
60 | */ | 65 | */ |
@@ -68,7 +73,6 @@ struct rpcrdma_ia { | |||
68 | struct completion ri_done; | 73 | struct completion ri_done; |
69 | int ri_async_rc; | 74 | int ri_async_rc; |
70 | unsigned int ri_max_frmr_depth; | 75 | unsigned int ri_max_frmr_depth; |
71 | struct ib_device_attr ri_devattr; | ||
72 | struct ib_qp_attr ri_qp_attr; | 76 | struct ib_qp_attr ri_qp_attr; |
73 | struct ib_qp_init_attr ri_qp_init_attr; | 77 | struct ib_qp_init_attr ri_qp_init_attr; |
74 | }; | 78 | }; |
@@ -142,6 +146,8 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb) | |||
142 | return (struct rpcrdma_msg *)rb->rg_base; | 146 | return (struct rpcrdma_msg *)rb->rg_base; |
143 | } | 147 | } |
144 | 148 | ||
149 | #define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN) | ||
150 | |||
145 | /* | 151 | /* |
146 | * struct rpcrdma_rep -- this structure encapsulates state required to recv | 152 | * struct rpcrdma_rep -- this structure encapsulates state required to recv |
147 | * and complete a reply, asychronously. It needs several pieces of | 153 | * and complete a reply, asychronously. It needs several pieces of |
@@ -309,6 +315,8 @@ struct rpcrdma_buffer { | |||
309 | u32 rb_bc_srv_max_requests; | 315 | u32 rb_bc_srv_max_requests; |
310 | spinlock_t rb_reqslock; /* protect rb_allreqs */ | 316 | spinlock_t rb_reqslock; /* protect rb_allreqs */ |
311 | struct list_head rb_allreqs; | 317 | struct list_head rb_allreqs; |
318 | |||
319 | u32 rb_bc_max_requests; | ||
312 | }; | 320 | }; |
313 | #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) | 321 | #define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) |
314 | 322 | ||
@@ -516,6 +524,10 @@ int rpcrdma_marshal_req(struct rpc_rqst *); | |||
516 | 524 | ||
517 | /* RPC/RDMA module init - xprtrdma/transport.c | 525 | /* RPC/RDMA module init - xprtrdma/transport.c |
518 | */ | 526 | */ |
527 | extern unsigned int xprt_rdma_max_inline_read; | ||
528 | void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); | ||
529 | void xprt_rdma_free_addresses(struct rpc_xprt *xprt); | ||
530 | void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq); | ||
519 | int xprt_rdma_init(void); | 531 | int xprt_rdma_init(void); |
520 | void xprt_rdma_cleanup(void); | 532 | void xprt_rdma_cleanup(void); |
521 | 533 | ||
@@ -531,11 +543,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *); | |||
531 | void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); | 543 | void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); |
532 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ | 544 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ |
533 | 545 | ||
534 | /* Temporary NFS request map cache. Created in svc_rdma.c */ | 546 | extern struct xprt_class xprt_rdma_bc; |
535 | extern struct kmem_cache *svc_rdma_map_cachep; | ||
536 | /* WR context cache. Created in svc_rdma.c */ | ||
537 | extern struct kmem_cache *svc_rdma_ctxt_cachep; | ||
538 | /* Workqueue created in svc_rdma.c */ | ||
539 | extern struct workqueue_struct *svc_rdma_wq; | ||
540 | 547 | ||
541 | #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ | 548 | #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ |