diff options
Diffstat (limited to 'net')
40 files changed, 1251 insertions, 587 deletions
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 0e474b13463b..1059ed3bc255 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c | |||
| @@ -1044,10 +1044,9 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, | |||
| 1044 | if (repl->num_counters && | 1044 | if (repl->num_counters && |
| 1045 | copy_to_user(repl->counters, counterstmp, | 1045 | copy_to_user(repl->counters, counterstmp, |
| 1046 | repl->num_counters * sizeof(struct ebt_counter))) { | 1046 | repl->num_counters * sizeof(struct ebt_counter))) { |
| 1047 | ret = -EFAULT; | 1047 | /* Silent error, can't fail, new table is already in place */ |
| 1048 | net_warn_ratelimited("ebtables: counters copy to user failed while replacing table\n"); | ||
| 1048 | } | 1049 | } |
| 1049 | else | ||
| 1050 | ret = 0; | ||
| 1051 | 1050 | ||
| 1052 | /* decrease module count and free resources */ | 1051 | /* decrease module count and free resources */ |
| 1053 | EBT_ENTRY_ITERATE(table->entries, table->entries_size, | 1052 | EBT_ENTRY_ITERATE(table->entries, table->entries_size, |
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index b703790b4e44..a1ef53c04415 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c | |||
| @@ -292,10 +292,12 @@ static int is_out(const struct crush_map *map, | |||
| 292 | * @outpos: our position in that vector | 292 | * @outpos: our position in that vector |
| 293 | * @tries: number of attempts to make | 293 | * @tries: number of attempts to make |
| 294 | * @recurse_tries: number of attempts to have recursive chooseleaf make | 294 | * @recurse_tries: number of attempts to have recursive chooseleaf make |
| 295 | * @local_tries: localized retries | 295 | * @local_retries: localized retries |
| 296 | * @local_fallback_tries: localized fallback retries | 296 | * @local_fallback_retries: localized fallback retries |
| 297 | * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose) | 297 | * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose) |
| 298 | * @vary_r: pass r to recursive calls | ||
| 298 | * @out2: second output vector for leaf items (if @recurse_to_leaf) | 299 | * @out2: second output vector for leaf items (if @recurse_to_leaf) |
| 300 | * @parent_r: r value passed from the parent | ||
| 299 | */ | 301 | */ |
| 300 | static int crush_choose_firstn(const struct crush_map *map, | 302 | static int crush_choose_firstn(const struct crush_map *map, |
| 301 | struct crush_bucket *bucket, | 303 | struct crush_bucket *bucket, |
| @@ -304,10 +306,12 @@ static int crush_choose_firstn(const struct crush_map *map, | |||
| 304 | int *out, int outpos, | 306 | int *out, int outpos, |
| 305 | unsigned int tries, | 307 | unsigned int tries, |
| 306 | unsigned int recurse_tries, | 308 | unsigned int recurse_tries, |
| 307 | unsigned int local_tries, | 309 | unsigned int local_retries, |
| 308 | unsigned int local_fallback_tries, | 310 | unsigned int local_fallback_retries, |
| 309 | int recurse_to_leaf, | 311 | int recurse_to_leaf, |
| 310 | int *out2) | 312 | unsigned int vary_r, |
| 313 | int *out2, | ||
| 314 | int parent_r) | ||
| 311 | { | 315 | { |
| 312 | int rep; | 316 | int rep; |
| 313 | unsigned int ftotal, flocal; | 317 | unsigned int ftotal, flocal; |
| @@ -319,8 +323,11 @@ static int crush_choose_firstn(const struct crush_map *map, | |||
| 319 | int itemtype; | 323 | int itemtype; |
| 320 | int collide, reject; | 324 | int collide, reject; |
| 321 | 325 | ||
| 322 | dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "", | 326 | dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n", |
| 323 | bucket->id, x, outpos, numrep); | 327 | recurse_to_leaf ? "_LEAF" : "", |
| 328 | bucket->id, x, outpos, numrep, | ||
| 329 | tries, recurse_tries, local_retries, local_fallback_retries, | ||
| 330 | parent_r); | ||
| 324 | 331 | ||
| 325 | for (rep = outpos; rep < numrep; rep++) { | 332 | for (rep = outpos; rep < numrep; rep++) { |
| 326 | /* keep trying until we get a non-out, non-colliding item */ | 333 | /* keep trying until we get a non-out, non-colliding item */ |
| @@ -335,7 +342,7 @@ static int crush_choose_firstn(const struct crush_map *map, | |||
| 335 | do { | 342 | do { |
| 336 | collide = 0; | 343 | collide = 0; |
| 337 | retry_bucket = 0; | 344 | retry_bucket = 0; |
| 338 | r = rep; | 345 | r = rep + parent_r; |
| 339 | /* r' = r + f_total */ | 346 | /* r' = r + f_total */ |
| 340 | r += ftotal; | 347 | r += ftotal; |
| 341 | 348 | ||
| @@ -344,9 +351,9 @@ static int crush_choose_firstn(const struct crush_map *map, | |||
| 344 | reject = 1; | 351 | reject = 1; |
| 345 | goto reject; | 352 | goto reject; |
| 346 | } | 353 | } |
| 347 | if (local_fallback_tries > 0 && | 354 | if (local_fallback_retries > 0 && |
| 348 | flocal >= (in->size>>1) && | 355 | flocal >= (in->size>>1) && |
| 349 | flocal > local_fallback_tries) | 356 | flocal > local_fallback_retries) |
| 350 | item = bucket_perm_choose(in, x, r); | 357 | item = bucket_perm_choose(in, x, r); |
| 351 | else | 358 | else |
| 352 | item = crush_bucket_choose(in, x, r); | 359 | item = crush_bucket_choose(in, x, r); |
| @@ -387,16 +394,23 @@ static int crush_choose_firstn(const struct crush_map *map, | |||
| 387 | reject = 0; | 394 | reject = 0; |
| 388 | if (!collide && recurse_to_leaf) { | 395 | if (!collide && recurse_to_leaf) { |
| 389 | if (item < 0) { | 396 | if (item < 0) { |
| 397 | int sub_r; | ||
| 398 | if (vary_r) | ||
| 399 | sub_r = r >> (vary_r-1); | ||
| 400 | else | ||
| 401 | sub_r = 0; | ||
| 390 | if (crush_choose_firstn(map, | 402 | if (crush_choose_firstn(map, |
| 391 | map->buckets[-1-item], | 403 | map->buckets[-1-item], |
| 392 | weight, weight_max, | 404 | weight, weight_max, |
| 393 | x, outpos+1, 0, | 405 | x, outpos+1, 0, |
| 394 | out2, outpos, | 406 | out2, outpos, |
| 395 | recurse_tries, 0, | 407 | recurse_tries, 0, |
| 396 | local_tries, | 408 | local_retries, |
| 397 | local_fallback_tries, | 409 | local_fallback_retries, |
| 398 | 0, | 410 | 0, |
| 399 | NULL) <= outpos) | 411 | vary_r, |
| 412 | NULL, | ||
| 413 | sub_r) <= outpos) | ||
| 400 | /* didn't get leaf */ | 414 | /* didn't get leaf */ |
| 401 | reject = 1; | 415 | reject = 1; |
| 402 | } else { | 416 | } else { |
| @@ -420,14 +434,14 @@ reject: | |||
| 420 | ftotal++; | 434 | ftotal++; |
| 421 | flocal++; | 435 | flocal++; |
| 422 | 436 | ||
| 423 | if (collide && flocal <= local_tries) | 437 | if (collide && flocal <= local_retries) |
| 424 | /* retry locally a few times */ | 438 | /* retry locally a few times */ |
| 425 | retry_bucket = 1; | 439 | retry_bucket = 1; |
| 426 | else if (local_fallback_tries > 0 && | 440 | else if (local_fallback_retries > 0 && |
| 427 | flocal <= in->size + local_fallback_tries) | 441 | flocal <= in->size + local_fallback_retries) |
| 428 | /* exhaustive bucket search */ | 442 | /* exhaustive bucket search */ |
| 429 | retry_bucket = 1; | 443 | retry_bucket = 1; |
| 430 | else if (ftotal <= tries) | 444 | else if (ftotal < tries) |
| 431 | /* then retry descent */ | 445 | /* then retry descent */ |
| 432 | retry_descent = 1; | 446 | retry_descent = 1; |
| 433 | else | 447 | else |
| @@ -640,10 +654,20 @@ int crush_do_rule(const struct crush_map *map, | |||
| 640 | __u32 step; | 654 | __u32 step; |
| 641 | int i, j; | 655 | int i, j; |
| 642 | int numrep; | 656 | int numrep; |
| 643 | int choose_tries = map->choose_total_tries; | 657 | /* |
| 644 | int choose_local_tries = map->choose_local_tries; | 658 | * the original choose_total_tries value was off by one (it |
| 645 | int choose_local_fallback_tries = map->choose_local_fallback_tries; | 659 | * counted "retries" and not "tries"). add one. |
| 660 | */ | ||
| 661 | int choose_tries = map->choose_total_tries + 1; | ||
| 646 | int choose_leaf_tries = 0; | 662 | int choose_leaf_tries = 0; |
| 663 | /* | ||
| 664 | * the local tries values were counted as "retries", though, | ||
| 665 | * and need no adjustment | ||
| 666 | */ | ||
| 667 | int choose_local_retries = map->choose_local_tries; | ||
| 668 | int choose_local_fallback_retries = map->choose_local_fallback_tries; | ||
| 669 | |||
| 670 | int vary_r = map->chooseleaf_vary_r; | ||
| 647 | 671 | ||
| 648 | if ((__u32)ruleno >= map->max_rules) { | 672 | if ((__u32)ruleno >= map->max_rules) { |
| 649 | dprintk(" bad ruleno %d\n", ruleno); | 673 | dprintk(" bad ruleno %d\n", ruleno); |
| @@ -676,13 +700,18 @@ int crush_do_rule(const struct crush_map *map, | |||
| 676 | break; | 700 | break; |
| 677 | 701 | ||
| 678 | case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES: | 702 | case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES: |
| 679 | if (curstep->arg1 > 0) | 703 | if (curstep->arg1 >= 0) |
| 680 | choose_local_tries = curstep->arg1; | 704 | choose_local_retries = curstep->arg1; |
| 681 | break; | 705 | break; |
| 682 | 706 | ||
| 683 | case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES: | 707 | case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES: |
| 684 | if (curstep->arg1 > 0) | 708 | if (curstep->arg1 >= 0) |
| 685 | choose_local_fallback_tries = curstep->arg1; | 709 | choose_local_fallback_retries = curstep->arg1; |
| 710 | break; | ||
| 711 | |||
| 712 | case CRUSH_RULE_SET_CHOOSELEAF_VARY_R: | ||
| 713 | if (curstep->arg1 >= 0) | ||
| 714 | vary_r = curstep->arg1; | ||
| 686 | break; | 715 | break; |
| 687 | 716 | ||
| 688 | case CRUSH_RULE_CHOOSELEAF_FIRSTN: | 717 | case CRUSH_RULE_CHOOSELEAF_FIRSTN: |
| @@ -734,10 +763,12 @@ int crush_do_rule(const struct crush_map *map, | |||
| 734 | o+osize, j, | 763 | o+osize, j, |
| 735 | choose_tries, | 764 | choose_tries, |
| 736 | recurse_tries, | 765 | recurse_tries, |
| 737 | choose_local_tries, | 766 | choose_local_retries, |
| 738 | choose_local_fallback_tries, | 767 | choose_local_fallback_retries, |
| 739 | recurse_to_leaf, | 768 | recurse_to_leaf, |
| 740 | c+osize); | 769 | vary_r, |
| 770 | c+osize, | ||
| 771 | 0); | ||
| 741 | } else { | 772 | } else { |
| 742 | crush_choose_indep( | 773 | crush_choose_indep( |
| 743 | map, | 774 | map, |
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 258a382e75ed..10421a4b76f8 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c | |||
| @@ -53,34 +53,55 @@ static int osdmap_show(struct seq_file *s, void *p) | |||
| 53 | { | 53 | { |
| 54 | int i; | 54 | int i; |
| 55 | struct ceph_client *client = s->private; | 55 | struct ceph_client *client = s->private; |
| 56 | struct ceph_osdmap *map = client->osdc.osdmap; | ||
| 56 | struct rb_node *n; | 57 | struct rb_node *n; |
| 57 | 58 | ||
| 58 | if (client->osdc.osdmap == NULL) | 59 | if (map == NULL) |
| 59 | return 0; | 60 | return 0; |
| 60 | seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch); | 61 | |
| 62 | seq_printf(s, "epoch %d\n", map->epoch); | ||
| 61 | seq_printf(s, "flags%s%s\n", | 63 | seq_printf(s, "flags%s%s\n", |
| 62 | (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ? | 64 | (map->flags & CEPH_OSDMAP_NEARFULL) ? " NEARFULL" : "", |
| 63 | " NEARFULL" : "", | 65 | (map->flags & CEPH_OSDMAP_FULL) ? " FULL" : ""); |
| 64 | (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ? | 66 | |
| 65 | " FULL" : ""); | 67 | for (n = rb_first(&map->pg_pools); n; n = rb_next(n)) { |
| 66 | for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { | ||
| 67 | struct ceph_pg_pool_info *pool = | 68 | struct ceph_pg_pool_info *pool = |
| 68 | rb_entry(n, struct ceph_pg_pool_info, node); | 69 | rb_entry(n, struct ceph_pg_pool_info, node); |
| 69 | seq_printf(s, "pg_pool %llu pg_num %d / %d\n", | 70 | |
| 70 | (unsigned long long)pool->id, pool->pg_num, | 71 | seq_printf(s, "pool %lld pg_num %u (%d) read_tier %lld write_tier %lld\n", |
| 71 | pool->pg_num_mask); | 72 | pool->id, pool->pg_num, pool->pg_num_mask, |
| 73 | pool->read_tier, pool->write_tier); | ||
| 72 | } | 74 | } |
| 73 | for (i = 0; i < client->osdc.osdmap->max_osd; i++) { | 75 | for (i = 0; i < map->max_osd; i++) { |
| 74 | struct ceph_entity_addr *addr = | 76 | struct ceph_entity_addr *addr = &map->osd_addr[i]; |
| 75 | &client->osdc.osdmap->osd_addr[i]; | 77 | int state = map->osd_state[i]; |
| 76 | int state = client->osdc.osdmap->osd_state[i]; | ||
| 77 | char sb[64]; | 78 | char sb[64]; |
| 78 | 79 | ||
| 79 | seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n", | 80 | seq_printf(s, "osd%d\t%s\t%3d%%\t(%s)\t%3d%%\n", |
| 80 | i, ceph_pr_addr(&addr->in_addr), | 81 | i, ceph_pr_addr(&addr->in_addr), |
| 81 | ((client->osdc.osdmap->osd_weight[i]*100) >> 16), | 82 | ((map->osd_weight[i]*100) >> 16), |
| 82 | ceph_osdmap_state_str(sb, sizeof(sb), state)); | 83 | ceph_osdmap_state_str(sb, sizeof(sb), state), |
| 84 | ((ceph_get_primary_affinity(map, i)*100) >> 16)); | ||
| 85 | } | ||
| 86 | for (n = rb_first(&map->pg_temp); n; n = rb_next(n)) { | ||
| 87 | struct ceph_pg_mapping *pg = | ||
| 88 | rb_entry(n, struct ceph_pg_mapping, node); | ||
| 89 | |||
| 90 | seq_printf(s, "pg_temp %llu.%x [", pg->pgid.pool, | ||
| 91 | pg->pgid.seed); | ||
| 92 | for (i = 0; i < pg->pg_temp.len; i++) | ||
| 93 | seq_printf(s, "%s%d", (i == 0 ? "" : ","), | ||
| 94 | pg->pg_temp.osds[i]); | ||
| 95 | seq_printf(s, "]\n"); | ||
| 83 | } | 96 | } |
| 97 | for (n = rb_first(&map->primary_temp); n; n = rb_next(n)) { | ||
| 98 | struct ceph_pg_mapping *pg = | ||
| 99 | rb_entry(n, struct ceph_pg_mapping, node); | ||
| 100 | |||
| 101 | seq_printf(s, "primary_temp %llu.%x %d\n", pg->pgid.pool, | ||
| 102 | pg->pgid.seed, pg->primary_temp.osd); | ||
| 103 | } | ||
| 104 | |||
| 84 | return 0; | 105 | return 0; |
| 85 | } | 106 | } |
| 86 | 107 | ||
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 30efc5c18622..4f55f9ce63fa 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
| @@ -919,6 +919,9 @@ static bool ceph_msg_data_pages_advance(struct ceph_msg_data_cursor *cursor, | |||
| 919 | if (!bytes || cursor->page_offset) | 919 | if (!bytes || cursor->page_offset) |
| 920 | return false; /* more bytes to process in the current page */ | 920 | return false; /* more bytes to process in the current page */ |
| 921 | 921 | ||
| 922 | if (!cursor->resid) | ||
| 923 | return false; /* no more data */ | ||
| 924 | |||
| 922 | /* Move on to the next page; offset is already at 0 */ | 925 | /* Move on to the next page; offset is already at 0 */ |
| 923 | 926 | ||
| 924 | BUG_ON(cursor->page_index >= cursor->page_count); | 927 | BUG_ON(cursor->page_index >= cursor->page_count); |
| @@ -1004,6 +1007,9 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor, | |||
| 1004 | if (!bytes || cursor->offset & ~PAGE_MASK) | 1007 | if (!bytes || cursor->offset & ~PAGE_MASK) |
| 1005 | return false; /* more bytes to process in the current page */ | 1008 | return false; /* more bytes to process in the current page */ |
| 1006 | 1009 | ||
| 1010 | if (!cursor->resid) | ||
| 1011 | return false; /* no more data */ | ||
| 1012 | |||
| 1007 | /* Move on to the next page */ | 1013 | /* Move on to the next page */ |
| 1008 | 1014 | ||
| 1009 | BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head)); | 1015 | BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head)); |
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 82750f915865..b0dfce77656a 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
| @@ -436,6 +436,7 @@ static bool osd_req_opcode_valid(u16 opcode) | |||
| 436 | case CEPH_OSD_OP_OMAPCLEAR: | 436 | case CEPH_OSD_OP_OMAPCLEAR: |
| 437 | case CEPH_OSD_OP_OMAPRMKEYS: | 437 | case CEPH_OSD_OP_OMAPRMKEYS: |
| 438 | case CEPH_OSD_OP_OMAP_CMP: | 438 | case CEPH_OSD_OP_OMAP_CMP: |
| 439 | case CEPH_OSD_OP_SETALLOCHINT: | ||
| 439 | case CEPH_OSD_OP_CLONERANGE: | 440 | case CEPH_OSD_OP_CLONERANGE: |
| 440 | case CEPH_OSD_OP_ASSERT_SRC_VERSION: | 441 | case CEPH_OSD_OP_ASSERT_SRC_VERSION: |
| 441 | case CEPH_OSD_OP_SRC_CMPXATTR: | 442 | case CEPH_OSD_OP_SRC_CMPXATTR: |
| @@ -591,6 +592,26 @@ void osd_req_op_watch_init(struct ceph_osd_request *osd_req, | |||
| 591 | } | 592 | } |
| 592 | EXPORT_SYMBOL(osd_req_op_watch_init); | 593 | EXPORT_SYMBOL(osd_req_op_watch_init); |
| 593 | 594 | ||
| 595 | void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, | ||
| 596 | unsigned int which, | ||
| 597 | u64 expected_object_size, | ||
| 598 | u64 expected_write_size) | ||
| 599 | { | ||
| 600 | struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, | ||
| 601 | CEPH_OSD_OP_SETALLOCHINT); | ||
| 602 | |||
| 603 | op->alloc_hint.expected_object_size = expected_object_size; | ||
| 604 | op->alloc_hint.expected_write_size = expected_write_size; | ||
| 605 | |||
| 606 | /* | ||
| 607 | * CEPH_OSD_OP_SETALLOCHINT op is advisory and therefore deemed | ||
| 608 | * not worth a feature bit. Set FAILOK per-op flag to make | ||
| 609 | * sure older osds don't trip over an unsupported opcode. | ||
| 610 | */ | ||
| 611 | op->flags |= CEPH_OSD_OP_FLAG_FAILOK; | ||
| 612 | } | ||
| 613 | EXPORT_SYMBOL(osd_req_op_alloc_hint_init); | ||
| 614 | |||
| 594 | static void ceph_osdc_msg_data_add(struct ceph_msg *msg, | 615 | static void ceph_osdc_msg_data_add(struct ceph_msg *msg, |
| 595 | struct ceph_osd_data *osd_data) | 616 | struct ceph_osd_data *osd_data) |
| 596 | { | 617 | { |
| @@ -681,6 +702,12 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req, | |||
| 681 | dst->watch.ver = cpu_to_le64(src->watch.ver); | 702 | dst->watch.ver = cpu_to_le64(src->watch.ver); |
| 682 | dst->watch.flag = src->watch.flag; | 703 | dst->watch.flag = src->watch.flag; |
| 683 | break; | 704 | break; |
| 705 | case CEPH_OSD_OP_SETALLOCHINT: | ||
| 706 | dst->alloc_hint.expected_object_size = | ||
| 707 | cpu_to_le64(src->alloc_hint.expected_object_size); | ||
| 708 | dst->alloc_hint.expected_write_size = | ||
| 709 | cpu_to_le64(src->alloc_hint.expected_write_size); | ||
| 710 | break; | ||
| 684 | default: | 711 | default: |
| 685 | pr_err("unsupported osd opcode %s\n", | 712 | pr_err("unsupported osd opcode %s\n", |
| 686 | ceph_osd_op_name(src->op)); | 713 | ceph_osd_op_name(src->op)); |
| @@ -688,7 +715,9 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req, | |||
| 688 | 715 | ||
| 689 | return 0; | 716 | return 0; |
| 690 | } | 717 | } |
| 718 | |||
| 691 | dst->op = cpu_to_le16(src->op); | 719 | dst->op = cpu_to_le16(src->op); |
| 720 | dst->flags = cpu_to_le32(src->flags); | ||
| 692 | dst->payload_len = cpu_to_le32(src->payload_len); | 721 | dst->payload_len = cpu_to_le32(src->payload_len); |
| 693 | 722 | ||
| 694 | return request_data_len; | 723 | return request_data_len; |
| @@ -1304,7 +1333,7 @@ static int __map_request(struct ceph_osd_client *osdc, | |||
| 1304 | { | 1333 | { |
| 1305 | struct ceph_pg pgid; | 1334 | struct ceph_pg pgid; |
| 1306 | int acting[CEPH_PG_MAX_SIZE]; | 1335 | int acting[CEPH_PG_MAX_SIZE]; |
| 1307 | int o = -1, num = 0; | 1336 | int num, o; |
| 1308 | int err; | 1337 | int err; |
| 1309 | bool was_paused; | 1338 | bool was_paused; |
| 1310 | 1339 | ||
| @@ -1317,11 +1346,9 @@ static int __map_request(struct ceph_osd_client *osdc, | |||
| 1317 | } | 1346 | } |
| 1318 | req->r_pgid = pgid; | 1347 | req->r_pgid = pgid; |
| 1319 | 1348 | ||
| 1320 | err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting); | 1349 | num = ceph_calc_pg_acting(osdc->osdmap, pgid, acting, &o); |
| 1321 | if (err > 0) { | 1350 | if (num < 0) |
| 1322 | o = acting[0]; | 1351 | num = 0; |
| 1323 | num = err; | ||
| 1324 | } | ||
| 1325 | 1352 | ||
| 1326 | was_paused = req->r_paused; | 1353 | was_paused = req->r_paused; |
| 1327 | req->r_paused = __req_should_be_paused(osdc, req); | 1354 | req->r_paused = __req_should_be_paused(osdc, req); |
| @@ -2033,7 +2060,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) | |||
| 2033 | int skipped_map = 0; | 2060 | int skipped_map = 0; |
| 2034 | 2061 | ||
| 2035 | dout("taking full map %u len %d\n", epoch, maplen); | 2062 | dout("taking full map %u len %d\n", epoch, maplen); |
| 2036 | newmap = osdmap_decode(&p, p+maplen); | 2063 | newmap = ceph_osdmap_decode(&p, p+maplen); |
| 2037 | if (IS_ERR(newmap)) { | 2064 | if (IS_ERR(newmap)) { |
| 2038 | err = PTR_ERR(newmap); | 2065 | err = PTR_ERR(newmap); |
| 2039 | goto bad; | 2066 | goto bad; |
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index aade4a5c1c07..e632b5a52f5b 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c | |||
| @@ -343,7 +343,7 @@ bad: | |||
| 343 | 343 | ||
| 344 | /* | 344 | /* |
| 345 | * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid | 345 | * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid |
| 346 | * to a set of osds) | 346 | * to a set of osds) and primary_temp (explicit primary setting) |
| 347 | */ | 347 | */ |
| 348 | static int pgid_cmp(struct ceph_pg l, struct ceph_pg r) | 348 | static int pgid_cmp(struct ceph_pg l, struct ceph_pg r) |
| 349 | { | 349 | { |
| @@ -506,7 +506,7 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) | |||
| 506 | kfree(pi); | 506 | kfree(pi); |
| 507 | } | 507 | } |
| 508 | 508 | ||
| 509 | static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) | 509 | static int decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) |
| 510 | { | 510 | { |
| 511 | u8 ev, cv; | 511 | u8 ev, cv; |
| 512 | unsigned len, num; | 512 | unsigned len, num; |
| @@ -587,7 +587,7 @@ bad: | |||
| 587 | return -EINVAL; | 587 | return -EINVAL; |
| 588 | } | 588 | } |
| 589 | 589 | ||
| 590 | static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) | 590 | static int decode_pool_names(void **p, void *end, struct ceph_osdmap *map) |
| 591 | { | 591 | { |
| 592 | struct ceph_pg_pool_info *pi; | 592 | struct ceph_pg_pool_info *pi; |
| 593 | u32 num, len; | 593 | u32 num, len; |
| @@ -633,6 +633,13 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map) | |||
| 633 | rb_erase(&pg->node, &map->pg_temp); | 633 | rb_erase(&pg->node, &map->pg_temp); |
| 634 | kfree(pg); | 634 | kfree(pg); |
| 635 | } | 635 | } |
| 636 | while (!RB_EMPTY_ROOT(&map->primary_temp)) { | ||
| 637 | struct ceph_pg_mapping *pg = | ||
| 638 | rb_entry(rb_first(&map->primary_temp), | ||
| 639 | struct ceph_pg_mapping, node); | ||
| 640 | rb_erase(&pg->node, &map->primary_temp); | ||
| 641 | kfree(pg); | ||
| 642 | } | ||
| 636 | while (!RB_EMPTY_ROOT(&map->pg_pools)) { | 643 | while (!RB_EMPTY_ROOT(&map->pg_pools)) { |
| 637 | struct ceph_pg_pool_info *pi = | 644 | struct ceph_pg_pool_info *pi = |
| 638 | rb_entry(rb_first(&map->pg_pools), | 645 | rb_entry(rb_first(&map->pg_pools), |
| @@ -642,186 +649,516 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map) | |||
| 642 | kfree(map->osd_state); | 649 | kfree(map->osd_state); |
| 643 | kfree(map->osd_weight); | 650 | kfree(map->osd_weight); |
| 644 | kfree(map->osd_addr); | 651 | kfree(map->osd_addr); |
| 652 | kfree(map->osd_primary_affinity); | ||
| 645 | kfree(map); | 653 | kfree(map); |
| 646 | } | 654 | } |
| 647 | 655 | ||
| 648 | /* | 656 | /* |
| 649 | * adjust max osd value. reallocate arrays. | 657 | * Adjust max_osd value, (re)allocate arrays. |
| 658 | * | ||
| 659 | * The new elements are properly initialized. | ||
| 650 | */ | 660 | */ |
| 651 | static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) | 661 | static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) |
| 652 | { | 662 | { |
| 653 | u8 *state; | 663 | u8 *state; |
| 654 | struct ceph_entity_addr *addr; | ||
| 655 | u32 *weight; | 664 | u32 *weight; |
| 665 | struct ceph_entity_addr *addr; | ||
| 666 | int i; | ||
| 656 | 667 | ||
| 657 | state = kcalloc(max, sizeof(*state), GFP_NOFS); | 668 | state = krealloc(map->osd_state, max*sizeof(*state), GFP_NOFS); |
| 658 | addr = kcalloc(max, sizeof(*addr), GFP_NOFS); | 669 | weight = krealloc(map->osd_weight, max*sizeof(*weight), GFP_NOFS); |
| 659 | weight = kcalloc(max, sizeof(*weight), GFP_NOFS); | 670 | addr = krealloc(map->osd_addr, max*sizeof(*addr), GFP_NOFS); |
| 660 | if (state == NULL || addr == NULL || weight == NULL) { | 671 | if (!state || !weight || !addr) { |
| 661 | kfree(state); | 672 | kfree(state); |
| 662 | kfree(addr); | ||
| 663 | kfree(weight); | 673 | kfree(weight); |
| 674 | kfree(addr); | ||
| 675 | |||
| 664 | return -ENOMEM; | 676 | return -ENOMEM; |
| 665 | } | 677 | } |
| 666 | 678 | ||
| 667 | /* copy old? */ | 679 | for (i = map->max_osd; i < max; i++) { |
| 668 | if (map->osd_state) { | 680 | state[i] = 0; |
| 669 | memcpy(state, map->osd_state, map->max_osd*sizeof(*state)); | 681 | weight[i] = CEPH_OSD_OUT; |
| 670 | memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr)); | 682 | memset(addr + i, 0, sizeof(*addr)); |
| 671 | memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight)); | ||
| 672 | kfree(map->osd_state); | ||
| 673 | kfree(map->osd_addr); | ||
| 674 | kfree(map->osd_weight); | ||
| 675 | } | 683 | } |
| 676 | 684 | ||
| 677 | map->osd_state = state; | 685 | map->osd_state = state; |
| 678 | map->osd_weight = weight; | 686 | map->osd_weight = weight; |
| 679 | map->osd_addr = addr; | 687 | map->osd_addr = addr; |
| 688 | |||
| 689 | if (map->osd_primary_affinity) { | ||
| 690 | u32 *affinity; | ||
| 691 | |||
| 692 | affinity = krealloc(map->osd_primary_affinity, | ||
| 693 | max*sizeof(*affinity), GFP_NOFS); | ||
| 694 | if (!affinity) | ||
| 695 | return -ENOMEM; | ||
| 696 | |||
| 697 | for (i = map->max_osd; i < max; i++) | ||
| 698 | affinity[i] = CEPH_OSD_DEFAULT_PRIMARY_AFFINITY; | ||
| 699 | |||
| 700 | map->osd_primary_affinity = affinity; | ||
| 701 | } | ||
| 702 | |||
| 680 | map->max_osd = max; | 703 | map->max_osd = max; |
| 704 | |||
| 681 | return 0; | 705 | return 0; |
| 682 | } | 706 | } |
| 683 | 707 | ||
| 708 | #define OSDMAP_WRAPPER_COMPAT_VER 7 | ||
| 709 | #define OSDMAP_CLIENT_DATA_COMPAT_VER 1 | ||
| 710 | |||
| 684 | /* | 711 | /* |
| 685 | * decode a full map. | 712 | * Return 0 or error. On success, *v is set to 0 for old (v6) osdmaps, |
| 713 | * to struct_v of the client_data section for new (v7 and above) | ||
| 714 | * osdmaps. | ||
| 686 | */ | 715 | */ |
| 687 | struct ceph_osdmap *osdmap_decode(void **p, void *end) | 716 | static int get_osdmap_client_data_v(void **p, void *end, |
| 717 | const char *prefix, u8 *v) | ||
| 688 | { | 718 | { |
| 689 | struct ceph_osdmap *map; | 719 | u8 struct_v; |
| 690 | u16 version; | 720 | |
| 691 | u32 len, max, i; | 721 | ceph_decode_8_safe(p, end, struct_v, e_inval); |
| 692 | int err = -EINVAL; | 722 | if (struct_v >= 7) { |
| 693 | void *start = *p; | 723 | u8 struct_compat; |
| 694 | struct ceph_pg_pool_info *pi; | 724 | |
| 725 | ceph_decode_8_safe(p, end, struct_compat, e_inval); | ||
| 726 | if (struct_compat > OSDMAP_WRAPPER_COMPAT_VER) { | ||
| 727 | pr_warning("got v %d cv %d > %d of %s ceph_osdmap\n", | ||
| 728 | struct_v, struct_compat, | ||
| 729 | OSDMAP_WRAPPER_COMPAT_VER, prefix); | ||
| 730 | return -EINVAL; | ||
| 731 | } | ||
| 732 | *p += 4; /* ignore wrapper struct_len */ | ||
| 733 | |||
| 734 | ceph_decode_8_safe(p, end, struct_v, e_inval); | ||
| 735 | ceph_decode_8_safe(p, end, struct_compat, e_inval); | ||
| 736 | if (struct_compat > OSDMAP_CLIENT_DATA_COMPAT_VER) { | ||
| 737 | pr_warning("got v %d cv %d > %d of %s ceph_osdmap client data\n", | ||
| 738 | struct_v, struct_compat, | ||
| 739 | OSDMAP_CLIENT_DATA_COMPAT_VER, prefix); | ||
| 740 | return -EINVAL; | ||
| 741 | } | ||
| 742 | *p += 4; /* ignore client data struct_len */ | ||
| 743 | } else { | ||
| 744 | u16 version; | ||
| 745 | |||
| 746 | *p -= 1; | ||
| 747 | ceph_decode_16_safe(p, end, version, e_inval); | ||
| 748 | if (version < 6) { | ||
| 749 | pr_warning("got v %d < 6 of %s ceph_osdmap\n", version, | ||
| 750 | prefix); | ||
| 751 | return -EINVAL; | ||
| 752 | } | ||
| 695 | 753 | ||
| 696 | dout("osdmap_decode %p to %p len %d\n", *p, end, (int)(end - *p)); | 754 | /* old osdmap enconding */ |
| 755 | struct_v = 0; | ||
| 756 | } | ||
| 697 | 757 | ||
| 698 | map = kzalloc(sizeof(*map), GFP_NOFS); | 758 | *v = struct_v; |
| 699 | if (map == NULL) | 759 | return 0; |
| 700 | return ERR_PTR(-ENOMEM); | ||
| 701 | map->pg_temp = RB_ROOT; | ||
| 702 | 760 | ||
| 703 | ceph_decode_16_safe(p, end, version, bad); | 761 | e_inval: |
| 704 | if (version > 6) { | 762 | return -EINVAL; |
| 705 | pr_warning("got unknown v %d > 6 of osdmap\n", version); | 763 | } |
| 706 | goto bad; | 764 | |
| 765 | static int __decode_pools(void **p, void *end, struct ceph_osdmap *map, | ||
| 766 | bool incremental) | ||
| 767 | { | ||
| 768 | u32 n; | ||
| 769 | |||
| 770 | ceph_decode_32_safe(p, end, n, e_inval); | ||
| 771 | while (n--) { | ||
| 772 | struct ceph_pg_pool_info *pi; | ||
| 773 | u64 pool; | ||
| 774 | int ret; | ||
| 775 | |||
| 776 | ceph_decode_64_safe(p, end, pool, e_inval); | ||
| 777 | |||
| 778 | pi = __lookup_pg_pool(&map->pg_pools, pool); | ||
| 779 | if (!incremental || !pi) { | ||
| 780 | pi = kzalloc(sizeof(*pi), GFP_NOFS); | ||
| 781 | if (!pi) | ||
| 782 | return -ENOMEM; | ||
| 783 | |||
| 784 | pi->id = pool; | ||
| 785 | |||
| 786 | ret = __insert_pg_pool(&map->pg_pools, pi); | ||
| 787 | if (ret) { | ||
| 788 | kfree(pi); | ||
| 789 | return ret; | ||
| 790 | } | ||
| 791 | } | ||
| 792 | |||
| 793 | ret = decode_pool(p, end, pi); | ||
| 794 | if (ret) | ||
| 795 | return ret; | ||
| 707 | } | 796 | } |
| 708 | if (version < 6) { | 797 | |
| 709 | pr_warning("got old v %d < 6 of osdmap\n", version); | 798 | return 0; |
| 710 | goto bad; | 799 | |
| 800 | e_inval: | ||
| 801 | return -EINVAL; | ||
| 802 | } | ||
| 803 | |||
| 804 | static int decode_pools(void **p, void *end, struct ceph_osdmap *map) | ||
| 805 | { | ||
| 806 | return __decode_pools(p, end, map, false); | ||
| 807 | } | ||
| 808 | |||
| 809 | static int decode_new_pools(void **p, void *end, struct ceph_osdmap *map) | ||
| 810 | { | ||
| 811 | return __decode_pools(p, end, map, true); | ||
| 812 | } | ||
| 813 | |||
| 814 | static int __decode_pg_temp(void **p, void *end, struct ceph_osdmap *map, | ||
| 815 | bool incremental) | ||
| 816 | { | ||
| 817 | u32 n; | ||
| 818 | |||
| 819 | ceph_decode_32_safe(p, end, n, e_inval); | ||
| 820 | while (n--) { | ||
| 821 | struct ceph_pg pgid; | ||
| 822 | u32 len, i; | ||
| 823 | int ret; | ||
| 824 | |||
| 825 | ret = ceph_decode_pgid(p, end, &pgid); | ||
| 826 | if (ret) | ||
| 827 | return ret; | ||
| 828 | |||
| 829 | ceph_decode_32_safe(p, end, len, e_inval); | ||
| 830 | |||
| 831 | ret = __remove_pg_mapping(&map->pg_temp, pgid); | ||
| 832 | BUG_ON(!incremental && ret != -ENOENT); | ||
| 833 | |||
| 834 | if (!incremental || len > 0) { | ||
| 835 | struct ceph_pg_mapping *pg; | ||
| 836 | |||
| 837 | ceph_decode_need(p, end, len*sizeof(u32), e_inval); | ||
| 838 | |||
| 839 | if (len > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) | ||
| 840 | return -EINVAL; | ||
| 841 | |||
| 842 | pg = kzalloc(sizeof(*pg) + len*sizeof(u32), GFP_NOFS); | ||
| 843 | if (!pg) | ||
| 844 | return -ENOMEM; | ||
| 845 | |||
| 846 | pg->pgid = pgid; | ||
| 847 | pg->pg_temp.len = len; | ||
| 848 | for (i = 0; i < len; i++) | ||
| 849 | pg->pg_temp.osds[i] = ceph_decode_32(p); | ||
| 850 | |||
| 851 | ret = __insert_pg_mapping(pg, &map->pg_temp); | ||
| 852 | if (ret) { | ||
| 853 | kfree(pg); | ||
| 854 | return ret; | ||
| 855 | } | ||
| 856 | } | ||
| 711 | } | 857 | } |
| 712 | 858 | ||
| 713 | ceph_decode_need(p, end, 2*sizeof(u64)+6*sizeof(u32), bad); | 859 | return 0; |
| 860 | |||
| 861 | e_inval: | ||
| 862 | return -EINVAL; | ||
| 863 | } | ||
| 864 | |||
| 865 | static int decode_pg_temp(void **p, void *end, struct ceph_osdmap *map) | ||
| 866 | { | ||
| 867 | return __decode_pg_temp(p, end, map, false); | ||
| 868 | } | ||
| 869 | |||
| 870 | static int decode_new_pg_temp(void **p, void *end, struct ceph_osdmap *map) | ||
| 871 | { | ||
| 872 | return __decode_pg_temp(p, end, map, true); | ||
| 873 | } | ||
| 874 | |||
| 875 | static int __decode_primary_temp(void **p, void *end, struct ceph_osdmap *map, | ||
| 876 | bool incremental) | ||
| 877 | { | ||
| 878 | u32 n; | ||
| 879 | |||
| 880 | ceph_decode_32_safe(p, end, n, e_inval); | ||
| 881 | while (n--) { | ||
| 882 | struct ceph_pg pgid; | ||
| 883 | u32 osd; | ||
| 884 | int ret; | ||
| 885 | |||
| 886 | ret = ceph_decode_pgid(p, end, &pgid); | ||
| 887 | if (ret) | ||
| 888 | return ret; | ||
| 889 | |||
| 890 | ceph_decode_32_safe(p, end, osd, e_inval); | ||
| 891 | |||
| 892 | ret = __remove_pg_mapping(&map->primary_temp, pgid); | ||
| 893 | BUG_ON(!incremental && ret != -ENOENT); | ||
| 894 | |||
| 895 | if (!incremental || osd != (u32)-1) { | ||
| 896 | struct ceph_pg_mapping *pg; | ||
| 897 | |||
| 898 | pg = kzalloc(sizeof(*pg), GFP_NOFS); | ||
| 899 | if (!pg) | ||
| 900 | return -ENOMEM; | ||
| 901 | |||
| 902 | pg->pgid = pgid; | ||
| 903 | pg->primary_temp.osd = osd; | ||
| 904 | |||
| 905 | ret = __insert_pg_mapping(pg, &map->primary_temp); | ||
| 906 | if (ret) { | ||
| 907 | kfree(pg); | ||
| 908 | return ret; | ||
| 909 | } | ||
| 910 | } | ||
| 911 | } | ||
| 912 | |||
| 913 | return 0; | ||
| 914 | |||
| 915 | e_inval: | ||
| 916 | return -EINVAL; | ||
| 917 | } | ||
| 918 | |||
| 919 | static int decode_primary_temp(void **p, void *end, struct ceph_osdmap *map) | ||
| 920 | { | ||
| 921 | return __decode_primary_temp(p, end, map, false); | ||
| 922 | } | ||
| 923 | |||
| 924 | static int decode_new_primary_temp(void **p, void *end, | ||
| 925 | struct ceph_osdmap *map) | ||
| 926 | { | ||
| 927 | return __decode_primary_temp(p, end, map, true); | ||
| 928 | } | ||
| 929 | |||
| 930 | u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd) | ||
| 931 | { | ||
| 932 | BUG_ON(osd >= map->max_osd); | ||
| 933 | |||
| 934 | if (!map->osd_primary_affinity) | ||
| 935 | return CEPH_OSD_DEFAULT_PRIMARY_AFFINITY; | ||
| 936 | |||
| 937 | return map->osd_primary_affinity[osd]; | ||
| 938 | } | ||
| 939 | |||
| 940 | static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff) | ||
| 941 | { | ||
| 942 | BUG_ON(osd >= map->max_osd); | ||
| 943 | |||
| 944 | if (!map->osd_primary_affinity) { | ||
| 945 | int i; | ||
| 946 | |||
| 947 | map->osd_primary_affinity = kmalloc(map->max_osd*sizeof(u32), | ||
| 948 | GFP_NOFS); | ||
| 949 | if (!map->osd_primary_affinity) | ||
| 950 | return -ENOMEM; | ||
| 951 | |||
| 952 | for (i = 0; i < map->max_osd; i++) | ||
| 953 | map->osd_primary_affinity[i] = | ||
| 954 | CEPH_OSD_DEFAULT_PRIMARY_AFFINITY; | ||
| 955 | } | ||
| 956 | |||
| 957 | map->osd_primary_affinity[osd] = aff; | ||
| 958 | |||
| 959 | return 0; | ||
| 960 | } | ||
| 961 | |||
| 962 | static int decode_primary_affinity(void **p, void *end, | ||
| 963 | struct ceph_osdmap *map) | ||
| 964 | { | ||
| 965 | u32 len, i; | ||
| 966 | |||
| 967 | ceph_decode_32_safe(p, end, len, e_inval); | ||
| 968 | if (len == 0) { | ||
| 969 | kfree(map->osd_primary_affinity); | ||
| 970 | map->osd_primary_affinity = NULL; | ||
| 971 | return 0; | ||
| 972 | } | ||
| 973 | if (len != map->max_osd) | ||
| 974 | goto e_inval; | ||
| 975 | |||
| 976 | ceph_decode_need(p, end, map->max_osd*sizeof(u32), e_inval); | ||
| 977 | |||
| 978 | for (i = 0; i < map->max_osd; i++) { | ||
| 979 | int ret; | ||
| 980 | |||
| 981 | ret = set_primary_affinity(map, i, ceph_decode_32(p)); | ||
| 982 | if (ret) | ||
| 983 | return ret; | ||
| 984 | } | ||
| 985 | |||
| 986 | return 0; | ||
| 987 | |||
| 988 | e_inval: | ||
| 989 | return -EINVAL; | ||
| 990 | } | ||
| 991 | |||
| 992 | static int decode_new_primary_affinity(void **p, void *end, | ||
| 993 | struct ceph_osdmap *map) | ||
| 994 | { | ||
| 995 | u32 n; | ||
| 996 | |||
| 997 | ceph_decode_32_safe(p, end, n, e_inval); | ||
| 998 | while (n--) { | ||
| 999 | u32 osd, aff; | ||
| 1000 | int ret; | ||
| 1001 | |||
| 1002 | ceph_decode_32_safe(p, end, osd, e_inval); | ||
| 1003 | ceph_decode_32_safe(p, end, aff, e_inval); | ||
| 1004 | |||
| 1005 | ret = set_primary_affinity(map, osd, aff); | ||
| 1006 | if (ret) | ||
| 1007 | return ret; | ||
| 1008 | |||
| 1009 | pr_info("osd%d primary-affinity 0x%x\n", osd, aff); | ||
| 1010 | } | ||
| 1011 | |||
| 1012 | return 0; | ||
| 1013 | |||
| 1014 | e_inval: | ||
| 1015 | return -EINVAL; | ||
| 1016 | } | ||
| 1017 | |||
| 1018 | /* | ||
| 1019 | * decode a full map. | ||
| 1020 | */ | ||
| 1021 | static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map) | ||
| 1022 | { | ||
| 1023 | u8 struct_v; | ||
| 1024 | u32 epoch = 0; | ||
| 1025 | void *start = *p; | ||
| 1026 | u32 max; | ||
| 1027 | u32 len, i; | ||
| 1028 | int err; | ||
| 1029 | |||
| 1030 | dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p)); | ||
| 1031 | |||
| 1032 | err = get_osdmap_client_data_v(p, end, "full", &struct_v); | ||
| 1033 | if (err) | ||
| 1034 | goto bad; | ||
| 1035 | |||
| 1036 | /* fsid, epoch, created, modified */ | ||
| 1037 | ceph_decode_need(p, end, sizeof(map->fsid) + sizeof(u32) + | ||
| 1038 | sizeof(map->created) + sizeof(map->modified), e_inval); | ||
| 714 | ceph_decode_copy(p, &map->fsid, sizeof(map->fsid)); | 1039 | ceph_decode_copy(p, &map->fsid, sizeof(map->fsid)); |
| 715 | map->epoch = ceph_decode_32(p); | 1040 | epoch = map->epoch = ceph_decode_32(p); |
| 716 | ceph_decode_copy(p, &map->created, sizeof(map->created)); | 1041 | ceph_decode_copy(p, &map->created, sizeof(map->created)); |
| 717 | ceph_decode_copy(p, &map->modified, sizeof(map->modified)); | 1042 | ceph_decode_copy(p, &map->modified, sizeof(map->modified)); |
| 718 | 1043 | ||
| 719 | ceph_decode_32_safe(p, end, max, bad); | 1044 | /* pools */ |
| 720 | while (max--) { | 1045 | err = decode_pools(p, end, map); |
| 721 | ceph_decode_need(p, end, 8 + 2, bad); | 1046 | if (err) |
| 722 | err = -ENOMEM; | 1047 | goto bad; |
| 723 | pi = kzalloc(sizeof(*pi), GFP_NOFS); | ||
| 724 | if (!pi) | ||
| 725 | goto bad; | ||
| 726 | pi->id = ceph_decode_64(p); | ||
| 727 | err = __decode_pool(p, end, pi); | ||
| 728 | if (err < 0) { | ||
| 729 | kfree(pi); | ||
| 730 | goto bad; | ||
| 731 | } | ||
| 732 | __insert_pg_pool(&map->pg_pools, pi); | ||
| 733 | } | ||
| 734 | 1048 | ||
| 735 | err = __decode_pool_names(p, end, map); | 1049 | /* pool_name */ |
| 736 | if (err < 0) { | 1050 | err = decode_pool_names(p, end, map); |
| 737 | dout("fail to decode pool names"); | 1051 | if (err) |
| 738 | goto bad; | 1052 | goto bad; |
| 739 | } | ||
| 740 | 1053 | ||
| 741 | ceph_decode_32_safe(p, end, map->pool_max, bad); | 1054 | ceph_decode_32_safe(p, end, map->pool_max, e_inval); |
| 742 | 1055 | ||
| 743 | ceph_decode_32_safe(p, end, map->flags, bad); | 1056 | ceph_decode_32_safe(p, end, map->flags, e_inval); |
| 744 | 1057 | ||
| 745 | max = ceph_decode_32(p); | 1058 | /* max_osd */ |
| 1059 | ceph_decode_32_safe(p, end, max, e_inval); | ||
| 746 | 1060 | ||
| 747 | /* (re)alloc osd arrays */ | 1061 | /* (re)alloc osd arrays */ |
| 748 | err = osdmap_set_max_osd(map, max); | 1062 | err = osdmap_set_max_osd(map, max); |
| 749 | if (err < 0) | 1063 | if (err) |
| 750 | goto bad; | 1064 | goto bad; |
| 751 | dout("osdmap_decode max_osd = %d\n", map->max_osd); | ||
| 752 | 1065 | ||
| 753 | /* osds */ | 1066 | /* osd_state, osd_weight, osd_addrs->client_addr */ |
| 754 | err = -EINVAL; | ||
| 755 | ceph_decode_need(p, end, 3*sizeof(u32) + | 1067 | ceph_decode_need(p, end, 3*sizeof(u32) + |
| 756 | map->max_osd*(1 + sizeof(*map->osd_weight) + | 1068 | map->max_osd*(1 + sizeof(*map->osd_weight) + |
| 757 | sizeof(*map->osd_addr)), bad); | 1069 | sizeof(*map->osd_addr)), e_inval); |
| 758 | *p += 4; /* skip length field (should match max) */ | 1070 | |
| 1071 | if (ceph_decode_32(p) != map->max_osd) | ||
| 1072 | goto e_inval; | ||
| 1073 | |||
| 759 | ceph_decode_copy(p, map->osd_state, map->max_osd); | 1074 | ceph_decode_copy(p, map->osd_state, map->max_osd); |
| 760 | 1075 | ||
| 761 | *p += 4; /* skip length field (should match max) */ | 1076 | if (ceph_decode_32(p) != map->max_osd) |
| 1077 | goto e_inval; | ||
| 1078 | |||
| 762 | for (i = 0; i < map->max_osd; i++) | 1079 | for (i = 0; i < map->max_osd; i++) |
| 763 | map->osd_weight[i] = ceph_decode_32(p); | 1080 | map->osd_weight[i] = ceph_decode_32(p); |
| 764 | 1081 | ||
| 765 | *p += 4; /* skip length field (should match max) */ | 1082 | if (ceph_decode_32(p) != map->max_osd) |
| 1083 | goto e_inval; | ||
| 1084 | |||
| 766 | ceph_decode_copy(p, map->osd_addr, map->max_osd*sizeof(*map->osd_addr)); | 1085 | ceph_decode_copy(p, map->osd_addr, map->max_osd*sizeof(*map->osd_addr)); |
| 767 | for (i = 0; i < map->max_osd; i++) | 1086 | for (i = 0; i < map->max_osd; i++) |
| 768 | ceph_decode_addr(&map->osd_addr[i]); | 1087 | ceph_decode_addr(&map->osd_addr[i]); |
| 769 | 1088 | ||
| 770 | /* pg_temp */ | 1089 | /* pg_temp */ |
| 771 | ceph_decode_32_safe(p, end, len, bad); | 1090 | err = decode_pg_temp(p, end, map); |
| 772 | for (i = 0; i < len; i++) { | 1091 | if (err) |
| 773 | int n, j; | 1092 | goto bad; |
| 774 | struct ceph_pg pgid; | ||
| 775 | struct ceph_pg_mapping *pg; | ||
| 776 | 1093 | ||
| 777 | err = ceph_decode_pgid(p, end, &pgid); | 1094 | /* primary_temp */ |
| 1095 | if (struct_v >= 1) { | ||
| 1096 | err = decode_primary_temp(p, end, map); | ||
| 778 | if (err) | 1097 | if (err) |
| 779 | goto bad; | 1098 | goto bad; |
| 780 | ceph_decode_need(p, end, sizeof(u32), bad); | 1099 | } |
| 781 | n = ceph_decode_32(p); | ||
| 782 | err = -EINVAL; | ||
| 783 | if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) | ||
| 784 | goto bad; | ||
| 785 | ceph_decode_need(p, end, n * sizeof(u32), bad); | ||
| 786 | err = -ENOMEM; | ||
| 787 | pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS); | ||
| 788 | if (!pg) | ||
| 789 | goto bad; | ||
| 790 | pg->pgid = pgid; | ||
| 791 | pg->len = n; | ||
| 792 | for (j = 0; j < n; j++) | ||
| 793 | pg->osds[j] = ceph_decode_32(p); | ||
| 794 | 1100 | ||
| 795 | err = __insert_pg_mapping(pg, &map->pg_temp); | 1101 | /* primary_affinity */ |
| 1102 | if (struct_v >= 2) { | ||
| 1103 | err = decode_primary_affinity(p, end, map); | ||
| 796 | if (err) | 1104 | if (err) |
| 797 | goto bad; | 1105 | goto bad; |
| 798 | dout(" added pg_temp %lld.%x len %d\n", pgid.pool, pgid.seed, | 1106 | } else { |
| 799 | len); | 1107 | /* XXX can this happen? */ |
| 1108 | kfree(map->osd_primary_affinity); | ||
| 1109 | map->osd_primary_affinity = NULL; | ||
| 800 | } | 1110 | } |
| 801 | 1111 | ||
| 802 | /* crush */ | 1112 | /* crush */ |
| 803 | ceph_decode_32_safe(p, end, len, bad); | 1113 | ceph_decode_32_safe(p, end, len, e_inval); |
| 804 | dout("osdmap_decode crush len %d from off 0x%x\n", len, | 1114 | map->crush = crush_decode(*p, min(*p + len, end)); |
| 805 | (int)(*p - start)); | ||
| 806 | ceph_decode_need(p, end, len, bad); | ||
| 807 | map->crush = crush_decode(*p, end); | ||
| 808 | *p += len; | ||
| 809 | if (IS_ERR(map->crush)) { | 1115 | if (IS_ERR(map->crush)) { |
| 810 | err = PTR_ERR(map->crush); | 1116 | err = PTR_ERR(map->crush); |
| 811 | map->crush = NULL; | 1117 | map->crush = NULL; |
| 812 | goto bad; | 1118 | goto bad; |
| 813 | } | 1119 | } |
| 1120 | *p += len; | ||
| 814 | 1121 | ||
| 815 | /* ignore the rest of the map */ | 1122 | /* ignore the rest */ |
| 816 | *p = end; | 1123 | *p = end; |
| 817 | 1124 | ||
| 818 | dout("osdmap_decode done %p %p\n", *p, end); | 1125 | dout("full osdmap epoch %d max_osd %d\n", map->epoch, map->max_osd); |
| 819 | return map; | 1126 | return 0; |
| 820 | 1127 | ||
| 1128 | e_inval: | ||
| 1129 | err = -EINVAL; | ||
| 821 | bad: | 1130 | bad: |
| 822 | dout("osdmap_decode fail err %d\n", err); | 1131 | pr_err("corrupt full osdmap (%d) epoch %d off %d (%p of %p-%p)\n", |
| 823 | ceph_osdmap_destroy(map); | 1132 | err, epoch, (int)(*p - start), *p, start, end); |
| 824 | return ERR_PTR(err); | 1133 | print_hex_dump(KERN_DEBUG, "osdmap: ", |
| 1134 | DUMP_PREFIX_OFFSET, 16, 1, | ||
| 1135 | start, end - start, true); | ||
| 1136 | return err; | ||
| 1137 | } | ||
| 1138 | |||
| 1139 | /* | ||
| 1140 | * Allocate and decode a full map. | ||
| 1141 | */ | ||
| 1142 | struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end) | ||
| 1143 | { | ||
| 1144 | struct ceph_osdmap *map; | ||
| 1145 | int ret; | ||
| 1146 | |||
| 1147 | map = kzalloc(sizeof(*map), GFP_NOFS); | ||
| 1148 | if (!map) | ||
| 1149 | return ERR_PTR(-ENOMEM); | ||
| 1150 | |||
| 1151 | map->pg_temp = RB_ROOT; | ||
| 1152 | map->primary_temp = RB_ROOT; | ||
| 1153 | mutex_init(&map->crush_scratch_mutex); | ||
| 1154 | |||
| 1155 | ret = osdmap_decode(p, end, map); | ||
| 1156 | if (ret) { | ||
| 1157 | ceph_osdmap_destroy(map); | ||
| 1158 | return ERR_PTR(ret); | ||
| 1159 | } | ||
| 1160 | |||
| 1161 | return map; | ||
| 825 | } | 1162 | } |
| 826 | 1163 | ||
| 827 | /* | 1164 | /* |
| @@ -840,17 +1177,18 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
| 840 | __s64 new_pool_max; | 1177 | __s64 new_pool_max; |
| 841 | __s32 new_flags, max; | 1178 | __s32 new_flags, max; |
| 842 | void *start = *p; | 1179 | void *start = *p; |
| 843 | int err = -EINVAL; | 1180 | int err; |
| 844 | u16 version; | 1181 | u8 struct_v; |
| 1182 | |||
| 1183 | dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p)); | ||
| 845 | 1184 | ||
| 846 | ceph_decode_16_safe(p, end, version, bad); | 1185 | err = get_osdmap_client_data_v(p, end, "inc", &struct_v); |
| 847 | if (version != 6) { | 1186 | if (err) |
| 848 | pr_warning("got unknown v %d != 6 of inc osdmap\n", version); | ||
| 849 | goto bad; | 1187 | goto bad; |
| 850 | } | ||
| 851 | 1188 | ||
| 852 | ceph_decode_need(p, end, sizeof(fsid)+sizeof(modified)+2*sizeof(u32), | 1189 | /* fsid, epoch, modified, new_pool_max, new_flags */ |
| 853 | bad); | 1190 | ceph_decode_need(p, end, sizeof(fsid) + sizeof(u32) + sizeof(modified) + |
| 1191 | sizeof(u64) + sizeof(u32), e_inval); | ||
| 854 | ceph_decode_copy(p, &fsid, sizeof(fsid)); | 1192 | ceph_decode_copy(p, &fsid, sizeof(fsid)); |
| 855 | epoch = ceph_decode_32(p); | 1193 | epoch = ceph_decode_32(p); |
| 856 | BUG_ON(epoch != map->epoch+1); | 1194 | BUG_ON(epoch != map->epoch+1); |
| @@ -859,21 +1197,22 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
| 859 | new_flags = ceph_decode_32(p); | 1197 | new_flags = ceph_decode_32(p); |
| 860 | 1198 | ||
| 861 | /* full map? */ | 1199 | /* full map? */ |
| 862 | ceph_decode_32_safe(p, end, len, bad); | 1200 | ceph_decode_32_safe(p, end, len, e_inval); |
| 863 | if (len > 0) { | 1201 | if (len > 0) { |
| 864 | dout("apply_incremental full map len %d, %p to %p\n", | 1202 | dout("apply_incremental full map len %d, %p to %p\n", |
| 865 | len, *p, end); | 1203 | len, *p, end); |
| 866 | return osdmap_decode(p, min(*p+len, end)); | 1204 | return ceph_osdmap_decode(p, min(*p+len, end)); |
| 867 | } | 1205 | } |
| 868 | 1206 | ||
| 869 | /* new crush? */ | 1207 | /* new crush? */ |
| 870 | ceph_decode_32_safe(p, end, len, bad); | 1208 | ceph_decode_32_safe(p, end, len, e_inval); |
| 871 | if (len > 0) { | 1209 | if (len > 0) { |
| 872 | dout("apply_incremental new crush map len %d, %p to %p\n", | ||
| 873 | len, *p, end); | ||
| 874 | newcrush = crush_decode(*p, min(*p+len, end)); | 1210 | newcrush = crush_decode(*p, min(*p+len, end)); |
| 875 | if (IS_ERR(newcrush)) | 1211 | if (IS_ERR(newcrush)) { |
| 876 | return ERR_CAST(newcrush); | 1212 | err = PTR_ERR(newcrush); |
| 1213 | newcrush = NULL; | ||
| 1214 | goto bad; | ||
| 1215 | } | ||
| 877 | *p += len; | 1216 | *p += len; |
| 878 | } | 1217 | } |
| 879 | 1218 | ||
| @@ -883,13 +1222,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
| 883 | if (new_pool_max >= 0) | 1222 | if (new_pool_max >= 0) |
| 884 | map->pool_max = new_pool_max; | 1223 | map->pool_max = new_pool_max; |
| 885 | 1224 | ||
| 886 | ceph_decode_need(p, end, 5*sizeof(u32), bad); | ||
| 887 | |||
| 888 | /* new max? */ | 1225 | /* new max? */ |
| 889 | max = ceph_decode_32(p); | 1226 | ceph_decode_32_safe(p, end, max, e_inval); |
| 890 | if (max >= 0) { | 1227 | if (max >= 0) { |
| 891 | err = osdmap_set_max_osd(map, max); | 1228 | err = osdmap_set_max_osd(map, max); |
| 892 | if (err < 0) | 1229 | if (err) |
| 893 | goto bad; | 1230 | goto bad; |
| 894 | } | 1231 | } |
| 895 | 1232 | ||
| @@ -902,51 +1239,34 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
| 902 | newcrush = NULL; | 1239 | newcrush = NULL; |
| 903 | } | 1240 | } |
| 904 | 1241 | ||
| 905 | /* new_pool */ | 1242 | /* new_pools */ |
| 906 | ceph_decode_32_safe(p, end, len, bad); | 1243 | err = decode_new_pools(p, end, map); |
| 907 | while (len--) { | 1244 | if (err) |
| 908 | struct ceph_pg_pool_info *pi; | 1245 | goto bad; |
| 909 | 1246 | ||
| 910 | ceph_decode_64_safe(p, end, pool, bad); | 1247 | /* new_pool_names */ |
| 911 | pi = __lookup_pg_pool(&map->pg_pools, pool); | 1248 | err = decode_pool_names(p, end, map); |
| 912 | if (!pi) { | 1249 | if (err) |
| 913 | pi = kzalloc(sizeof(*pi), GFP_NOFS); | 1250 | goto bad; |
| 914 | if (!pi) { | ||
| 915 | err = -ENOMEM; | ||
| 916 | goto bad; | ||
| 917 | } | ||
| 918 | pi->id = pool; | ||
| 919 | __insert_pg_pool(&map->pg_pools, pi); | ||
| 920 | } | ||
| 921 | err = __decode_pool(p, end, pi); | ||
| 922 | if (err < 0) | ||
| 923 | goto bad; | ||
| 924 | } | ||
| 925 | if (version >= 5) { | ||
| 926 | err = __decode_pool_names(p, end, map); | ||
| 927 | if (err < 0) | ||
| 928 | goto bad; | ||
| 929 | } | ||
| 930 | 1251 | ||
| 931 | /* old_pool */ | 1252 | /* old_pool */ |
| 932 | ceph_decode_32_safe(p, end, len, bad); | 1253 | ceph_decode_32_safe(p, end, len, e_inval); |
| 933 | while (len--) { | 1254 | while (len--) { |
| 934 | struct ceph_pg_pool_info *pi; | 1255 | struct ceph_pg_pool_info *pi; |
| 935 | 1256 | ||
| 936 | ceph_decode_64_safe(p, end, pool, bad); | 1257 | ceph_decode_64_safe(p, end, pool, e_inval); |
| 937 | pi = __lookup_pg_pool(&map->pg_pools, pool); | 1258 | pi = __lookup_pg_pool(&map->pg_pools, pool); |
| 938 | if (pi) | 1259 | if (pi) |
| 939 | __remove_pg_pool(&map->pg_pools, pi); | 1260 | __remove_pg_pool(&map->pg_pools, pi); |
| 940 | } | 1261 | } |
| 941 | 1262 | ||
| 942 | /* new_up */ | 1263 | /* new_up */ |
| 943 | err = -EINVAL; | 1264 | ceph_decode_32_safe(p, end, len, e_inval); |
| 944 | ceph_decode_32_safe(p, end, len, bad); | ||
| 945 | while (len--) { | 1265 | while (len--) { |
| 946 | u32 osd; | 1266 | u32 osd; |
| 947 | struct ceph_entity_addr addr; | 1267 | struct ceph_entity_addr addr; |
| 948 | ceph_decode_32_safe(p, end, osd, bad); | 1268 | ceph_decode_32_safe(p, end, osd, e_inval); |
| 949 | ceph_decode_copy_safe(p, end, &addr, sizeof(addr), bad); | 1269 | ceph_decode_copy_safe(p, end, &addr, sizeof(addr), e_inval); |
| 950 | ceph_decode_addr(&addr); | 1270 | ceph_decode_addr(&addr); |
| 951 | pr_info("osd%d up\n", osd); | 1271 | pr_info("osd%d up\n", osd); |
| 952 | BUG_ON(osd >= map->max_osd); | 1272 | BUG_ON(osd >= map->max_osd); |
| @@ -955,11 +1275,11 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
| 955 | } | 1275 | } |
| 956 | 1276 | ||
| 957 | /* new_state */ | 1277 | /* new_state */ |
| 958 | ceph_decode_32_safe(p, end, len, bad); | 1278 | ceph_decode_32_safe(p, end, len, e_inval); |
| 959 | while (len--) { | 1279 | while (len--) { |
| 960 | u32 osd; | 1280 | u32 osd; |
| 961 | u8 xorstate; | 1281 | u8 xorstate; |
| 962 | ceph_decode_32_safe(p, end, osd, bad); | 1282 | ceph_decode_32_safe(p, end, osd, e_inval); |
| 963 | xorstate = **(u8 **)p; | 1283 | xorstate = **(u8 **)p; |
| 964 | (*p)++; /* clean flag */ | 1284 | (*p)++; /* clean flag */ |
| 965 | if (xorstate == 0) | 1285 | if (xorstate == 0) |
| @@ -971,10 +1291,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
| 971 | } | 1291 | } |
| 972 | 1292 | ||
| 973 | /* new_weight */ | 1293 | /* new_weight */ |
| 974 | ceph_decode_32_safe(p, end, len, bad); | 1294 | ceph_decode_32_safe(p, end, len, e_inval); |
| 975 | while (len--) { | 1295 | while (len--) { |
| 976 | u32 osd, off; | 1296 | u32 osd, off; |
| 977 | ceph_decode_need(p, end, sizeof(u32)*2, bad); | 1297 | ceph_decode_need(p, end, sizeof(u32)*2, e_inval); |
| 978 | osd = ceph_decode_32(p); | 1298 | osd = ceph_decode_32(p); |
| 979 | off = ceph_decode_32(p); | 1299 | off = ceph_decode_32(p); |
| 980 | pr_info("osd%d weight 0x%x %s\n", osd, off, | 1300 | pr_info("osd%d weight 0x%x %s\n", osd, off, |
| @@ -985,56 +1305,35 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | |||
| 985 | } | 1305 | } |
| 986 | 1306 | ||
| 987 | /* new_pg_temp */ | 1307 | /* new_pg_temp */ |
| 988 | ceph_decode_32_safe(p, end, len, bad); | 1308 | err = decode_new_pg_temp(p, end, map); |
| 989 | while (len--) { | 1309 | if (err) |
| 990 | struct ceph_pg_mapping *pg; | 1310 | goto bad; |
| 991 | int j; | ||
| 992 | struct ceph_pg pgid; | ||
| 993 | u32 pglen; | ||
| 994 | 1311 | ||
| 995 | err = ceph_decode_pgid(p, end, &pgid); | 1312 | /* new_primary_temp */ |
| 1313 | if (struct_v >= 1) { | ||
| 1314 | err = decode_new_primary_temp(p, end, map); | ||
| 996 | if (err) | 1315 | if (err) |
| 997 | goto bad; | 1316 | goto bad; |
| 998 | ceph_decode_need(p, end, sizeof(u32), bad); | 1317 | } |
| 999 | pglen = ceph_decode_32(p); | ||
| 1000 | if (pglen) { | ||
| 1001 | ceph_decode_need(p, end, pglen*sizeof(u32), bad); | ||
| 1002 | |||
| 1003 | /* removing existing (if any) */ | ||
| 1004 | (void) __remove_pg_mapping(&map->pg_temp, pgid); | ||
| 1005 | 1318 | ||
| 1006 | /* insert */ | 1319 | /* new_primary_affinity */ |
| 1007 | err = -EINVAL; | 1320 | if (struct_v >= 2) { |
| 1008 | if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) | 1321 | err = decode_new_primary_affinity(p, end, map); |
| 1009 | goto bad; | 1322 | if (err) |
| 1010 | err = -ENOMEM; | 1323 | goto bad; |
| 1011 | pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS); | ||
| 1012 | if (!pg) | ||
| 1013 | goto bad; | ||
| 1014 | pg->pgid = pgid; | ||
| 1015 | pg->len = pglen; | ||
| 1016 | for (j = 0; j < pglen; j++) | ||
| 1017 | pg->osds[j] = ceph_decode_32(p); | ||
| 1018 | err = __insert_pg_mapping(pg, &map->pg_temp); | ||
| 1019 | if (err) { | ||
| 1020 | kfree(pg); | ||
| 1021 | goto bad; | ||
| 1022 | } | ||
| 1023 | dout(" added pg_temp %lld.%x len %d\n", pgid.pool, | ||
| 1024 | pgid.seed, pglen); | ||
| 1025 | } else { | ||
| 1026 | /* remove */ | ||
| 1027 | __remove_pg_mapping(&map->pg_temp, pgid); | ||
| 1028 | } | ||
| 1029 | } | 1324 | } |
| 1030 | 1325 | ||
| 1031 | /* ignore the rest */ | 1326 | /* ignore the rest */ |
| 1032 | *p = end; | 1327 | *p = end; |
| 1328 | |||
| 1329 | dout("inc osdmap epoch %d max_osd %d\n", map->epoch, map->max_osd); | ||
| 1033 | return map; | 1330 | return map; |
| 1034 | 1331 | ||
| 1332 | e_inval: | ||
| 1333 | err = -EINVAL; | ||
| 1035 | bad: | 1334 | bad: |
| 1036 | pr_err("corrupt inc osdmap epoch %d off %d (%p of %p-%p)\n", | 1335 | pr_err("corrupt inc osdmap (%d) epoch %d off %d (%p of %p-%p)\n", |
| 1037 | epoch, (int)(*p - start), *p, start, end); | 1336 | err, epoch, (int)(*p - start), *p, start, end); |
| 1038 | print_hex_dump(KERN_DEBUG, "osdmap: ", | 1337 | print_hex_dump(KERN_DEBUG, "osdmap: ", |
| 1039 | DUMP_PREFIX_OFFSET, 16, 1, | 1338 | DUMP_PREFIX_OFFSET, 16, 1, |
| 1040 | start, end - start, true); | 1339 | start, end - start, true); |
| @@ -1142,61 +1441,249 @@ int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap, | |||
| 1142 | } | 1441 | } |
| 1143 | EXPORT_SYMBOL(ceph_oloc_oid_to_pg); | 1442 | EXPORT_SYMBOL(ceph_oloc_oid_to_pg); |
| 1144 | 1443 | ||
| 1145 | static int crush_do_rule_ary(const struct crush_map *map, int ruleno, int x, | 1444 | static int do_crush(struct ceph_osdmap *map, int ruleno, int x, |
| 1146 | int *result, int result_max, | 1445 | int *result, int result_max, |
| 1147 | const __u32 *weight, int weight_max) | 1446 | const __u32 *weight, int weight_max) |
| 1148 | { | 1447 | { |
| 1149 | int scratch[result_max * 3]; | 1448 | int r; |
| 1150 | 1449 | ||
| 1151 | return crush_do_rule(map, ruleno, x, result, result_max, | 1450 | BUG_ON(result_max > CEPH_PG_MAX_SIZE); |
| 1152 | weight, weight_max, scratch); | 1451 | |
| 1452 | mutex_lock(&map->crush_scratch_mutex); | ||
| 1453 | r = crush_do_rule(map->crush, ruleno, x, result, result_max, | ||
| 1454 | weight, weight_max, map->crush_scratch_ary); | ||
| 1455 | mutex_unlock(&map->crush_scratch_mutex); | ||
| 1456 | |||
| 1457 | return r; | ||
| 1153 | } | 1458 | } |
| 1154 | 1459 | ||
| 1155 | /* | 1460 | /* |
| 1156 | * Calculate raw osd vector for the given pgid. Return pointer to osd | 1461 | * Calculate raw (crush) set for given pgid. |
| 1157 | * array, or NULL on failure. | 1462 | * |
| 1463 | * Return raw set length, or error. | ||
| 1158 | */ | 1464 | */ |
| 1159 | static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | 1465 | static int pg_to_raw_osds(struct ceph_osdmap *osdmap, |
| 1160 | int *osds, int *num) | 1466 | struct ceph_pg_pool_info *pool, |
| 1467 | struct ceph_pg pgid, u32 pps, int *osds) | ||
| 1161 | { | 1468 | { |
| 1162 | struct ceph_pg_mapping *pg; | ||
| 1163 | struct ceph_pg_pool_info *pool; | ||
| 1164 | int ruleno; | 1469 | int ruleno; |
| 1165 | int r; | 1470 | int len; |
| 1166 | u32 pps; | ||
| 1167 | 1471 | ||
| 1168 | pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); | 1472 | /* crush */ |
| 1169 | if (!pool) | 1473 | ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset, |
| 1170 | return NULL; | 1474 | pool->type, pool->size); |
| 1475 | if (ruleno < 0) { | ||
| 1476 | pr_err("no crush rule: pool %lld ruleset %d type %d size %d\n", | ||
| 1477 | pgid.pool, pool->crush_ruleset, pool->type, | ||
| 1478 | pool->size); | ||
| 1479 | return -ENOENT; | ||
| 1480 | } | ||
| 1171 | 1481 | ||
| 1172 | /* pg_temp? */ | 1482 | len = do_crush(osdmap, ruleno, pps, osds, |
| 1483 | min_t(int, pool->size, CEPH_PG_MAX_SIZE), | ||
| 1484 | osdmap->osd_weight, osdmap->max_osd); | ||
| 1485 | if (len < 0) { | ||
| 1486 | pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n", | ||
| 1487 | len, ruleno, pgid.pool, pool->crush_ruleset, | ||
| 1488 | pool->type, pool->size); | ||
| 1489 | return len; | ||
| 1490 | } | ||
| 1491 | |||
| 1492 | return len; | ||
| 1493 | } | ||
| 1494 | |||
| 1495 | /* | ||
| 1496 | * Given raw set, calculate up set and up primary. | ||
| 1497 | * | ||
| 1498 | * Return up set length. *primary is set to up primary osd id, or -1 | ||
| 1499 | * if up set is empty. | ||
| 1500 | */ | ||
| 1501 | static int raw_to_up_osds(struct ceph_osdmap *osdmap, | ||
| 1502 | struct ceph_pg_pool_info *pool, | ||
| 1503 | int *osds, int len, int *primary) | ||
| 1504 | { | ||
| 1505 | int up_primary = -1; | ||
| 1506 | int i; | ||
| 1507 | |||
| 1508 | if (ceph_can_shift_osds(pool)) { | ||
| 1509 | int removed = 0; | ||
| 1510 | |||
| 1511 | for (i = 0; i < len; i++) { | ||
| 1512 | if (ceph_osd_is_down(osdmap, osds[i])) { | ||
| 1513 | removed++; | ||
| 1514 | continue; | ||
| 1515 | } | ||
| 1516 | if (removed) | ||
| 1517 | osds[i - removed] = osds[i]; | ||
| 1518 | } | ||
| 1519 | |||
| 1520 | len -= removed; | ||
| 1521 | if (len > 0) | ||
| 1522 | up_primary = osds[0]; | ||
| 1523 | } else { | ||
| 1524 | for (i = len - 1; i >= 0; i--) { | ||
| 1525 | if (ceph_osd_is_down(osdmap, osds[i])) | ||
| 1526 | osds[i] = CRUSH_ITEM_NONE; | ||
| 1527 | else | ||
| 1528 | up_primary = osds[i]; | ||
| 1529 | } | ||
| 1530 | } | ||
| 1531 | |||
| 1532 | *primary = up_primary; | ||
| 1533 | return len; | ||
| 1534 | } | ||
| 1535 | |||
| 1536 | static void apply_primary_affinity(struct ceph_osdmap *osdmap, u32 pps, | ||
| 1537 | struct ceph_pg_pool_info *pool, | ||
| 1538 | int *osds, int len, int *primary) | ||
| 1539 | { | ||
| 1540 | int i; | ||
| 1541 | int pos = -1; | ||
| 1542 | |||
| 1543 | /* | ||
| 1544 | * Do we have any non-default primary_affinity values for these | ||
| 1545 | * osds? | ||
| 1546 | */ | ||
| 1547 | if (!osdmap->osd_primary_affinity) | ||
| 1548 | return; | ||
| 1549 | |||
| 1550 | for (i = 0; i < len; i++) { | ||
| 1551 | if (osds[i] != CRUSH_ITEM_NONE && | ||
| 1552 | osdmap->osd_primary_affinity[i] != | ||
| 1553 | CEPH_OSD_DEFAULT_PRIMARY_AFFINITY) { | ||
| 1554 | break; | ||
| 1555 | } | ||
| 1556 | } | ||
| 1557 | if (i == len) | ||
| 1558 | return; | ||
| 1559 | |||
| 1560 | /* | ||
| 1561 | * Pick the primary. Feed both the seed (for the pg) and the | ||
| 1562 | * osd into the hash/rng so that a proportional fraction of an | ||
| 1563 | * osd's pgs get rejected as primary. | ||
| 1564 | */ | ||
| 1565 | for (i = 0; i < len; i++) { | ||
| 1566 | int osd; | ||
| 1567 | u32 aff; | ||
| 1568 | |||
| 1569 | osd = osds[i]; | ||
| 1570 | if (osd == CRUSH_ITEM_NONE) | ||
| 1571 | continue; | ||
| 1572 | |||
| 1573 | aff = osdmap->osd_primary_affinity[osd]; | ||
| 1574 | if (aff < CEPH_OSD_MAX_PRIMARY_AFFINITY && | ||
| 1575 | (crush_hash32_2(CRUSH_HASH_RJENKINS1, | ||
| 1576 | pps, osd) >> 16) >= aff) { | ||
| 1577 | /* | ||
| 1578 | * We chose not to use this primary. Note it | ||
| 1579 | * anyway as a fallback in case we don't pick | ||
| 1580 | * anyone else, but keep looking. | ||
| 1581 | */ | ||
| 1582 | if (pos < 0) | ||
| 1583 | pos = i; | ||
| 1584 | } else { | ||
| 1585 | pos = i; | ||
| 1586 | break; | ||
| 1587 | } | ||
| 1588 | } | ||
| 1589 | if (pos < 0) | ||
| 1590 | return; | ||
| 1591 | |||
| 1592 | *primary = osds[pos]; | ||
| 1593 | |||
| 1594 | if (ceph_can_shift_osds(pool) && pos > 0) { | ||
| 1595 | /* move the new primary to the front */ | ||
| 1596 | for (i = pos; i > 0; i--) | ||
| 1597 | osds[i] = osds[i - 1]; | ||
| 1598 | osds[0] = *primary; | ||
| 1599 | } | ||
| 1600 | } | ||
| 1601 | |||
| 1602 | /* | ||
| 1603 | * Given up set, apply pg_temp and primary_temp mappings. | ||
| 1604 | * | ||
| 1605 | * Return acting set length. *primary is set to acting primary osd id, | ||
| 1606 | * or -1 if acting set is empty. | ||
| 1607 | */ | ||
| 1608 | static int apply_temps(struct ceph_osdmap *osdmap, | ||
| 1609 | struct ceph_pg_pool_info *pool, struct ceph_pg pgid, | ||
| 1610 | int *osds, int len, int *primary) | ||
| 1611 | { | ||
| 1612 | struct ceph_pg_mapping *pg; | ||
| 1613 | int temp_len; | ||
| 1614 | int temp_primary; | ||
| 1615 | int i; | ||
| 1616 | |||
| 1617 | /* raw_pg -> pg */ | ||
| 1173 | pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num, | 1618 | pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num, |
| 1174 | pool->pg_num_mask); | 1619 | pool->pg_num_mask); |
| 1620 | |||
| 1621 | /* pg_temp? */ | ||
| 1175 | pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); | 1622 | pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); |
| 1176 | if (pg) { | 1623 | if (pg) { |
| 1177 | *num = pg->len; | 1624 | temp_len = 0; |
| 1178 | return pg->osds; | 1625 | temp_primary = -1; |
| 1626 | |||
| 1627 | for (i = 0; i < pg->pg_temp.len; i++) { | ||
| 1628 | if (ceph_osd_is_down(osdmap, pg->pg_temp.osds[i])) { | ||
| 1629 | if (ceph_can_shift_osds(pool)) | ||
| 1630 | continue; | ||
| 1631 | else | ||
| 1632 | osds[temp_len++] = CRUSH_ITEM_NONE; | ||
| 1633 | } else { | ||
| 1634 | osds[temp_len++] = pg->pg_temp.osds[i]; | ||
| 1635 | } | ||
| 1636 | } | ||
| 1637 | |||
| 1638 | /* apply pg_temp's primary */ | ||
| 1639 | for (i = 0; i < temp_len; i++) { | ||
| 1640 | if (osds[i] != CRUSH_ITEM_NONE) { | ||
| 1641 | temp_primary = osds[i]; | ||
| 1642 | break; | ||
| 1643 | } | ||
| 1644 | } | ||
| 1645 | } else { | ||
| 1646 | temp_len = len; | ||
| 1647 | temp_primary = *primary; | ||
| 1179 | } | 1648 | } |
| 1180 | 1649 | ||
| 1181 | /* crush */ | 1650 | /* primary_temp? */ |
| 1182 | ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset, | 1651 | pg = __lookup_pg_mapping(&osdmap->primary_temp, pgid); |
| 1183 | pool->type, pool->size); | 1652 | if (pg) |
| 1184 | if (ruleno < 0) { | 1653 | temp_primary = pg->primary_temp.osd; |
| 1185 | pr_err("no crush rule pool %lld ruleset %d type %d size %d\n", | 1654 | |
| 1186 | pgid.pool, pool->crush_ruleset, pool->type, | 1655 | *primary = temp_primary; |
| 1187 | pool->size); | 1656 | return temp_len; |
| 1188 | return NULL; | 1657 | } |
| 1658 | |||
| 1659 | /* | ||
| 1660 | * Calculate acting set for given pgid. | ||
| 1661 | * | ||
| 1662 | * Return acting set length, or error. *primary is set to acting | ||
| 1663 | * primary osd id, or -1 if acting set is empty or on error. | ||
| 1664 | */ | ||
| 1665 | int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | ||
| 1666 | int *osds, int *primary) | ||
| 1667 | { | ||
| 1668 | struct ceph_pg_pool_info *pool; | ||
| 1669 | u32 pps; | ||
| 1670 | int len; | ||
| 1671 | |||
| 1672 | pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); | ||
| 1673 | if (!pool) { | ||
| 1674 | *primary = -1; | ||
| 1675 | return -ENOENT; | ||
| 1189 | } | 1676 | } |
| 1190 | 1677 | ||
| 1191 | if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) { | 1678 | if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) { |
| 1192 | /* hash pool id and seed sothat pool PGs do not overlap */ | 1679 | /* hash pool id and seed so that pool PGs do not overlap */ |
| 1193 | pps = crush_hash32_2(CRUSH_HASH_RJENKINS1, | 1680 | pps = crush_hash32_2(CRUSH_HASH_RJENKINS1, |
| 1194 | ceph_stable_mod(pgid.seed, pool->pgp_num, | 1681 | ceph_stable_mod(pgid.seed, pool->pgp_num, |
| 1195 | pool->pgp_num_mask), | 1682 | pool->pgp_num_mask), |
| 1196 | pgid.pool); | 1683 | pgid.pool); |
| 1197 | } else { | 1684 | } else { |
| 1198 | /* | 1685 | /* |
| 1199 | * legacy ehavior: add ps and pool together. this is | 1686 | * legacy behavior: add ps and pool together. this is |
| 1200 | * not a great approach because the PGs from each pool | 1687 | * not a great approach because the PGs from each pool |
| 1201 | * will overlap on top of each other: 0.5 == 1.4 == | 1688 | * will overlap on top of each other: 0.5 == 1.4 == |
| 1202 | * 2.3 == ... | 1689 | * 2.3 == ... |
| @@ -1205,38 +1692,20 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
| 1205 | pool->pgp_num_mask) + | 1692 | pool->pgp_num_mask) + |
| 1206 | (unsigned)pgid.pool; | 1693 | (unsigned)pgid.pool; |
| 1207 | } | 1694 | } |
| 1208 | r = crush_do_rule_ary(osdmap->crush, ruleno, pps, | 1695 | |
| 1209 | osds, min_t(int, pool->size, *num), | 1696 | len = pg_to_raw_osds(osdmap, pool, pgid, pps, osds); |
| 1210 | osdmap->osd_weight, osdmap->max_osd); | 1697 | if (len < 0) { |
| 1211 | if (r < 0) { | 1698 | *primary = -1; |
| 1212 | pr_err("error %d from crush rule: pool %lld ruleset %d type %d" | 1699 | return len; |
| 1213 | " size %d\n", r, pgid.pool, pool->crush_ruleset, | ||
| 1214 | pool->type, pool->size); | ||
| 1215 | return NULL; | ||
| 1216 | } | 1700 | } |
| 1217 | *num = r; | ||
| 1218 | return osds; | ||
| 1219 | } | ||
| 1220 | 1701 | ||
| 1221 | /* | 1702 | len = raw_to_up_osds(osdmap, pool, osds, len, primary); |
| 1222 | * Return acting set for given pgid. | ||
| 1223 | */ | ||
| 1224 | int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | ||
| 1225 | int *acting) | ||
| 1226 | { | ||
| 1227 | int rawosds[CEPH_PG_MAX_SIZE], *osds; | ||
| 1228 | int i, o, num = CEPH_PG_MAX_SIZE; | ||
| 1229 | 1703 | ||
| 1230 | osds = calc_pg_raw(osdmap, pgid, rawosds, &num); | 1704 | apply_primary_affinity(osdmap, pps, pool, osds, len, primary); |
| 1231 | if (!osds) | ||
| 1232 | return -1; | ||
| 1233 | 1705 | ||
| 1234 | /* primary is first up osd */ | 1706 | len = apply_temps(osdmap, pool, pgid, osds, len, primary); |
| 1235 | o = 0; | 1707 | |
| 1236 | for (i = 0; i < num; i++) | 1708 | return len; |
| 1237 | if (ceph_osd_is_up(osdmap, osds[i])) | ||
| 1238 | acting[o++] = osds[i]; | ||
| 1239 | return o; | ||
| 1240 | } | 1709 | } |
| 1241 | 1710 | ||
| 1242 | /* | 1711 | /* |
| @@ -1244,17 +1713,11 @@ int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | |||
| 1244 | */ | 1713 | */ |
| 1245 | int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) | 1714 | int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) |
| 1246 | { | 1715 | { |
| 1247 | int rawosds[CEPH_PG_MAX_SIZE], *osds; | 1716 | int osds[CEPH_PG_MAX_SIZE]; |
| 1248 | int i, num = CEPH_PG_MAX_SIZE; | 1717 | int primary; |
| 1249 | 1718 | ||
| 1250 | osds = calc_pg_raw(osdmap, pgid, rawosds, &num); | 1719 | ceph_calc_pg_acting(osdmap, pgid, osds, &primary); |
| 1251 | if (!osds) | ||
| 1252 | return -1; | ||
| 1253 | 1720 | ||
| 1254 | /* primary is first up osd */ | 1721 | return primary; |
| 1255 | for (i = 0; i < num; i++) | ||
| 1256 | if (ceph_osd_is_up(osdmap, osds[i])) | ||
| 1257 | return osds[i]; | ||
| 1258 | return -1; | ||
| 1259 | } | 1722 | } |
| 1260 | EXPORT_SYMBOL(ceph_calc_pg_primary); | 1723 | EXPORT_SYMBOL(ceph_calc_pg_primary); |
diff --git a/net/core/dev.c b/net/core/dev.c index 757063420ce0..14dac0654f28 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
| @@ -4043,6 +4043,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) | |||
| 4043 | skb->vlan_tci = 0; | 4043 | skb->vlan_tci = 0; |
| 4044 | skb->dev = napi->dev; | 4044 | skb->dev = napi->dev; |
| 4045 | skb->skb_iif = 0; | 4045 | skb->skb_iif = 0; |
| 4046 | skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); | ||
| 4046 | 4047 | ||
| 4047 | napi->skb = skb; | 4048 | napi->skb = skb; |
| 4048 | } | 4049 | } |
| @@ -4588,8 +4589,7 @@ void *netdev_lower_get_next_private(struct net_device *dev, | |||
| 4588 | if (&lower->list == &dev->adj_list.lower) | 4589 | if (&lower->list == &dev->adj_list.lower) |
| 4589 | return NULL; | 4590 | return NULL; |
| 4590 | 4591 | ||
| 4591 | if (iter) | 4592 | *iter = lower->list.next; |
| 4592 | *iter = lower->list.next; | ||
| 4593 | 4593 | ||
| 4594 | return lower->private; | 4594 | return lower->private; |
| 4595 | } | 4595 | } |
| @@ -4617,8 +4617,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, | |||
| 4617 | if (&lower->list == &dev->adj_list.lower) | 4617 | if (&lower->list == &dev->adj_list.lower) |
| 4618 | return NULL; | 4618 | return NULL; |
| 4619 | 4619 | ||
| 4620 | if (iter) | 4620 | *iter = &lower->list; |
| 4621 | *iter = &lower->list; | ||
| 4622 | 4621 | ||
| 4623 | return lower->private; | 4622 | return lower->private; |
| 4624 | } | 4623 | } |
| @@ -5696,6 +5695,13 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, | |||
| 5696 | } | 5695 | } |
| 5697 | } | 5696 | } |
| 5698 | 5697 | ||
| 5698 | #ifdef CONFIG_NET_RX_BUSY_POLL | ||
| 5699 | if (dev->netdev_ops->ndo_busy_poll) | ||
| 5700 | features |= NETIF_F_BUSY_POLL; | ||
| 5701 | else | ||
| 5702 | #endif | ||
| 5703 | features &= ~NETIF_F_BUSY_POLL; | ||
| 5704 | |||
| 5699 | return features; | 5705 | return features; |
| 5700 | } | 5706 | } |
| 5701 | 5707 | ||
diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 30071dec287a..640ba0e5831c 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c | |||
| @@ -97,6 +97,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] | |||
| 97 | [NETIF_F_RXFCS_BIT] = "rx-fcs", | 97 | [NETIF_F_RXFCS_BIT] = "rx-fcs", |
| 98 | [NETIF_F_RXALL_BIT] = "rx-all", | 98 | [NETIF_F_RXALL_BIT] = "rx-all", |
| 99 | [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", | 99 | [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", |
| 100 | [NETIF_F_BUSY_POLL_BIT] = "busy-poll", | ||
| 100 | }; | 101 | }; |
| 101 | 102 | ||
| 102 | static int ethtool_get_features(struct net_device *dev, void __user *useraddr) | 103 | static int ethtool_get_features(struct net_device *dev, void __user *useraddr) |
diff --git a/net/core/filter.c b/net/core/filter.c index 765556ba32ef..e08b3822c72a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c | |||
| @@ -295,43 +295,43 @@ select_insn: | |||
| 295 | (*(s64 *) &A) >>= K; | 295 | (*(s64 *) &A) >>= K; |
| 296 | CONT; | 296 | CONT; |
| 297 | BPF_ALU64_BPF_MOD_BPF_X: | 297 | BPF_ALU64_BPF_MOD_BPF_X: |
| 298 | if (unlikely(X == 0)) | ||
| 299 | return 0; | ||
| 298 | tmp = A; | 300 | tmp = A; |
| 299 | if (X) | 301 | A = do_div(tmp, X); |
| 300 | A = do_div(tmp, X); | ||
| 301 | CONT; | 302 | CONT; |
| 302 | BPF_ALU_BPF_MOD_BPF_X: | 303 | BPF_ALU_BPF_MOD_BPF_X: |
| 304 | if (unlikely(X == 0)) | ||
| 305 | return 0; | ||
| 303 | tmp = (u32) A; | 306 | tmp = (u32) A; |
| 304 | if (X) | 307 | A = do_div(tmp, (u32) X); |
| 305 | A = do_div(tmp, (u32) X); | ||
| 306 | CONT; | 308 | CONT; |
| 307 | BPF_ALU64_BPF_MOD_BPF_K: | 309 | BPF_ALU64_BPF_MOD_BPF_K: |
| 308 | tmp = A; | 310 | tmp = A; |
| 309 | if (K) | 311 | A = do_div(tmp, K); |
| 310 | A = do_div(tmp, K); | ||
| 311 | CONT; | 312 | CONT; |
| 312 | BPF_ALU_BPF_MOD_BPF_K: | 313 | BPF_ALU_BPF_MOD_BPF_K: |
| 313 | tmp = (u32) A; | 314 | tmp = (u32) A; |
| 314 | if (K) | 315 | A = do_div(tmp, (u32) K); |
| 315 | A = do_div(tmp, (u32) K); | ||
| 316 | CONT; | 316 | CONT; |
| 317 | BPF_ALU64_BPF_DIV_BPF_X: | 317 | BPF_ALU64_BPF_DIV_BPF_X: |
| 318 | if (X) | 318 | if (unlikely(X == 0)) |
| 319 | do_div(A, X); | 319 | return 0; |
| 320 | do_div(A, X); | ||
| 320 | CONT; | 321 | CONT; |
| 321 | BPF_ALU_BPF_DIV_BPF_X: | 322 | BPF_ALU_BPF_DIV_BPF_X: |
| 323 | if (unlikely(X == 0)) | ||
| 324 | return 0; | ||
| 322 | tmp = (u32) A; | 325 | tmp = (u32) A; |
| 323 | if (X) | 326 | do_div(tmp, (u32) X); |
| 324 | do_div(tmp, (u32) X); | ||
| 325 | A = (u32) tmp; | 327 | A = (u32) tmp; |
| 326 | CONT; | 328 | CONT; |
| 327 | BPF_ALU64_BPF_DIV_BPF_K: | 329 | BPF_ALU64_BPF_DIV_BPF_K: |
| 328 | if (K) | 330 | do_div(A, K); |
| 329 | do_div(A, K); | ||
| 330 | CONT; | 331 | CONT; |
| 331 | BPF_ALU_BPF_DIV_BPF_K: | 332 | BPF_ALU_BPF_DIV_BPF_K: |
| 332 | tmp = (u32) A; | 333 | tmp = (u32) A; |
| 333 | if (K) | 334 | do_div(tmp, (u32) K); |
| 334 | do_div(tmp, (u32) K); | ||
| 335 | A = (u32) tmp; | 335 | A = (u32) tmp; |
| 336 | CONT; | 336 | CONT; |
| 337 | BPF_ALU_BPF_END_BPF_TO_BE: | 337 | BPF_ALU_BPF_END_BPF_TO_BE: |
diff --git a/net/core/flow.c b/net/core/flow.c index 31cfb365e0c6..a0348fde1fdf 100644 --- a/net/core/flow.c +++ b/net/core/flow.c | |||
| @@ -455,6 +455,8 @@ int flow_cache_init(struct net *net) | |||
| 455 | if (!fc->percpu) | 455 | if (!fc->percpu) |
| 456 | return -ENOMEM; | 456 | return -ENOMEM; |
| 457 | 457 | ||
| 458 | cpu_notifier_register_begin(); | ||
| 459 | |||
| 458 | for_each_online_cpu(i) { | 460 | for_each_online_cpu(i) { |
| 459 | if (flow_cache_cpu_prepare(fc, i)) | 461 | if (flow_cache_cpu_prepare(fc, i)) |
| 460 | goto err; | 462 | goto err; |
| @@ -462,7 +464,9 @@ int flow_cache_init(struct net *net) | |||
| 462 | fc->hotcpu_notifier = (struct notifier_block){ | 464 | fc->hotcpu_notifier = (struct notifier_block){ |
| 463 | .notifier_call = flow_cache_cpu, | 465 | .notifier_call = flow_cache_cpu, |
| 464 | }; | 466 | }; |
| 465 | register_hotcpu_notifier(&fc->hotcpu_notifier); | 467 | __register_hotcpu_notifier(&fc->hotcpu_notifier); |
| 468 | |||
| 469 | cpu_notifier_register_done(); | ||
| 466 | 470 | ||
| 467 | setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, | 471 | setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, |
| 468 | (unsigned long) fc); | 472 | (unsigned long) fc); |
| @@ -478,6 +482,8 @@ err: | |||
| 478 | fcp->hash_table = NULL; | 482 | fcp->hash_table = NULL; |
| 479 | } | 483 | } |
| 480 | 484 | ||
| 485 | cpu_notifier_register_done(); | ||
| 486 | |||
| 481 | free_percpu(fc->percpu); | 487 | free_percpu(fc->percpu); |
| 482 | fc->percpu = NULL; | 488 | fc->percpu = NULL; |
| 483 | 489 | ||
diff --git a/net/core/pktgen.c b/net/core/pktgen.c index d0dac57291af..d068ec25db1e 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c | |||
| @@ -3340,7 +3340,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) | |||
| 3340 | 3340 | ||
| 3341 | __netif_tx_lock_bh(txq); | 3341 | __netif_tx_lock_bh(txq); |
| 3342 | 3342 | ||
| 3343 | if (unlikely(netif_xmit_frozen_or_stopped(txq))) { | 3343 | if (unlikely(netif_xmit_frozen_or_drv_stopped(txq))) { |
| 3344 | ret = NETDEV_TX_BUSY; | 3344 | ret = NETDEV_TX_BUSY; |
| 3345 | pkt_dev->last_ok = 0; | 3345 | pkt_dev->last_ok = 0; |
| 3346 | goto unlock; | 3346 | goto unlock; |
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 59da7cde0724..f95b6f93814b 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c | |||
| @@ -1044,8 +1044,10 @@ static int __do_replace(struct net *net, const char *name, | |||
| 1044 | 1044 | ||
| 1045 | xt_free_table_info(oldinfo); | 1045 | xt_free_table_info(oldinfo); |
| 1046 | if (copy_to_user(counters_ptr, counters, | 1046 | if (copy_to_user(counters_ptr, counters, |
| 1047 | sizeof(struct xt_counters) * num_counters) != 0) | 1047 | sizeof(struct xt_counters) * num_counters) != 0) { |
| 1048 | ret = -EFAULT; | 1048 | /* Silent error, can't fail, new table is already in place */ |
| 1049 | net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n"); | ||
| 1050 | } | ||
| 1049 | vfree(counters); | 1051 | vfree(counters); |
| 1050 | xt_table_unlock(t); | 1052 | xt_table_unlock(t); |
| 1051 | return ret; | 1053 | return ret; |
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 718dfbd30cbe..99e810f84671 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c | |||
| @@ -1231,8 +1231,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, | |||
| 1231 | 1231 | ||
| 1232 | xt_free_table_info(oldinfo); | 1232 | xt_free_table_info(oldinfo); |
| 1233 | if (copy_to_user(counters_ptr, counters, | 1233 | if (copy_to_user(counters_ptr, counters, |
| 1234 | sizeof(struct xt_counters) * num_counters) != 0) | 1234 | sizeof(struct xt_counters) * num_counters) != 0) { |
| 1235 | ret = -EFAULT; | 1235 | /* Silent error, can't fail, new table is already in place */ |
| 1236 | net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n"); | ||
| 1237 | } | ||
| 1236 | vfree(counters); | 1238 | vfree(counters); |
| 1237 | xt_table_unlock(t); | 1239 | xt_table_unlock(t); |
| 1238 | return ret; | 1240 | return ret; |
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1be9e990514d..34d094cadb11 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
| @@ -188,7 +188,7 @@ const __u8 ip_tos2prio[16] = { | |||
| 188 | EXPORT_SYMBOL(ip_tos2prio); | 188 | EXPORT_SYMBOL(ip_tos2prio); |
| 189 | 189 | ||
| 190 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); | 190 | static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); |
| 191 | #define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field) | 191 | #define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field) |
| 192 | 192 | ||
| 193 | #ifdef CONFIG_PROC_FS | 193 | #ifdef CONFIG_PROC_FS |
| 194 | static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) | 194 | static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) |
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 710238f58aa9..e080fbbbc0e5 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c | |||
| @@ -1241,8 +1241,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, | |||
| 1241 | 1241 | ||
| 1242 | xt_free_table_info(oldinfo); | 1242 | xt_free_table_info(oldinfo); |
| 1243 | if (copy_to_user(counters_ptr, counters, | 1243 | if (copy_to_user(counters_ptr, counters, |
| 1244 | sizeof(struct xt_counters) * num_counters) != 0) | 1244 | sizeof(struct xt_counters) * num_counters) != 0) { |
| 1245 | ret = -EFAULT; | 1245 | /* Silent error, can't fail, new table is already in place */ |
| 1246 | net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n"); | ||
| 1247 | } | ||
| 1246 | vfree(counters); | 1248 | vfree(counters); |
| 1247 | xt_table_unlock(t); | 1249 | xt_table_unlock(t); |
| 1248 | return ret; | 1250 | return ret; |
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index cd5b8ec9be04..da787930df0a 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c | |||
| @@ -621,6 +621,42 @@ static void iucv_disable(void) | |||
| 621 | put_online_cpus(); | 621 | put_online_cpus(); |
| 622 | } | 622 | } |
| 623 | 623 | ||
| 624 | static void free_iucv_data(int cpu) | ||
| 625 | { | ||
| 626 | kfree(iucv_param_irq[cpu]); | ||
| 627 | iucv_param_irq[cpu] = NULL; | ||
| 628 | kfree(iucv_param[cpu]); | ||
| 629 | iucv_param[cpu] = NULL; | ||
| 630 | kfree(iucv_irq_data[cpu]); | ||
| 631 | iucv_irq_data[cpu] = NULL; | ||
| 632 | } | ||
| 633 | |||
| 634 | static int alloc_iucv_data(int cpu) | ||
| 635 | { | ||
| 636 | /* Note: GFP_DMA used to get memory below 2G */ | ||
| 637 | iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data), | ||
| 638 | GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); | ||
| 639 | if (!iucv_irq_data[cpu]) | ||
| 640 | goto out_free; | ||
| 641 | |||
| 642 | /* Allocate parameter blocks. */ | ||
| 643 | iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param), | ||
| 644 | GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); | ||
| 645 | if (!iucv_param[cpu]) | ||
| 646 | goto out_free; | ||
| 647 | |||
| 648 | iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param), | ||
| 649 | GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); | ||
| 650 | if (!iucv_param_irq[cpu]) | ||
| 651 | goto out_free; | ||
| 652 | |||
| 653 | return 0; | ||
| 654 | |||
| 655 | out_free: | ||
| 656 | free_iucv_data(cpu); | ||
| 657 | return -ENOMEM; | ||
| 658 | } | ||
| 659 | |||
| 624 | static int iucv_cpu_notify(struct notifier_block *self, | 660 | static int iucv_cpu_notify(struct notifier_block *self, |
| 625 | unsigned long action, void *hcpu) | 661 | unsigned long action, void *hcpu) |
| 626 | { | 662 | { |
| @@ -630,38 +666,14 @@ static int iucv_cpu_notify(struct notifier_block *self, | |||
| 630 | switch (action) { | 666 | switch (action) { |
| 631 | case CPU_UP_PREPARE: | 667 | case CPU_UP_PREPARE: |
| 632 | case CPU_UP_PREPARE_FROZEN: | 668 | case CPU_UP_PREPARE_FROZEN: |
| 633 | iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data), | 669 | if (alloc_iucv_data(cpu)) |
| 634 | GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); | ||
| 635 | if (!iucv_irq_data[cpu]) | ||
| 636 | return notifier_from_errno(-ENOMEM); | ||
| 637 | |||
| 638 | iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param), | ||
| 639 | GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); | ||
| 640 | if (!iucv_param[cpu]) { | ||
| 641 | kfree(iucv_irq_data[cpu]); | ||
| 642 | iucv_irq_data[cpu] = NULL; | ||
| 643 | return notifier_from_errno(-ENOMEM); | 670 | return notifier_from_errno(-ENOMEM); |
| 644 | } | ||
| 645 | iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param), | ||
| 646 | GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); | ||
| 647 | if (!iucv_param_irq[cpu]) { | ||
| 648 | kfree(iucv_param[cpu]); | ||
| 649 | iucv_param[cpu] = NULL; | ||
| 650 | kfree(iucv_irq_data[cpu]); | ||
| 651 | iucv_irq_data[cpu] = NULL; | ||
| 652 | return notifier_from_errno(-ENOMEM); | ||
| 653 | } | ||
| 654 | break; | 671 | break; |
| 655 | case CPU_UP_CANCELED: | 672 | case CPU_UP_CANCELED: |
| 656 | case CPU_UP_CANCELED_FROZEN: | 673 | case CPU_UP_CANCELED_FROZEN: |
| 657 | case CPU_DEAD: | 674 | case CPU_DEAD: |
| 658 | case CPU_DEAD_FROZEN: | 675 | case CPU_DEAD_FROZEN: |
| 659 | kfree(iucv_param_irq[cpu]); | 676 | free_iucv_data(cpu); |
| 660 | iucv_param_irq[cpu] = NULL; | ||
| 661 | kfree(iucv_param[cpu]); | ||
| 662 | iucv_param[cpu] = NULL; | ||
| 663 | kfree(iucv_irq_data[cpu]); | ||
| 664 | iucv_irq_data[cpu] = NULL; | ||
| 665 | break; | 677 | break; |
| 666 | case CPU_ONLINE: | 678 | case CPU_ONLINE: |
| 667 | case CPU_ONLINE_FROZEN: | 679 | case CPU_ONLINE_FROZEN: |
| @@ -2016,7 +2028,7 @@ static int __init iucv_init(void) | |||
| 2016 | rc = iucv_query_maxconn(); | 2028 | rc = iucv_query_maxconn(); |
| 2017 | if (rc) | 2029 | if (rc) |
| 2018 | goto out_ctl; | 2030 | goto out_ctl; |
| 2019 | rc = register_external_interrupt(0x4000, iucv_external_interrupt); | 2031 | rc = register_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt); |
| 2020 | if (rc) | 2032 | if (rc) |
| 2021 | goto out_ctl; | 2033 | goto out_ctl; |
| 2022 | iucv_root = root_device_register("iucv"); | 2034 | iucv_root = root_device_register("iucv"); |
| @@ -2025,33 +2037,20 @@ static int __init iucv_init(void) | |||
| 2025 | goto out_int; | 2037 | goto out_int; |
| 2026 | } | 2038 | } |
| 2027 | 2039 | ||
| 2028 | for_each_online_cpu(cpu) { | 2040 | cpu_notifier_register_begin(); |
| 2029 | /* Note: GFP_DMA used to get memory below 2G */ | ||
| 2030 | iucv_irq_data[cpu] = kmalloc_node(sizeof(struct iucv_irq_data), | ||
| 2031 | GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); | ||
| 2032 | if (!iucv_irq_data[cpu]) { | ||
| 2033 | rc = -ENOMEM; | ||
| 2034 | goto out_free; | ||
| 2035 | } | ||
| 2036 | 2041 | ||
| 2037 | /* Allocate parameter blocks. */ | 2042 | for_each_online_cpu(cpu) { |
| 2038 | iucv_param[cpu] = kmalloc_node(sizeof(union iucv_param), | 2043 | if (alloc_iucv_data(cpu)) { |
| 2039 | GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); | ||
| 2040 | if (!iucv_param[cpu]) { | ||
| 2041 | rc = -ENOMEM; | ||
| 2042 | goto out_free; | ||
| 2043 | } | ||
| 2044 | iucv_param_irq[cpu] = kmalloc_node(sizeof(union iucv_param), | ||
| 2045 | GFP_KERNEL|GFP_DMA, cpu_to_node(cpu)); | ||
| 2046 | if (!iucv_param_irq[cpu]) { | ||
| 2047 | rc = -ENOMEM; | 2044 | rc = -ENOMEM; |
| 2048 | goto out_free; | 2045 | goto out_free; |
| 2049 | } | 2046 | } |
| 2050 | |||
| 2051 | } | 2047 | } |
| 2052 | rc = register_hotcpu_notifier(&iucv_cpu_notifier); | 2048 | rc = __register_hotcpu_notifier(&iucv_cpu_notifier); |
| 2053 | if (rc) | 2049 | if (rc) |
| 2054 | goto out_free; | 2050 | goto out_free; |
| 2051 | |||
| 2052 | cpu_notifier_register_done(); | ||
| 2053 | |||
| 2055 | rc = register_reboot_notifier(&iucv_reboot_notifier); | 2054 | rc = register_reboot_notifier(&iucv_reboot_notifier); |
| 2056 | if (rc) | 2055 | if (rc) |
| 2057 | goto out_cpu; | 2056 | goto out_cpu; |
| @@ -2069,19 +2068,17 @@ static int __init iucv_init(void) | |||
| 2069 | out_reboot: | 2068 | out_reboot: |
| 2070 | unregister_reboot_notifier(&iucv_reboot_notifier); | 2069 | unregister_reboot_notifier(&iucv_reboot_notifier); |
| 2071 | out_cpu: | 2070 | out_cpu: |
| 2072 | unregister_hotcpu_notifier(&iucv_cpu_notifier); | 2071 | cpu_notifier_register_begin(); |
| 2072 | __unregister_hotcpu_notifier(&iucv_cpu_notifier); | ||
| 2073 | out_free: | 2073 | out_free: |
| 2074 | for_each_possible_cpu(cpu) { | 2074 | for_each_possible_cpu(cpu) |
| 2075 | kfree(iucv_param_irq[cpu]); | 2075 | free_iucv_data(cpu); |
| 2076 | iucv_param_irq[cpu] = NULL; | 2076 | |
| 2077 | kfree(iucv_param[cpu]); | 2077 | cpu_notifier_register_done(); |
| 2078 | iucv_param[cpu] = NULL; | 2078 | |
| 2079 | kfree(iucv_irq_data[cpu]); | ||
| 2080 | iucv_irq_data[cpu] = NULL; | ||
| 2081 | } | ||
| 2082 | root_device_unregister(iucv_root); | 2079 | root_device_unregister(iucv_root); |
| 2083 | out_int: | 2080 | out_int: |
| 2084 | unregister_external_interrupt(0x4000, iucv_external_interrupt); | 2081 | unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt); |
| 2085 | out_ctl: | 2082 | out_ctl: |
| 2086 | ctl_clear_bit(0, 1); | 2083 | ctl_clear_bit(0, 1); |
| 2087 | out: | 2084 | out: |
| @@ -2105,18 +2102,14 @@ static void __exit iucv_exit(void) | |||
| 2105 | kfree(p); | 2102 | kfree(p); |
| 2106 | spin_unlock_irq(&iucv_queue_lock); | 2103 | spin_unlock_irq(&iucv_queue_lock); |
| 2107 | unregister_reboot_notifier(&iucv_reboot_notifier); | 2104 | unregister_reboot_notifier(&iucv_reboot_notifier); |
| 2108 | unregister_hotcpu_notifier(&iucv_cpu_notifier); | 2105 | cpu_notifier_register_begin(); |
| 2109 | for_each_possible_cpu(cpu) { | 2106 | __unregister_hotcpu_notifier(&iucv_cpu_notifier); |
| 2110 | kfree(iucv_param_irq[cpu]); | 2107 | for_each_possible_cpu(cpu) |
| 2111 | iucv_param_irq[cpu] = NULL; | 2108 | free_iucv_data(cpu); |
| 2112 | kfree(iucv_param[cpu]); | 2109 | cpu_notifier_register_done(); |
| 2113 | iucv_param[cpu] = NULL; | ||
| 2114 | kfree(iucv_irq_data[cpu]); | ||
| 2115 | iucv_irq_data[cpu] = NULL; | ||
| 2116 | } | ||
| 2117 | root_device_unregister(iucv_root); | 2110 | root_device_unregister(iucv_root); |
| 2118 | bus_unregister(&iucv_bus); | 2111 | bus_unregister(&iucv_bus); |
| 2119 | unregister_external_interrupt(0x4000, iucv_external_interrupt); | 2112 | unregister_external_irq(EXT_IRQ_IUCV, iucv_external_interrupt); |
| 2120 | } | 2113 | } |
| 2121 | 2114 | ||
| 2122 | subsys_initcall(iucv_init); | 2115 | subsys_initcall(iucv_init); |
diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c index 153bd1ddbfbb..f0991f2344d4 100644 --- a/net/mac802154/mib.c +++ b/net/mac802154/mib.c | |||
| @@ -26,7 +26,6 @@ | |||
| 26 | #include <net/mac802154.h> | 26 | #include <net/mac802154.h> |
| 27 | #include <net/ieee802154_netdev.h> | 27 | #include <net/ieee802154_netdev.h> |
| 28 | #include <net/wpan-phy.h> | 28 | #include <net/wpan-phy.h> |
| 29 | #include <net/ieee802154_netdev.h> | ||
| 30 | 29 | ||
| 31 | #include "mac802154.h" | 30 | #include "mac802154.h" |
| 32 | 31 | ||
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 33045a562297..3fd159db9f06 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c | |||
| @@ -152,8 +152,8 @@ nf_tables_chain_type_lookup(const struct nft_af_info *afi, | |||
| 152 | #ifdef CONFIG_MODULES | 152 | #ifdef CONFIG_MODULES |
| 153 | if (autoload) { | 153 | if (autoload) { |
| 154 | nfnl_unlock(NFNL_SUBSYS_NFTABLES); | 154 | nfnl_unlock(NFNL_SUBSYS_NFTABLES); |
| 155 | request_module("nft-chain-%u-%*.s", afi->family, | 155 | request_module("nft-chain-%u-%.*s", afi->family, |
| 156 | nla_len(nla)-1, (const char *)nla_data(nla)); | 156 | nla_len(nla), (const char *)nla_data(nla)); |
| 157 | nfnl_lock(NFNL_SUBSYS_NFTABLES); | 157 | nfnl_lock(NFNL_SUBSYS_NFTABLES); |
| 158 | type = __nf_tables_chain_type_lookup(afi->family, nla); | 158 | type = __nf_tables_chain_type_lookup(afi->family, nla); |
| 159 | if (type != NULL) | 159 | if (type != NULL) |
| @@ -1946,7 +1946,8 @@ static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const | |||
| 1946 | 1946 | ||
| 1947 | static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { | 1947 | static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { |
| 1948 | [NFTA_SET_TABLE] = { .type = NLA_STRING }, | 1948 | [NFTA_SET_TABLE] = { .type = NLA_STRING }, |
| 1949 | [NFTA_SET_NAME] = { .type = NLA_STRING }, | 1949 | [NFTA_SET_NAME] = { .type = NLA_STRING, |
| 1950 | .len = IFNAMSIZ - 1 }, | ||
| 1950 | [NFTA_SET_FLAGS] = { .type = NLA_U32 }, | 1951 | [NFTA_SET_FLAGS] = { .type = NLA_U32 }, |
| 1951 | [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 }, | 1952 | [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 }, |
| 1952 | [NFTA_SET_KEY_LEN] = { .type = NLA_U32 }, | 1953 | [NFTA_SET_KEY_LEN] = { .type = NLA_U32 }, |
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 9a8e77e7f8d4..f4e833005320 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c | |||
| @@ -54,7 +54,8 @@ static struct xt_match cgroup_mt_reg __read_mostly = { | |||
| 54 | .matchsize = sizeof(struct xt_cgroup_info), | 54 | .matchsize = sizeof(struct xt_cgroup_info), |
| 55 | .me = THIS_MODULE, | 55 | .me = THIS_MODULE, |
| 56 | .hooks = (1 << NF_INET_LOCAL_OUT) | | 56 | .hooks = (1 << NF_INET_LOCAL_OUT) | |
| 57 | (1 << NF_INET_POST_ROUTING), | 57 | (1 << NF_INET_POST_ROUTING) | |
| 58 | (1 << NF_INET_LOCAL_IN), | ||
| 58 | }; | 59 | }; |
| 59 | 60 | ||
| 60 | static int __init cgroup_mt_init(void) | 61 | static int __init cgroup_mt_init(void) |
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 458464e7bd7a..fbc66bb250d5 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c | |||
| @@ -32,8 +32,14 @@ | |||
| 32 | #include <net/netfilter/nf_conntrack_tuple.h> | 32 | #include <net/netfilter/nf_conntrack_tuple.h> |
| 33 | #include <net/netfilter/nf_conntrack_zones.h> | 33 | #include <net/netfilter/nf_conntrack_zones.h> |
| 34 | 34 | ||
| 35 | #define CONNLIMIT_SLOTS 32 | 35 | #define CONNLIMIT_SLOTS 256U |
| 36 | #define CONNLIMIT_LOCK_SLOTS 32 | 36 | |
| 37 | #ifdef CONFIG_LOCKDEP | ||
| 38 | #define CONNLIMIT_LOCK_SLOTS 8U | ||
| 39 | #else | ||
| 40 | #define CONNLIMIT_LOCK_SLOTS 256U | ||
| 41 | #endif | ||
| 42 | |||
| 37 | #define CONNLIMIT_GC_MAX_NODES 8 | 43 | #define CONNLIMIT_GC_MAX_NODES 8 |
| 38 | 44 | ||
| 39 | /* we will save the tuples of all connections we care about */ | 45 | /* we will save the tuples of all connections we care about */ |
| @@ -49,10 +55,11 @@ struct xt_connlimit_rb { | |||
| 49 | union nf_inet_addr addr; /* search key */ | 55 | union nf_inet_addr addr; /* search key */ |
| 50 | }; | 56 | }; |
| 51 | 57 | ||
| 58 | static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp; | ||
| 59 | |||
| 52 | struct xt_connlimit_data { | 60 | struct xt_connlimit_data { |
| 53 | struct rb_root climit_root4[CONNLIMIT_SLOTS]; | 61 | struct rb_root climit_root4[CONNLIMIT_SLOTS]; |
| 54 | struct rb_root climit_root6[CONNLIMIT_SLOTS]; | 62 | struct rb_root climit_root6[CONNLIMIT_SLOTS]; |
| 55 | spinlock_t locks[CONNLIMIT_LOCK_SLOTS]; | ||
| 56 | }; | 63 | }; |
| 57 | 64 | ||
| 58 | static u_int32_t connlimit_rnd __read_mostly; | 65 | static u_int32_t connlimit_rnd __read_mostly; |
| @@ -297,11 +304,11 @@ static int count_them(struct net *net, | |||
| 297 | root = &data->climit_root4[hash]; | 304 | root = &data->climit_root4[hash]; |
| 298 | } | 305 | } |
| 299 | 306 | ||
| 300 | spin_lock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]); | 307 | spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); |
| 301 | 308 | ||
| 302 | count = count_tree(net, root, tuple, addr, mask, family); | 309 | count = count_tree(net, root, tuple, addr, mask, family); |
| 303 | 310 | ||
| 304 | spin_unlock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]); | 311 | spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); |
| 305 | 312 | ||
| 306 | return count; | 313 | return count; |
| 307 | } | 314 | } |
| @@ -377,9 +384,6 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) | |||
| 377 | return -ENOMEM; | 384 | return -ENOMEM; |
| 378 | } | 385 | } |
| 379 | 386 | ||
| 380 | for (i = 0; i < ARRAY_SIZE(info->data->locks); ++i) | ||
| 381 | spin_lock_init(&info->data->locks[i]); | ||
| 382 | |||
| 383 | for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i) | 387 | for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i) |
| 384 | info->data->climit_root4[i] = RB_ROOT; | 388 | info->data->climit_root4[i] = RB_ROOT; |
| 385 | for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i) | 389 | for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i) |
| @@ -435,11 +439,14 @@ static struct xt_match connlimit_mt_reg __read_mostly = { | |||
| 435 | 439 | ||
| 436 | static int __init connlimit_mt_init(void) | 440 | static int __init connlimit_mt_init(void) |
| 437 | { | 441 | { |
| 438 | int ret; | 442 | int ret, i; |
| 439 | 443 | ||
| 440 | BUILD_BUG_ON(CONNLIMIT_LOCK_SLOTS > CONNLIMIT_SLOTS); | 444 | BUILD_BUG_ON(CONNLIMIT_LOCK_SLOTS > CONNLIMIT_SLOTS); |
| 441 | BUILD_BUG_ON((CONNLIMIT_SLOTS % CONNLIMIT_LOCK_SLOTS) != 0); | 445 | BUILD_BUG_ON((CONNLIMIT_SLOTS % CONNLIMIT_LOCK_SLOTS) != 0); |
| 442 | 446 | ||
| 447 | for (i = 0; i < CONNLIMIT_LOCK_SLOTS; ++i) | ||
| 448 | spin_lock_init(&xt_connlimit_locks[i]); | ||
| 449 | |||
| 443 | connlimit_conn_cachep = kmem_cache_create("xt_connlimit_conn", | 450 | connlimit_conn_cachep = kmem_cache_create("xt_connlimit_conn", |
| 444 | sizeof(struct xt_connlimit_conn), | 451 | sizeof(struct xt_connlimit_conn), |
| 445 | 0, 0, NULL); | 452 | 0, 0, NULL); |
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 7174611bd672..c529161cdbf8 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c | |||
| @@ -422,4 +422,6 @@ module_exit(xt_osf_fini); | |||
| 422 | MODULE_LICENSE("GPL"); | 422 | MODULE_LICENSE("GPL"); |
| 423 | MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>"); | 423 | MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>"); |
| 424 | MODULE_DESCRIPTION("Passive OS fingerprint matching."); | 424 | MODULE_DESCRIPTION("Passive OS fingerprint matching."); |
| 425 | MODULE_ALIAS("ipt_osf"); | ||
| 426 | MODULE_ALIAS("ip6t_osf"); | ||
| 425 | MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); | 427 | MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); |
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 01039d2b1695..72e0c71fb01d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
| @@ -261,7 +261,7 @@ static int packet_direct_xmit(struct sk_buff *skb) | |||
| 261 | local_bh_disable(); | 261 | local_bh_disable(); |
| 262 | 262 | ||
| 263 | HARD_TX_LOCK(dev, txq, smp_processor_id()); | 263 | HARD_TX_LOCK(dev, txq, smp_processor_id()); |
| 264 | if (!netif_xmit_frozen_or_stopped(txq)) { | 264 | if (!netif_xmit_frozen_or_drv_stopped(txq)) { |
| 265 | ret = ops->ndo_start_xmit(skb, dev); | 265 | ret = ops->ndo_start_xmit(skb, dev); |
| 266 | if (ret == NETDEV_TX_OK) | 266 | if (ret == NETDEV_TX_OK) |
| 267 | txq_trans_update(txq); | 267 | txq_trans_update(txq); |
| @@ -275,6 +275,7 @@ static int packet_direct_xmit(struct sk_buff *skb) | |||
| 275 | 275 | ||
| 276 | return ret; | 276 | return ret; |
| 277 | drop: | 277 | drop: |
| 278 | atomic_long_inc(&dev->tx_dropped); | ||
| 278 | kfree_skb(skb); | 279 | kfree_skb(skb); |
| 279 | return NET_XMIT_DROP; | 280 | return NET_XMIT_DROP; |
| 280 | } | 281 | } |
diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 981aaf8b6ace..5f83a6a2fa67 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c | |||
| @@ -6593,6 +6593,40 @@ static void __sctp_write_space(struct sctp_association *asoc) | |||
| 6593 | } | 6593 | } |
| 6594 | } | 6594 | } |
| 6595 | 6595 | ||
| 6596 | static void sctp_wake_up_waiters(struct sock *sk, | ||
| 6597 | struct sctp_association *asoc) | ||
| 6598 | { | ||
| 6599 | struct sctp_association *tmp = asoc; | ||
| 6600 | |||
| 6601 | /* We do accounting for the sndbuf space per association, | ||
| 6602 | * so we only need to wake our own association. | ||
| 6603 | */ | ||
| 6604 | if (asoc->ep->sndbuf_policy) | ||
| 6605 | return __sctp_write_space(asoc); | ||
| 6606 | |||
| 6607 | /* Accounting for the sndbuf space is per socket, so we | ||
| 6608 | * need to wake up others, try to be fair and in case of | ||
| 6609 | * other associations, let them have a go first instead | ||
| 6610 | * of just doing a sctp_write_space() call. | ||
| 6611 | * | ||
| 6612 | * Note that we reach sctp_wake_up_waiters() only when | ||
| 6613 | * associations free up queued chunks, thus we are under | ||
| 6614 | * lock and the list of associations on a socket is | ||
| 6615 | * guaranteed not to change. | ||
| 6616 | */ | ||
| 6617 | for (tmp = list_next_entry(tmp, asocs); 1; | ||
| 6618 | tmp = list_next_entry(tmp, asocs)) { | ||
| 6619 | /* Manually skip the head element. */ | ||
| 6620 | if (&tmp->asocs == &((sctp_sk(sk))->ep->asocs)) | ||
| 6621 | continue; | ||
| 6622 | /* Wake up association. */ | ||
| 6623 | __sctp_write_space(tmp); | ||
| 6624 | /* We've reached the end. */ | ||
| 6625 | if (tmp == asoc) | ||
| 6626 | break; | ||
| 6627 | } | ||
| 6628 | } | ||
| 6629 | |||
| 6596 | /* Do accounting for the sndbuf space. | 6630 | /* Do accounting for the sndbuf space. |
| 6597 | * Decrement the used sndbuf space of the corresponding association by the | 6631 | * Decrement the used sndbuf space of the corresponding association by the |
| 6598 | * data size which was just transmitted(freed). | 6632 | * data size which was just transmitted(freed). |
| @@ -6620,7 +6654,7 @@ static void sctp_wfree(struct sk_buff *skb) | |||
| 6620 | sk_mem_uncharge(sk, skb->truesize); | 6654 | sk_mem_uncharge(sk, skb->truesize); |
| 6621 | 6655 | ||
| 6622 | sock_wfree(skb); | 6656 | sock_wfree(skb); |
| 6623 | __sctp_write_space(asoc); | 6657 | sctp_wake_up_waiters(sk, asoc); |
| 6624 | 6658 | ||
| 6625 | sctp_association_put(asoc); | 6659 | sctp_association_put(asoc); |
| 6626 | } | 6660 | } |
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 241b54f30204..0754d0f466d2 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig | |||
| @@ -9,19 +9,6 @@ config SUNRPC_BACKCHANNEL | |||
| 9 | bool | 9 | bool |
| 10 | depends on SUNRPC | 10 | depends on SUNRPC |
| 11 | 11 | ||
| 12 | config SUNRPC_XPRT_RDMA | ||
| 13 | tristate | ||
| 14 | depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS | ||
| 15 | default SUNRPC && INFINIBAND | ||
| 16 | help | ||
| 17 | This option allows the NFS client and server to support | ||
| 18 | an RDMA-enabled transport. | ||
| 19 | |||
| 20 | To compile RPC client RDMA transport support as a module, | ||
| 21 | choose M here: the module will be called xprtrdma. | ||
| 22 | |||
| 23 | If unsure, say N. | ||
| 24 | |||
| 25 | config SUNRPC_SWAP | 12 | config SUNRPC_SWAP |
| 26 | bool | 13 | bool |
| 27 | depends on SUNRPC | 14 | depends on SUNRPC |
| @@ -57,3 +44,29 @@ config SUNRPC_DEBUG | |||
| 57 | but makes troubleshooting NFS issues significantly harder. | 44 | but makes troubleshooting NFS issues significantly harder. |
| 58 | 45 | ||
| 59 | If unsure, say Y. | 46 | If unsure, say Y. |
| 47 | |||
| 48 | config SUNRPC_XPRT_RDMA_CLIENT | ||
| 49 | tristate "RPC over RDMA Client Support" | ||
| 50 | depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS | ||
| 51 | default SUNRPC && INFINIBAND | ||
| 52 | help | ||
| 53 | This option allows the NFS client to support an RDMA-enabled | ||
| 54 | transport. | ||
| 55 | |||
| 56 | To compile RPC client RDMA transport support as a module, | ||
| 57 | choose M here: the module will be called xprtrdma. | ||
| 58 | |||
| 59 | If unsure, say N. | ||
| 60 | |||
| 61 | config SUNRPC_XPRT_RDMA_SERVER | ||
| 62 | tristate "RPC over RDMA Server Support" | ||
| 63 | depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS | ||
| 64 | default SUNRPC && INFINIBAND | ||
| 65 | help | ||
| 66 | This option allows the NFS server to support an RDMA-enabled | ||
| 67 | transport. | ||
| 68 | |||
| 69 | To compile RPC server RDMA transport support as a module, | ||
| 70 | choose M here: the module will be called svcrdma. | ||
| 71 | |||
| 72 | If unsure, say N. | ||
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 8209a0411bca..e5a7a1cac8f3 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile | |||
| @@ -5,7 +5,8 @@ | |||
| 5 | 5 | ||
| 6 | obj-$(CONFIG_SUNRPC) += sunrpc.o | 6 | obj-$(CONFIG_SUNRPC) += sunrpc.o |
| 7 | obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ | 7 | obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ |
| 8 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ | 8 | |
| 9 | obj-y += xprtrdma/ | ||
| 9 | 10 | ||
| 10 | sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ | 11 | sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ |
| 11 | auth.o auth_null.o auth_unix.o auth_generic.o \ | 12 | auth.o auth_null.o auth_unix.o auth_generic.o \ |
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index e860d4f7ed2a..3513d559bc45 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c | |||
| @@ -212,39 +212,23 @@ out: | |||
| 212 | } | 212 | } |
| 213 | EXPORT_SYMBOL_GPL(xprt_destroy_backchannel); | 213 | EXPORT_SYMBOL_GPL(xprt_destroy_backchannel); |
| 214 | 214 | ||
| 215 | /* | 215 | static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) |
| 216 | * One or more rpc_rqst structure have been preallocated during the | ||
| 217 | * backchannel setup. Buffer space for the send and private XDR buffers | ||
| 218 | * has been preallocated as well. Use xprt_alloc_bc_request to allocate | ||
| 219 | * to this request. Use xprt_free_bc_request to return it. | ||
| 220 | * | ||
| 221 | * We know that we're called in soft interrupt context, grab the spin_lock | ||
| 222 | * since there is no need to grab the bottom half spin_lock. | ||
| 223 | * | ||
| 224 | * Return an available rpc_rqst, otherwise NULL if non are available. | ||
| 225 | */ | ||
| 226 | struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt) | ||
| 227 | { | 216 | { |
| 228 | struct rpc_rqst *req; | 217 | struct rpc_rqst *req = NULL; |
| 229 | 218 | ||
| 230 | dprintk("RPC: allocate a backchannel request\n"); | 219 | dprintk("RPC: allocate a backchannel request\n"); |
| 231 | spin_lock(&xprt->bc_pa_lock); | 220 | if (list_empty(&xprt->bc_pa_list)) |
| 232 | if (!list_empty(&xprt->bc_pa_list)) { | 221 | goto not_found; |
| 233 | req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, | ||
| 234 | rq_bc_pa_list); | ||
| 235 | list_del(&req->rq_bc_pa_list); | ||
| 236 | } else { | ||
| 237 | req = NULL; | ||
| 238 | } | ||
| 239 | spin_unlock(&xprt->bc_pa_lock); | ||
| 240 | 222 | ||
| 241 | if (req != NULL) { | 223 | req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, |
| 242 | set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); | 224 | rq_bc_pa_list); |
| 243 | req->rq_reply_bytes_recvd = 0; | 225 | req->rq_reply_bytes_recvd = 0; |
| 244 | req->rq_bytes_sent = 0; | 226 | req->rq_bytes_sent = 0; |
| 245 | memcpy(&req->rq_private_buf, &req->rq_rcv_buf, | 227 | memcpy(&req->rq_private_buf, &req->rq_rcv_buf, |
| 246 | sizeof(req->rq_private_buf)); | 228 | sizeof(req->rq_private_buf)); |
| 247 | } | 229 | req->rq_xid = xid; |
| 230 | req->rq_connect_cookie = xprt->connect_cookie; | ||
| 231 | not_found: | ||
| 248 | dprintk("RPC: backchannel req=%p\n", req); | 232 | dprintk("RPC: backchannel req=%p\n", req); |
| 249 | return req; | 233 | return req; |
| 250 | } | 234 | } |
| @@ -259,6 +243,7 @@ void xprt_free_bc_request(struct rpc_rqst *req) | |||
| 259 | 243 | ||
| 260 | dprintk("RPC: free backchannel req=%p\n", req); | 244 | dprintk("RPC: free backchannel req=%p\n", req); |
| 261 | 245 | ||
| 246 | req->rq_connect_cookie = xprt->connect_cookie - 1; | ||
| 262 | smp_mb__before_clear_bit(); | 247 | smp_mb__before_clear_bit(); |
| 263 | WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); | 248 | WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); |
| 264 | clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); | 249 | clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); |
| @@ -281,7 +266,57 @@ void xprt_free_bc_request(struct rpc_rqst *req) | |||
| 281 | * may be reused by a new callback request. | 266 | * may be reused by a new callback request. |
| 282 | */ | 267 | */ |
| 283 | spin_lock_bh(&xprt->bc_pa_lock); | 268 | spin_lock_bh(&xprt->bc_pa_lock); |
| 284 | list_add(&req->rq_bc_pa_list, &xprt->bc_pa_list); | 269 | list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); |
| 285 | spin_unlock_bh(&xprt->bc_pa_lock); | 270 | spin_unlock_bh(&xprt->bc_pa_lock); |
| 286 | } | 271 | } |
| 287 | 272 | ||
| 273 | /* | ||
| 274 | * One or more rpc_rqst structure have been preallocated during the | ||
| 275 | * backchannel setup. Buffer space for the send and private XDR buffers | ||
| 276 | * has been preallocated as well. Use xprt_alloc_bc_request to allocate | ||
| 277 | * to this request. Use xprt_free_bc_request to return it. | ||
| 278 | * | ||
| 279 | * We know that we're called in soft interrupt context, grab the spin_lock | ||
| 280 | * since there is no need to grab the bottom half spin_lock. | ||
| 281 | * | ||
| 282 | * Return an available rpc_rqst, otherwise NULL if non are available. | ||
| 283 | */ | ||
| 284 | struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid) | ||
| 285 | { | ||
| 286 | struct rpc_rqst *req; | ||
| 287 | |||
| 288 | spin_lock(&xprt->bc_pa_lock); | ||
| 289 | list_for_each_entry(req, &xprt->bc_pa_list, rq_bc_pa_list) { | ||
| 290 | if (req->rq_connect_cookie != xprt->connect_cookie) | ||
| 291 | continue; | ||
| 292 | if (req->rq_xid == xid) | ||
| 293 | goto found; | ||
| 294 | } | ||
| 295 | req = xprt_alloc_bc_request(xprt, xid); | ||
| 296 | found: | ||
| 297 | spin_unlock(&xprt->bc_pa_lock); | ||
| 298 | return req; | ||
| 299 | } | ||
| 300 | |||
| 301 | /* | ||
| 302 | * Add callback request to callback list. The callback | ||
| 303 | * service sleeps on the sv_cb_waitq waiting for new | ||
| 304 | * requests. Wake it up after adding enqueing the | ||
| 305 | * request. | ||
| 306 | */ | ||
| 307 | void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) | ||
| 308 | { | ||
| 309 | struct rpc_xprt *xprt = req->rq_xprt; | ||
| 310 | struct svc_serv *bc_serv = xprt->bc_serv; | ||
| 311 | |||
| 312 | req->rq_private_buf.len = copied; | ||
| 313 | set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); | ||
| 314 | |||
| 315 | dprintk("RPC: add callback request to list\n"); | ||
| 316 | spin_lock(&bc_serv->sv_cb_lock); | ||
| 317 | list_del(&req->rq_bc_pa_list); | ||
| 318 | list_add(&req->rq_bc_list, &bc_serv->sv_cb_list); | ||
| 319 | wake_up(&bc_serv->sv_cb_waitq); | ||
| 320 | spin_unlock(&bc_serv->sv_cb_lock); | ||
| 321 | } | ||
| 322 | |||
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0edada973434..2e6ab10734f6 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
| @@ -438,6 +438,38 @@ out_no_rpciod: | |||
| 438 | return ERR_PTR(err); | 438 | return ERR_PTR(err); |
| 439 | } | 439 | } |
| 440 | 440 | ||
| 441 | struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, | ||
| 442 | struct rpc_xprt *xprt) | ||
| 443 | { | ||
| 444 | struct rpc_clnt *clnt = NULL; | ||
| 445 | |||
| 446 | clnt = rpc_new_client(args, xprt, NULL); | ||
| 447 | if (IS_ERR(clnt)) | ||
| 448 | return clnt; | ||
| 449 | |||
| 450 | if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { | ||
| 451 | int err = rpc_ping(clnt); | ||
| 452 | if (err != 0) { | ||
| 453 | rpc_shutdown_client(clnt); | ||
| 454 | return ERR_PTR(err); | ||
| 455 | } | ||
| 456 | } | ||
| 457 | |||
| 458 | clnt->cl_softrtry = 1; | ||
| 459 | if (args->flags & RPC_CLNT_CREATE_HARDRTRY) | ||
| 460 | clnt->cl_softrtry = 0; | ||
| 461 | |||
| 462 | if (args->flags & RPC_CLNT_CREATE_AUTOBIND) | ||
| 463 | clnt->cl_autobind = 1; | ||
| 464 | if (args->flags & RPC_CLNT_CREATE_DISCRTRY) | ||
| 465 | clnt->cl_discrtry = 1; | ||
| 466 | if (!(args->flags & RPC_CLNT_CREATE_QUIET)) | ||
| 467 | clnt->cl_chatty = 1; | ||
| 468 | |||
| 469 | return clnt; | ||
| 470 | } | ||
| 471 | EXPORT_SYMBOL_GPL(rpc_create_xprt); | ||
| 472 | |||
| 441 | /** | 473 | /** |
| 442 | * rpc_create - create an RPC client and transport with one call | 474 | * rpc_create - create an RPC client and transport with one call |
| 443 | * @args: rpc_clnt create argument structure | 475 | * @args: rpc_clnt create argument structure |
| @@ -451,7 +483,6 @@ out_no_rpciod: | |||
| 451 | struct rpc_clnt *rpc_create(struct rpc_create_args *args) | 483 | struct rpc_clnt *rpc_create(struct rpc_create_args *args) |
| 452 | { | 484 | { |
| 453 | struct rpc_xprt *xprt; | 485 | struct rpc_xprt *xprt; |
| 454 | struct rpc_clnt *clnt; | ||
| 455 | struct xprt_create xprtargs = { | 486 | struct xprt_create xprtargs = { |
| 456 | .net = args->net, | 487 | .net = args->net, |
| 457 | .ident = args->protocol, | 488 | .ident = args->protocol, |
| @@ -515,30 +546,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) | |||
| 515 | if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) | 546 | if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) |
| 516 | xprt->resvport = 0; | 547 | xprt->resvport = 0; |
| 517 | 548 | ||
| 518 | clnt = rpc_new_client(args, xprt, NULL); | 549 | return rpc_create_xprt(args, xprt); |
| 519 | if (IS_ERR(clnt)) | ||
| 520 | return clnt; | ||
| 521 | |||
| 522 | if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { | ||
| 523 | int err = rpc_ping(clnt); | ||
| 524 | if (err != 0) { | ||
| 525 | rpc_shutdown_client(clnt); | ||
| 526 | return ERR_PTR(err); | ||
| 527 | } | ||
| 528 | } | ||
| 529 | |||
| 530 | clnt->cl_softrtry = 1; | ||
| 531 | if (args->flags & RPC_CLNT_CREATE_HARDRTRY) | ||
| 532 | clnt->cl_softrtry = 0; | ||
| 533 | |||
| 534 | if (args->flags & RPC_CLNT_CREATE_AUTOBIND) | ||
| 535 | clnt->cl_autobind = 1; | ||
| 536 | if (args->flags & RPC_CLNT_CREATE_DISCRTRY) | ||
| 537 | clnt->cl_discrtry = 1; | ||
| 538 | if (!(args->flags & RPC_CLNT_CREATE_QUIET)) | ||
| 539 | clnt->cl_chatty = 1; | ||
| 540 | |||
| 541 | return clnt; | ||
| 542 | } | 550 | } |
| 543 | EXPORT_SYMBOL_GPL(rpc_create); | 551 | EXPORT_SYMBOL_GPL(rpc_create); |
| 544 | 552 | ||
| @@ -1363,6 +1371,7 @@ rpc_restart_call_prepare(struct rpc_task *task) | |||
| 1363 | if (RPC_ASSASSINATED(task)) | 1371 | if (RPC_ASSASSINATED(task)) |
| 1364 | return 0; | 1372 | return 0; |
| 1365 | task->tk_action = call_start; | 1373 | task->tk_action = call_start; |
| 1374 | task->tk_status = 0; | ||
| 1366 | if (task->tk_ops->rpc_call_prepare != NULL) | 1375 | if (task->tk_ops->rpc_call_prepare != NULL) |
| 1367 | task->tk_action = rpc_prepare_task; | 1376 | task->tk_action = rpc_prepare_task; |
| 1368 | return 1; | 1377 | return 1; |
| @@ -1379,6 +1388,7 @@ rpc_restart_call(struct rpc_task *task) | |||
| 1379 | if (RPC_ASSASSINATED(task)) | 1388 | if (RPC_ASSASSINATED(task)) |
| 1380 | return 0; | 1389 | return 0; |
| 1381 | task->tk_action = call_start; | 1390 | task->tk_action = call_start; |
| 1391 | task->tk_status = 0; | ||
| 1382 | return 1; | 1392 | return 1; |
| 1383 | } | 1393 | } |
| 1384 | EXPORT_SYMBOL_GPL(rpc_restart_call); | 1394 | EXPORT_SYMBOL_GPL(rpc_restart_call); |
| @@ -1728,9 +1738,7 @@ call_bind_status(struct rpc_task *task) | |||
| 1728 | case -EPROTONOSUPPORT: | 1738 | case -EPROTONOSUPPORT: |
| 1729 | dprintk("RPC: %5u remote rpcbind version unavailable, retrying\n", | 1739 | dprintk("RPC: %5u remote rpcbind version unavailable, retrying\n", |
| 1730 | task->tk_pid); | 1740 | task->tk_pid); |
| 1731 | task->tk_status = 0; | 1741 | goto retry_timeout; |
| 1732 | task->tk_action = call_bind; | ||
| 1733 | return; | ||
| 1734 | case -ECONNREFUSED: /* connection problems */ | 1742 | case -ECONNREFUSED: /* connection problems */ |
| 1735 | case -ECONNRESET: | 1743 | case -ECONNRESET: |
| 1736 | case -ECONNABORTED: | 1744 | case -ECONNABORTED: |
| @@ -1756,6 +1764,7 @@ call_bind_status(struct rpc_task *task) | |||
| 1756 | return; | 1764 | return; |
| 1757 | 1765 | ||
| 1758 | retry_timeout: | 1766 | retry_timeout: |
| 1767 | task->tk_status = 0; | ||
| 1759 | task->tk_action = call_timeout; | 1768 | task->tk_action = call_timeout; |
| 1760 | } | 1769 | } |
| 1761 | 1770 | ||
| @@ -1798,21 +1807,19 @@ call_connect_status(struct rpc_task *task) | |||
| 1798 | trace_rpc_connect_status(task, status); | 1807 | trace_rpc_connect_status(task, status); |
| 1799 | task->tk_status = 0; | 1808 | task->tk_status = 0; |
| 1800 | switch (status) { | 1809 | switch (status) { |
| 1801 | /* if soft mounted, test if we've timed out */ | ||
| 1802 | case -ETIMEDOUT: | ||
| 1803 | task->tk_action = call_timeout; | ||
| 1804 | return; | ||
| 1805 | case -ECONNREFUSED: | 1810 | case -ECONNREFUSED: |
| 1806 | case -ECONNRESET: | 1811 | case -ECONNRESET: |
| 1807 | case -ECONNABORTED: | 1812 | case -ECONNABORTED: |
| 1808 | case -ENETUNREACH: | 1813 | case -ENETUNREACH: |
| 1809 | case -EHOSTUNREACH: | 1814 | case -EHOSTUNREACH: |
| 1810 | /* retry with existing socket, after a delay */ | ||
| 1811 | rpc_delay(task, 3*HZ); | ||
| 1812 | if (RPC_IS_SOFTCONN(task)) | 1815 | if (RPC_IS_SOFTCONN(task)) |
| 1813 | break; | 1816 | break; |
| 1817 | /* retry with existing socket, after a delay */ | ||
| 1818 | rpc_delay(task, 3*HZ); | ||
| 1814 | case -EAGAIN: | 1819 | case -EAGAIN: |
| 1815 | task->tk_action = call_bind; | 1820 | /* Check for timeouts before looping back to call_bind */ |
| 1821 | case -ETIMEDOUT: | ||
| 1822 | task->tk_action = call_timeout; | ||
| 1816 | return; | 1823 | return; |
| 1817 | case 0: | 1824 | case 0: |
| 1818 | clnt->cl_stats->netreconn++; | 1825 | clnt->cl_stats->netreconn++; |
| @@ -2007,6 +2014,10 @@ call_status(struct rpc_task *task) | |||
| 2007 | case -EHOSTDOWN: | 2014 | case -EHOSTDOWN: |
| 2008 | case -EHOSTUNREACH: | 2015 | case -EHOSTUNREACH: |
| 2009 | case -ENETUNREACH: | 2016 | case -ENETUNREACH: |
| 2017 | if (RPC_IS_SOFTCONN(task)) { | ||
| 2018 | rpc_exit(task, status); | ||
| 2019 | break; | ||
| 2020 | } | ||
| 2010 | /* | 2021 | /* |
| 2011 | * Delay any retries for 3 seconds, then handle as if it | 2022 | * Delay any retries for 3 seconds, then handle as if it |
| 2012 | * were a timeout. | 2023 | * were a timeout. |
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index ff3cc4bf4b24..25578afe1548 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
| @@ -637,7 +637,8 @@ static void __rpc_queue_timer_fn(unsigned long ptr) | |||
| 637 | 637 | ||
| 638 | static void __rpc_atrun(struct rpc_task *task) | 638 | static void __rpc_atrun(struct rpc_task *task) |
| 639 | { | 639 | { |
| 640 | task->tk_status = 0; | 640 | if (task->tk_status == -ETIMEDOUT) |
| 641 | task->tk_status = 0; | ||
| 641 | } | 642 | } |
| 642 | 643 | ||
| 643 | /* | 644 | /* |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b6e59f0a9475..d06cb8752dcd 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
| @@ -1397,6 +1397,22 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, | |||
| 1397 | return svsk; | 1397 | return svsk; |
| 1398 | } | 1398 | } |
| 1399 | 1399 | ||
| 1400 | bool svc_alien_sock(struct net *net, int fd) | ||
| 1401 | { | ||
| 1402 | int err; | ||
| 1403 | struct socket *sock = sockfd_lookup(fd, &err); | ||
| 1404 | bool ret = false; | ||
| 1405 | |||
| 1406 | if (!sock) | ||
| 1407 | goto out; | ||
| 1408 | if (sock_net(sock->sk) != net) | ||
| 1409 | ret = true; | ||
| 1410 | sockfd_put(sock); | ||
| 1411 | out: | ||
| 1412 | return ret; | ||
| 1413 | } | ||
| 1414 | EXPORT_SYMBOL_GPL(svc_alien_sock); | ||
| 1415 | |||
| 1400 | /** | 1416 | /** |
| 1401 | * svc_addsock - add a listener socket to an RPC service | 1417 | * svc_addsock - add a listener socket to an RPC service |
| 1402 | * @serv: pointer to RPC service to which to add a new listener | 1418 | * @serv: pointer to RPC service to which to add a new listener |
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 1504bb11e4f3..dd97ba3c4456 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c | |||
| @@ -833,8 +833,20 @@ xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf) | |||
| 833 | } | 833 | } |
| 834 | EXPORT_SYMBOL_GPL(xdr_buf_from_iov); | 834 | EXPORT_SYMBOL_GPL(xdr_buf_from_iov); |
| 835 | 835 | ||
| 836 | /* Sets subbuf to the portion of buf of length len beginning base bytes | 836 | /** |
| 837 | * from the start of buf. Returns -1 if base of length are out of bounds. */ | 837 | * xdr_buf_subsegment - set subbuf to a portion of buf |
| 838 | * @buf: an xdr buffer | ||
| 839 | * @subbuf: the result buffer | ||
| 840 | * @base: beginning of range in bytes | ||
| 841 | * @len: length of range in bytes | ||
| 842 | * | ||
| 843 | * sets @subbuf to an xdr buffer representing the portion of @buf of | ||
| 844 | * length @len starting at offset @base. | ||
| 845 | * | ||
| 846 | * @buf and @subbuf may be pointers to the same struct xdr_buf. | ||
| 847 | * | ||
| 848 | * Returns -1 if base of length are out of bounds. | ||
| 849 | */ | ||
| 838 | int | 850 | int |
| 839 | xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, | 851 | xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, |
| 840 | unsigned int base, unsigned int len) | 852 | unsigned int base, unsigned int len) |
| @@ -847,9 +859,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, | |||
| 847 | len -= subbuf->head[0].iov_len; | 859 | len -= subbuf->head[0].iov_len; |
| 848 | base = 0; | 860 | base = 0; |
| 849 | } else { | 861 | } else { |
| 850 | subbuf->head[0].iov_base = NULL; | ||
| 851 | subbuf->head[0].iov_len = 0; | ||
| 852 | base -= buf->head[0].iov_len; | 862 | base -= buf->head[0].iov_len; |
| 863 | subbuf->head[0].iov_len = 0; | ||
| 853 | } | 864 | } |
| 854 | 865 | ||
| 855 | if (base < buf->page_len) { | 866 | if (base < buf->page_len) { |
| @@ -871,9 +882,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, | |||
| 871 | len -= subbuf->tail[0].iov_len; | 882 | len -= subbuf->tail[0].iov_len; |
| 872 | base = 0; | 883 | base = 0; |
| 873 | } else { | 884 | } else { |
| 874 | subbuf->tail[0].iov_base = NULL; | ||
| 875 | subbuf->tail[0].iov_len = 0; | ||
| 876 | base -= buf->tail[0].iov_len; | 885 | base -= buf->tail[0].iov_len; |
| 886 | subbuf->tail[0].iov_len = 0; | ||
| 877 | } | 887 | } |
| 878 | 888 | ||
| 879 | if (base || len) | 889 | if (base || len) |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 7d4df99f761f..d173f79947c6 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
| @@ -1383,15 +1383,3 @@ void xprt_put(struct rpc_xprt *xprt) | |||
| 1383 | if (atomic_dec_and_test(&xprt->count)) | 1383 | if (atomic_dec_and_test(&xprt->count)) |
| 1384 | xprt_destroy(xprt); | 1384 | xprt_destroy(xprt); |
| 1385 | } | 1385 | } |
| 1386 | |||
| 1387 | /** | ||
| 1388 | * xprt_get - return a reference to an RPC transport. | ||
| 1389 | * @xprt: pointer to the transport | ||
| 1390 | * | ||
| 1391 | */ | ||
| 1392 | struct rpc_xprt *xprt_get(struct rpc_xprt *xprt) | ||
| 1393 | { | ||
| 1394 | if (atomic_inc_not_zero(&xprt->count)) | ||
| 1395 | return xprt; | ||
| 1396 | return NULL; | ||
| 1397 | } | ||
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index 5a8f268bdd30..da5136fd5694 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile | |||
| @@ -1,8 +1,8 @@ | |||
| 1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o | 1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o |
| 2 | 2 | ||
| 3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o | 3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o |
| 4 | 4 | ||
| 5 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += svcrdma.o | 5 | obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o |
| 6 | 6 | ||
| 7 | svcrdma-y := svc_rdma.o svc_rdma_transport.o \ | 7 | svcrdma-y := svc_rdma.o svc_rdma_transport.o \ |
| 8 | svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o | 8 | svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o |
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index e03725bfe2b8..96ead526b125 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c | |||
| @@ -649,9 +649,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) | |||
| 649 | break; | 649 | break; |
| 650 | page_base = 0; | 650 | page_base = 0; |
| 651 | } | 651 | } |
| 652 | rqst->rq_rcv_buf.page_len = olen - copy_len; | 652 | } |
| 653 | } else | ||
| 654 | rqst->rq_rcv_buf.page_len = 0; | ||
| 655 | 653 | ||
| 656 | if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) { | 654 | if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) { |
| 657 | curlen = copy_len; | 655 | curlen = copy_len; |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 0ce75524ed21..8d904e4eef15 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | |||
| @@ -90,6 +90,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, | |||
| 90 | sge_no++; | 90 | sge_no++; |
| 91 | } | 91 | } |
| 92 | rqstp->rq_respages = &rqstp->rq_pages[sge_no]; | 92 | rqstp->rq_respages = &rqstp->rq_pages[sge_no]; |
| 93 | rqstp->rq_next_page = rqstp->rq_respages + 1; | ||
| 93 | 94 | ||
| 94 | /* We should never run out of SGE because the limit is defined to | 95 | /* We should never run out of SGE because the limit is defined to |
| 95 | * support the max allowed RPC data length | 96 | * support the max allowed RPC data length |
| @@ -169,6 +170,7 @@ static int map_read_chunks(struct svcxprt_rdma *xprt, | |||
| 169 | */ | 170 | */ |
| 170 | head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; | 171 | head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; |
| 171 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1]; | 172 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1]; |
| 173 | rqstp->rq_next_page = rqstp->rq_respages + 1; | ||
| 172 | 174 | ||
| 173 | byte_count -= sge_bytes; | 175 | byte_count -= sge_bytes; |
| 174 | ch_bytes -= sge_bytes; | 176 | ch_bytes -= sge_bytes; |
| @@ -276,6 +278,7 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, | |||
| 276 | 278 | ||
| 277 | /* rq_respages points one past arg pages */ | 279 | /* rq_respages points one past arg pages */ |
| 278 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; | 280 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; |
| 281 | rqstp->rq_next_page = rqstp->rq_respages + 1; | ||
| 279 | 282 | ||
| 280 | /* Create the reply and chunk maps */ | 283 | /* Create the reply and chunk maps */ |
| 281 | offset = 0; | 284 | offset = 0; |
| @@ -520,13 +523,6 @@ next_sge: | |||
| 520 | for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) | 523 | for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) |
| 521 | rqstp->rq_pages[ch_no] = NULL; | 524 | rqstp->rq_pages[ch_no] = NULL; |
| 522 | 525 | ||
| 523 | /* | ||
| 524 | * Detach res pages. If svc_release sees any it will attempt to | ||
| 525 | * put them. | ||
| 526 | */ | ||
| 527 | while (rqstp->rq_next_page != rqstp->rq_respages) | ||
| 528 | *(--rqstp->rq_next_page) = NULL; | ||
| 529 | |||
| 530 | return err; | 526 | return err; |
| 531 | } | 527 | } |
| 532 | 528 | ||
| @@ -550,7 +546,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp, | |||
| 550 | 546 | ||
| 551 | /* rq_respages starts after the last arg page */ | 547 | /* rq_respages starts after the last arg page */ |
| 552 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; | 548 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; |
| 553 | rqstp->rq_next_page = &rqstp->rq_arg.pages[page_no]; | 549 | rqstp->rq_next_page = rqstp->rq_respages + 1; |
| 554 | 550 | ||
| 555 | /* Rebuild rq_arg head and tail. */ | 551 | /* Rebuild rq_arg head and tail. */ |
| 556 | rqstp->rq_arg.head[0] = head->arg.head[0]; | 552 | rqstp->rq_arg.head[0] = head->arg.head[0]; |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index c1d124dc772b..7e024a51617e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c | |||
| @@ -265,6 +265,7 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt, | |||
| 265 | xdr_off -= xdr->head[0].iov_len; | 265 | xdr_off -= xdr->head[0].iov_len; |
| 266 | if (xdr_off < xdr->page_len) { | 266 | if (xdr_off < xdr->page_len) { |
| 267 | /* This offset is in the page list */ | 267 | /* This offset is in the page list */ |
| 268 | xdr_off += xdr->page_base; | ||
| 268 | page = xdr->pages[xdr_off >> PAGE_SHIFT]; | 269 | page = xdr->pages[xdr_off >> PAGE_SHIFT]; |
| 269 | xdr_off &= ~PAGE_MASK; | 270 | xdr_off &= ~PAGE_MASK; |
| 270 | } else { | 271 | } else { |
| @@ -625,6 +626,7 @@ static int send_reply(struct svcxprt_rdma *rdma, | |||
| 625 | if (page_no+1 >= sge_no) | 626 | if (page_no+1 >= sge_no) |
| 626 | ctxt->sge[page_no+1].length = 0; | 627 | ctxt->sge[page_no+1].length = 0; |
| 627 | } | 628 | } |
| 629 | rqstp->rq_next_page = rqstp->rq_respages + 1; | ||
| 628 | BUG_ON(sge_no > rdma->sc_max_sge); | 630 | BUG_ON(sge_no > rdma->sc_max_sge); |
| 629 | memset(&send_wr, 0, sizeof send_wr); | 631 | memset(&send_wr, 0, sizeof send_wr); |
| 630 | ctxt->wr_op = IB_WR_SEND; | 632 | ctxt->wr_op = IB_WR_SEND; |
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 62e4f9bcc387..25688fa2207f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c | |||
| @@ -477,8 +477,7 @@ struct page *svc_rdma_get_page(void) | |||
| 477 | 477 | ||
| 478 | while ((page = alloc_page(GFP_KERNEL)) == NULL) { | 478 | while ((page = alloc_page(GFP_KERNEL)) == NULL) { |
| 479 | /* If we can't get memory, wait a bit and try again */ | 479 | /* If we can't get memory, wait a bit and try again */ |
| 480 | printk(KERN_INFO "svcrdma: out of memory...retrying in 1000 " | 480 | printk(KERN_INFO "svcrdma: out of memory...retrying in 1s\n"); |
| 481 | "jiffies.\n"); | ||
| 482 | schedule_timeout_uninterruptible(msecs_to_jiffies(1000)); | 481 | schedule_timeout_uninterruptible(msecs_to_jiffies(1000)); |
| 483 | } | 482 | } |
| 484 | return page; | 483 | return page; |
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 285dc0884115..1eb9c468d0c9 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c | |||
| @@ -733,7 +733,7 @@ static void __exit xprt_rdma_cleanup(void) | |||
| 733 | { | 733 | { |
| 734 | int rc; | 734 | int rc; |
| 735 | 735 | ||
| 736 | dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n"); | 736 | dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); |
| 737 | #ifdef RPC_DEBUG | 737 | #ifdef RPC_DEBUG |
| 738 | if (sunrpc_table_header) { | 738 | if (sunrpc_table_header) { |
| 739 | unregister_sysctl_table(sunrpc_table_header); | 739 | unregister_sysctl_table(sunrpc_table_header); |
| @@ -755,14 +755,14 @@ static int __init xprt_rdma_init(void) | |||
| 755 | if (rc) | 755 | if (rc) |
| 756 | return rc; | 756 | return rc; |
| 757 | 757 | ||
| 758 | dprintk(KERN_INFO "RPCRDMA Module Init, register RPC RDMA transport\n"); | 758 | dprintk("RPCRDMA Module Init, register RPC RDMA transport\n"); |
| 759 | 759 | ||
| 760 | dprintk(KERN_INFO "Defaults:\n"); | 760 | dprintk("Defaults:\n"); |
| 761 | dprintk(KERN_INFO "\tSlots %d\n" | 761 | dprintk("\tSlots %d\n" |
| 762 | "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", | 762 | "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", |
| 763 | xprt_rdma_slot_table_entries, | 763 | xprt_rdma_slot_table_entries, |
| 764 | xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); | 764 | xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); |
| 765 | dprintk(KERN_INFO "\tPadding %d\n\tMemreg %d\n", | 765 | dprintk("\tPadding %d\n\tMemreg %d\n", |
| 766 | xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); | 766 | xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); |
| 767 | 767 | ||
| 768 | #ifdef RPC_DEBUG | 768 | #ifdef RPC_DEBUG |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0addefca8e77..6735e1d1e9bb 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c | |||
| @@ -909,6 +909,12 @@ static void xs_tcp_close(struct rpc_xprt *xprt) | |||
| 909 | xs_tcp_shutdown(xprt); | 909 | xs_tcp_shutdown(xprt); |
| 910 | } | 910 | } |
| 911 | 911 | ||
| 912 | static void xs_xprt_free(struct rpc_xprt *xprt) | ||
| 913 | { | ||
| 914 | xs_free_peer_addresses(xprt); | ||
| 915 | xprt_free(xprt); | ||
| 916 | } | ||
| 917 | |||
| 912 | /** | 918 | /** |
| 913 | * xs_destroy - prepare to shutdown a transport | 919 | * xs_destroy - prepare to shutdown a transport |
| 914 | * @xprt: doomed transport | 920 | * @xprt: doomed transport |
| @@ -919,8 +925,7 @@ static void xs_destroy(struct rpc_xprt *xprt) | |||
| 919 | dprintk("RPC: xs_destroy xprt %p\n", xprt); | 925 | dprintk("RPC: xs_destroy xprt %p\n", xprt); |
| 920 | 926 | ||
| 921 | xs_close(xprt); | 927 | xs_close(xprt); |
| 922 | xs_free_peer_addresses(xprt); | 928 | xs_xprt_free(xprt); |
| 923 | xprt_free(xprt); | ||
| 924 | module_put(THIS_MODULE); | 929 | module_put(THIS_MODULE); |
| 925 | } | 930 | } |
| 926 | 931 | ||
| @@ -1306,41 +1311,29 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, | |||
| 1306 | * If we're unable to obtain the rpc_rqst we schedule the closing of the | 1311 | * If we're unable to obtain the rpc_rqst we schedule the closing of the |
| 1307 | * connection and return -1. | 1312 | * connection and return -1. |
| 1308 | */ | 1313 | */ |
| 1309 | static inline int xs_tcp_read_callback(struct rpc_xprt *xprt, | 1314 | static int xs_tcp_read_callback(struct rpc_xprt *xprt, |
| 1310 | struct xdr_skb_reader *desc) | 1315 | struct xdr_skb_reader *desc) |
| 1311 | { | 1316 | { |
| 1312 | struct sock_xprt *transport = | 1317 | struct sock_xprt *transport = |
| 1313 | container_of(xprt, struct sock_xprt, xprt); | 1318 | container_of(xprt, struct sock_xprt, xprt); |
| 1314 | struct rpc_rqst *req; | 1319 | struct rpc_rqst *req; |
| 1315 | 1320 | ||
| 1316 | req = xprt_alloc_bc_request(xprt); | 1321 | /* Look up and lock the request corresponding to the given XID */ |
| 1322 | spin_lock(&xprt->transport_lock); | ||
| 1323 | req = xprt_lookup_bc_request(xprt, transport->tcp_xid); | ||
| 1317 | if (req == NULL) { | 1324 | if (req == NULL) { |
| 1325 | spin_unlock(&xprt->transport_lock); | ||
| 1318 | printk(KERN_WARNING "Callback slot table overflowed\n"); | 1326 | printk(KERN_WARNING "Callback slot table overflowed\n"); |
| 1319 | xprt_force_disconnect(xprt); | 1327 | xprt_force_disconnect(xprt); |
| 1320 | return -1; | 1328 | return -1; |
| 1321 | } | 1329 | } |
| 1322 | 1330 | ||
| 1323 | req->rq_xid = transport->tcp_xid; | ||
| 1324 | dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid)); | 1331 | dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid)); |
| 1325 | xs_tcp_read_common(xprt, desc, req); | 1332 | xs_tcp_read_common(xprt, desc, req); |
| 1326 | 1333 | ||
| 1327 | if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) { | 1334 | if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) |
| 1328 | struct svc_serv *bc_serv = xprt->bc_serv; | 1335 | xprt_complete_bc_request(req, transport->tcp_copied); |
| 1329 | 1336 | spin_unlock(&xprt->transport_lock); | |
| 1330 | /* | ||
| 1331 | * Add callback request to callback list. The callback | ||
| 1332 | * service sleeps on the sv_cb_waitq waiting for new | ||
| 1333 | * requests. Wake it up after adding enqueing the | ||
| 1334 | * request. | ||
| 1335 | */ | ||
| 1336 | dprintk("RPC: add callback request to list\n"); | ||
| 1337 | spin_lock(&bc_serv->sv_cb_lock); | ||
| 1338 | list_add(&req->rq_bc_list, &bc_serv->sv_cb_list); | ||
| 1339 | spin_unlock(&bc_serv->sv_cb_lock); | ||
| 1340 | wake_up(&bc_serv->sv_cb_waitq); | ||
| 1341 | } | ||
| 1342 | |||
| 1343 | req->rq_private_buf.len = transport->tcp_copied; | ||
| 1344 | 1337 | ||
| 1345 | return 0; | 1338 | return 0; |
| 1346 | } | 1339 | } |
| @@ -2544,6 +2537,10 @@ static void bc_close(struct rpc_xprt *xprt) | |||
| 2544 | 2537 | ||
| 2545 | static void bc_destroy(struct rpc_xprt *xprt) | 2538 | static void bc_destroy(struct rpc_xprt *xprt) |
| 2546 | { | 2539 | { |
| 2540 | dprintk("RPC: bc_destroy xprt %p\n", xprt); | ||
| 2541 | |||
| 2542 | xs_xprt_free(xprt); | ||
| 2543 | module_put(THIS_MODULE); | ||
| 2547 | } | 2544 | } |
| 2548 | 2545 | ||
| 2549 | static struct rpc_xprt_ops xs_local_ops = { | 2546 | static struct rpc_xprt_ops xs_local_ops = { |
| @@ -2744,7 +2741,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) | |||
| 2744 | return xprt; | 2741 | return xprt; |
| 2745 | ret = ERR_PTR(-EINVAL); | 2742 | ret = ERR_PTR(-EINVAL); |
| 2746 | out_err: | 2743 | out_err: |
| 2747 | xprt_free(xprt); | 2744 | xs_xprt_free(xprt); |
| 2748 | return ret; | 2745 | return ret; |
| 2749 | } | 2746 | } |
| 2750 | 2747 | ||
| @@ -2822,7 +2819,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) | |||
| 2822 | return xprt; | 2819 | return xprt; |
| 2823 | ret = ERR_PTR(-EINVAL); | 2820 | ret = ERR_PTR(-EINVAL); |
| 2824 | out_err: | 2821 | out_err: |
| 2825 | xprt_free(xprt); | 2822 | xs_xprt_free(xprt); |
| 2826 | return ret; | 2823 | return ret; |
| 2827 | } | 2824 | } |
| 2828 | 2825 | ||
| @@ -2897,12 +2894,11 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) | |||
| 2897 | xprt->address_strings[RPC_DISPLAY_ADDR], | 2894 | xprt->address_strings[RPC_DISPLAY_ADDR], |
| 2898 | xprt->address_strings[RPC_DISPLAY_PROTO]); | 2895 | xprt->address_strings[RPC_DISPLAY_PROTO]); |
| 2899 | 2896 | ||
| 2900 | |||
| 2901 | if (try_module_get(THIS_MODULE)) | 2897 | if (try_module_get(THIS_MODULE)) |
| 2902 | return xprt; | 2898 | return xprt; |
| 2903 | ret = ERR_PTR(-EINVAL); | 2899 | ret = ERR_PTR(-EINVAL); |
| 2904 | out_err: | 2900 | out_err: |
| 2905 | xprt_free(xprt); | 2901 | xs_xprt_free(xprt); |
| 2906 | return ret; | 2902 | return ret; |
| 2907 | } | 2903 | } |
| 2908 | 2904 | ||
| @@ -2919,15 +2915,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) | |||
| 2919 | struct svc_sock *bc_sock; | 2915 | struct svc_sock *bc_sock; |
| 2920 | struct rpc_xprt *ret; | 2916 | struct rpc_xprt *ret; |
| 2921 | 2917 | ||
| 2922 | if (args->bc_xprt->xpt_bc_xprt) { | ||
| 2923 | /* | ||
| 2924 | * This server connection already has a backchannel | ||
| 2925 | * transport; we can't create a new one, as we wouldn't | ||
| 2926 | * be able to match replies based on xid any more. So, | ||
| 2927 | * reuse the already-existing one: | ||
| 2928 | */ | ||
| 2929 | return args->bc_xprt->xpt_bc_xprt; | ||
| 2930 | } | ||
| 2931 | xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, | 2918 | xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, |
| 2932 | xprt_tcp_slot_table_entries); | 2919 | xprt_tcp_slot_table_entries); |
| 2933 | if (IS_ERR(xprt)) | 2920 | if (IS_ERR(xprt)) |
| @@ -2985,13 +2972,14 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) | |||
| 2985 | */ | 2972 | */ |
| 2986 | xprt_set_connected(xprt); | 2973 | xprt_set_connected(xprt); |
| 2987 | 2974 | ||
| 2988 | |||
| 2989 | if (try_module_get(THIS_MODULE)) | 2975 | if (try_module_get(THIS_MODULE)) |
| 2990 | return xprt; | 2976 | return xprt; |
| 2977 | |||
| 2978 | args->bc_xprt->xpt_bc_xprt = NULL; | ||
| 2991 | xprt_put(xprt); | 2979 | xprt_put(xprt); |
| 2992 | ret = ERR_PTR(-EINVAL); | 2980 | ret = ERR_PTR(-EINVAL); |
| 2993 | out_err: | 2981 | out_err: |
| 2994 | xprt_free(xprt); | 2982 | xs_xprt_free(xprt); |
| 2995 | return ret; | 2983 | return ret; |
| 2996 | } | 2984 | } |
| 2997 | 2985 | ||
diff --git a/net/tipc/net.c b/net/tipc/net.c index 0374a817631e..4c564eb69e1a 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c | |||
| @@ -182,6 +182,8 @@ void tipc_net_start(u32 addr) | |||
| 182 | tipc_bclink_init(); | 182 | tipc_bclink_init(); |
| 183 | write_unlock_bh(&tipc_net_lock); | 183 | write_unlock_bh(&tipc_net_lock); |
| 184 | 184 | ||
| 185 | tipc_nametbl_publish(TIPC_CFG_SRV, tipc_own_addr, tipc_own_addr, | ||
| 186 | TIPC_ZONE_SCOPE, 0, tipc_own_addr); | ||
| 185 | pr_info("Started in network mode\n"); | 187 | pr_info("Started in network mode\n"); |
| 186 | pr_info("Own node address %s, network identity %u\n", | 188 | pr_info("Own node address %s, network identity %u\n", |
| 187 | tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); | 189 | tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); |
| @@ -192,6 +194,7 @@ void tipc_net_stop(void) | |||
| 192 | if (!tipc_own_addr) | 194 | if (!tipc_own_addr) |
| 193 | return; | 195 | return; |
| 194 | 196 | ||
| 197 | tipc_nametbl_withdraw(TIPC_CFG_SRV, tipc_own_addr, 0, tipc_own_addr); | ||
| 195 | write_lock_bh(&tipc_net_lock); | 198 | write_lock_bh(&tipc_net_lock); |
| 196 | tipc_bearer_stop(); | 199 | tipc_bearer_stop(); |
| 197 | tipc_bclink_stop(); | 200 | tipc_bclink_stop(); |
diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 29b7f26a12cf..adc12e227303 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c | |||
| @@ -301,7 +301,6 @@ static int tipc_release(struct socket *sock) | |||
| 301 | struct tipc_sock *tsk; | 301 | struct tipc_sock *tsk; |
| 302 | struct tipc_port *port; | 302 | struct tipc_port *port; |
| 303 | struct sk_buff *buf; | 303 | struct sk_buff *buf; |
| 304 | int res; | ||
| 305 | 304 | ||
| 306 | /* | 305 | /* |
| 307 | * Exit if socket isn't fully initialized (occurs when a failed accept() | 306 | * Exit if socket isn't fully initialized (occurs when a failed accept() |
| @@ -349,7 +348,7 @@ static int tipc_release(struct socket *sock) | |||
| 349 | sock_put(sk); | 348 | sock_put(sk); |
| 350 | sock->sk = NULL; | 349 | sock->sk = NULL; |
| 351 | 350 | ||
| 352 | return res; | 351 | return 0; |
| 353 | } | 352 | } |
| 354 | 353 | ||
| 355 | /** | 354 | /** |
