diff options
| author | Ingo Molnar <mingo@kernel.org> | 2013-12-17 09:27:08 -0500 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2013-12-17 09:27:08 -0500 |
| commit | bb799d3b980eb803ca2da4a4eefbd9308f8d988a (patch) | |
| tree | 69fbe0cd6d47b23a50f5e1d87bf7489532fae149 /fs | |
| parent | 919fc6e34831d1c2b58bfb5ae261dc3facc9b269 (diff) | |
| parent | 319e2e3f63c348a9b66db4667efa73178e18b17d (diff) | |
Merge tag 'v3.13-rc4' into core/locking
Merge Linux 3.13-rc4, to refresh this rather old tree with the latest fixes.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'fs')
100 files changed, 2016 insertions, 909 deletions
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index f039b104a98e..b03dd23feda8 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c | |||
| @@ -43,23 +43,6 @@ | |||
| 43 | #include "fid.h" | 43 | #include "fid.h" |
| 44 | 44 | ||
| 45 | /** | 45 | /** |
| 46 | * v9fs_dentry_delete - called when dentry refcount equals 0 | ||
| 47 | * @dentry: dentry in question | ||
| 48 | * | ||
| 49 | * By returning 1 here we should remove cacheing of unused | ||
| 50 | * dentry components. | ||
| 51 | * | ||
| 52 | */ | ||
| 53 | |||
| 54 | static int v9fs_dentry_delete(const struct dentry *dentry) | ||
| 55 | { | ||
| 56 | p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n", | ||
| 57 | dentry->d_name.name, dentry); | ||
| 58 | |||
| 59 | return 1; | ||
| 60 | } | ||
| 61 | |||
| 62 | /** | ||
| 63 | * v9fs_cached_dentry_delete - called when dentry refcount equals 0 | 46 | * v9fs_cached_dentry_delete - called when dentry refcount equals 0 |
| 64 | * @dentry: dentry in question | 47 | * @dentry: dentry in question |
| 65 | * | 48 | * |
| @@ -134,6 +117,6 @@ const struct dentry_operations v9fs_cached_dentry_operations = { | |||
| 134 | }; | 117 | }; |
| 135 | 118 | ||
| 136 | const struct dentry_operations v9fs_dentry_operations = { | 119 | const struct dentry_operations v9fs_dentry_operations = { |
| 137 | .d_delete = v9fs_dentry_delete, | 120 | .d_delete = always_delete_dentry, |
| 138 | .d_release = v9fs_dentry_release, | 121 | .d_release = v9fs_dentry_release, |
| 139 | }; | 122 | }; |
diff --git a/fs/affs/Changes b/fs/affs/Changes index a29409c1ffe0..b41c2c9792ff 100644 --- a/fs/affs/Changes +++ b/fs/affs/Changes | |||
| @@ -91,7 +91,7 @@ more 2.4 fixes: [Roman Zippel] | |||
| 91 | Version 3.11 | 91 | Version 3.11 |
| 92 | ------------ | 92 | ------------ |
| 93 | 93 | ||
| 94 | - Converted to use 2.3.x page cache [Dave Jones <dave@powertweak.com>] | 94 | - Converted to use 2.3.x page cache [Dave Jones] |
| 95 | - Corruption in truncate() bugfix [Ken Tyler <kent@werple.net.au>] | 95 | - Corruption in truncate() bugfix [Ken Tyler <kent@werple.net.au>] |
| 96 | 96 | ||
| 97 | Version 3.10 | 97 | Version 3.10 |
| @@ -80,6 +80,8 @@ struct kioctx { | |||
| 80 | struct percpu_ref users; | 80 | struct percpu_ref users; |
| 81 | atomic_t dead; | 81 | atomic_t dead; |
| 82 | 82 | ||
| 83 | struct percpu_ref reqs; | ||
| 84 | |||
| 83 | unsigned long user_id; | 85 | unsigned long user_id; |
| 84 | 86 | ||
| 85 | struct __percpu kioctx_cpu *cpu; | 87 | struct __percpu kioctx_cpu *cpu; |
| @@ -107,7 +109,6 @@ struct kioctx { | |||
| 107 | struct page **ring_pages; | 109 | struct page **ring_pages; |
| 108 | long nr_pages; | 110 | long nr_pages; |
| 109 | 111 | ||
| 110 | struct rcu_head rcu_head; | ||
| 111 | struct work_struct free_work; | 112 | struct work_struct free_work; |
| 112 | 113 | ||
| 113 | struct { | 114 | struct { |
| @@ -250,8 +251,10 @@ static void aio_free_ring(struct kioctx *ctx) | |||
| 250 | 251 | ||
| 251 | put_aio_ring_file(ctx); | 252 | put_aio_ring_file(ctx); |
| 252 | 253 | ||
| 253 | if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) | 254 | if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) { |
| 254 | kfree(ctx->ring_pages); | 255 | kfree(ctx->ring_pages); |
| 256 | ctx->ring_pages = NULL; | ||
| 257 | } | ||
| 255 | } | 258 | } |
| 256 | 259 | ||
| 257 | static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) | 260 | static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma) |
| @@ -364,8 +367,10 @@ static int aio_setup_ring(struct kioctx *ctx) | |||
| 364 | if (nr_pages > AIO_RING_PAGES) { | 367 | if (nr_pages > AIO_RING_PAGES) { |
| 365 | ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), | 368 | ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *), |
| 366 | GFP_KERNEL); | 369 | GFP_KERNEL); |
| 367 | if (!ctx->ring_pages) | 370 | if (!ctx->ring_pages) { |
| 371 | put_aio_ring_file(ctx); | ||
| 368 | return -ENOMEM; | 372 | return -ENOMEM; |
| 373 | } | ||
| 369 | } | 374 | } |
| 370 | 375 | ||
| 371 | ctx->mmap_size = nr_pages * PAGE_SIZE; | 376 | ctx->mmap_size = nr_pages * PAGE_SIZE; |
| @@ -463,26 +468,34 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb) | |||
| 463 | return cancel(kiocb); | 468 | return cancel(kiocb); |
| 464 | } | 469 | } |
| 465 | 470 | ||
| 466 | static void free_ioctx_rcu(struct rcu_head *head) | 471 | static void free_ioctx(struct work_struct *work) |
| 467 | { | 472 | { |
| 468 | struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); | 473 | struct kioctx *ctx = container_of(work, struct kioctx, free_work); |
| 474 | |||
| 475 | pr_debug("freeing %p\n", ctx); | ||
| 469 | 476 | ||
| 477 | aio_free_ring(ctx); | ||
| 470 | free_percpu(ctx->cpu); | 478 | free_percpu(ctx->cpu); |
| 471 | kmem_cache_free(kioctx_cachep, ctx); | 479 | kmem_cache_free(kioctx_cachep, ctx); |
| 472 | } | 480 | } |
| 473 | 481 | ||
| 482 | static void free_ioctx_reqs(struct percpu_ref *ref) | ||
| 483 | { | ||
| 484 | struct kioctx *ctx = container_of(ref, struct kioctx, reqs); | ||
| 485 | |||
| 486 | INIT_WORK(&ctx->free_work, free_ioctx); | ||
| 487 | schedule_work(&ctx->free_work); | ||
| 488 | } | ||
| 489 | |||
| 474 | /* | 490 | /* |
| 475 | * When this function runs, the kioctx has been removed from the "hash table" | 491 | * When this function runs, the kioctx has been removed from the "hash table" |
| 476 | * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - | 492 | * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - |
| 477 | * now it's safe to cancel any that need to be. | 493 | * now it's safe to cancel any that need to be. |
| 478 | */ | 494 | */ |
| 479 | static void free_ioctx(struct work_struct *work) | 495 | static void free_ioctx_users(struct percpu_ref *ref) |
| 480 | { | 496 | { |
| 481 | struct kioctx *ctx = container_of(work, struct kioctx, free_work); | 497 | struct kioctx *ctx = container_of(ref, struct kioctx, users); |
| 482 | struct aio_ring *ring; | ||
| 483 | struct kiocb *req; | 498 | struct kiocb *req; |
| 484 | unsigned cpu, avail; | ||
| 485 | DEFINE_WAIT(wait); | ||
| 486 | 499 | ||
| 487 | spin_lock_irq(&ctx->ctx_lock); | 500 | spin_lock_irq(&ctx->ctx_lock); |
| 488 | 501 | ||
| @@ -496,54 +509,8 @@ static void free_ioctx(struct work_struct *work) | |||
| 496 | 509 | ||
| 497 | spin_unlock_irq(&ctx->ctx_lock); | 510 | spin_unlock_irq(&ctx->ctx_lock); |
| 498 | 511 | ||
| 499 | for_each_possible_cpu(cpu) { | 512 | percpu_ref_kill(&ctx->reqs); |
| 500 | struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu); | 513 | percpu_ref_put(&ctx->reqs); |
| 501 | |||
| 502 | atomic_add(kcpu->reqs_available, &ctx->reqs_available); | ||
| 503 | kcpu->reqs_available = 0; | ||
| 504 | } | ||
| 505 | |||
| 506 | while (1) { | ||
| 507 | prepare_to_wait(&ctx->wait, &wait, TASK_UNINTERRUPTIBLE); | ||
| 508 | |||
| 509 | ring = kmap_atomic(ctx->ring_pages[0]); | ||
| 510 | avail = (ring->head <= ring->tail) | ||
| 511 | ? ring->tail - ring->head | ||
| 512 | : ctx->nr_events - ring->head + ring->tail; | ||
| 513 | |||
| 514 | atomic_add(avail, &ctx->reqs_available); | ||
| 515 | ring->head = ring->tail; | ||
| 516 | kunmap_atomic(ring); | ||
| 517 | |||
| 518 | if (atomic_read(&ctx->reqs_available) >= ctx->nr_events - 1) | ||
| 519 | break; | ||
| 520 | |||
| 521 | schedule(); | ||
| 522 | } | ||
| 523 | finish_wait(&ctx->wait, &wait); | ||
| 524 | |||
| 525 | WARN_ON(atomic_read(&ctx->reqs_available) > ctx->nr_events - 1); | ||
| 526 | |||
| 527 | aio_free_ring(ctx); | ||
| 528 | |||
| 529 | pr_debug("freeing %p\n", ctx); | ||
| 530 | |||
| 531 | /* | ||
| 532 | * Here the call_rcu() is between the wait_event() for reqs_active to | ||
| 533 | * hit 0, and freeing the ioctx. | ||
| 534 | * | ||
| 535 | * aio_complete() decrements reqs_active, but it has to touch the ioctx | ||
| 536 | * after to issue a wakeup so we use rcu. | ||
| 537 | */ | ||
| 538 | call_rcu(&ctx->rcu_head, free_ioctx_rcu); | ||
| 539 | } | ||
| 540 | |||
| 541 | static void free_ioctx_ref(struct percpu_ref *ref) | ||
| 542 | { | ||
| 543 | struct kioctx *ctx = container_of(ref, struct kioctx, users); | ||
| 544 | |||
| 545 | INIT_WORK(&ctx->free_work, free_ioctx); | ||
| 546 | schedule_work(&ctx->free_work); | ||
| 547 | } | 514 | } |
| 548 | 515 | ||
| 549 | static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) | 516 | static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) |
| @@ -602,6 +569,16 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) | |||
| 602 | } | 569 | } |
| 603 | } | 570 | } |
| 604 | 571 | ||
| 572 | static void aio_nr_sub(unsigned nr) | ||
| 573 | { | ||
| 574 | spin_lock(&aio_nr_lock); | ||
| 575 | if (WARN_ON(aio_nr - nr > aio_nr)) | ||
| 576 | aio_nr = 0; | ||
| 577 | else | ||
| 578 | aio_nr -= nr; | ||
| 579 | spin_unlock(&aio_nr_lock); | ||
| 580 | } | ||
| 581 | |||
| 605 | /* ioctx_alloc | 582 | /* ioctx_alloc |
| 606 | * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. | 583 | * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. |
| 607 | */ | 584 | */ |
| @@ -639,8 +616,11 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
| 639 | 616 | ||
| 640 | ctx->max_reqs = nr_events; | 617 | ctx->max_reqs = nr_events; |
| 641 | 618 | ||
| 642 | if (percpu_ref_init(&ctx->users, free_ioctx_ref)) | 619 | if (percpu_ref_init(&ctx->users, free_ioctx_users)) |
| 643 | goto out_freectx; | 620 | goto err; |
| 621 | |||
| 622 | if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs)) | ||
| 623 | goto err; | ||
| 644 | 624 | ||
| 645 | spin_lock_init(&ctx->ctx_lock); | 625 | spin_lock_init(&ctx->ctx_lock); |
| 646 | spin_lock_init(&ctx->completion_lock); | 626 | spin_lock_init(&ctx->completion_lock); |
| @@ -651,10 +631,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
| 651 | 631 | ||
| 652 | ctx->cpu = alloc_percpu(struct kioctx_cpu); | 632 | ctx->cpu = alloc_percpu(struct kioctx_cpu); |
| 653 | if (!ctx->cpu) | 633 | if (!ctx->cpu) |
| 654 | goto out_freeref; | 634 | goto err; |
| 655 | 635 | ||
| 656 | if (aio_setup_ring(ctx) < 0) | 636 | if (aio_setup_ring(ctx) < 0) |
| 657 | goto out_freepcpu; | 637 | goto err; |
| 658 | 638 | ||
| 659 | atomic_set(&ctx->reqs_available, ctx->nr_events - 1); | 639 | atomic_set(&ctx->reqs_available, ctx->nr_events - 1); |
| 660 | ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); | 640 | ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); |
| @@ -666,7 +646,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
| 666 | if (aio_nr + nr_events > (aio_max_nr * 2UL) || | 646 | if (aio_nr + nr_events > (aio_max_nr * 2UL) || |
| 667 | aio_nr + nr_events < aio_nr) { | 647 | aio_nr + nr_events < aio_nr) { |
| 668 | spin_unlock(&aio_nr_lock); | 648 | spin_unlock(&aio_nr_lock); |
| 669 | goto out_cleanup; | 649 | err = -EAGAIN; |
| 650 | goto err_ctx; | ||
| 670 | } | 651 | } |
| 671 | aio_nr += ctx->max_reqs; | 652 | aio_nr += ctx->max_reqs; |
| 672 | spin_unlock(&aio_nr_lock); | 653 | spin_unlock(&aio_nr_lock); |
| @@ -675,23 +656,20 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) | |||
| 675 | 656 | ||
| 676 | err = ioctx_add_table(ctx, mm); | 657 | err = ioctx_add_table(ctx, mm); |
| 677 | if (err) | 658 | if (err) |
| 678 | goto out_cleanup_put; | 659 | goto err_cleanup; |
| 679 | 660 | ||
| 680 | pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", | 661 | pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", |
| 681 | ctx, ctx->user_id, mm, ctx->nr_events); | 662 | ctx, ctx->user_id, mm, ctx->nr_events); |
| 682 | return ctx; | 663 | return ctx; |
| 683 | 664 | ||
| 684 | out_cleanup_put: | 665 | err_cleanup: |
| 685 | percpu_ref_put(&ctx->users); | 666 | aio_nr_sub(ctx->max_reqs); |
| 686 | out_cleanup: | 667 | err_ctx: |
| 687 | err = -EAGAIN; | ||
| 688 | aio_free_ring(ctx); | 668 | aio_free_ring(ctx); |
| 689 | out_freepcpu: | 669 | err: |
| 690 | free_percpu(ctx->cpu); | 670 | free_percpu(ctx->cpu); |
| 691 | out_freeref: | 671 | free_percpu(ctx->reqs.pcpu_count); |
| 692 | free_percpu(ctx->users.pcpu_count); | 672 | free_percpu(ctx->users.pcpu_count); |
| 693 | out_freectx: | ||
| 694 | put_aio_ring_file(ctx); | ||
| 695 | kmem_cache_free(kioctx_cachep, ctx); | 673 | kmem_cache_free(kioctx_cachep, ctx); |
| 696 | pr_debug("error allocating ioctx %d\n", err); | 674 | pr_debug("error allocating ioctx %d\n", err); |
| 697 | return ERR_PTR(err); | 675 | return ERR_PTR(err); |
| @@ -726,10 +704,7 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) | |||
| 726 | * -EAGAIN with no ioctxs actually in use (as far as userspace | 704 | * -EAGAIN with no ioctxs actually in use (as far as userspace |
| 727 | * could tell). | 705 | * could tell). |
| 728 | */ | 706 | */ |
| 729 | spin_lock(&aio_nr_lock); | 707 | aio_nr_sub(ctx->max_reqs); |
| 730 | BUG_ON(aio_nr - ctx->max_reqs > aio_nr); | ||
| 731 | aio_nr -= ctx->max_reqs; | ||
| 732 | spin_unlock(&aio_nr_lock); | ||
| 733 | 708 | ||
| 734 | if (ctx->mmap_size) | 709 | if (ctx->mmap_size) |
| 735 | vm_munmap(ctx->mmap_base, ctx->mmap_size); | 710 | vm_munmap(ctx->mmap_base, ctx->mmap_size); |
| @@ -861,6 +836,8 @@ static inline struct kiocb *aio_get_req(struct kioctx *ctx) | |||
| 861 | if (unlikely(!req)) | 836 | if (unlikely(!req)) |
| 862 | goto out_put; | 837 | goto out_put; |
| 863 | 838 | ||
| 839 | percpu_ref_get(&ctx->reqs); | ||
| 840 | |||
| 864 | req->ki_ctx = ctx; | 841 | req->ki_ctx = ctx; |
| 865 | return req; | 842 | return req; |
| 866 | out_put: | 843 | out_put: |
| @@ -930,12 +907,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
| 930 | return; | 907 | return; |
| 931 | } | 908 | } |
| 932 | 909 | ||
| 933 | /* | ||
| 934 | * Take rcu_read_lock() in case the kioctx is being destroyed, as we | ||
| 935 | * need to issue a wakeup after incrementing reqs_available. | ||
| 936 | */ | ||
| 937 | rcu_read_lock(); | ||
| 938 | |||
| 939 | if (iocb->ki_list.next) { | 910 | if (iocb->ki_list.next) { |
| 940 | unsigned long flags; | 911 | unsigned long flags; |
| 941 | 912 | ||
| @@ -1010,7 +981,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2) | |||
| 1010 | if (waitqueue_active(&ctx->wait)) | 981 | if (waitqueue_active(&ctx->wait)) |
| 1011 | wake_up(&ctx->wait); | 982 | wake_up(&ctx->wait); |
| 1012 | 983 | ||
| 1013 | rcu_read_unlock(); | 984 | percpu_ref_put(&ctx->reqs); |
| 1014 | } | 985 | } |
| 1015 | EXPORT_SYMBOL(aio_complete); | 986 | EXPORT_SYMBOL(aio_complete); |
| 1016 | 987 | ||
| @@ -1421,6 +1392,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, | |||
| 1421 | return 0; | 1392 | return 0; |
| 1422 | out_put_req: | 1393 | out_put_req: |
| 1423 | put_reqs_available(ctx, 1); | 1394 | put_reqs_available(ctx, 1); |
| 1395 | percpu_ref_put(&ctx->reqs); | ||
| 1424 | kiocb_free(req); | 1396 | kiocb_free(req); |
| 1425 | return ret; | 1397 | return ret; |
| 1426 | } | 1398 | } |
| @@ -601,7 +601,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs); | |||
| 601 | 601 | ||
| 602 | static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | 602 | static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page |
| 603 | *page, unsigned int len, unsigned int offset, | 603 | *page, unsigned int len, unsigned int offset, |
| 604 | unsigned short max_sectors) | 604 | unsigned int max_sectors) |
| 605 | { | 605 | { |
| 606 | int retried_segments = 0; | 606 | int retried_segments = 0; |
| 607 | struct bio_vec *bvec; | 607 | struct bio_vec *bvec; |
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index f9d5094e1029..aa976eced2d2 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig | |||
| @@ -9,12 +9,17 @@ config BTRFS_FS | |||
| 9 | select XOR_BLOCKS | 9 | select XOR_BLOCKS |
| 10 | 10 | ||
| 11 | help | 11 | help |
| 12 | Btrfs is a new filesystem with extents, writable snapshotting, | 12 | Btrfs is a general purpose copy-on-write filesystem with extents, |
| 13 | support for multiple devices and many more features. | 13 | writable snapshotting, support for multiple devices and many more |
| 14 | features focused on fault tolerance, repair and easy administration. | ||
| 14 | 15 | ||
| 15 | Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET | 16 | The filesystem disk format is no longer unstable, and it's not |
| 16 | FINALIZED. You should say N here unless you are interested in | 17 | expected to change unless there are strong reasons to do so. If there |
| 17 | testing Btrfs with non-critical data. | 18 | is a format change, file systems with a unchanged format will |
| 19 | continue to be mountable and usable by newer kernels. | ||
| 20 | |||
| 21 | For more information, please see the web pages at | ||
| 22 | http://btrfs.wiki.kernel.org. | ||
| 18 | 23 | ||
| 19 | To compile this file system support as a module, choose M here. The | 24 | To compile this file system support as a module, choose M here. The |
| 20 | module will be called btrfs. | 25 | module will be called btrfs. |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 8aec751fa464..c1e0b0caf9cc 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
| @@ -495,6 +495,7 @@ static int __btrfs_start_workers(struct btrfs_workers *workers) | |||
| 495 | spin_lock_irq(&workers->lock); | 495 | spin_lock_irq(&workers->lock); |
| 496 | if (workers->stopping) { | 496 | if (workers->stopping) { |
| 497 | spin_unlock_irq(&workers->lock); | 497 | spin_unlock_irq(&workers->lock); |
| 498 | ret = -EINVAL; | ||
| 498 | goto fail_kthread; | 499 | goto fail_kthread; |
| 499 | } | 500 | } |
| 500 | list_add_tail(&worker->worker_list, &workers->idle_list); | 501 | list_add_tail(&worker->worker_list, &workers->idle_list); |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index e0aab4456974..131d82800b3a 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
| @@ -77,6 +77,15 @@ | |||
| 77 | * the integrity of (super)-block write requests, do not | 77 | * the integrity of (super)-block write requests, do not |
| 78 | * enable the config option BTRFS_FS_CHECK_INTEGRITY to | 78 | * enable the config option BTRFS_FS_CHECK_INTEGRITY to |
| 79 | * include and compile the integrity check tool. | 79 | * include and compile the integrity check tool. |
| 80 | * | ||
| 81 | * Expect millions of lines of information in the kernel log with an | ||
| 82 | * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the | ||
| 83 | * kernel config to at least 26 (which is 64MB). Usually the value is | ||
| 84 | * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be | ||
| 85 | * changed like this before LOG_BUF_SHIFT can be set to a high value: | ||
| 86 | * config LOG_BUF_SHIFT | ||
| 87 | * int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" | ||
| 88 | * range 12 30 | ||
| 80 | */ | 89 | */ |
| 81 | 90 | ||
| 82 | #include <linux/sched.h> | 91 | #include <linux/sched.h> |
| @@ -124,6 +133,7 @@ | |||
| 124 | #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400 | 133 | #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400 |
| 125 | #define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800 | 134 | #define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800 |
| 126 | #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000 | 135 | #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000 |
| 136 | #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE 0x00002000 | ||
| 127 | 137 | ||
| 128 | struct btrfsic_dev_state; | 138 | struct btrfsic_dev_state; |
| 129 | struct btrfsic_state; | 139 | struct btrfsic_state; |
| @@ -323,7 +333,6 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); | |||
| 323 | static int btrfsic_read_block(struct btrfsic_state *state, | 333 | static int btrfsic_read_block(struct btrfsic_state *state, |
| 324 | struct btrfsic_block_data_ctx *block_ctx); | 334 | struct btrfsic_block_data_ctx *block_ctx); |
| 325 | static void btrfsic_dump_database(struct btrfsic_state *state); | 335 | static void btrfsic_dump_database(struct btrfsic_state *state); |
| 326 | static void btrfsic_complete_bio_end_io(struct bio *bio, int err); | ||
| 327 | static int btrfsic_test_for_metadata(struct btrfsic_state *state, | 336 | static int btrfsic_test_for_metadata(struct btrfsic_state *state, |
| 328 | char **datav, unsigned int num_pages); | 337 | char **datav, unsigned int num_pages); |
| 329 | static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, | 338 | static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, |
| @@ -1677,7 +1686,6 @@ static int btrfsic_read_block(struct btrfsic_state *state, | |||
| 1677 | for (i = 0; i < num_pages;) { | 1686 | for (i = 0; i < num_pages;) { |
| 1678 | struct bio *bio; | 1687 | struct bio *bio; |
| 1679 | unsigned int j; | 1688 | unsigned int j; |
| 1680 | DECLARE_COMPLETION_ONSTACK(complete); | ||
| 1681 | 1689 | ||
| 1682 | bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i); | 1690 | bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i); |
| 1683 | if (!bio) { | 1691 | if (!bio) { |
| @@ -1688,8 +1696,6 @@ static int btrfsic_read_block(struct btrfsic_state *state, | |||
| 1688 | } | 1696 | } |
| 1689 | bio->bi_bdev = block_ctx->dev->bdev; | 1697 | bio->bi_bdev = block_ctx->dev->bdev; |
| 1690 | bio->bi_sector = dev_bytenr >> 9; | 1698 | bio->bi_sector = dev_bytenr >> 9; |
| 1691 | bio->bi_end_io = btrfsic_complete_bio_end_io; | ||
| 1692 | bio->bi_private = &complete; | ||
| 1693 | 1699 | ||
| 1694 | for (j = i; j < num_pages; j++) { | 1700 | for (j = i; j < num_pages; j++) { |
| 1695 | ret = bio_add_page(bio, block_ctx->pagev[j], | 1701 | ret = bio_add_page(bio, block_ctx->pagev[j], |
| @@ -1702,12 +1708,7 @@ static int btrfsic_read_block(struct btrfsic_state *state, | |||
| 1702 | "btrfsic: error, failed to add a single page!\n"); | 1708 | "btrfsic: error, failed to add a single page!\n"); |
| 1703 | return -1; | 1709 | return -1; |
| 1704 | } | 1710 | } |
| 1705 | submit_bio(READ, bio); | 1711 | if (submit_bio_wait(READ, bio)) { |
| 1706 | |||
| 1707 | /* this will also unplug the queue */ | ||
| 1708 | wait_for_completion(&complete); | ||
| 1709 | |||
| 1710 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { | ||
| 1711 | printk(KERN_INFO | 1712 | printk(KERN_INFO |
| 1712 | "btrfsic: read error at logical %llu dev %s!\n", | 1713 | "btrfsic: read error at logical %llu dev %s!\n", |
| 1713 | block_ctx->start, block_ctx->dev->name); | 1714 | block_ctx->start, block_ctx->dev->name); |
| @@ -1730,11 +1731,6 @@ static int btrfsic_read_block(struct btrfsic_state *state, | |||
| 1730 | return block_ctx->len; | 1731 | return block_ctx->len; |
| 1731 | } | 1732 | } |
| 1732 | 1733 | ||
| 1733 | static void btrfsic_complete_bio_end_io(struct bio *bio, int err) | ||
| 1734 | { | ||
| 1735 | complete((struct completion *)bio->bi_private); | ||
| 1736 | } | ||
| 1737 | |||
| 1738 | static void btrfsic_dump_database(struct btrfsic_state *state) | 1734 | static void btrfsic_dump_database(struct btrfsic_state *state) |
| 1739 | { | 1735 | { |
| 1740 | struct list_head *elem_all; | 1736 | struct list_head *elem_all; |
| @@ -2998,14 +2994,12 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh) | |||
| 2998 | return submit_bh(rw, bh); | 2994 | return submit_bh(rw, bh); |
| 2999 | } | 2995 | } |
| 3000 | 2996 | ||
| 3001 | void btrfsic_submit_bio(int rw, struct bio *bio) | 2997 | static void __btrfsic_submit_bio(int rw, struct bio *bio) |
| 3002 | { | 2998 | { |
| 3003 | struct btrfsic_dev_state *dev_state; | 2999 | struct btrfsic_dev_state *dev_state; |
| 3004 | 3000 | ||
| 3005 | if (!btrfsic_is_initialized) { | 3001 | if (!btrfsic_is_initialized) |
| 3006 | submit_bio(rw, bio); | ||
| 3007 | return; | 3002 | return; |
| 3008 | } | ||
| 3009 | 3003 | ||
| 3010 | mutex_lock(&btrfsic_mutex); | 3004 | mutex_lock(&btrfsic_mutex); |
| 3011 | /* since btrfsic_submit_bio() is also called before | 3005 | /* since btrfsic_submit_bio() is also called before |
| @@ -3015,6 +3009,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio) | |||
| 3015 | (rw & WRITE) && NULL != bio->bi_io_vec) { | 3009 | (rw & WRITE) && NULL != bio->bi_io_vec) { |
| 3016 | unsigned int i; | 3010 | unsigned int i; |
| 3017 | u64 dev_bytenr; | 3011 | u64 dev_bytenr; |
| 3012 | u64 cur_bytenr; | ||
| 3018 | int bio_is_patched; | 3013 | int bio_is_patched; |
| 3019 | char **mapped_datav; | 3014 | char **mapped_datav; |
| 3020 | 3015 | ||
| @@ -3033,6 +3028,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio) | |||
| 3033 | GFP_NOFS); | 3028 | GFP_NOFS); |
| 3034 | if (!mapped_datav) | 3029 | if (!mapped_datav) |
| 3035 | goto leave; | 3030 | goto leave; |
| 3031 | cur_bytenr = dev_bytenr; | ||
| 3036 | for (i = 0; i < bio->bi_vcnt; i++) { | 3032 | for (i = 0; i < bio->bi_vcnt; i++) { |
| 3037 | BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE); | 3033 | BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE); |
| 3038 | mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page); | 3034 | mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page); |
| @@ -3044,16 +3040,13 @@ void btrfsic_submit_bio(int rw, struct bio *bio) | |||
| 3044 | kfree(mapped_datav); | 3040 | kfree(mapped_datav); |
| 3045 | goto leave; | 3041 | goto leave; |
| 3046 | } | 3042 | } |
| 3047 | if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | | 3043 | if (dev_state->state->print_mask & |
| 3048 | BTRFSIC_PRINT_MASK_VERBOSE) == | 3044 | BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE) |
| 3049 | (dev_state->state->print_mask & | ||
| 3050 | (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | | ||
| 3051 | BTRFSIC_PRINT_MASK_VERBOSE))) | ||
| 3052 | printk(KERN_INFO | 3045 | printk(KERN_INFO |
| 3053 | "#%u: page=%p, len=%u, offset=%u\n", | 3046 | "#%u: bytenr=%llu, len=%u, offset=%u\n", |
| 3054 | i, bio->bi_io_vec[i].bv_page, | 3047 | i, cur_bytenr, bio->bi_io_vec[i].bv_len, |
| 3055 | bio->bi_io_vec[i].bv_len, | ||
| 3056 | bio->bi_io_vec[i].bv_offset); | 3048 | bio->bi_io_vec[i].bv_offset); |
| 3049 | cur_bytenr += bio->bi_io_vec[i].bv_len; | ||
| 3057 | } | 3050 | } |
| 3058 | btrfsic_process_written_block(dev_state, dev_bytenr, | 3051 | btrfsic_process_written_block(dev_state, dev_bytenr, |
| 3059 | mapped_datav, bio->bi_vcnt, | 3052 | mapped_datav, bio->bi_vcnt, |
| @@ -3097,10 +3090,20 @@ void btrfsic_submit_bio(int rw, struct bio *bio) | |||
| 3097 | } | 3090 | } |
| 3098 | leave: | 3091 | leave: |
| 3099 | mutex_unlock(&btrfsic_mutex); | 3092 | mutex_unlock(&btrfsic_mutex); |
| 3093 | } | ||
| 3100 | 3094 | ||
| 3095 | void btrfsic_submit_bio(int rw, struct bio *bio) | ||
| 3096 | { | ||
| 3097 | __btrfsic_submit_bio(rw, bio); | ||
| 3101 | submit_bio(rw, bio); | 3098 | submit_bio(rw, bio); |
| 3102 | } | 3099 | } |
| 3103 | 3100 | ||
| 3101 | int btrfsic_submit_bio_wait(int rw, struct bio *bio) | ||
| 3102 | { | ||
| 3103 | __btrfsic_submit_bio(rw, bio); | ||
| 3104 | return submit_bio_wait(rw, bio); | ||
| 3105 | } | ||
| 3106 | |||
| 3104 | int btrfsic_mount(struct btrfs_root *root, | 3107 | int btrfsic_mount(struct btrfs_root *root, |
| 3105 | struct btrfs_fs_devices *fs_devices, | 3108 | struct btrfs_fs_devices *fs_devices, |
| 3106 | int including_extent_data, u32 print_mask) | 3109 | int including_extent_data, u32 print_mask) |
diff --git a/fs/btrfs/check-integrity.h b/fs/btrfs/check-integrity.h index 8b59175cc502..13b8566c97ab 100644 --- a/fs/btrfs/check-integrity.h +++ b/fs/btrfs/check-integrity.h | |||
| @@ -22,9 +22,11 @@ | |||
| 22 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 22 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
| 23 | int btrfsic_submit_bh(int rw, struct buffer_head *bh); | 23 | int btrfsic_submit_bh(int rw, struct buffer_head *bh); |
| 24 | void btrfsic_submit_bio(int rw, struct bio *bio); | 24 | void btrfsic_submit_bio(int rw, struct bio *bio); |
| 25 | int btrfsic_submit_bio_wait(int rw, struct bio *bio); | ||
| 25 | #else | 26 | #else |
| 26 | #define btrfsic_submit_bh submit_bh | 27 | #define btrfsic_submit_bh submit_bh |
| 27 | #define btrfsic_submit_bio submit_bio | 28 | #define btrfsic_submit_bio submit_bio |
| 29 | #define btrfsic_submit_bio_wait submit_bio_wait | ||
| 28 | #endif | 30 | #endif |
| 29 | 31 | ||
| 30 | int btrfsic_mount(struct btrfs_root *root, | 32 | int btrfsic_mount(struct btrfs_root *root, |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f9aeb2759a64..54ab86127f7a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -3613,9 +3613,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | |||
| 3613 | struct btrfs_ordered_sum *sums); | 3613 | struct btrfs_ordered_sum *sums); |
| 3614 | int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | 3614 | int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, |
| 3615 | struct bio *bio, u64 file_start, int contig); | 3615 | struct bio *bio, u64 file_start, int contig); |
| 3616 | int btrfs_csum_truncate(struct btrfs_trans_handle *trans, | ||
| 3617 | struct btrfs_root *root, struct btrfs_path *path, | ||
| 3618 | u64 isize); | ||
| 3619 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | 3616 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
| 3620 | struct list_head *list, int search_commit); | 3617 | struct list_head *list, int search_commit); |
| 3621 | /* inode.c */ | 3618 | /* inode.c */ |
| @@ -3744,9 +3741,6 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info); | |||
| 3744 | int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); | 3741 | int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); |
| 3745 | void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 3742 | void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
| 3746 | int skip_pinned); | 3743 | int skip_pinned); |
| 3747 | int btrfs_replace_extent_cache(struct inode *inode, struct extent_map *replace, | ||
| 3748 | u64 start, u64 end, int skip_pinned, | ||
| 3749 | int modified); | ||
| 3750 | extern const struct file_operations btrfs_file_operations; | 3744 | extern const struct file_operations btrfs_file_operations; |
| 3751 | int __btrfs_drop_extents(struct btrfs_trans_handle *trans, | 3745 | int __btrfs_drop_extents(struct btrfs_trans_handle *trans, |
| 3752 | struct btrfs_root *root, struct inode *inode, | 3746 | struct btrfs_root *root, struct inode *inode, |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 342f9fd411e3..2cfc3dfff64f 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
| @@ -366,7 +366,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, | |||
| 366 | dev_replace->tgtdev = tgt_device; | 366 | dev_replace->tgtdev = tgt_device; |
| 367 | 367 | ||
| 368 | printk_in_rcu(KERN_INFO | 368 | printk_in_rcu(KERN_INFO |
| 369 | "btrfs: dev_replace from %s (devid %llu) to %s) started\n", | 369 | "btrfs: dev_replace from %s (devid %llu) to %s started\n", |
| 370 | src_device->missing ? "<missing disk>" : | 370 | src_device->missing ? "<missing disk>" : |
| 371 | rcu_str_deref(src_device->name), | 371 | rcu_str_deref(src_device->name), |
| 372 | src_device->devid, | 372 | src_device->devid, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4c4ed0bb3da1..8072cfa8a3b1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -3517,7 +3517,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | |||
| 3517 | int btrfs_commit_super(struct btrfs_root *root) | 3517 | int btrfs_commit_super(struct btrfs_root *root) |
| 3518 | { | 3518 | { |
| 3519 | struct btrfs_trans_handle *trans; | 3519 | struct btrfs_trans_handle *trans; |
| 3520 | int ret; | ||
| 3521 | 3520 | ||
| 3522 | mutex_lock(&root->fs_info->cleaner_mutex); | 3521 | mutex_lock(&root->fs_info->cleaner_mutex); |
| 3523 | btrfs_run_delayed_iputs(root); | 3522 | btrfs_run_delayed_iputs(root); |
| @@ -3531,25 +3530,7 @@ int btrfs_commit_super(struct btrfs_root *root) | |||
| 3531 | trans = btrfs_join_transaction(root); | 3530 | trans = btrfs_join_transaction(root); |
| 3532 | if (IS_ERR(trans)) | 3531 | if (IS_ERR(trans)) |
| 3533 | return PTR_ERR(trans); | 3532 | return PTR_ERR(trans); |
| 3534 | ret = btrfs_commit_transaction(trans, root); | 3533 | return btrfs_commit_transaction(trans, root); |
| 3535 | if (ret) | ||
| 3536 | return ret; | ||
| 3537 | /* run commit again to drop the original snapshot */ | ||
| 3538 | trans = btrfs_join_transaction(root); | ||
| 3539 | if (IS_ERR(trans)) | ||
| 3540 | return PTR_ERR(trans); | ||
| 3541 | ret = btrfs_commit_transaction(trans, root); | ||
| 3542 | if (ret) | ||
| 3543 | return ret; | ||
| 3544 | ret = btrfs_write_and_wait_transaction(NULL, root); | ||
| 3545 | if (ret) { | ||
| 3546 | btrfs_error(root->fs_info, ret, | ||
| 3547 | "Failed to sync btree inode to disk."); | ||
| 3548 | return ret; | ||
| 3549 | } | ||
| 3550 | |||
| 3551 | ret = write_ctree_super(NULL, root, 0); | ||
| 3552 | return ret; | ||
| 3553 | } | 3534 | } |
| 3554 | 3535 | ||
| 3555 | int close_ctree(struct btrfs_root *root) | 3536 | int close_ctree(struct btrfs_root *root) |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 45d98d01028f..9c01509dd8ab 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -767,20 +767,19 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | |||
| 767 | if (!path) | 767 | if (!path) |
| 768 | return -ENOMEM; | 768 | return -ENOMEM; |
| 769 | 769 | ||
| 770 | if (metadata) { | ||
| 771 | key.objectid = bytenr; | ||
| 772 | key.type = BTRFS_METADATA_ITEM_KEY; | ||
| 773 | key.offset = offset; | ||
| 774 | } else { | ||
| 775 | key.objectid = bytenr; | ||
| 776 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 777 | key.offset = offset; | ||
| 778 | } | ||
| 779 | |||
| 780 | if (!trans) { | 770 | if (!trans) { |
| 781 | path->skip_locking = 1; | 771 | path->skip_locking = 1; |
| 782 | path->search_commit_root = 1; | 772 | path->search_commit_root = 1; |
| 783 | } | 773 | } |
| 774 | |||
| 775 | search_again: | ||
| 776 | key.objectid = bytenr; | ||
| 777 | key.offset = offset; | ||
| 778 | if (metadata) | ||
| 779 | key.type = BTRFS_METADATA_ITEM_KEY; | ||
| 780 | else | ||
| 781 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 782 | |||
| 784 | again: | 783 | again: |
| 785 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | 784 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, |
| 786 | &key, path, 0, 0); | 785 | &key, path, 0, 0); |
| @@ -788,7 +787,6 @@ again: | |||
| 788 | goto out_free; | 787 | goto out_free; |
| 789 | 788 | ||
| 790 | if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) { | 789 | if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) { |
| 791 | metadata = 0; | ||
| 792 | if (path->slots[0]) { | 790 | if (path->slots[0]) { |
| 793 | path->slots[0]--; | 791 | path->slots[0]--; |
| 794 | btrfs_item_key_to_cpu(path->nodes[0], &key, | 792 | btrfs_item_key_to_cpu(path->nodes[0], &key, |
| @@ -855,7 +853,7 @@ again: | |||
| 855 | mutex_lock(&head->mutex); | 853 | mutex_lock(&head->mutex); |
| 856 | mutex_unlock(&head->mutex); | 854 | mutex_unlock(&head->mutex); |
| 857 | btrfs_put_delayed_ref(&head->node); | 855 | btrfs_put_delayed_ref(&head->node); |
| 858 | goto again; | 856 | goto search_again; |
| 859 | } | 857 | } |
| 860 | if (head->extent_op && head->extent_op->update_flags) | 858 | if (head->extent_op && head->extent_op->update_flags) |
| 861 | extent_flags |= head->extent_op->flags_to_set; | 859 | extent_flags |= head->extent_op->flags_to_set; |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 856bc2b2192c..ff43802a7c88 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -1952,11 +1952,6 @@ static int free_io_failure(struct inode *inode, struct io_failure_record *rec, | |||
| 1952 | return err; | 1952 | return err; |
| 1953 | } | 1953 | } |
| 1954 | 1954 | ||
| 1955 | static void repair_io_failure_callback(struct bio *bio, int err) | ||
| 1956 | { | ||
| 1957 | complete(bio->bi_private); | ||
| 1958 | } | ||
| 1959 | |||
| 1960 | /* | 1955 | /* |
| 1961 | * this bypasses the standard btrfs submit functions deliberately, as | 1956 | * this bypasses the standard btrfs submit functions deliberately, as |
| 1962 | * the standard behavior is to write all copies in a raid setup. here we only | 1957 | * the standard behavior is to write all copies in a raid setup. here we only |
| @@ -1973,13 +1968,13 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, | |||
| 1973 | { | 1968 | { |
| 1974 | struct bio *bio; | 1969 | struct bio *bio; |
| 1975 | struct btrfs_device *dev; | 1970 | struct btrfs_device *dev; |
| 1976 | DECLARE_COMPLETION_ONSTACK(compl); | ||
| 1977 | u64 map_length = 0; | 1971 | u64 map_length = 0; |
| 1978 | u64 sector; | 1972 | u64 sector; |
| 1979 | struct btrfs_bio *bbio = NULL; | 1973 | struct btrfs_bio *bbio = NULL; |
| 1980 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | 1974 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; |
| 1981 | int ret; | 1975 | int ret; |
| 1982 | 1976 | ||
| 1977 | ASSERT(!(fs_info->sb->s_flags & MS_RDONLY)); | ||
| 1983 | BUG_ON(!mirror_num); | 1978 | BUG_ON(!mirror_num); |
| 1984 | 1979 | ||
| 1985 | /* we can't repair anything in raid56 yet */ | 1980 | /* we can't repair anything in raid56 yet */ |
| @@ -1989,8 +1984,6 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, | |||
| 1989 | bio = btrfs_io_bio_alloc(GFP_NOFS, 1); | 1984 | bio = btrfs_io_bio_alloc(GFP_NOFS, 1); |
| 1990 | if (!bio) | 1985 | if (!bio) |
| 1991 | return -EIO; | 1986 | return -EIO; |
| 1992 | bio->bi_private = &compl; | ||
| 1993 | bio->bi_end_io = repair_io_failure_callback; | ||
| 1994 | bio->bi_size = 0; | 1987 | bio->bi_size = 0; |
| 1995 | map_length = length; | 1988 | map_length = length; |
| 1996 | 1989 | ||
| @@ -2011,10 +2004,8 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, | |||
| 2011 | } | 2004 | } |
| 2012 | bio->bi_bdev = dev->bdev; | 2005 | bio->bi_bdev = dev->bdev; |
| 2013 | bio_add_page(bio, page, length, start - page_offset(page)); | 2006 | bio_add_page(bio, page, length, start - page_offset(page)); |
| 2014 | btrfsic_submit_bio(WRITE_SYNC, bio); | ||
| 2015 | wait_for_completion(&compl); | ||
| 2016 | 2007 | ||
| 2017 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { | 2008 | if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) { |
| 2018 | /* try to remap that extent elsewhere? */ | 2009 | /* try to remap that extent elsewhere? */ |
| 2019 | bio_put(bio); | 2010 | bio_put(bio); |
| 2020 | btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); | 2011 | btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); |
| @@ -2036,6 +2027,9 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, | |||
| 2036 | unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); | 2027 | unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); |
| 2037 | int ret = 0; | 2028 | int ret = 0; |
| 2038 | 2029 | ||
| 2030 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
| 2031 | return -EROFS; | ||
| 2032 | |||
| 2039 | for (i = 0; i < num_pages; i++) { | 2033 | for (i = 0; i < num_pages; i++) { |
| 2040 | struct page *p = extent_buffer_page(eb, i); | 2034 | struct page *p = extent_buffer_page(eb, i); |
| 2041 | ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE, | 2035 | ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE, |
| @@ -2057,12 +2051,12 @@ static int clean_io_failure(u64 start, struct page *page) | |||
| 2057 | u64 private; | 2051 | u64 private; |
| 2058 | u64 private_failure; | 2052 | u64 private_failure; |
| 2059 | struct io_failure_record *failrec; | 2053 | struct io_failure_record *failrec; |
| 2060 | struct btrfs_fs_info *fs_info; | 2054 | struct inode *inode = page->mapping->host; |
| 2055 | struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; | ||
| 2061 | struct extent_state *state; | 2056 | struct extent_state *state; |
| 2062 | int num_copies; | 2057 | int num_copies; |
| 2063 | int did_repair = 0; | 2058 | int did_repair = 0; |
| 2064 | int ret; | 2059 | int ret; |
| 2065 | struct inode *inode = page->mapping->host; | ||
| 2066 | 2060 | ||
| 2067 | private = 0; | 2061 | private = 0; |
| 2068 | ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, | 2062 | ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, |
| @@ -2085,6 +2079,8 @@ static int clean_io_failure(u64 start, struct page *page) | |||
| 2085 | did_repair = 1; | 2079 | did_repair = 1; |
| 2086 | goto out; | 2080 | goto out; |
| 2087 | } | 2081 | } |
| 2082 | if (fs_info->sb->s_flags & MS_RDONLY) | ||
| 2083 | goto out; | ||
| 2088 | 2084 | ||
| 2089 | spin_lock(&BTRFS_I(inode)->io_tree.lock); | 2085 | spin_lock(&BTRFS_I(inode)->io_tree.lock); |
| 2090 | state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree, | 2086 | state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree, |
| @@ -2094,7 +2090,6 @@ static int clean_io_failure(u64 start, struct page *page) | |||
| 2094 | 2090 | ||
| 2095 | if (state && state->start <= failrec->start && | 2091 | if (state && state->start <= failrec->start && |
| 2096 | state->end >= failrec->start + failrec->len - 1) { | 2092 | state->end >= failrec->start + failrec->len - 1) { |
| 2097 | fs_info = BTRFS_I(inode)->root->fs_info; | ||
| 2098 | num_copies = btrfs_num_copies(fs_info, failrec->logical, | 2093 | num_copies = btrfs_num_copies(fs_info, failrec->logical, |
| 2099 | failrec->len); | 2094 | failrec->len); |
| 2100 | if (num_copies > 1) { | 2095 | if (num_copies > 1) { |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index da8d2f696ac5..f1a77449d032 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -2129,7 +2129,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path, | |||
| 2129 | old->extent_offset, fs_info, | 2129 | old->extent_offset, fs_info, |
| 2130 | path, record_one_backref, | 2130 | path, record_one_backref, |
| 2131 | old); | 2131 | old); |
| 2132 | BUG_ON(ret < 0 && ret != -ENOENT); | 2132 | if (ret < 0 && ret != -ENOENT) |
| 2133 | return false; | ||
| 2133 | 2134 | ||
| 2134 | /* no backref to be processed for this extent */ | 2135 | /* no backref to be processed for this extent */ |
| 2135 | if (!old->count) { | 2136 | if (!old->count) { |
| @@ -6186,8 +6187,7 @@ insert: | |||
| 6186 | write_unlock(&em_tree->lock); | 6187 | write_unlock(&em_tree->lock); |
| 6187 | out: | 6188 | out: |
| 6188 | 6189 | ||
| 6189 | if (em) | 6190 | trace_btrfs_get_extent(root, em); |
| 6190 | trace_btrfs_get_extent(root, em); | ||
| 6191 | 6191 | ||
| 6192 | if (path) | 6192 | if (path) |
| 6193 | btrfs_free_path(path); | 6193 | btrfs_free_path(path); |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a111622598b0..21da5762b0b1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -2121,7 +2121,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
| 2121 | 2121 | ||
| 2122 | err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); | 2122 | err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); |
| 2123 | if (err == -EINTR) | 2123 | if (err == -EINTR) |
| 2124 | goto out; | 2124 | goto out_drop_write; |
| 2125 | dentry = lookup_one_len(vol_args->name, parent, namelen); | 2125 | dentry = lookup_one_len(vol_args->name, parent, namelen); |
| 2126 | if (IS_ERR(dentry)) { | 2126 | if (IS_ERR(dentry)) { |
| 2127 | err = PTR_ERR(dentry); | 2127 | err = PTR_ERR(dentry); |
| @@ -2284,6 +2284,7 @@ out_dput: | |||
| 2284 | dput(dentry); | 2284 | dput(dentry); |
| 2285 | out_unlock_dir: | 2285 | out_unlock_dir: |
| 2286 | mutex_unlock(&dir->i_mutex); | 2286 | mutex_unlock(&dir->i_mutex); |
| 2287 | out_drop_write: | ||
| 2287 | mnt_drop_write_file(file); | 2288 | mnt_drop_write_file(file); |
| 2288 | out: | 2289 | out: |
| 2289 | kfree(vol_args); | 2290 | kfree(vol_args); |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 25a8f3812f14..69582d5b69d1 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -638,6 +638,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) | |||
| 638 | WARN_ON(nr < 0); | 638 | WARN_ON(nr < 0); |
| 639 | } | 639 | } |
| 640 | } | 640 | } |
| 641 | list_splice_tail(&splice, &fs_info->ordered_roots); | ||
| 641 | spin_unlock(&fs_info->ordered_root_lock); | 642 | spin_unlock(&fs_info->ordered_root_lock); |
| 642 | } | 643 | } |
| 643 | 644 | ||
| @@ -803,7 +804,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
| 803 | btrfs_put_ordered_extent(ordered); | 804 | btrfs_put_ordered_extent(ordered); |
| 804 | break; | 805 | break; |
| 805 | } | 806 | } |
| 806 | if (ordered->file_offset + ordered->len < start) { | 807 | if (ordered->file_offset + ordered->len <= start) { |
| 807 | btrfs_put_ordered_extent(ordered); | 808 | btrfs_put_ordered_extent(ordered); |
| 808 | break; | 809 | break; |
| 809 | } | 810 | } |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index ce459a7cb16d..429c73c374b8 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
| @@ -571,7 +571,9 @@ static int is_cowonly_root(u64 root_objectid) | |||
| 571 | root_objectid == BTRFS_CHUNK_TREE_OBJECTID || | 571 | root_objectid == BTRFS_CHUNK_TREE_OBJECTID || |
| 572 | root_objectid == BTRFS_DEV_TREE_OBJECTID || | 572 | root_objectid == BTRFS_DEV_TREE_OBJECTID || |
| 573 | root_objectid == BTRFS_TREE_LOG_OBJECTID || | 573 | root_objectid == BTRFS_TREE_LOG_OBJECTID || |
| 574 | root_objectid == BTRFS_CSUM_TREE_OBJECTID) | 574 | root_objectid == BTRFS_CSUM_TREE_OBJECTID || |
| 575 | root_objectid == BTRFS_UUID_TREE_OBJECTID || | ||
| 576 | root_objectid == BTRFS_QUOTA_TREE_OBJECTID) | ||
| 575 | return 1; | 577 | return 1; |
| 576 | return 0; | 578 | return 0; |
| 577 | } | 579 | } |
| @@ -1264,10 +1266,10 @@ static int __must_check __add_reloc_root(struct btrfs_root *root) | |||
| 1264 | } | 1266 | } |
| 1265 | 1267 | ||
| 1266 | /* | 1268 | /* |
| 1267 | * helper to update/delete the 'address of tree root -> reloc tree' | 1269 | * helper to delete the 'address of tree root -> reloc tree' |
| 1268 | * mapping | 1270 | * mapping |
| 1269 | */ | 1271 | */ |
| 1270 | static int __update_reloc_root(struct btrfs_root *root, int del) | 1272 | static void __del_reloc_root(struct btrfs_root *root) |
| 1271 | { | 1273 | { |
| 1272 | struct rb_node *rb_node; | 1274 | struct rb_node *rb_node; |
| 1273 | struct mapping_node *node = NULL; | 1275 | struct mapping_node *node = NULL; |
| @@ -1275,7 +1277,7 @@ static int __update_reloc_root(struct btrfs_root *root, int del) | |||
| 1275 | 1277 | ||
| 1276 | spin_lock(&rc->reloc_root_tree.lock); | 1278 | spin_lock(&rc->reloc_root_tree.lock); |
| 1277 | rb_node = tree_search(&rc->reloc_root_tree.rb_root, | 1279 | rb_node = tree_search(&rc->reloc_root_tree.rb_root, |
| 1278 | root->commit_root->start); | 1280 | root->node->start); |
| 1279 | if (rb_node) { | 1281 | if (rb_node) { |
| 1280 | node = rb_entry(rb_node, struct mapping_node, rb_node); | 1282 | node = rb_entry(rb_node, struct mapping_node, rb_node); |
| 1281 | rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); | 1283 | rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); |
| @@ -1283,23 +1285,45 @@ static int __update_reloc_root(struct btrfs_root *root, int del) | |||
| 1283 | spin_unlock(&rc->reloc_root_tree.lock); | 1285 | spin_unlock(&rc->reloc_root_tree.lock); |
| 1284 | 1286 | ||
| 1285 | if (!node) | 1287 | if (!node) |
| 1286 | return 0; | 1288 | return; |
| 1287 | BUG_ON((struct btrfs_root *)node->data != root); | 1289 | BUG_ON((struct btrfs_root *)node->data != root); |
| 1288 | 1290 | ||
| 1289 | if (!del) { | 1291 | spin_lock(&root->fs_info->trans_lock); |
| 1290 | spin_lock(&rc->reloc_root_tree.lock); | 1292 | list_del_init(&root->root_list); |
| 1291 | node->bytenr = root->node->start; | 1293 | spin_unlock(&root->fs_info->trans_lock); |
| 1292 | rb_node = tree_insert(&rc->reloc_root_tree.rb_root, | 1294 | kfree(node); |
| 1293 | node->bytenr, &node->rb_node); | 1295 | } |
| 1294 | spin_unlock(&rc->reloc_root_tree.lock); | 1296 | |
| 1295 | if (rb_node) | 1297 | /* |
| 1296 | backref_tree_panic(rb_node, -EEXIST, node->bytenr); | 1298 | * helper to update the 'address of tree root -> reloc tree' |
| 1297 | } else { | 1299 | * mapping |
| 1298 | spin_lock(&root->fs_info->trans_lock); | 1300 | */ |
| 1299 | list_del_init(&root->root_list); | 1301 | static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr) |
| 1300 | spin_unlock(&root->fs_info->trans_lock); | 1302 | { |
| 1301 | kfree(node); | 1303 | struct rb_node *rb_node; |
| 1304 | struct mapping_node *node = NULL; | ||
| 1305 | struct reloc_control *rc = root->fs_info->reloc_ctl; | ||
| 1306 | |||
| 1307 | spin_lock(&rc->reloc_root_tree.lock); | ||
| 1308 | rb_node = tree_search(&rc->reloc_root_tree.rb_root, | ||
| 1309 | root->node->start); | ||
| 1310 | if (rb_node) { | ||
| 1311 | node = rb_entry(rb_node, struct mapping_node, rb_node); | ||
| 1312 | rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); | ||
| 1302 | } | 1313 | } |
| 1314 | spin_unlock(&rc->reloc_root_tree.lock); | ||
| 1315 | |||
| 1316 | if (!node) | ||
| 1317 | return 0; | ||
| 1318 | BUG_ON((struct btrfs_root *)node->data != root); | ||
| 1319 | |||
| 1320 | spin_lock(&rc->reloc_root_tree.lock); | ||
| 1321 | node->bytenr = new_bytenr; | ||
| 1322 | rb_node = tree_insert(&rc->reloc_root_tree.rb_root, | ||
| 1323 | node->bytenr, &node->rb_node); | ||
| 1324 | spin_unlock(&rc->reloc_root_tree.lock); | ||
| 1325 | if (rb_node) | ||
| 1326 | backref_tree_panic(rb_node, -EEXIST, node->bytenr); | ||
| 1303 | return 0; | 1327 | return 0; |
| 1304 | } | 1328 | } |
| 1305 | 1329 | ||
| @@ -1420,7 +1444,6 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
| 1420 | { | 1444 | { |
| 1421 | struct btrfs_root *reloc_root; | 1445 | struct btrfs_root *reloc_root; |
| 1422 | struct btrfs_root_item *root_item; | 1446 | struct btrfs_root_item *root_item; |
| 1423 | int del = 0; | ||
| 1424 | int ret; | 1447 | int ret; |
| 1425 | 1448 | ||
| 1426 | if (!root->reloc_root) | 1449 | if (!root->reloc_root) |
| @@ -1432,11 +1455,9 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
| 1432 | if (root->fs_info->reloc_ctl->merge_reloc_tree && | 1455 | if (root->fs_info->reloc_ctl->merge_reloc_tree && |
| 1433 | btrfs_root_refs(root_item) == 0) { | 1456 | btrfs_root_refs(root_item) == 0) { |
| 1434 | root->reloc_root = NULL; | 1457 | root->reloc_root = NULL; |
| 1435 | del = 1; | 1458 | __del_reloc_root(reloc_root); |
| 1436 | } | 1459 | } |
| 1437 | 1460 | ||
| 1438 | __update_reloc_root(reloc_root, del); | ||
| 1439 | |||
| 1440 | if (reloc_root->commit_root != reloc_root->node) { | 1461 | if (reloc_root->commit_root != reloc_root->node) { |
| 1441 | btrfs_set_root_node(root_item, reloc_root->node); | 1462 | btrfs_set_root_node(root_item, reloc_root->node); |
| 1442 | free_extent_buffer(reloc_root->commit_root); | 1463 | free_extent_buffer(reloc_root->commit_root); |
| @@ -2287,7 +2308,7 @@ void free_reloc_roots(struct list_head *list) | |||
| 2287 | while (!list_empty(list)) { | 2308 | while (!list_empty(list)) { |
| 2288 | reloc_root = list_entry(list->next, struct btrfs_root, | 2309 | reloc_root = list_entry(list->next, struct btrfs_root, |
| 2289 | root_list); | 2310 | root_list); |
| 2290 | __update_reloc_root(reloc_root, 1); | 2311 | __del_reloc_root(reloc_root); |
| 2291 | free_extent_buffer(reloc_root->node); | 2312 | free_extent_buffer(reloc_root->node); |
| 2292 | free_extent_buffer(reloc_root->commit_root); | 2313 | free_extent_buffer(reloc_root->commit_root); |
| 2293 | kfree(reloc_root); | 2314 | kfree(reloc_root); |
| @@ -2332,7 +2353,7 @@ again: | |||
| 2332 | 2353 | ||
| 2333 | ret = merge_reloc_root(rc, root); | 2354 | ret = merge_reloc_root(rc, root); |
| 2334 | if (ret) { | 2355 | if (ret) { |
| 2335 | __update_reloc_root(reloc_root, 1); | 2356 | __del_reloc_root(reloc_root); |
| 2336 | free_extent_buffer(reloc_root->node); | 2357 | free_extent_buffer(reloc_root->node); |
| 2337 | free_extent_buffer(reloc_root->commit_root); | 2358 | free_extent_buffer(reloc_root->commit_root); |
| 2338 | kfree(reloc_root); | 2359 | kfree(reloc_root); |
| @@ -2388,6 +2409,13 @@ out: | |||
| 2388 | btrfs_std_error(root->fs_info, ret); | 2409 | btrfs_std_error(root->fs_info, ret); |
| 2389 | if (!list_empty(&reloc_roots)) | 2410 | if (!list_empty(&reloc_roots)) |
| 2390 | free_reloc_roots(&reloc_roots); | 2411 | free_reloc_roots(&reloc_roots); |
| 2412 | |||
| 2413 | /* new reloc root may be added */ | ||
| 2414 | mutex_lock(&root->fs_info->reloc_mutex); | ||
| 2415 | list_splice_init(&rc->reloc_roots, &reloc_roots); | ||
| 2416 | mutex_unlock(&root->fs_info->reloc_mutex); | ||
| 2417 | if (!list_empty(&reloc_roots)) | ||
| 2418 | free_reloc_roots(&reloc_roots); | ||
| 2391 | } | 2419 | } |
| 2392 | 2420 | ||
| 2393 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); | 2421 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); |
| @@ -4522,6 +4550,11 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, | |||
| 4522 | BUG_ON(rc->stage == UPDATE_DATA_PTRS && | 4550 | BUG_ON(rc->stage == UPDATE_DATA_PTRS && |
| 4523 | root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); | 4551 | root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); |
| 4524 | 4552 | ||
| 4553 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
| 4554 | if (buf == root->node) | ||
| 4555 | __update_reloc_root(root, cow->start); | ||
| 4556 | } | ||
| 4557 | |||
| 4525 | level = btrfs_header_level(buf); | 4558 | level = btrfs_header_level(buf); |
| 4526 | if (btrfs_header_generation(buf) <= | 4559 | if (btrfs_header_generation(buf) <= |
| 4527 | btrfs_root_last_snapshot(&root->root_item)) | 4560 | btrfs_root_last_snapshot(&root->root_item)) |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2544805544f0..1fd3f33c330a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
| @@ -208,7 +208,6 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | |||
| 208 | int is_metadata, int have_csum, | 208 | int is_metadata, int have_csum, |
| 209 | const u8 *csum, u64 generation, | 209 | const u8 *csum, u64 generation, |
| 210 | u16 csum_size); | 210 | u16 csum_size); |
| 211 | static void scrub_complete_bio_end_io(struct bio *bio, int err); | ||
| 212 | static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, | 211 | static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, |
| 213 | struct scrub_block *sblock_good, | 212 | struct scrub_block *sblock_good, |
| 214 | int force_write); | 213 | int force_write); |
| @@ -938,8 +937,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 938 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | 937 | BTRFS_DEV_STAT_CORRUPTION_ERRS); |
| 939 | } | 938 | } |
| 940 | 939 | ||
| 941 | if (sctx->readonly && !sctx->is_dev_replace) | 940 | if (sctx->readonly) { |
| 942 | goto did_not_correct_error; | 941 | ASSERT(!sctx->is_dev_replace); |
| 942 | goto out; | ||
| 943 | } | ||
| 943 | 944 | ||
| 944 | if (!is_metadata && !have_csum) { | 945 | if (!is_metadata && !have_csum) { |
| 945 | struct scrub_fixup_nodatasum *fixup_nodatasum; | 946 | struct scrub_fixup_nodatasum *fixup_nodatasum; |
| @@ -1292,7 +1293,6 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
| 1292 | for (page_num = 0; page_num < sblock->page_count; page_num++) { | 1293 | for (page_num = 0; page_num < sblock->page_count; page_num++) { |
| 1293 | struct bio *bio; | 1294 | struct bio *bio; |
| 1294 | struct scrub_page *page = sblock->pagev[page_num]; | 1295 | struct scrub_page *page = sblock->pagev[page_num]; |
| 1295 | DECLARE_COMPLETION_ONSTACK(complete); | ||
| 1296 | 1296 | ||
| 1297 | if (page->dev->bdev == NULL) { | 1297 | if (page->dev->bdev == NULL) { |
| 1298 | page->io_error = 1; | 1298 | page->io_error = 1; |
| @@ -1309,18 +1309,11 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
| 1309 | } | 1309 | } |
| 1310 | bio->bi_bdev = page->dev->bdev; | 1310 | bio->bi_bdev = page->dev->bdev; |
| 1311 | bio->bi_sector = page->physical >> 9; | 1311 | bio->bi_sector = page->physical >> 9; |
| 1312 | bio->bi_end_io = scrub_complete_bio_end_io; | ||
| 1313 | bio->bi_private = &complete; | ||
| 1314 | 1312 | ||
| 1315 | bio_add_page(bio, page->page, PAGE_SIZE, 0); | 1313 | bio_add_page(bio, page->page, PAGE_SIZE, 0); |
| 1316 | btrfsic_submit_bio(READ, bio); | 1314 | if (btrfsic_submit_bio_wait(READ, bio)) |
| 1317 | |||
| 1318 | /* this will also unplug the queue */ | ||
| 1319 | wait_for_completion(&complete); | ||
| 1320 | |||
| 1321 | page->io_error = !test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
| 1322 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
| 1323 | sblock->no_io_error_seen = 0; | 1315 | sblock->no_io_error_seen = 0; |
| 1316 | |||
| 1324 | bio_put(bio); | 1317 | bio_put(bio); |
| 1325 | } | 1318 | } |
| 1326 | 1319 | ||
| @@ -1389,11 +1382,6 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | |||
| 1389 | sblock->checksum_error = 1; | 1382 | sblock->checksum_error = 1; |
| 1390 | } | 1383 | } |
| 1391 | 1384 | ||
| 1392 | static void scrub_complete_bio_end_io(struct bio *bio, int err) | ||
| 1393 | { | ||
| 1394 | complete((struct completion *)bio->bi_private); | ||
| 1395 | } | ||
| 1396 | |||
| 1397 | static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, | 1385 | static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, |
| 1398 | struct scrub_block *sblock_good, | 1386 | struct scrub_block *sblock_good, |
| 1399 | int force_write) | 1387 | int force_write) |
| @@ -1428,7 +1416,6 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | |||
| 1428 | sblock_bad->checksum_error || page_bad->io_error) { | 1416 | sblock_bad->checksum_error || page_bad->io_error) { |
| 1429 | struct bio *bio; | 1417 | struct bio *bio; |
| 1430 | int ret; | 1418 | int ret; |
| 1431 | DECLARE_COMPLETION_ONSTACK(complete); | ||
| 1432 | 1419 | ||
| 1433 | if (!page_bad->dev->bdev) { | 1420 | if (!page_bad->dev->bdev) { |
| 1434 | printk_ratelimited(KERN_WARNING | 1421 | printk_ratelimited(KERN_WARNING |
| @@ -1441,19 +1428,14 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | |||
| 1441 | return -EIO; | 1428 | return -EIO; |
| 1442 | bio->bi_bdev = page_bad->dev->bdev; | 1429 | bio->bi_bdev = page_bad->dev->bdev; |
| 1443 | bio->bi_sector = page_bad->physical >> 9; | 1430 | bio->bi_sector = page_bad->physical >> 9; |
| 1444 | bio->bi_end_io = scrub_complete_bio_end_io; | ||
| 1445 | bio->bi_private = &complete; | ||
| 1446 | 1431 | ||
| 1447 | ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0); | 1432 | ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0); |
| 1448 | if (PAGE_SIZE != ret) { | 1433 | if (PAGE_SIZE != ret) { |
| 1449 | bio_put(bio); | 1434 | bio_put(bio); |
| 1450 | return -EIO; | 1435 | return -EIO; |
| 1451 | } | 1436 | } |
| 1452 | btrfsic_submit_bio(WRITE, bio); | ||
| 1453 | 1437 | ||
| 1454 | /* this will also unplug the queue */ | 1438 | if (btrfsic_submit_bio_wait(WRITE, bio)) { |
| 1455 | wait_for_completion(&complete); | ||
| 1456 | if (!bio_flagged(bio, BIO_UPTODATE)) { | ||
| 1457 | btrfs_dev_stat_inc_and_print(page_bad->dev, | 1439 | btrfs_dev_stat_inc_and_print(page_bad->dev, |
| 1458 | BTRFS_DEV_STAT_WRITE_ERRS); | 1440 | BTRFS_DEV_STAT_WRITE_ERRS); |
| 1459 | btrfs_dev_replace_stats_inc( | 1441 | btrfs_dev_replace_stats_inc( |
| @@ -3373,7 +3355,6 @@ static int write_page_nocow(struct scrub_ctx *sctx, | |||
| 3373 | struct bio *bio; | 3355 | struct bio *bio; |
| 3374 | struct btrfs_device *dev; | 3356 | struct btrfs_device *dev; |
| 3375 | int ret; | 3357 | int ret; |
| 3376 | DECLARE_COMPLETION_ONSTACK(compl); | ||
| 3377 | 3358 | ||
| 3378 | dev = sctx->wr_ctx.tgtdev; | 3359 | dev = sctx->wr_ctx.tgtdev; |
| 3379 | if (!dev) | 3360 | if (!dev) |
| @@ -3390,8 +3371,6 @@ static int write_page_nocow(struct scrub_ctx *sctx, | |||
| 3390 | spin_unlock(&sctx->stat_lock); | 3371 | spin_unlock(&sctx->stat_lock); |
| 3391 | return -ENOMEM; | 3372 | return -ENOMEM; |
| 3392 | } | 3373 | } |
| 3393 | bio->bi_private = &compl; | ||
| 3394 | bio->bi_end_io = scrub_complete_bio_end_io; | ||
| 3395 | bio->bi_size = 0; | 3374 | bio->bi_size = 0; |
| 3396 | bio->bi_sector = physical_for_dev_replace >> 9; | 3375 | bio->bi_sector = physical_for_dev_replace >> 9; |
| 3397 | bio->bi_bdev = dev->bdev; | 3376 | bio->bi_bdev = dev->bdev; |
| @@ -3402,10 +3381,8 @@ leave_with_eio: | |||
| 3402 | btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); | 3381 | btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); |
| 3403 | return -EIO; | 3382 | return -EIO; |
| 3404 | } | 3383 | } |
| 3405 | btrfsic_submit_bio(WRITE_SYNC, bio); | ||
| 3406 | wait_for_completion(&compl); | ||
| 3407 | 3384 | ||
| 3408 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | 3385 | if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) |
| 3409 | goto leave_with_eio; | 3386 | goto leave_with_eio; |
| 3410 | 3387 | ||
| 3411 | bio_put(bio); | 3388 | bio_put(bio); |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 6837fe87f3a6..945d1db98f26 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
| @@ -4723,8 +4723,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
| 4723 | } | 4723 | } |
| 4724 | 4724 | ||
| 4725 | if (!access_ok(VERIFY_READ, arg->clone_sources, | 4725 | if (!access_ok(VERIFY_READ, arg->clone_sources, |
| 4726 | sizeof(*arg->clone_sources * | 4726 | sizeof(*arg->clone_sources) * |
| 4727 | arg->clone_sources_count))) { | 4727 | arg->clone_sources_count)) { |
| 4728 | ret = -EFAULT; | 4728 | ret = -EFAULT; |
| 4729 | goto out; | 4729 | goto out; |
| 4730 | } | 4730 | } |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2d8ac1bf0cf9..d71a11d13dfa 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -432,7 +432,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 432 | } else { | 432 | } else { |
| 433 | printk(KERN_INFO "btrfs: setting nodatacow\n"); | 433 | printk(KERN_INFO "btrfs: setting nodatacow\n"); |
| 434 | } | 434 | } |
| 435 | info->compress_type = BTRFS_COMPRESS_NONE; | ||
| 436 | btrfs_clear_opt(info->mount_opt, COMPRESS); | 435 | btrfs_clear_opt(info->mount_opt, COMPRESS); |
| 437 | btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); | 436 | btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); |
| 438 | btrfs_set_opt(info->mount_opt, NODATACOW); | 437 | btrfs_set_opt(info->mount_opt, NODATACOW); |
| @@ -461,7 +460,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 461 | btrfs_set_fs_incompat(info, COMPRESS_LZO); | 460 | btrfs_set_fs_incompat(info, COMPRESS_LZO); |
| 462 | } else if (strncmp(args[0].from, "no", 2) == 0) { | 461 | } else if (strncmp(args[0].from, "no", 2) == 0) { |
| 463 | compress_type = "no"; | 462 | compress_type = "no"; |
| 464 | info->compress_type = BTRFS_COMPRESS_NONE; | ||
| 465 | btrfs_clear_opt(info->mount_opt, COMPRESS); | 463 | btrfs_clear_opt(info->mount_opt, COMPRESS); |
| 466 | btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); | 464 | btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); |
| 467 | compress_force = false; | 465 | compress_force = false; |
| @@ -474,9 +472,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 474 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); | 472 | btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); |
| 475 | pr_info("btrfs: force %s compression\n", | 473 | pr_info("btrfs: force %s compression\n", |
| 476 | compress_type); | 474 | compress_type); |
| 477 | } else | 475 | } else if (btrfs_test_opt(root, COMPRESS)) { |
| 478 | pr_info("btrfs: use %s compression\n", | 476 | pr_info("btrfs: use %s compression\n", |
| 479 | compress_type); | 477 | compress_type); |
| 478 | } | ||
| 480 | break; | 479 | break; |
| 481 | case Opt_ssd: | 480 | case Opt_ssd: |
| 482 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); | 481 | printk(KERN_INFO "btrfs: use ssd allocation scheme\n"); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 57c16b46afbd..c6a872a8a468 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -1480,7 +1480,7 @@ static void do_async_commit(struct work_struct *work) | |||
| 1480 | * We've got freeze protection passed with the transaction. | 1480 | * We've got freeze protection passed with the transaction. |
| 1481 | * Tell lockdep about it. | 1481 | * Tell lockdep about it. |
| 1482 | */ | 1482 | */ |
| 1483 | if (ac->newtrans->type < TRANS_JOIN_NOLOCK) | 1483 | if (ac->newtrans->type & __TRANS_FREEZABLE) |
| 1484 | rwsem_acquire_read( | 1484 | rwsem_acquire_read( |
| 1485 | &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], | 1485 | &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], |
| 1486 | 0, 1, _THIS_IP_); | 1486 | 0, 1, _THIS_IP_); |
| @@ -1521,7 +1521,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | |||
| 1521 | * Tell lockdep we've released the freeze rwsem, since the | 1521 | * Tell lockdep we've released the freeze rwsem, since the |
| 1522 | * async commit thread will be the one to unlock it. | 1522 | * async commit thread will be the one to unlock it. |
| 1523 | */ | 1523 | */ |
| 1524 | if (trans->type < TRANS_JOIN_NOLOCK) | 1524 | if (ac->newtrans->type & __TRANS_FREEZABLE) |
| 1525 | rwsem_release( | 1525 | rwsem_release( |
| 1526 | &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], | 1526 | &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], |
| 1527 | 1, _THIS_IP_); | 1527 | 1, _THIS_IP_); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 744553c83fe2..9f7fc51ca334 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -3697,7 +3697,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 3697 | ret = btrfs_truncate_inode_items(trans, log, | 3697 | ret = btrfs_truncate_inode_items(trans, log, |
| 3698 | inode, 0, 0); | 3698 | inode, 0, 0); |
| 3699 | } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, | 3699 | } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, |
| 3700 | &BTRFS_I(inode)->runtime_flags)) { | 3700 | &BTRFS_I(inode)->runtime_flags) || |
| 3701 | inode_only == LOG_INODE_EXISTS) { | ||
| 3701 | if (inode_only == LOG_INODE_ALL) | 3702 | if (inode_only == LOG_INODE_ALL) |
| 3702 | fast_search = true; | 3703 | fast_search = true; |
| 3703 | max_key.type = BTRFS_XATTR_ITEM_KEY; | 3704 | max_key.type = BTRFS_XATTR_ITEM_KEY; |
| @@ -3801,7 +3802,7 @@ log_extents: | |||
| 3801 | err = ret; | 3802 | err = ret; |
| 3802 | goto out_unlock; | 3803 | goto out_unlock; |
| 3803 | } | 3804 | } |
| 3804 | } else { | 3805 | } else if (inode_only == LOG_INODE_ALL) { |
| 3805 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; | 3806 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; |
| 3806 | struct extent_map *em, *n; | 3807 | struct extent_map *em, *n; |
| 3807 | 3808 | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0db637097862..92303f42baaa 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -5394,7 +5394,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio, | |||
| 5394 | { | 5394 | { |
| 5395 | struct bio_vec *prev; | 5395 | struct bio_vec *prev; |
| 5396 | struct request_queue *q = bdev_get_queue(bdev); | 5396 | struct request_queue *q = bdev_get_queue(bdev); |
| 5397 | unsigned short max_sectors = queue_max_sectors(q); | 5397 | unsigned int max_sectors = queue_max_sectors(q); |
| 5398 | struct bvec_merge_data bvm = { | 5398 | struct bvec_merge_data bvm = { |
| 5399 | .bi_bdev = bdev, | 5399 | .bi_bdev = bdev, |
| 5400 | .bi_sector = sector, | 5400 | .bi_sector = sector, |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 6df8bd481425..1e561c059539 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
| @@ -216,7 +216,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
| 216 | } | 216 | } |
| 217 | SetPageUptodate(page); | 217 | SetPageUptodate(page); |
| 218 | 218 | ||
| 219 | if (err == 0) | 219 | if (err >= 0) |
| 220 | ceph_readpage_to_fscache(inode, page); | 220 | ceph_readpage_to_fscache(inode, page); |
| 221 | 221 | ||
| 222 | out: | 222 | out: |
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 7db2e6ca4b8f..8c44fdd4e1c3 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c | |||
| @@ -324,6 +324,9 @@ void ceph_invalidate_fscache_page(struct inode* inode, struct page *page) | |||
| 324 | { | 324 | { |
| 325 | struct ceph_inode_info *ci = ceph_inode(inode); | 325 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 326 | 326 | ||
| 327 | if (!PageFsCache(page)) | ||
| 328 | return; | ||
| 329 | |||
| 327 | fscache_wait_on_page_write(ci->fscache, page); | 330 | fscache_wait_on_page_write(ci->fscache, page); |
| 328 | fscache_uncache_page(ci->fscache, page); | 331 | fscache_uncache_page(ci->fscache, page); |
| 329 | } | 332 | } |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 13976c33332e..3c0a4bd74996 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -897,7 +897,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci) | |||
| 897 | * caller should hold i_ceph_lock. | 897 | * caller should hold i_ceph_lock. |
| 898 | * caller will not hold session s_mutex if called from destroy_inode. | 898 | * caller will not hold session s_mutex if called from destroy_inode. |
| 899 | */ | 899 | */ |
| 900 | void __ceph_remove_cap(struct ceph_cap *cap) | 900 | void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) |
| 901 | { | 901 | { |
| 902 | struct ceph_mds_session *session = cap->session; | 902 | struct ceph_mds_session *session = cap->session; |
| 903 | struct ceph_inode_info *ci = cap->ci; | 903 | struct ceph_inode_info *ci = cap->ci; |
| @@ -909,6 +909,16 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
| 909 | 909 | ||
| 910 | /* remove from session list */ | 910 | /* remove from session list */ |
| 911 | spin_lock(&session->s_cap_lock); | 911 | spin_lock(&session->s_cap_lock); |
| 912 | /* | ||
| 913 | * s_cap_reconnect is protected by s_cap_lock. no one changes | ||
| 914 | * s_cap_gen while session is in the reconnect state. | ||
| 915 | */ | ||
| 916 | if (queue_release && | ||
| 917 | (!session->s_cap_reconnect || | ||
| 918 | cap->cap_gen == session->s_cap_gen)) | ||
| 919 | __queue_cap_release(session, ci->i_vino.ino, cap->cap_id, | ||
| 920 | cap->mseq, cap->issue_seq); | ||
| 921 | |||
| 912 | if (session->s_cap_iterator == cap) { | 922 | if (session->s_cap_iterator == cap) { |
| 913 | /* not yet, we are iterating over this very cap */ | 923 | /* not yet, we are iterating over this very cap */ |
| 914 | dout("__ceph_remove_cap delaying %p removal from session %p\n", | 924 | dout("__ceph_remove_cap delaying %p removal from session %p\n", |
| @@ -1023,7 +1033,6 @@ void __queue_cap_release(struct ceph_mds_session *session, | |||
| 1023 | struct ceph_mds_cap_release *head; | 1033 | struct ceph_mds_cap_release *head; |
| 1024 | struct ceph_mds_cap_item *item; | 1034 | struct ceph_mds_cap_item *item; |
| 1025 | 1035 | ||
| 1026 | spin_lock(&session->s_cap_lock); | ||
| 1027 | BUG_ON(!session->s_num_cap_releases); | 1036 | BUG_ON(!session->s_num_cap_releases); |
| 1028 | msg = list_first_entry(&session->s_cap_releases, | 1037 | msg = list_first_entry(&session->s_cap_releases, |
| 1029 | struct ceph_msg, list_head); | 1038 | struct ceph_msg, list_head); |
| @@ -1052,7 +1061,6 @@ void __queue_cap_release(struct ceph_mds_session *session, | |||
| 1052 | (int)CEPH_CAPS_PER_RELEASE, | 1061 | (int)CEPH_CAPS_PER_RELEASE, |
| 1053 | (int)msg->front.iov_len); | 1062 | (int)msg->front.iov_len); |
| 1054 | } | 1063 | } |
| 1055 | spin_unlock(&session->s_cap_lock); | ||
| 1056 | } | 1064 | } |
| 1057 | 1065 | ||
| 1058 | /* | 1066 | /* |
| @@ -1067,12 +1075,8 @@ void ceph_queue_caps_release(struct inode *inode) | |||
| 1067 | p = rb_first(&ci->i_caps); | 1075 | p = rb_first(&ci->i_caps); |
| 1068 | while (p) { | 1076 | while (p) { |
| 1069 | struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); | 1077 | struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); |
| 1070 | struct ceph_mds_session *session = cap->session; | ||
| 1071 | |||
| 1072 | __queue_cap_release(session, ceph_ino(inode), cap->cap_id, | ||
| 1073 | cap->mseq, cap->issue_seq); | ||
| 1074 | p = rb_next(p); | 1078 | p = rb_next(p); |
| 1075 | __ceph_remove_cap(cap); | 1079 | __ceph_remove_cap(cap, true); |
| 1076 | } | 1080 | } |
| 1077 | } | 1081 | } |
| 1078 | 1082 | ||
| @@ -2791,7 +2795,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
| 2791 | } | 2795 | } |
| 2792 | spin_unlock(&mdsc->cap_dirty_lock); | 2796 | spin_unlock(&mdsc->cap_dirty_lock); |
| 2793 | } | 2797 | } |
| 2794 | __ceph_remove_cap(cap); | 2798 | __ceph_remove_cap(cap, false); |
| 2795 | } | 2799 | } |
| 2796 | /* else, we already released it */ | 2800 | /* else, we already released it */ |
| 2797 | 2801 | ||
| @@ -2931,9 +2935,12 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
| 2931 | if (!inode) { | 2935 | if (!inode) { |
| 2932 | dout(" i don't have ino %llx\n", vino.ino); | 2936 | dout(" i don't have ino %llx\n", vino.ino); |
| 2933 | 2937 | ||
| 2934 | if (op == CEPH_CAP_OP_IMPORT) | 2938 | if (op == CEPH_CAP_OP_IMPORT) { |
| 2939 | spin_lock(&session->s_cap_lock); | ||
| 2935 | __queue_cap_release(session, vino.ino, cap_id, | 2940 | __queue_cap_release(session, vino.ino, cap_id, |
| 2936 | mseq, seq); | 2941 | mseq, seq); |
| 2942 | spin_unlock(&session->s_cap_lock); | ||
| 2943 | } | ||
| 2937 | goto flush_cap_releases; | 2944 | goto flush_cap_releases; |
| 2938 | } | 2945 | } |
| 2939 | 2946 | ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 868b61d56cac..2a0bcaeb189a 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
| @@ -352,8 +352,18 @@ more: | |||
| 352 | } | 352 | } |
| 353 | 353 | ||
| 354 | /* note next offset and last dentry name */ | 354 | /* note next offset and last dentry name */ |
| 355 | rinfo = &req->r_reply_info; | ||
| 356 | if (le32_to_cpu(rinfo->dir_dir->frag) != frag) { | ||
| 357 | frag = le32_to_cpu(rinfo->dir_dir->frag); | ||
| 358 | if (ceph_frag_is_leftmost(frag)) | ||
| 359 | fi->next_offset = 2; | ||
| 360 | else | ||
| 361 | fi->next_offset = 0; | ||
| 362 | off = fi->next_offset; | ||
| 363 | } | ||
| 355 | fi->offset = fi->next_offset; | 364 | fi->offset = fi->next_offset; |
| 356 | fi->last_readdir = req; | 365 | fi->last_readdir = req; |
| 366 | fi->frag = frag; | ||
| 357 | 367 | ||
| 358 | if (req->r_reply_info.dir_end) { | 368 | if (req->r_reply_info.dir_end) { |
| 359 | kfree(fi->last_name); | 369 | kfree(fi->last_name); |
| @@ -363,7 +373,6 @@ more: | |||
| 363 | else | 373 | else |
| 364 | fi->next_offset = 0; | 374 | fi->next_offset = 0; |
| 365 | } else { | 375 | } else { |
| 366 | rinfo = &req->r_reply_info; | ||
| 367 | err = note_last_dentry(fi, | 376 | err = note_last_dentry(fi, |
| 368 | rinfo->dir_dname[rinfo->dir_nr-1], | 377 | rinfo->dir_dname[rinfo->dir_nr-1], |
| 369 | rinfo->dir_dname_len[rinfo->dir_nr-1]); | 378 | rinfo->dir_dname_len[rinfo->dir_nr-1]); |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8549a48115f7..9a8e396aed89 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
| @@ -577,6 +577,8 @@ static int fill_inode(struct inode *inode, | |||
| 577 | int issued = 0, implemented; | 577 | int issued = 0, implemented; |
| 578 | struct timespec mtime, atime, ctime; | 578 | struct timespec mtime, atime, ctime; |
| 579 | u32 nsplits; | 579 | u32 nsplits; |
| 580 | struct ceph_inode_frag *frag; | ||
| 581 | struct rb_node *rb_node; | ||
| 580 | struct ceph_buffer *xattr_blob = NULL; | 582 | struct ceph_buffer *xattr_blob = NULL; |
| 581 | int err = 0; | 583 | int err = 0; |
| 582 | int queue_trunc = 0; | 584 | int queue_trunc = 0; |
| @@ -751,15 +753,38 @@ no_change: | |||
| 751 | /* FIXME: move me up, if/when version reflects fragtree changes */ | 753 | /* FIXME: move me up, if/when version reflects fragtree changes */ |
| 752 | nsplits = le32_to_cpu(info->fragtree.nsplits); | 754 | nsplits = le32_to_cpu(info->fragtree.nsplits); |
| 753 | mutex_lock(&ci->i_fragtree_mutex); | 755 | mutex_lock(&ci->i_fragtree_mutex); |
| 756 | rb_node = rb_first(&ci->i_fragtree); | ||
| 754 | for (i = 0; i < nsplits; i++) { | 757 | for (i = 0; i < nsplits; i++) { |
| 755 | u32 id = le32_to_cpu(info->fragtree.splits[i].frag); | 758 | u32 id = le32_to_cpu(info->fragtree.splits[i].frag); |
| 756 | struct ceph_inode_frag *frag = __get_or_create_frag(ci, id); | 759 | frag = NULL; |
| 757 | 760 | while (rb_node) { | |
| 758 | if (IS_ERR(frag)) | 761 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); |
| 759 | continue; | 762 | if (ceph_frag_compare(frag->frag, id) >= 0) { |
| 763 | if (frag->frag != id) | ||
| 764 | frag = NULL; | ||
| 765 | else | ||
| 766 | rb_node = rb_next(rb_node); | ||
| 767 | break; | ||
| 768 | } | ||
| 769 | rb_node = rb_next(rb_node); | ||
| 770 | rb_erase(&frag->node, &ci->i_fragtree); | ||
| 771 | kfree(frag); | ||
| 772 | frag = NULL; | ||
| 773 | } | ||
| 774 | if (!frag) { | ||
| 775 | frag = __get_or_create_frag(ci, id); | ||
| 776 | if (IS_ERR(frag)) | ||
| 777 | continue; | ||
| 778 | } | ||
| 760 | frag->split_by = le32_to_cpu(info->fragtree.splits[i].by); | 779 | frag->split_by = le32_to_cpu(info->fragtree.splits[i].by); |
| 761 | dout(" frag %x split by %d\n", frag->frag, frag->split_by); | 780 | dout(" frag %x split by %d\n", frag->frag, frag->split_by); |
| 762 | } | 781 | } |
| 782 | while (rb_node) { | ||
| 783 | frag = rb_entry(rb_node, struct ceph_inode_frag, node); | ||
| 784 | rb_node = rb_next(rb_node); | ||
| 785 | rb_erase(&frag->node, &ci->i_fragtree); | ||
| 786 | kfree(frag); | ||
| 787 | } | ||
| 763 | mutex_unlock(&ci->i_fragtree_mutex); | 788 | mutex_unlock(&ci->i_fragtree_mutex); |
| 764 | 789 | ||
| 765 | /* were we issued a capability? */ | 790 | /* were we issued a capability? */ |
| @@ -1250,8 +1275,20 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, | |||
| 1250 | int err = 0, i; | 1275 | int err = 0, i; |
| 1251 | struct inode *snapdir = NULL; | 1276 | struct inode *snapdir = NULL; |
| 1252 | struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; | 1277 | struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; |
| 1253 | u64 frag = le32_to_cpu(rhead->args.readdir.frag); | ||
| 1254 | struct ceph_dentry_info *di; | 1278 | struct ceph_dentry_info *di; |
| 1279 | u64 r_readdir_offset = req->r_readdir_offset; | ||
| 1280 | u32 frag = le32_to_cpu(rhead->args.readdir.frag); | ||
| 1281 | |||
| 1282 | if (rinfo->dir_dir && | ||
| 1283 | le32_to_cpu(rinfo->dir_dir->frag) != frag) { | ||
| 1284 | dout("readdir_prepopulate got new frag %x -> %x\n", | ||
| 1285 | frag, le32_to_cpu(rinfo->dir_dir->frag)); | ||
| 1286 | frag = le32_to_cpu(rinfo->dir_dir->frag); | ||
| 1287 | if (ceph_frag_is_leftmost(frag)) | ||
| 1288 | r_readdir_offset = 2; | ||
| 1289 | else | ||
| 1290 | r_readdir_offset = 0; | ||
| 1291 | } | ||
| 1255 | 1292 | ||
| 1256 | if (req->r_aborted) | 1293 | if (req->r_aborted) |
| 1257 | return readdir_prepopulate_inodes_only(req, session); | 1294 | return readdir_prepopulate_inodes_only(req, session); |
| @@ -1315,7 +1352,7 @@ retry_lookup: | |||
| 1315 | } | 1352 | } |
| 1316 | 1353 | ||
| 1317 | di = dn->d_fsdata; | 1354 | di = dn->d_fsdata; |
| 1318 | di->offset = ceph_make_fpos(frag, i + req->r_readdir_offset); | 1355 | di->offset = ceph_make_fpos(frag, i + r_readdir_offset); |
| 1319 | 1356 | ||
| 1320 | /* inode */ | 1357 | /* inode */ |
| 1321 | if (dn->d_inode) { | 1358 | if (dn->d_inode) { |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b7bda5d9611d..d90861f45210 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
| @@ -43,6 +43,7 @@ | |||
| 43 | */ | 43 | */ |
| 44 | 44 | ||
| 45 | struct ceph_reconnect_state { | 45 | struct ceph_reconnect_state { |
| 46 | int nr_caps; | ||
| 46 | struct ceph_pagelist *pagelist; | 47 | struct ceph_pagelist *pagelist; |
| 47 | bool flock; | 48 | bool flock; |
| 48 | }; | 49 | }; |
| @@ -443,6 +444,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, | |||
| 443 | INIT_LIST_HEAD(&s->s_waiting); | 444 | INIT_LIST_HEAD(&s->s_waiting); |
| 444 | INIT_LIST_HEAD(&s->s_unsafe); | 445 | INIT_LIST_HEAD(&s->s_unsafe); |
| 445 | s->s_num_cap_releases = 0; | 446 | s->s_num_cap_releases = 0; |
| 447 | s->s_cap_reconnect = 0; | ||
| 446 | s->s_cap_iterator = NULL; | 448 | s->s_cap_iterator = NULL; |
| 447 | INIT_LIST_HEAD(&s->s_cap_releases); | 449 | INIT_LIST_HEAD(&s->s_cap_releases); |
| 448 | INIT_LIST_HEAD(&s->s_cap_releases_done); | 450 | INIT_LIST_HEAD(&s->s_cap_releases_done); |
| @@ -642,6 +644,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc, | |||
| 642 | req->r_unsafe_dir = NULL; | 644 | req->r_unsafe_dir = NULL; |
| 643 | } | 645 | } |
| 644 | 646 | ||
| 647 | complete_all(&req->r_safe_completion); | ||
| 648 | |||
| 645 | ceph_mdsc_put_request(req); | 649 | ceph_mdsc_put_request(req); |
| 646 | } | 650 | } |
| 647 | 651 | ||
| @@ -986,7 +990,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 986 | dout("removing cap %p, ci is %p, inode is %p\n", | 990 | dout("removing cap %p, ci is %p, inode is %p\n", |
| 987 | cap, ci, &ci->vfs_inode); | 991 | cap, ci, &ci->vfs_inode); |
| 988 | spin_lock(&ci->i_ceph_lock); | 992 | spin_lock(&ci->i_ceph_lock); |
| 989 | __ceph_remove_cap(cap); | 993 | __ceph_remove_cap(cap, false); |
| 990 | if (!__ceph_is_any_real_caps(ci)) { | 994 | if (!__ceph_is_any_real_caps(ci)) { |
| 991 | struct ceph_mds_client *mdsc = | 995 | struct ceph_mds_client *mdsc = |
| 992 | ceph_sb_to_client(inode->i_sb)->mdsc; | 996 | ceph_sb_to_client(inode->i_sb)->mdsc; |
| @@ -1231,9 +1235,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) | |||
| 1231 | session->s_trim_caps--; | 1235 | session->s_trim_caps--; |
| 1232 | if (oissued) { | 1236 | if (oissued) { |
| 1233 | /* we aren't the only cap.. just remove us */ | 1237 | /* we aren't the only cap.. just remove us */ |
| 1234 | __queue_cap_release(session, ceph_ino(inode), cap->cap_id, | 1238 | __ceph_remove_cap(cap, true); |
| 1235 | cap->mseq, cap->issue_seq); | ||
| 1236 | __ceph_remove_cap(cap); | ||
| 1237 | } else { | 1239 | } else { |
| 1238 | /* try to drop referring dentries */ | 1240 | /* try to drop referring dentries */ |
| 1239 | spin_unlock(&ci->i_ceph_lock); | 1241 | spin_unlock(&ci->i_ceph_lock); |
| @@ -1416,7 +1418,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc, | |||
| 1416 | unsigned num; | 1418 | unsigned num; |
| 1417 | 1419 | ||
| 1418 | dout("discard_cap_releases mds%d\n", session->s_mds); | 1420 | dout("discard_cap_releases mds%d\n", session->s_mds); |
| 1419 | spin_lock(&session->s_cap_lock); | ||
| 1420 | 1421 | ||
| 1421 | /* zero out the in-progress message */ | 1422 | /* zero out the in-progress message */ |
| 1422 | msg = list_first_entry(&session->s_cap_releases, | 1423 | msg = list_first_entry(&session->s_cap_releases, |
| @@ -1443,8 +1444,6 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc, | |||
| 1443 | msg->front.iov_len = sizeof(*head); | 1444 | msg->front.iov_len = sizeof(*head); |
| 1444 | list_add(&msg->list_head, &session->s_cap_releases); | 1445 | list_add(&msg->list_head, &session->s_cap_releases); |
| 1445 | } | 1446 | } |
| 1446 | |||
| 1447 | spin_unlock(&session->s_cap_lock); | ||
| 1448 | } | 1447 | } |
| 1449 | 1448 | ||
| 1450 | /* | 1449 | /* |
| @@ -1875,8 +1874,11 @@ static int __do_request(struct ceph_mds_client *mdsc, | |||
| 1875 | int mds = -1; | 1874 | int mds = -1; |
| 1876 | int err = -EAGAIN; | 1875 | int err = -EAGAIN; |
| 1877 | 1876 | ||
| 1878 | if (req->r_err || req->r_got_result) | 1877 | if (req->r_err || req->r_got_result) { |
| 1878 | if (req->r_aborted) | ||
| 1879 | __unregister_request(mdsc, req); | ||
| 1879 | goto out; | 1880 | goto out; |
| 1881 | } | ||
| 1880 | 1882 | ||
| 1881 | if (req->r_timeout && | 1883 | if (req->r_timeout && |
| 1882 | time_after_eq(jiffies, req->r_started + req->r_timeout)) { | 1884 | time_after_eq(jiffies, req->r_started + req->r_timeout)) { |
| @@ -2186,7 +2188,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
| 2186 | if (head->safe) { | 2188 | if (head->safe) { |
| 2187 | req->r_got_safe = true; | 2189 | req->r_got_safe = true; |
| 2188 | __unregister_request(mdsc, req); | 2190 | __unregister_request(mdsc, req); |
| 2189 | complete_all(&req->r_safe_completion); | ||
| 2190 | 2191 | ||
| 2191 | if (req->r_got_unsafe) { | 2192 | if (req->r_got_unsafe) { |
| 2192 | /* | 2193 | /* |
| @@ -2238,8 +2239,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
| 2238 | err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); | 2239 | err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); |
| 2239 | if (err == 0) { | 2240 | if (err == 0) { |
| 2240 | if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || | 2241 | if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || |
| 2241 | req->r_op == CEPH_MDS_OP_LSSNAP) && | 2242 | req->r_op == CEPH_MDS_OP_LSSNAP)) |
| 2242 | rinfo->dir_nr) | ||
| 2243 | ceph_readdir_prepopulate(req, req->r_session); | 2243 | ceph_readdir_prepopulate(req, req->r_session); |
| 2244 | ceph_unreserve_caps(mdsc, &req->r_caps_reservation); | 2244 | ceph_unreserve_caps(mdsc, &req->r_caps_reservation); |
| 2245 | } | 2245 | } |
| @@ -2490,6 +2490,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
| 2490 | cap->seq = 0; /* reset cap seq */ | 2490 | cap->seq = 0; /* reset cap seq */ |
| 2491 | cap->issue_seq = 0; /* and issue_seq */ | 2491 | cap->issue_seq = 0; /* and issue_seq */ |
| 2492 | cap->mseq = 0; /* and migrate_seq */ | 2492 | cap->mseq = 0; /* and migrate_seq */ |
| 2493 | cap->cap_gen = cap->session->s_cap_gen; | ||
| 2493 | 2494 | ||
| 2494 | if (recon_state->flock) { | 2495 | if (recon_state->flock) { |
| 2495 | rec.v2.cap_id = cpu_to_le64(cap->cap_id); | 2496 | rec.v2.cap_id = cpu_to_le64(cap->cap_id); |
| @@ -2552,6 +2553,8 @@ encode_again: | |||
| 2552 | } else { | 2553 | } else { |
| 2553 | err = ceph_pagelist_append(pagelist, &rec, reclen); | 2554 | err = ceph_pagelist_append(pagelist, &rec, reclen); |
| 2554 | } | 2555 | } |
| 2556 | |||
| 2557 | recon_state->nr_caps++; | ||
| 2555 | out_free: | 2558 | out_free: |
| 2556 | kfree(path); | 2559 | kfree(path); |
| 2557 | out_dput: | 2560 | out_dput: |
| @@ -2579,6 +2582,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
| 2579 | struct rb_node *p; | 2582 | struct rb_node *p; |
| 2580 | int mds = session->s_mds; | 2583 | int mds = session->s_mds; |
| 2581 | int err = -ENOMEM; | 2584 | int err = -ENOMEM; |
| 2585 | int s_nr_caps; | ||
| 2582 | struct ceph_pagelist *pagelist; | 2586 | struct ceph_pagelist *pagelist; |
| 2583 | struct ceph_reconnect_state recon_state; | 2587 | struct ceph_reconnect_state recon_state; |
| 2584 | 2588 | ||
| @@ -2610,20 +2614,38 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
| 2610 | dout("session %p state %s\n", session, | 2614 | dout("session %p state %s\n", session, |
| 2611 | session_state_name(session->s_state)); | 2615 | session_state_name(session->s_state)); |
| 2612 | 2616 | ||
| 2617 | spin_lock(&session->s_gen_ttl_lock); | ||
| 2618 | session->s_cap_gen++; | ||
| 2619 | spin_unlock(&session->s_gen_ttl_lock); | ||
| 2620 | |||
| 2621 | spin_lock(&session->s_cap_lock); | ||
| 2622 | /* | ||
| 2623 | * notify __ceph_remove_cap() that we are composing cap reconnect. | ||
| 2624 | * If a cap get released before being added to the cap reconnect, | ||
| 2625 | * __ceph_remove_cap() should skip queuing cap release. | ||
| 2626 | */ | ||
| 2627 | session->s_cap_reconnect = 1; | ||
| 2613 | /* drop old cap expires; we're about to reestablish that state */ | 2628 | /* drop old cap expires; we're about to reestablish that state */ |
| 2614 | discard_cap_releases(mdsc, session); | 2629 | discard_cap_releases(mdsc, session); |
| 2630 | spin_unlock(&session->s_cap_lock); | ||
| 2615 | 2631 | ||
| 2616 | /* traverse this session's caps */ | 2632 | /* traverse this session's caps */ |
| 2617 | err = ceph_pagelist_encode_32(pagelist, session->s_nr_caps); | 2633 | s_nr_caps = session->s_nr_caps; |
| 2634 | err = ceph_pagelist_encode_32(pagelist, s_nr_caps); | ||
| 2618 | if (err) | 2635 | if (err) |
| 2619 | goto fail; | 2636 | goto fail; |
| 2620 | 2637 | ||
| 2638 | recon_state.nr_caps = 0; | ||
| 2621 | recon_state.pagelist = pagelist; | 2639 | recon_state.pagelist = pagelist; |
| 2622 | recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK; | 2640 | recon_state.flock = session->s_con.peer_features & CEPH_FEATURE_FLOCK; |
| 2623 | err = iterate_session_caps(session, encode_caps_cb, &recon_state); | 2641 | err = iterate_session_caps(session, encode_caps_cb, &recon_state); |
| 2624 | if (err < 0) | 2642 | if (err < 0) |
| 2625 | goto fail; | 2643 | goto fail; |
| 2626 | 2644 | ||
| 2645 | spin_lock(&session->s_cap_lock); | ||
| 2646 | session->s_cap_reconnect = 0; | ||
| 2647 | spin_unlock(&session->s_cap_lock); | ||
| 2648 | |||
| 2627 | /* | 2649 | /* |
| 2628 | * snaprealms. we provide mds with the ino, seq (version), and | 2650 | * snaprealms. we provide mds with the ino, seq (version), and |
| 2629 | * parent for all of our realms. If the mds has any newer info, | 2651 | * parent for all of our realms. If the mds has any newer info, |
| @@ -2646,11 +2668,18 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, | |||
| 2646 | 2668 | ||
| 2647 | if (recon_state.flock) | 2669 | if (recon_state.flock) |
| 2648 | reply->hdr.version = cpu_to_le16(2); | 2670 | reply->hdr.version = cpu_to_le16(2); |
| 2649 | if (pagelist->length) { | 2671 | |
| 2650 | /* set up outbound data if we have any */ | 2672 | /* raced with cap release? */ |
| 2651 | reply->hdr.data_len = cpu_to_le32(pagelist->length); | 2673 | if (s_nr_caps != recon_state.nr_caps) { |
| 2652 | ceph_msg_data_add_pagelist(reply, pagelist); | 2674 | struct page *page = list_first_entry(&pagelist->head, |
| 2675 | struct page, lru); | ||
| 2676 | __le32 *addr = kmap_atomic(page); | ||
| 2677 | *addr = cpu_to_le32(recon_state.nr_caps); | ||
| 2678 | kunmap_atomic(addr); | ||
| 2653 | } | 2679 | } |
| 2680 | |||
| 2681 | reply->hdr.data_len = cpu_to_le32(pagelist->length); | ||
| 2682 | ceph_msg_data_add_pagelist(reply, pagelist); | ||
| 2654 | ceph_con_send(&session->s_con, reply); | 2683 | ceph_con_send(&session->s_con, reply); |
| 2655 | 2684 | ||
| 2656 | mutex_unlock(&session->s_mutex); | 2685 | mutex_unlock(&session->s_mutex); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index c2a19fbbe517..4c053d099ae4 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
| @@ -132,6 +132,7 @@ struct ceph_mds_session { | |||
| 132 | struct list_head s_caps; /* all caps issued by this session */ | 132 | struct list_head s_caps; /* all caps issued by this session */ |
| 133 | int s_nr_caps, s_trim_caps; | 133 | int s_nr_caps, s_trim_caps; |
| 134 | int s_num_cap_releases; | 134 | int s_num_cap_releases; |
| 135 | int s_cap_reconnect; | ||
| 135 | struct list_head s_cap_releases; /* waiting cap_release messages */ | 136 | struct list_head s_cap_releases; /* waiting cap_release messages */ |
| 136 | struct list_head s_cap_releases_done; /* ready to send */ | 137 | struct list_head s_cap_releases_done; /* ready to send */ |
| 137 | struct ceph_cap *s_cap_iterator; | 138 | struct ceph_cap *s_cap_iterator; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 6014b0a3c405..ef4ac38bb614 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
| @@ -741,13 +741,7 @@ extern int ceph_add_cap(struct inode *inode, | |||
| 741 | int fmode, unsigned issued, unsigned wanted, | 741 | int fmode, unsigned issued, unsigned wanted, |
| 742 | unsigned cap, unsigned seq, u64 realmino, int flags, | 742 | unsigned cap, unsigned seq, u64 realmino, int flags, |
| 743 | struct ceph_cap_reservation *caps_reservation); | 743 | struct ceph_cap_reservation *caps_reservation); |
| 744 | extern void __ceph_remove_cap(struct ceph_cap *cap); | 744 | extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); |
| 745 | static inline void ceph_remove_cap(struct ceph_cap *cap) | ||
| 746 | { | ||
| 747 | spin_lock(&cap->ci->i_ceph_lock); | ||
| 748 | __ceph_remove_cap(cap); | ||
| 749 | spin_unlock(&cap->ci->i_ceph_lock); | ||
| 750 | } | ||
| 751 | extern void ceph_put_cap(struct ceph_mds_client *mdsc, | 745 | extern void ceph_put_cap(struct ceph_mds_client *mdsc, |
| 752 | struct ceph_cap *cap); | 746 | struct ceph_cap *cap); |
| 753 | 747 | ||
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index d9ea7ada1378..f918a998a087 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
| @@ -384,6 +384,7 @@ struct smb_version_operations { | |||
| 384 | int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file, | 384 | int (*clone_range)(const unsigned int, struct cifsFileInfo *src_file, |
| 385 | struct cifsFileInfo *target_file, u64 src_off, u64 len, | 385 | struct cifsFileInfo *target_file, u64 src_off, u64 len, |
| 386 | u64 dest_off); | 386 | u64 dest_off); |
| 387 | int (*validate_negotiate)(const unsigned int, struct cifs_tcon *); | ||
| 387 | }; | 388 | }; |
| 388 | 389 | ||
| 389 | struct smb_version_values { | 390 | struct smb_version_values { |
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 409b45eefe70..77492301cc2b 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c | |||
| @@ -26,13 +26,15 @@ | |||
| 26 | #include <linux/mount.h> | 26 | #include <linux/mount.h> |
| 27 | #include <linux/mm.h> | 27 | #include <linux/mm.h> |
| 28 | #include <linux/pagemap.h> | 28 | #include <linux/pagemap.h> |
| 29 | #include <linux/btrfs.h> | ||
| 30 | #include "cifspdu.h" | 29 | #include "cifspdu.h" |
| 31 | #include "cifsglob.h" | 30 | #include "cifsglob.h" |
| 32 | #include "cifsproto.h" | 31 | #include "cifsproto.h" |
| 33 | #include "cifs_debug.h" | 32 | #include "cifs_debug.h" |
| 34 | #include "cifsfs.h" | 33 | #include "cifsfs.h" |
| 35 | 34 | ||
| 35 | #define CIFS_IOCTL_MAGIC 0xCF | ||
| 36 | #define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int) | ||
| 37 | |||
| 36 | static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, | 38 | static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file, |
| 37 | unsigned long srcfd, u64 off, u64 len, u64 destoff) | 39 | unsigned long srcfd, u64 off, u64 len, u64 destoff) |
| 38 | { | 40 | { |
| @@ -213,7 +215,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) | |||
| 213 | cifs_dbg(FYI, "set compress flag rc %d\n", rc); | 215 | cifs_dbg(FYI, "set compress flag rc %d\n", rc); |
| 214 | } | 216 | } |
| 215 | break; | 217 | break; |
| 216 | case BTRFS_IOC_CLONE: | 218 | case CIFS_IOC_COPYCHUNK_FILE: |
| 217 | rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0); | 219 | rc = cifs_ioctl_clone(xid, filep, arg, 0, 0, 0); |
| 218 | break; | 220 | break; |
| 219 | default: | 221 | default: |
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 11dde4b24f8a..757da3e54d3d 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c | |||
| @@ -532,7 +532,10 @@ smb2_clone_range(const unsigned int xid, | |||
| 532 | int rc; | 532 | int rc; |
| 533 | unsigned int ret_data_len; | 533 | unsigned int ret_data_len; |
| 534 | struct copychunk_ioctl *pcchunk; | 534 | struct copychunk_ioctl *pcchunk; |
| 535 | char *retbuf = NULL; | 535 | struct copychunk_ioctl_rsp *retbuf = NULL; |
| 536 | struct cifs_tcon *tcon; | ||
| 537 | int chunks_copied = 0; | ||
| 538 | bool chunk_sizes_updated = false; | ||
| 536 | 539 | ||
| 537 | pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL); | 540 | pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL); |
| 538 | 541 | ||
| @@ -547,27 +550,96 @@ smb2_clone_range(const unsigned int xid, | |||
| 547 | 550 | ||
| 548 | /* Note: request_res_key sets res_key null only if rc !=0 */ | 551 | /* Note: request_res_key sets res_key null only if rc !=0 */ |
| 549 | if (rc) | 552 | if (rc) |
| 550 | return rc; | 553 | goto cchunk_out; |
| 551 | 554 | ||
| 552 | /* For now array only one chunk long, will make more flexible later */ | 555 | /* For now array only one chunk long, will make more flexible later */ |
| 553 | pcchunk->ChunkCount = __constant_cpu_to_le32(1); | 556 | pcchunk->ChunkCount = __constant_cpu_to_le32(1); |
| 554 | pcchunk->Reserved = 0; | 557 | pcchunk->Reserved = 0; |
| 555 | pcchunk->SourceOffset = cpu_to_le64(src_off); | ||
| 556 | pcchunk->TargetOffset = cpu_to_le64(dest_off); | ||
| 557 | pcchunk->Length = cpu_to_le32(len); | ||
| 558 | pcchunk->Reserved2 = 0; | 558 | pcchunk->Reserved2 = 0; |
| 559 | 559 | ||
| 560 | /* Request that server copy to target from src file identified by key */ | 560 | tcon = tlink_tcon(trgtfile->tlink); |
| 561 | rc = SMB2_ioctl(xid, tlink_tcon(trgtfile->tlink), | ||
| 562 | trgtfile->fid.persistent_fid, | ||
| 563 | trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE, | ||
| 564 | true /* is_fsctl */, (char *)pcchunk, | ||
| 565 | sizeof(struct copychunk_ioctl), &retbuf, &ret_data_len); | ||
| 566 | 561 | ||
| 567 | /* BB need to special case rc = EINVAL to alter chunk size */ | 562 | while (len > 0) { |
| 563 | pcchunk->SourceOffset = cpu_to_le64(src_off); | ||
| 564 | pcchunk->TargetOffset = cpu_to_le64(dest_off); | ||
| 565 | pcchunk->Length = | ||
| 566 | cpu_to_le32(min_t(u32, len, tcon->max_bytes_chunk)); | ||
| 568 | 567 | ||
| 569 | cifs_dbg(FYI, "rc %d data length out %d\n", rc, ret_data_len); | 568 | /* Request server copy to target from src identified by key */ |
| 569 | rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid, | ||
| 570 | trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE, | ||
| 571 | true /* is_fsctl */, (char *)pcchunk, | ||
| 572 | sizeof(struct copychunk_ioctl), (char **)&retbuf, | ||
| 573 | &ret_data_len); | ||
| 574 | if (rc == 0) { | ||
| 575 | if (ret_data_len != | ||
| 576 | sizeof(struct copychunk_ioctl_rsp)) { | ||
| 577 | cifs_dbg(VFS, "invalid cchunk response size\n"); | ||
| 578 | rc = -EIO; | ||
| 579 | goto cchunk_out; | ||
| 580 | } | ||
| 581 | if (retbuf->TotalBytesWritten == 0) { | ||
| 582 | cifs_dbg(FYI, "no bytes copied\n"); | ||
| 583 | rc = -EIO; | ||
| 584 | goto cchunk_out; | ||
| 585 | } | ||
| 586 | /* | ||
| 587 | * Check if server claimed to write more than we asked | ||
| 588 | */ | ||
| 589 | if (le32_to_cpu(retbuf->TotalBytesWritten) > | ||
| 590 | le32_to_cpu(pcchunk->Length)) { | ||
| 591 | cifs_dbg(VFS, "invalid copy chunk response\n"); | ||
| 592 | rc = -EIO; | ||
| 593 | goto cchunk_out; | ||
| 594 | } | ||
| 595 | if (le32_to_cpu(retbuf->ChunksWritten) != 1) { | ||
| 596 | cifs_dbg(VFS, "invalid num chunks written\n"); | ||
| 597 | rc = -EIO; | ||
| 598 | goto cchunk_out; | ||
| 599 | } | ||
| 600 | chunks_copied++; | ||
| 601 | |||
| 602 | src_off += le32_to_cpu(retbuf->TotalBytesWritten); | ||
| 603 | dest_off += le32_to_cpu(retbuf->TotalBytesWritten); | ||
| 604 | len -= le32_to_cpu(retbuf->TotalBytesWritten); | ||
| 605 | |||
| 606 | cifs_dbg(FYI, "Chunks %d PartialChunk %d Total %d\n", | ||
| 607 | le32_to_cpu(retbuf->ChunksWritten), | ||
| 608 | le32_to_cpu(retbuf->ChunkBytesWritten), | ||
| 609 | le32_to_cpu(retbuf->TotalBytesWritten)); | ||
| 610 | } else if (rc == -EINVAL) { | ||
| 611 | if (ret_data_len != sizeof(struct copychunk_ioctl_rsp)) | ||
| 612 | goto cchunk_out; | ||
| 613 | |||
| 614 | cifs_dbg(FYI, "MaxChunks %d BytesChunk %d MaxCopy %d\n", | ||
| 615 | le32_to_cpu(retbuf->ChunksWritten), | ||
| 616 | le32_to_cpu(retbuf->ChunkBytesWritten), | ||
| 617 | le32_to_cpu(retbuf->TotalBytesWritten)); | ||
| 618 | |||
| 619 | /* | ||
| 620 | * Check if this is the first request using these sizes, | ||
| 621 | * (ie check if copy succeed once with original sizes | ||
| 622 | * and check if the server gave us different sizes after | ||
| 623 | * we already updated max sizes on previous request). | ||
| 624 | * if not then why is the server returning an error now | ||
| 625 | */ | ||
| 626 | if ((chunks_copied != 0) || chunk_sizes_updated) | ||
| 627 | goto cchunk_out; | ||
| 628 | |||
| 629 | /* Check that server is not asking us to grow size */ | ||
| 630 | if (le32_to_cpu(retbuf->ChunkBytesWritten) < | ||
| 631 | tcon->max_bytes_chunk) | ||
| 632 | tcon->max_bytes_chunk = | ||
| 633 | le32_to_cpu(retbuf->ChunkBytesWritten); | ||
| 634 | else | ||
| 635 | goto cchunk_out; /* server gave us bogus size */ | ||
| 636 | |||
| 637 | /* No need to change MaxChunks since already set to 1 */ | ||
| 638 | chunk_sizes_updated = true; | ||
| 639 | } | ||
| 640 | } | ||
| 570 | 641 | ||
| 642 | cchunk_out: | ||
| 571 | kfree(pcchunk); | 643 | kfree(pcchunk); |
| 572 | return rc; | 644 | return rc; |
| 573 | } | 645 | } |
| @@ -1247,6 +1319,7 @@ struct smb_version_operations smb30_operations = { | |||
| 1247 | .create_lease_buf = smb3_create_lease_buf, | 1319 | .create_lease_buf = smb3_create_lease_buf, |
| 1248 | .parse_lease_buf = smb3_parse_lease_buf, | 1320 | .parse_lease_buf = smb3_parse_lease_buf, |
| 1249 | .clone_range = smb2_clone_range, | 1321 | .clone_range = smb2_clone_range, |
| 1322 | .validate_negotiate = smb3_validate_negotiate, | ||
| 1250 | }; | 1323 | }; |
| 1251 | 1324 | ||
| 1252 | struct smb_version_values smb20_values = { | 1325 | struct smb_version_values smb20_values = { |
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index d65270c290a1..2013234b73ad 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c | |||
| @@ -454,6 +454,81 @@ neg_exit: | |||
| 454 | return rc; | 454 | return rc; |
| 455 | } | 455 | } |
| 456 | 456 | ||
| 457 | int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) | ||
| 458 | { | ||
| 459 | int rc = 0; | ||
| 460 | struct validate_negotiate_info_req vneg_inbuf; | ||
| 461 | struct validate_negotiate_info_rsp *pneg_rsp; | ||
| 462 | u32 rsplen; | ||
| 463 | |||
| 464 | cifs_dbg(FYI, "validate negotiate\n"); | ||
| 465 | |||
| 466 | /* | ||
| 467 | * validation ioctl must be signed, so no point sending this if we | ||
| 468 | * can not sign it. We could eventually change this to selectively | ||
| 469 | * sign just this, the first and only signed request on a connection. | ||
| 470 | * This is good enough for now since a user who wants better security | ||
| 471 | * would also enable signing on the mount. Having validation of | ||
| 472 | * negotiate info for signed connections helps reduce attack vectors | ||
| 473 | */ | ||
| 474 | if (tcon->ses->server->sign == false) | ||
| 475 | return 0; /* validation requires signing */ | ||
| 476 | |||
| 477 | vneg_inbuf.Capabilities = | ||
| 478 | cpu_to_le32(tcon->ses->server->vals->req_capabilities); | ||
| 479 | memcpy(vneg_inbuf.Guid, cifs_client_guid, SMB2_CLIENT_GUID_SIZE); | ||
| 480 | |||
| 481 | if (tcon->ses->sign) | ||
| 482 | vneg_inbuf.SecurityMode = | ||
| 483 | cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED); | ||
| 484 | else if (global_secflags & CIFSSEC_MAY_SIGN) | ||
| 485 | vneg_inbuf.SecurityMode = | ||
| 486 | cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED); | ||
| 487 | else | ||
| 488 | vneg_inbuf.SecurityMode = 0; | ||
| 489 | |||
| 490 | vneg_inbuf.DialectCount = cpu_to_le16(1); | ||
| 491 | vneg_inbuf.Dialects[0] = | ||
| 492 | cpu_to_le16(tcon->ses->server->vals->protocol_id); | ||
| 493 | |||
| 494 | rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, | ||
| 495 | FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */, | ||
| 496 | (char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req), | ||
| 497 | (char **)&pneg_rsp, &rsplen); | ||
| 498 | |||
| 499 | if (rc != 0) { | ||
| 500 | cifs_dbg(VFS, "validate protocol negotiate failed: %d\n", rc); | ||
| 501 | return -EIO; | ||
| 502 | } | ||
| 503 | |||
| 504 | if (rsplen != sizeof(struct validate_negotiate_info_rsp)) { | ||
| 505 | cifs_dbg(VFS, "invalid size of protocol negotiate response\n"); | ||
| 506 | return -EIO; | ||
| 507 | } | ||
| 508 | |||
| 509 | /* check validate negotiate info response matches what we got earlier */ | ||
| 510 | if (pneg_rsp->Dialect != | ||
| 511 | cpu_to_le16(tcon->ses->server->vals->protocol_id)) | ||
| 512 | goto vneg_out; | ||
| 513 | |||
| 514 | if (pneg_rsp->SecurityMode != cpu_to_le16(tcon->ses->server->sec_mode)) | ||
| 515 | goto vneg_out; | ||
| 516 | |||
| 517 | /* do not validate server guid because not saved at negprot time yet */ | ||
| 518 | |||
| 519 | if ((le32_to_cpu(pneg_rsp->Capabilities) | SMB2_NT_FIND | | ||
| 520 | SMB2_LARGE_FILES) != tcon->ses->server->capabilities) | ||
| 521 | goto vneg_out; | ||
| 522 | |||
| 523 | /* validate negotiate successful */ | ||
| 524 | cifs_dbg(FYI, "validate negotiate info successful\n"); | ||
| 525 | return 0; | ||
| 526 | |||
| 527 | vneg_out: | ||
| 528 | cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n"); | ||
| 529 | return -EIO; | ||
| 530 | } | ||
| 531 | |||
| 457 | int | 532 | int |
| 458 | SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, | 533 | SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, |
| 459 | const struct nls_table *nls_cp) | 534 | const struct nls_table *nls_cp) |
| @@ -829,6 +904,8 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, | |||
| 829 | ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0)) | 904 | ((tcon->share_flags & SHI1005_FLAGS_DFS) == 0)) |
| 830 | cifs_dbg(VFS, "DFS capability contradicts DFS flag\n"); | 905 | cifs_dbg(VFS, "DFS capability contradicts DFS flag\n"); |
| 831 | init_copy_chunk_defaults(tcon); | 906 | init_copy_chunk_defaults(tcon); |
| 907 | if (tcon->ses->server->ops->validate_negotiate) | ||
| 908 | rc = tcon->ses->server->ops->validate_negotiate(xid, tcon); | ||
| 832 | tcon_exit: | 909 | tcon_exit: |
| 833 | free_rsp_buf(resp_buftype, rsp); | 910 | free_rsp_buf(resp_buftype, rsp); |
| 834 | kfree(unc_path); | 911 | kfree(unc_path); |
| @@ -1214,10 +1291,17 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, | |||
| 1214 | rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); | 1291 | rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); |
| 1215 | rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base; | 1292 | rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base; |
| 1216 | 1293 | ||
| 1217 | if (rc != 0) { | 1294 | if ((rc != 0) && (rc != -EINVAL)) { |
| 1218 | if (tcon) | 1295 | if (tcon) |
| 1219 | cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); | 1296 | cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); |
| 1220 | goto ioctl_exit; | 1297 | goto ioctl_exit; |
| 1298 | } else if (rc == -EINVAL) { | ||
| 1299 | if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) && | ||
| 1300 | (opcode != FSCTL_SRV_COPYCHUNK)) { | ||
| 1301 | if (tcon) | ||
| 1302 | cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE); | ||
| 1303 | goto ioctl_exit; | ||
| 1304 | } | ||
| 1221 | } | 1305 | } |
| 1222 | 1306 | ||
| 1223 | /* check if caller wants to look at return data or just return rc */ | 1307 | /* check if caller wants to look at return data or just return rc */ |
| @@ -2154,11 +2238,9 @@ send_set_info(const unsigned int xid, struct cifs_tcon *tcon, | |||
| 2154 | rc = SendReceive2(xid, ses, iov, num, &resp_buftype, 0); | 2238 | rc = SendReceive2(xid, ses, iov, num, &resp_buftype, 0); |
| 2155 | rsp = (struct smb2_set_info_rsp *)iov[0].iov_base; | 2239 | rsp = (struct smb2_set_info_rsp *)iov[0].iov_base; |
| 2156 | 2240 | ||
| 2157 | if (rc != 0) { | 2241 | if (rc != 0) |
| 2158 | cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE); | 2242 | cifs_stats_fail_inc(tcon, SMB2_SET_INFO_HE); |
| 2159 | goto out; | 2243 | |
| 2160 | } | ||
| 2161 | out: | ||
| 2162 | free_rsp_buf(resp_buftype, rsp); | 2244 | free_rsp_buf(resp_buftype, rsp); |
| 2163 | kfree(iov); | 2245 | kfree(iov); |
| 2164 | return rc; | 2246 | return rc; |
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index f88320bbb477..2022c542ea3a 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h | |||
| @@ -577,13 +577,19 @@ struct copychunk_ioctl_rsp { | |||
| 577 | __le32 TotalBytesWritten; | 577 | __le32 TotalBytesWritten; |
| 578 | } __packed; | 578 | } __packed; |
| 579 | 579 | ||
| 580 | /* Response and Request are the same format */ | 580 | struct validate_negotiate_info_req { |
| 581 | struct validate_negotiate_info { | ||
| 582 | __le32 Capabilities; | 581 | __le32 Capabilities; |
| 583 | __u8 Guid[SMB2_CLIENT_GUID_SIZE]; | 582 | __u8 Guid[SMB2_CLIENT_GUID_SIZE]; |
| 584 | __le16 SecurityMode; | 583 | __le16 SecurityMode; |
| 585 | __le16 DialectCount; | 584 | __le16 DialectCount; |
| 586 | __le16 Dialect[1]; | 585 | __le16 Dialects[1]; /* dialect (someday maybe list) client asked for */ |
| 586 | } __packed; | ||
| 587 | |||
| 588 | struct validate_negotiate_info_rsp { | ||
| 589 | __le32 Capabilities; | ||
| 590 | __u8 Guid[SMB2_CLIENT_GUID_SIZE]; | ||
| 591 | __le16 SecurityMode; | ||
| 592 | __le16 Dialect; /* Dialect in use for the connection */ | ||
| 587 | } __packed; | 593 | } __packed; |
| 588 | 594 | ||
| 589 | #define RSS_CAPABLE 0x00000001 | 595 | #define RSS_CAPABLE 0x00000001 |
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index b4eea105b08c..93adc64666f3 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h | |||
| @@ -162,5 +162,6 @@ extern int smb2_lockv(const unsigned int xid, struct cifs_tcon *tcon, | |||
| 162 | struct smb2_lock_element *buf); | 162 | struct smb2_lock_element *buf); |
| 163 | extern int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, | 163 | extern int SMB2_lease_break(const unsigned int xid, struct cifs_tcon *tcon, |
| 164 | __u8 *lease_key, const __le32 lease_state); | 164 | __u8 *lease_key, const __le32 lease_state); |
| 165 | extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *); | ||
| 165 | 166 | ||
| 166 | #endif /* _SMB2PROTO_H */ | 167 | #endif /* _SMB2PROTO_H */ |
diff --git a/fs/cifs/smbfsctl.h b/fs/cifs/smbfsctl.h index a4b2391fe66e..0e538b5c9622 100644 --- a/fs/cifs/smbfsctl.h +++ b/fs/cifs/smbfsctl.h | |||
| @@ -90,7 +90,7 @@ | |||
| 90 | #define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 /* BB add struct */ | 90 | #define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4 /* BB add struct */ |
| 91 | #define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */ | 91 | #define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */ |
| 92 | #define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */ | 92 | #define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */ |
| 93 | #define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 /* BB add struct */ | 93 | #define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204 |
| 94 | /* Perform server-side data movement */ | 94 | /* Perform server-side data movement */ |
| 95 | #define FSCTL_SRV_COPYCHUNK 0x001440F2 | 95 | #define FSCTL_SRV_COPYCHUNK 0x001440F2 |
| 96 | #define FSCTL_SRV_COPYCHUNK_WRITE 0x001480F2 | 96 | #define FSCTL_SRV_COPYCHUNK_WRITE 0x001480F2 |
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 277bd1be21fd..e081acbac2e7 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
| @@ -56,29 +56,28 @@ static void configfs_d_iput(struct dentry * dentry, | |||
| 56 | struct configfs_dirent *sd = dentry->d_fsdata; | 56 | struct configfs_dirent *sd = dentry->d_fsdata; |
| 57 | 57 | ||
| 58 | if (sd) { | 58 | if (sd) { |
| 59 | BUG_ON(sd->s_dentry != dentry); | ||
| 60 | /* Coordinate with configfs_readdir */ | 59 | /* Coordinate with configfs_readdir */ |
| 61 | spin_lock(&configfs_dirent_lock); | 60 | spin_lock(&configfs_dirent_lock); |
| 62 | sd->s_dentry = NULL; | 61 | /* Coordinate with configfs_attach_attr where will increase |
| 62 | * sd->s_count and update sd->s_dentry to new allocated one. | ||
| 63 | * Only set sd->dentry to null when this dentry is the only | ||
| 64 | * sd owner. | ||
| 65 | * If not do so, configfs_d_iput may run just after | ||
| 66 | * configfs_attach_attr and set sd->s_dentry to null | ||
| 67 | * even it's still in use. | ||
| 68 | */ | ||
| 69 | if (atomic_read(&sd->s_count) <= 2) | ||
| 70 | sd->s_dentry = NULL; | ||
| 71 | |||
| 63 | spin_unlock(&configfs_dirent_lock); | 72 | spin_unlock(&configfs_dirent_lock); |
| 64 | configfs_put(sd); | 73 | configfs_put(sd); |
| 65 | } | 74 | } |
| 66 | iput(inode); | 75 | iput(inode); |
| 67 | } | 76 | } |
| 68 | 77 | ||
| 69 | /* | ||
| 70 | * We _must_ delete our dentries on last dput, as the chain-to-parent | ||
| 71 | * behavior is required to clear the parents of default_groups. | ||
| 72 | */ | ||
| 73 | static int configfs_d_delete(const struct dentry *dentry) | ||
| 74 | { | ||
| 75 | return 1; | ||
| 76 | } | ||
| 77 | |||
| 78 | const struct dentry_operations configfs_dentry_ops = { | 78 | const struct dentry_operations configfs_dentry_ops = { |
| 79 | .d_iput = configfs_d_iput, | 79 | .d_iput = configfs_d_iput, |
| 80 | /* simple_delete_dentry() isn't exported */ | 80 | .d_delete = always_delete_dentry, |
| 81 | .d_delete = configfs_d_delete, | ||
| 82 | }; | 81 | }; |
| 83 | 82 | ||
| 84 | #ifdef CONFIG_LOCKDEP | 83 | #ifdef CONFIG_LOCKDEP |
| @@ -426,8 +425,11 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den | |||
| 426 | struct configfs_attribute * attr = sd->s_element; | 425 | struct configfs_attribute * attr = sd->s_element; |
| 427 | int error; | 426 | int error; |
| 428 | 427 | ||
| 428 | spin_lock(&configfs_dirent_lock); | ||
| 429 | dentry->d_fsdata = configfs_get(sd); | 429 | dentry->d_fsdata = configfs_get(sd); |
| 430 | sd->s_dentry = dentry; | 430 | sd->s_dentry = dentry; |
| 431 | spin_unlock(&configfs_dirent_lock); | ||
| 432 | |||
| 431 | error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG, | 433 | error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG, |
| 432 | configfs_init_file); | 434 | configfs_init_file); |
| 433 | if (error) { | 435 | if (error) { |
diff --git a/fs/coredump.c b/fs/coredump.c index 62406b6959b6..bc3fbcd32558 100644 --- a/fs/coredump.c +++ b/fs/coredump.c | |||
| @@ -695,7 +695,7 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr) | |||
| 695 | while (nr) { | 695 | while (nr) { |
| 696 | if (dump_interrupted()) | 696 | if (dump_interrupted()) |
| 697 | return 0; | 697 | return 0; |
| 698 | n = vfs_write(file, addr, nr, &pos); | 698 | n = __kernel_write(file, addr, nr, &pos); |
| 699 | if (n <= 0) | 699 | if (n <= 0) |
| 700 | return 0; | 700 | return 0; |
| 701 | file->f_pos = pos; | 701 | file->f_pos = pos; |
| @@ -733,7 +733,7 @@ int dump_align(struct coredump_params *cprm, int align) | |||
| 733 | { | 733 | { |
| 734 | unsigned mod = cprm->written & (align - 1); | 734 | unsigned mod = cprm->written & (align - 1); |
| 735 | if (align & (align - 1)) | 735 | if (align & (align - 1)) |
| 736 | return -EINVAL; | 736 | return 0; |
| 737 | return mod ? dump_skip(cprm, align - mod) : 0; | 737 | return mod ? dump_skip(cprm, align - mod) : 1; |
| 738 | } | 738 | } |
| 739 | EXPORT_SYMBOL(dump_align); | 739 | EXPORT_SYMBOL(dump_align); |
diff --git a/fs/dcache.c b/fs/dcache.c index 0a38ef8d7f00..6055d61811d3 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -88,35 +88,6 @@ EXPORT_SYMBOL(rename_lock); | |||
| 88 | 88 | ||
| 89 | static struct kmem_cache *dentry_cache __read_mostly; | 89 | static struct kmem_cache *dentry_cache __read_mostly; |
| 90 | 90 | ||
| 91 | /** | ||
| 92 | * read_seqbegin_or_lock - begin a sequence number check or locking block | ||
| 93 | * @lock: sequence lock | ||
| 94 | * @seq : sequence number to be checked | ||
| 95 | * | ||
| 96 | * First try it once optimistically without taking the lock. If that fails, | ||
| 97 | * take the lock. The sequence number is also used as a marker for deciding | ||
| 98 | * whether to be a reader (even) or writer (odd). | ||
| 99 | * N.B. seq must be initialized to an even number to begin with. | ||
| 100 | */ | ||
| 101 | static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) | ||
| 102 | { | ||
| 103 | if (!(*seq & 1)) /* Even */ | ||
| 104 | *seq = read_seqbegin(lock); | ||
| 105 | else /* Odd */ | ||
| 106 | read_seqlock_excl(lock); | ||
| 107 | } | ||
| 108 | |||
| 109 | static inline int need_seqretry(seqlock_t *lock, int seq) | ||
| 110 | { | ||
| 111 | return !(seq & 1) && read_seqretry(lock, seq); | ||
| 112 | } | ||
| 113 | |||
| 114 | static inline void done_seqretry(seqlock_t *lock, int seq) | ||
| 115 | { | ||
| 116 | if (seq & 1) | ||
| 117 | read_sequnlock_excl(lock); | ||
| 118 | } | ||
| 119 | |||
| 120 | /* | 91 | /* |
| 121 | * This is the single most critical data structure when it comes | 92 | * This is the single most critical data structure when it comes |
| 122 | * to the dcache: the hashtable for lookups. Somebody should try | 93 | * to the dcache: the hashtable for lookups. Somebody should try |
| @@ -125,8 +96,6 @@ static inline void done_seqretry(seqlock_t *lock, int seq) | |||
| 125 | * This hash-function tries to avoid losing too many bits of hash | 96 | * This hash-function tries to avoid losing too many bits of hash |
| 126 | * information, yet avoid using a prime hash-size or similar. | 97 | * information, yet avoid using a prime hash-size or similar. |
| 127 | */ | 98 | */ |
| 128 | #define D_HASHBITS d_hash_shift | ||
| 129 | #define D_HASHMASK d_hash_mask | ||
| 130 | 99 | ||
| 131 | static unsigned int d_hash_mask __read_mostly; | 100 | static unsigned int d_hash_mask __read_mostly; |
| 132 | static unsigned int d_hash_shift __read_mostly; | 101 | static unsigned int d_hash_shift __read_mostly; |
| @@ -137,8 +106,8 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent, | |||
| 137 | unsigned int hash) | 106 | unsigned int hash) |
| 138 | { | 107 | { |
| 139 | hash += (unsigned long) parent / L1_CACHE_BYTES; | 108 | hash += (unsigned long) parent / L1_CACHE_BYTES; |
| 140 | hash = hash + (hash >> D_HASHBITS); | 109 | hash = hash + (hash >> d_hash_shift); |
| 141 | return dentry_hashtable + (hash & D_HASHMASK); | 110 | return dentry_hashtable + (hash & d_hash_mask); |
| 142 | } | 111 | } |
| 143 | 112 | ||
| 144 | /* Statistics gathering. */ | 113 | /* Statistics gathering. */ |
| @@ -223,7 +192,7 @@ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char | |||
| 223 | if (!tcount) | 192 | if (!tcount) |
| 224 | return 0; | 193 | return 0; |
| 225 | } | 194 | } |
| 226 | mask = ~(~0ul << tcount*8); | 195 | mask = bytemask_from_count(tcount); |
| 227 | return unlikely(!!((a ^ b) & mask)); | 196 | return unlikely(!!((a ^ b) & mask)); |
| 228 | } | 197 | } |
| 229 | 198 | ||
| @@ -469,7 +438,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) | |||
| 469 | { | 438 | { |
| 470 | list_del(&dentry->d_u.d_child); | 439 | list_del(&dentry->d_u.d_child); |
| 471 | /* | 440 | /* |
| 472 | * Inform try_to_ascend() that we are no longer attached to the | 441 | * Inform d_walk() that we are no longer attached to the |
| 473 | * dentry tree | 442 | * dentry tree |
| 474 | */ | 443 | */ |
| 475 | dentry->d_flags |= DCACHE_DENTRY_KILLED; | 444 | dentry->d_flags |= DCACHE_DENTRY_KILLED; |
| @@ -1069,34 +1038,6 @@ void shrink_dcache_sb(struct super_block *sb) | |||
| 1069 | } | 1038 | } |
| 1070 | EXPORT_SYMBOL(shrink_dcache_sb); | 1039 | EXPORT_SYMBOL(shrink_dcache_sb); |
| 1071 | 1040 | ||
| 1072 | /* | ||
| 1073 | * This tries to ascend one level of parenthood, but | ||
| 1074 | * we can race with renaming, so we need to re-check | ||
| 1075 | * the parenthood after dropping the lock and check | ||
| 1076 | * that the sequence number still matches. | ||
| 1077 | */ | ||
| 1078 | static struct dentry *try_to_ascend(struct dentry *old, unsigned seq) | ||
| 1079 | { | ||
| 1080 | struct dentry *new = old->d_parent; | ||
| 1081 | |||
| 1082 | rcu_read_lock(); | ||
| 1083 | spin_unlock(&old->d_lock); | ||
| 1084 | spin_lock(&new->d_lock); | ||
| 1085 | |||
| 1086 | /* | ||
| 1087 | * might go back up the wrong parent if we have had a rename | ||
| 1088 | * or deletion | ||
| 1089 | */ | ||
| 1090 | if (new != old->d_parent || | ||
| 1091 | (old->d_flags & DCACHE_DENTRY_KILLED) || | ||
| 1092 | need_seqretry(&rename_lock, seq)) { | ||
| 1093 | spin_unlock(&new->d_lock); | ||
| 1094 | new = NULL; | ||
| 1095 | } | ||
| 1096 | rcu_read_unlock(); | ||
| 1097 | return new; | ||
| 1098 | } | ||
| 1099 | |||
| 1100 | /** | 1041 | /** |
| 1101 | * enum d_walk_ret - action to talke during tree walk | 1042 | * enum d_walk_ret - action to talke during tree walk |
| 1102 | * @D_WALK_CONTINUE: contrinue walk | 1043 | * @D_WALK_CONTINUE: contrinue walk |
| @@ -1185,9 +1126,24 @@ resume: | |||
| 1185 | */ | 1126 | */ |
| 1186 | if (this_parent != parent) { | 1127 | if (this_parent != parent) { |
| 1187 | struct dentry *child = this_parent; | 1128 | struct dentry *child = this_parent; |
| 1188 | this_parent = try_to_ascend(this_parent, seq); | 1129 | this_parent = child->d_parent; |
| 1189 | if (!this_parent) | 1130 | |
| 1131 | rcu_read_lock(); | ||
| 1132 | spin_unlock(&child->d_lock); | ||
| 1133 | spin_lock(&this_parent->d_lock); | ||
| 1134 | |||
| 1135 | /* | ||
| 1136 | * might go back up the wrong parent if we have had a rename | ||
| 1137 | * or deletion | ||
| 1138 | */ | ||
| 1139 | if (this_parent != child->d_parent || | ||
| 1140 | (child->d_flags & DCACHE_DENTRY_KILLED) || | ||
| 1141 | need_seqretry(&rename_lock, seq)) { | ||
| 1142 | spin_unlock(&this_parent->d_lock); | ||
| 1143 | rcu_read_unlock(); | ||
| 1190 | goto rename_retry; | 1144 | goto rename_retry; |
| 1145 | } | ||
| 1146 | rcu_read_unlock(); | ||
| 1191 | next = child->d_u.d_child.next; | 1147 | next = child->d_u.d_child.next; |
| 1192 | goto resume; | 1148 | goto resume; |
| 1193 | } | 1149 | } |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 2229a74aeeed..b1eaa7a1f82c 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
| @@ -313,11 +313,9 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag) | |||
| 313 | static long | 313 | static long |
| 314 | ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 314 | ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
| 315 | { | 315 | { |
| 316 | struct file *lower_file = NULL; | 316 | struct file *lower_file = ecryptfs_file_to_lower(file); |
| 317 | long rc = -ENOTTY; | 317 | long rc = -ENOTTY; |
| 318 | 318 | ||
| 319 | if (ecryptfs_file_to_private(file)) | ||
| 320 | lower_file = ecryptfs_file_to_lower(file); | ||
| 321 | if (lower_file->f_op->unlocked_ioctl) | 319 | if (lower_file->f_op->unlocked_ioctl) |
| 322 | rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); | 320 | rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg); |
| 323 | return rc; | 321 | return rc; |
| @@ -327,11 +325,9 @@ ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
| 327 | static long | 325 | static long |
| 328 | ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 326 | ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
| 329 | { | 327 | { |
| 330 | struct file *lower_file = NULL; | 328 | struct file *lower_file = ecryptfs_file_to_lower(file); |
| 331 | long rc = -ENOIOCTLCMD; | 329 | long rc = -ENOIOCTLCMD; |
| 332 | 330 | ||
| 333 | if (ecryptfs_file_to_private(file)) | ||
| 334 | lower_file = ecryptfs_file_to_lower(file); | ||
| 335 | if (lower_file->f_op && lower_file->f_op->compat_ioctl) | 331 | if (lower_file->f_op && lower_file->f_op->compat_ioctl) |
| 336 | rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); | 332 | rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg); |
| 337 | return rc; | 333 | return rc; |
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index a8766b880c07..becc725a1953 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c | |||
| @@ -83,19 +83,10 @@ static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr) | |||
| 83 | return 0; | 83 | return 0; |
| 84 | } | 84 | } |
| 85 | 85 | ||
| 86 | /* | ||
| 87 | * Retaining negative dentries for an in-memory filesystem just wastes | ||
| 88 | * memory and lookup time: arrange for them to be deleted immediately. | ||
| 89 | */ | ||
| 90 | static int efivarfs_delete_dentry(const struct dentry *dentry) | ||
| 91 | { | ||
| 92 | return 1; | ||
| 93 | } | ||
| 94 | |||
| 95 | static struct dentry_operations efivarfs_d_ops = { | 86 | static struct dentry_operations efivarfs_d_ops = { |
| 96 | .d_compare = efivarfs_d_compare, | 87 | .d_compare = efivarfs_d_compare, |
| 97 | .d_hash = efivarfs_d_hash, | 88 | .d_hash = efivarfs_d_hash, |
| 98 | .d_delete = efivarfs_delete_dentry, | 89 | .d_delete = always_delete_dentry, |
| 99 | }; | 90 | }; |
| 100 | 91 | ||
| 101 | static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name) | 92 | static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name) |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 79b65c3b9e87..8b5e2584c840 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
| @@ -1852,8 +1852,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
| 1852 | goto error_tgt_fput; | 1852 | goto error_tgt_fput; |
| 1853 | 1853 | ||
| 1854 | /* Check if EPOLLWAKEUP is allowed */ | 1854 | /* Check if EPOLLWAKEUP is allowed */ |
| 1855 | if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND)) | 1855 | ep_take_care_of_epollwakeup(&epds); |
| 1856 | epds.events &= ~EPOLLWAKEUP; | ||
| 1857 | 1856 | ||
| 1858 | /* | 1857 | /* |
| 1859 | * We have to check that the file structure underneath the file descriptor | 1858 | * We have to check that the file structure underneath the file descriptor |
| @@ -1380,10 +1380,6 @@ int search_binary_handler(struct linux_binprm *bprm) | |||
| 1380 | if (retval) | 1380 | if (retval) |
| 1381 | return retval; | 1381 | return retval; |
| 1382 | 1382 | ||
| 1383 | retval = audit_bprm(bprm); | ||
| 1384 | if (retval) | ||
| 1385 | return retval; | ||
| 1386 | |||
| 1387 | retval = -ENOENT; | 1383 | retval = -ENOENT; |
| 1388 | retry: | 1384 | retry: |
| 1389 | read_lock(&binfmt_lock); | 1385 | read_lock(&binfmt_lock); |
| @@ -1431,6 +1427,7 @@ static int exec_binprm(struct linux_binprm *bprm) | |||
| 1431 | 1427 | ||
| 1432 | ret = search_binary_handler(bprm); | 1428 | ret = search_binary_handler(bprm); |
| 1433 | if (ret >= 0) { | 1429 | if (ret >= 0) { |
| 1430 | audit_bprm(bprm); | ||
| 1434 | trace_sched_process_exec(current, old_pid, bprm); | 1431 | trace_sched_process_exec(current, old_pid, bprm); |
| 1435 | ptrace_event(PTRACE_EVENT_EXEC, old_vpid); | 1432 | ptrace_event(PTRACE_EVENT_EXEC, old_vpid); |
| 1436 | current->did_exec = 1; | 1433 | current->did_exec = 1; |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index e66a8009aff1..c8420f7e4db6 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
| @@ -1899,7 +1899,8 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) | |||
| 1899 | gi->nhash = 0; | 1899 | gi->nhash = 0; |
| 1900 | } | 1900 | } |
| 1901 | /* Skip entries for other sb and dead entries */ | 1901 | /* Skip entries for other sb and dead entries */ |
| 1902 | } while (gi->sdp != gi->gl->gl_sbd || __lockref_is_dead(&gl->gl_lockref)); | 1902 | } while (gi->sdp != gi->gl->gl_sbd || |
| 1903 | __lockref_is_dead(&gi->gl->gl_lockref)); | ||
| 1903 | 1904 | ||
| 1904 | return 0; | 1905 | return 0; |
| 1905 | } | 1906 | } |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 1615df16cf4e..7119504159f1 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
| @@ -1171,8 +1171,11 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, | |||
| 1171 | if (d != NULL) | 1171 | if (d != NULL) |
| 1172 | dentry = d; | 1172 | dentry = d; |
| 1173 | if (dentry->d_inode) { | 1173 | if (dentry->d_inode) { |
| 1174 | if (!(*opened & FILE_OPENED)) | 1174 | if (!(*opened & FILE_OPENED)) { |
| 1175 | if (d == NULL) | ||
| 1176 | dget(dentry); | ||
| 1175 | return finish_no_open(file, dentry); | 1177 | return finish_no_open(file, dentry); |
| 1178 | } | ||
| 1176 | dput(d); | 1179 | dput(d); |
| 1177 | return 0; | 1180 | return 0; |
| 1178 | } | 1181 | } |
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index c8423d6de6c3..2a6ba06bee6f 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c | |||
| @@ -466,19 +466,19 @@ static void gdlm_cancel(struct gfs2_glock *gl) | |||
| 466 | static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen, | 466 | static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen, |
| 467 | char *lvb_bits) | 467 | char *lvb_bits) |
| 468 | { | 468 | { |
| 469 | uint32_t gen; | 469 | __le32 gen; |
| 470 | memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE); | 470 | memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE); |
| 471 | memcpy(&gen, lvb_bits, sizeof(uint32_t)); | 471 | memcpy(&gen, lvb_bits, sizeof(__le32)); |
| 472 | *lvb_gen = le32_to_cpu(gen); | 472 | *lvb_gen = le32_to_cpu(gen); |
| 473 | } | 473 | } |
| 474 | 474 | ||
| 475 | static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen, | 475 | static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen, |
| 476 | char *lvb_bits) | 476 | char *lvb_bits) |
| 477 | { | 477 | { |
| 478 | uint32_t gen; | 478 | __le32 gen; |
| 479 | memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE); | 479 | memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE); |
| 480 | gen = cpu_to_le32(lvb_gen); | 480 | gen = cpu_to_le32(lvb_gen); |
| 481 | memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t)); | 481 | memcpy(ls->ls_control_lvb, &gen, sizeof(__le32)); |
| 482 | } | 482 | } |
| 483 | 483 | ||
| 484 | static int all_jid_bits_clear(char *lvb) | 484 | static int all_jid_bits_clear(char *lvb) |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 453b50eaddec..98236d0df3ca 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
| @@ -667,7 +667,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, | |||
| 667 | struct buffer_head *bh; | 667 | struct buffer_head *bh; |
| 668 | struct page *page; | 668 | struct page *page; |
| 669 | void *kaddr, *ptr; | 669 | void *kaddr, *ptr; |
| 670 | struct gfs2_quota q, *qp; | 670 | struct gfs2_quota q; |
| 671 | int err, nbytes; | 671 | int err, nbytes; |
| 672 | u64 size; | 672 | u64 size; |
| 673 | 673 | ||
| @@ -683,28 +683,25 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, | |||
| 683 | return err; | 683 | return err; |
| 684 | 684 | ||
| 685 | err = -EIO; | 685 | err = -EIO; |
| 686 | qp = &q; | 686 | be64_add_cpu(&q.qu_value, change); |
| 687 | qp->qu_value = be64_to_cpu(qp->qu_value); | 687 | qd->qd_qb.qb_value = q.qu_value; |
| 688 | qp->qu_value += change; | ||
| 689 | qp->qu_value = cpu_to_be64(qp->qu_value); | ||
| 690 | qd->qd_qb.qb_value = qp->qu_value; | ||
| 691 | if (fdq) { | 688 | if (fdq) { |
| 692 | if (fdq->d_fieldmask & FS_DQ_BSOFT) { | 689 | if (fdq->d_fieldmask & FS_DQ_BSOFT) { |
| 693 | qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); | 690 | q.qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift); |
| 694 | qd->qd_qb.qb_warn = qp->qu_warn; | 691 | qd->qd_qb.qb_warn = q.qu_warn; |
| 695 | } | 692 | } |
| 696 | if (fdq->d_fieldmask & FS_DQ_BHARD) { | 693 | if (fdq->d_fieldmask & FS_DQ_BHARD) { |
| 697 | qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); | 694 | q.qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift); |
| 698 | qd->qd_qb.qb_limit = qp->qu_limit; | 695 | qd->qd_qb.qb_limit = q.qu_limit; |
| 699 | } | 696 | } |
| 700 | if (fdq->d_fieldmask & FS_DQ_BCOUNT) { | 697 | if (fdq->d_fieldmask & FS_DQ_BCOUNT) { |
| 701 | qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); | 698 | q.qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift); |
| 702 | qd->qd_qb.qb_value = qp->qu_value; | 699 | qd->qd_qb.qb_value = q.qu_value; |
| 703 | } | 700 | } |
| 704 | } | 701 | } |
| 705 | 702 | ||
| 706 | /* Write the quota into the quota file on disk */ | 703 | /* Write the quota into the quota file on disk */ |
| 707 | ptr = qp; | 704 | ptr = &q; |
| 708 | nbytes = sizeof(struct gfs2_quota); | 705 | nbytes = sizeof(struct gfs2_quota); |
| 709 | get_a_page: | 706 | get_a_page: |
| 710 | page = find_or_create_page(mapping, index, GFP_NOFS); | 707 | page = find_or_create_page(mapping, index, GFP_NOFS); |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 4d83abdd5635..c8d6161bd682 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
| @@ -1127,7 +1127,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) | |||
| 1127 | rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); | 1127 | rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); |
| 1128 | rgd->rd_free_clone = rgd->rd_free; | 1128 | rgd->rd_free_clone = rgd->rd_free; |
| 1129 | } | 1129 | } |
| 1130 | if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { | 1130 | if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { |
| 1131 | rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); | 1131 | rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); |
| 1132 | gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, | 1132 | gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, |
| 1133 | rgd->rd_bits[0].bi_bh->b_data); | 1133 | rgd->rd_bits[0].bi_bh->b_data); |
| @@ -1161,7 +1161,7 @@ int update_rgrp_lvb(struct gfs2_rgrpd *rgd) | |||
| 1161 | if (rgd->rd_flags & GFS2_RDF_UPTODATE) | 1161 | if (rgd->rd_flags & GFS2_RDF_UPTODATE) |
| 1162 | return 0; | 1162 | return 0; |
| 1163 | 1163 | ||
| 1164 | if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) | 1164 | if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) |
| 1165 | return gfs2_rgrp_bh_get(rgd); | 1165 | return gfs2_rgrp_bh_get(rgd); |
| 1166 | 1166 | ||
| 1167 | rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); | 1167 | rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); |
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index b51a6079108d..e9a97a0d4314 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c | |||
| @@ -24,13 +24,6 @@ struct hfsplus_wd { | |||
| 24 | u16 embed_count; | 24 | u16 embed_count; |
| 25 | }; | 25 | }; |
| 26 | 26 | ||
| 27 | static void hfsplus_end_io_sync(struct bio *bio, int err) | ||
| 28 | { | ||
| 29 | if (err) | ||
| 30 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
| 31 | complete(bio->bi_private); | ||
| 32 | } | ||
| 33 | |||
| 34 | /* | 27 | /* |
| 35 | * hfsplus_submit_bio - Perfrom block I/O | 28 | * hfsplus_submit_bio - Perfrom block I/O |
| 36 | * @sb: super block of volume for I/O | 29 | * @sb: super block of volume for I/O |
| @@ -53,7 +46,6 @@ static void hfsplus_end_io_sync(struct bio *bio, int err) | |||
| 53 | int hfsplus_submit_bio(struct super_block *sb, sector_t sector, | 46 | int hfsplus_submit_bio(struct super_block *sb, sector_t sector, |
| 54 | void *buf, void **data, int rw) | 47 | void *buf, void **data, int rw) |
| 55 | { | 48 | { |
| 56 | DECLARE_COMPLETION_ONSTACK(wait); | ||
| 57 | struct bio *bio; | 49 | struct bio *bio; |
| 58 | int ret = 0; | 50 | int ret = 0; |
| 59 | u64 io_size; | 51 | u64 io_size; |
| @@ -73,8 +65,6 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, | |||
| 73 | bio = bio_alloc(GFP_NOIO, 1); | 65 | bio = bio_alloc(GFP_NOIO, 1); |
| 74 | bio->bi_sector = sector; | 66 | bio->bi_sector = sector; |
| 75 | bio->bi_bdev = sb->s_bdev; | 67 | bio->bi_bdev = sb->s_bdev; |
| 76 | bio->bi_end_io = hfsplus_end_io_sync; | ||
| 77 | bio->bi_private = &wait; | ||
| 78 | 68 | ||
| 79 | if (!(rw & WRITE) && data) | 69 | if (!(rw & WRITE) && data) |
| 80 | *data = (u8 *)buf + offset; | 70 | *data = (u8 *)buf + offset; |
| @@ -93,12 +83,7 @@ int hfsplus_submit_bio(struct super_block *sb, sector_t sector, | |||
| 93 | buf = (u8 *)buf + len; | 83 | buf = (u8 *)buf + len; |
| 94 | } | 84 | } |
| 95 | 85 | ||
| 96 | submit_bio(rw, bio); | 86 | ret = submit_bio_wait(rw, bio); |
| 97 | wait_for_completion(&wait); | ||
| 98 | |||
| 99 | if (!bio_flagged(bio, BIO_UPTODATE)) | ||
| 100 | ret = -EIO; | ||
| 101 | |||
| 102 | out: | 87 | out: |
| 103 | bio_put(bio); | 88 | bio_put(bio); |
| 104 | return ret < 0 ? ret : 0; | 89 | return ret < 0 ? ret : 0; |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 25437280a207..db23ce1bd903 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
| @@ -33,15 +33,6 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode) | |||
| 33 | 33 | ||
| 34 | #define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file)) | 34 | #define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file)) |
| 35 | 35 | ||
| 36 | static int hostfs_d_delete(const struct dentry *dentry) | ||
| 37 | { | ||
| 38 | return 1; | ||
| 39 | } | ||
| 40 | |||
| 41 | static const struct dentry_operations hostfs_dentry_ops = { | ||
| 42 | .d_delete = hostfs_d_delete, | ||
| 43 | }; | ||
| 44 | |||
| 45 | /* Changed in hostfs_args before the kernel starts running */ | 36 | /* Changed in hostfs_args before the kernel starts running */ |
| 46 | static char *root_ino = ""; | 37 | static char *root_ino = ""; |
| 47 | static int append = 0; | 38 | static int append = 0; |
| @@ -925,7 +916,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) | |||
| 925 | sb->s_blocksize_bits = 10; | 916 | sb->s_blocksize_bits = 10; |
| 926 | sb->s_magic = HOSTFS_SUPER_MAGIC; | 917 | sb->s_magic = HOSTFS_SUPER_MAGIC; |
| 927 | sb->s_op = &hostfs_sbops; | 918 | sb->s_op = &hostfs_sbops; |
| 928 | sb->s_d_op = &hostfs_dentry_ops; | 919 | sb->s_d_op = &simple_dentry_operations; |
| 929 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 920 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
| 930 | 921 | ||
| 931 | /* NULL is printed as <NULL> by sprintf: avoid that. */ | 922 | /* NULL is printed as <NULL> by sprintf: avoid that. */ |
diff --git a/fs/libfs.c b/fs/libfs.c index 5de06947ba5e..a1844244246f 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
| @@ -47,10 +47,16 @@ EXPORT_SYMBOL(simple_statfs); | |||
| 47 | * Retaining negative dentries for an in-memory filesystem just wastes | 47 | * Retaining negative dentries for an in-memory filesystem just wastes |
| 48 | * memory and lookup time: arrange for them to be deleted immediately. | 48 | * memory and lookup time: arrange for them to be deleted immediately. |
| 49 | */ | 49 | */ |
| 50 | static int simple_delete_dentry(const struct dentry *dentry) | 50 | int always_delete_dentry(const struct dentry *dentry) |
| 51 | { | 51 | { |
| 52 | return 1; | 52 | return 1; |
| 53 | } | 53 | } |
| 54 | EXPORT_SYMBOL(always_delete_dentry); | ||
| 55 | |||
| 56 | const struct dentry_operations simple_dentry_operations = { | ||
| 57 | .d_delete = always_delete_dentry, | ||
| 58 | }; | ||
| 59 | EXPORT_SYMBOL(simple_dentry_operations); | ||
| 54 | 60 | ||
| 55 | /* | 61 | /* |
| 56 | * Lookup the data. This is trivial - if the dentry didn't already | 62 | * Lookup the data. This is trivial - if the dentry didn't already |
| @@ -58,10 +64,6 @@ static int simple_delete_dentry(const struct dentry *dentry) | |||
| 58 | */ | 64 | */ |
| 59 | struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | 65 | struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) |
| 60 | { | 66 | { |
| 61 | static const struct dentry_operations simple_dentry_operations = { | ||
| 62 | .d_delete = simple_delete_dentry, | ||
| 63 | }; | ||
| 64 | |||
| 65 | if (dentry->d_name.len > NAME_MAX) | 67 | if (dentry->d_name.len > NAME_MAX) |
| 66 | return ERR_PTR(-ENAMETOOLONG); | 68 | return ERR_PTR(-ENAMETOOLONG); |
| 67 | if (!dentry->d_sb->s_d_op) | 69 | if (!dentry->d_sb->s_d_op) |
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index 550475ca6a0e..0f95f0d0b313 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c | |||
| @@ -14,16 +14,10 @@ | |||
| 14 | 14 | ||
| 15 | #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) | 15 | #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1)) |
| 16 | 16 | ||
| 17 | static void request_complete(struct bio *bio, int err) | ||
| 18 | { | ||
| 19 | complete((struct completion *)bio->bi_private); | ||
| 20 | } | ||
| 21 | |||
| 22 | static int sync_request(struct page *page, struct block_device *bdev, int rw) | 17 | static int sync_request(struct page *page, struct block_device *bdev, int rw) |
| 23 | { | 18 | { |
| 24 | struct bio bio; | 19 | struct bio bio; |
| 25 | struct bio_vec bio_vec; | 20 | struct bio_vec bio_vec; |
| 26 | struct completion complete; | ||
| 27 | 21 | ||
| 28 | bio_init(&bio); | 22 | bio_init(&bio); |
| 29 | bio.bi_max_vecs = 1; | 23 | bio.bi_max_vecs = 1; |
| @@ -35,13 +29,8 @@ static int sync_request(struct page *page, struct block_device *bdev, int rw) | |||
| 35 | bio.bi_size = PAGE_SIZE; | 29 | bio.bi_size = PAGE_SIZE; |
| 36 | bio.bi_bdev = bdev; | 30 | bio.bi_bdev = bdev; |
| 37 | bio.bi_sector = page->index * (PAGE_SIZE >> 9); | 31 | bio.bi_sector = page->index * (PAGE_SIZE >> 9); |
| 38 | init_completion(&complete); | ||
| 39 | bio.bi_private = &complete; | ||
| 40 | bio.bi_end_io = request_complete; | ||
| 41 | 32 | ||
| 42 | submit_bio(rw, &bio); | 33 | return submit_bio_wait(rw, &bio); |
| 43 | wait_for_completion(&complete); | ||
| 44 | return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO; | ||
| 45 | } | 34 | } |
| 46 | 35 | ||
| 47 | static int bdev_readpage(void *_sb, struct page *page) | 36 | static int bdev_readpage(void *_sb, struct page *page) |
diff --git a/fs/namei.c b/fs/namei.c index e029a4cbff7d..3531deebad30 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
| @@ -513,8 +513,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) | |||
| 513 | 513 | ||
| 514 | if (!lockref_get_not_dead(&parent->d_lockref)) { | 514 | if (!lockref_get_not_dead(&parent->d_lockref)) { |
| 515 | nd->path.dentry = NULL; | 515 | nd->path.dentry = NULL; |
| 516 | rcu_read_unlock(); | 516 | goto out; |
| 517 | return -ECHILD; | ||
| 518 | } | 517 | } |
| 519 | 518 | ||
| 520 | /* | 519 | /* |
| @@ -1599,11 +1598,6 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd) | |||
| 1599 | * do a "get_unaligned()" if this helps and is sufficiently | 1598 | * do a "get_unaligned()" if this helps and is sufficiently |
| 1600 | * fast. | 1599 | * fast. |
| 1601 | * | 1600 | * |
| 1602 | * - Little-endian machines (so that we can generate the mask | ||
| 1603 | * of low bytes efficiently). Again, we *could* do a byte | ||
| 1604 | * swapping load on big-endian architectures if that is not | ||
| 1605 | * expensive enough to make the optimization worthless. | ||
| 1606 | * | ||
| 1607 | * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we | 1601 | * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we |
| 1608 | * do not trap on the (extremely unlikely) case of a page | 1602 | * do not trap on the (extremely unlikely) case of a page |
| 1609 | * crossing operation. | 1603 | * crossing operation. |
| @@ -1647,7 +1641,7 @@ unsigned int full_name_hash(const unsigned char *name, unsigned int len) | |||
| 1647 | if (!len) | 1641 | if (!len) |
| 1648 | goto done; | 1642 | goto done; |
| 1649 | } | 1643 | } |
| 1650 | mask = ~(~0ul << len*8); | 1644 | mask = bytemask_from_count(len); |
| 1651 | hash += mask & a; | 1645 | hash += mask & a; |
| 1652 | done: | 1646 | done: |
| 1653 | return fold_hash(hash); | 1647 | return fold_hash(hash); |
| @@ -2435,6 +2429,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) | |||
| 2435 | */ | 2429 | */ |
| 2436 | static inline int may_create(struct inode *dir, struct dentry *child) | 2430 | static inline int may_create(struct inode *dir, struct dentry *child) |
| 2437 | { | 2431 | { |
| 2432 | audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE); | ||
| 2438 | if (child->d_inode) | 2433 | if (child->d_inode) |
| 2439 | return -EEXIST; | 2434 | return -EEXIST; |
| 2440 | if (IS_DEADDIR(dir)) | 2435 | if (IS_DEADDIR(dir)) |
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index 8485978993e8..9838fb020473 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h | |||
| @@ -36,6 +36,7 @@ | |||
| 36 | #include <linux/nfs_fs.h> | 36 | #include <linux/nfs_fs.h> |
| 37 | #include <linux/sunrpc/rpc_pipe_fs.h> | 37 | #include <linux/sunrpc/rpc_pipe_fs.h> |
| 38 | 38 | ||
| 39 | #include "../nfs4_fs.h" | ||
| 39 | #include "../pnfs.h" | 40 | #include "../pnfs.h" |
| 40 | #include "../netns.h" | 41 | #include "../netns.h" |
| 41 | 42 | ||
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 9c3e117c3ed1..4d0161442565 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c | |||
| @@ -44,7 +44,7 @@ | |||
| 44 | static inline sector_t normalize(sector_t s, int base) | 44 | static inline sector_t normalize(sector_t s, int base) |
| 45 | { | 45 | { |
| 46 | sector_t tmp = s; /* Since do_div modifies its argument */ | 46 | sector_t tmp = s; /* Since do_div modifies its argument */ |
| 47 | return s - do_div(tmp, base); | 47 | return s - sector_div(tmp, base); |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | static inline sector_t normalize_up(sector_t s, int base) | 50 | static inline sector_t normalize_up(sector_t s, int base) |
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index fc0f95ec7358..d25f10fb4926 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c | |||
| @@ -46,7 +46,9 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, | |||
| 46 | #include <linux/sunrpc/cache.h> | 46 | #include <linux/sunrpc/cache.h> |
| 47 | #include <linux/sunrpc/svcauth.h> | 47 | #include <linux/sunrpc/svcauth.h> |
| 48 | #include <linux/sunrpc/rpc_pipe_fs.h> | 48 | #include <linux/sunrpc/rpc_pipe_fs.h> |
| 49 | #include <linux/nfs_fs.h> | ||
| 49 | 50 | ||
| 51 | #include "nfs4_fs.h" | ||
| 50 | #include "dns_resolve.h" | 52 | #include "dns_resolve.h" |
| 51 | #include "cache_lib.h" | 53 | #include "cache_lib.h" |
| 52 | #include "netns.h" | 54 | #include "netns.h" |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 18ab2da4eeb6..00ad1c2b217d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
| @@ -312,7 +312,7 @@ struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) | |||
| 312 | } | 312 | } |
| 313 | EXPORT_SYMBOL_GPL(nfs4_label_alloc); | 313 | EXPORT_SYMBOL_GPL(nfs4_label_alloc); |
| 314 | #else | 314 | #else |
| 315 | void inline nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, | 315 | void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, |
| 316 | struct nfs4_label *label) | 316 | struct nfs4_label *label) |
| 317 | { | 317 | { |
| 318 | } | 318 | } |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index bca6a3e3c49c..8b5cc04a8611 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
| @@ -269,6 +269,21 @@ extern const u32 nfs41_maxgetdevinfo_overhead; | |||
| 269 | extern struct rpc_procinfo nfs4_procedures[]; | 269 | extern struct rpc_procinfo nfs4_procedures[]; |
| 270 | #endif | 270 | #endif |
| 271 | 271 | ||
| 272 | #ifdef CONFIG_NFS_V4_SECURITY_LABEL | ||
| 273 | extern struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags); | ||
| 274 | static inline void nfs4_label_free(struct nfs4_label *label) | ||
| 275 | { | ||
| 276 | if (label) { | ||
| 277 | kfree(label->label); | ||
| 278 | kfree(label); | ||
| 279 | } | ||
| 280 | return; | ||
| 281 | } | ||
| 282 | #else | ||
| 283 | static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; } | ||
| 284 | static inline void nfs4_label_free(void *label) {} | ||
| 285 | #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ | ||
| 286 | |||
| 272 | /* proc.c */ | 287 | /* proc.c */ |
| 273 | void nfs_close_context(struct nfs_open_context *ctx, int is_sync); | 288 | void nfs_close_context(struct nfs_open_context *ctx, int is_sync); |
| 274 | extern struct nfs_client *nfs_init_client(struct nfs_client *clp, | 289 | extern struct nfs_client *nfs_init_client(struct nfs_client *clp, |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 3ce79b04522e..5609edc742a0 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
| @@ -9,6 +9,14 @@ | |||
| 9 | #ifndef __LINUX_FS_NFS_NFS4_FS_H | 9 | #ifndef __LINUX_FS_NFS_NFS4_FS_H |
| 10 | #define __LINUX_FS_NFS_NFS4_FS_H | 10 | #define __LINUX_FS_NFS_NFS4_FS_H |
| 11 | 11 | ||
| 12 | #if defined(CONFIG_NFS_V4_2) | ||
| 13 | #define NFS4_MAX_MINOR_VERSION 2 | ||
| 14 | #elif defined(CONFIG_NFS_V4_1) | ||
| 15 | #define NFS4_MAX_MINOR_VERSION 1 | ||
| 16 | #else | ||
| 17 | #define NFS4_MAX_MINOR_VERSION 0 | ||
| 18 | #endif | ||
| 19 | |||
| 12 | #if IS_ENABLED(CONFIG_NFS_V4) | 20 | #if IS_ENABLED(CONFIG_NFS_V4) |
| 13 | 21 | ||
| 14 | #define NFS4_MAX_LOOP_ON_RECOVER (10) | 22 | #define NFS4_MAX_LOOP_ON_RECOVER (10) |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 659990c0109e..15052b81df42 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
| @@ -2518,9 +2518,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
| 2518 | calldata->roc_barrier); | 2518 | calldata->roc_barrier); |
| 2519 | nfs_set_open_stateid(state, &calldata->res.stateid, 0); | 2519 | nfs_set_open_stateid(state, &calldata->res.stateid, 0); |
| 2520 | renew_lease(server, calldata->timestamp); | 2520 | renew_lease(server, calldata->timestamp); |
| 2521 | nfs4_close_clear_stateid_flags(state, | ||
| 2522 | calldata->arg.fmode); | ||
| 2523 | break; | 2521 | break; |
| 2522 | case -NFS4ERR_ADMIN_REVOKED: | ||
| 2524 | case -NFS4ERR_STALE_STATEID: | 2523 | case -NFS4ERR_STALE_STATEID: |
| 2525 | case -NFS4ERR_OLD_STATEID: | 2524 | case -NFS4ERR_OLD_STATEID: |
| 2526 | case -NFS4ERR_BAD_STATEID: | 2525 | case -NFS4ERR_BAD_STATEID: |
| @@ -2528,9 +2527,13 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
| 2528 | if (calldata->arg.fmode == 0) | 2527 | if (calldata->arg.fmode == 0) |
| 2529 | break; | 2528 | break; |
| 2530 | default: | 2529 | default: |
| 2531 | if (nfs4_async_handle_error(task, server, state) == -EAGAIN) | 2530 | if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { |
| 2532 | rpc_restart_call_prepare(task); | 2531 | rpc_restart_call_prepare(task); |
| 2532 | goto out_release; | ||
| 2533 | } | ||
| 2533 | } | 2534 | } |
| 2535 | nfs4_close_clear_stateid_flags(state, calldata->arg.fmode); | ||
| 2536 | out_release: | ||
| 2534 | nfs_release_seqid(calldata->arg.seqid); | 2537 | nfs_release_seqid(calldata->arg.seqid); |
| 2535 | nfs_refresh_inode(calldata->inode, calldata->res.fattr); | 2538 | nfs_refresh_inode(calldata->inode, calldata->res.fattr); |
| 2536 | dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); | 2539 | dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); |
| @@ -4802,7 +4805,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, | |||
| 4802 | dprintk("%s ERROR %d, Reset session\n", __func__, | 4805 | dprintk("%s ERROR %d, Reset session\n", __func__, |
| 4803 | task->tk_status); | 4806 | task->tk_status); |
| 4804 | nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); | 4807 | nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); |
| 4805 | goto restart_call; | 4808 | goto wait_on_recovery; |
| 4806 | #endif /* CONFIG_NFS_V4_1 */ | 4809 | #endif /* CONFIG_NFS_V4_1 */ |
| 4807 | case -NFS4ERR_DELAY: | 4810 | case -NFS4ERR_DELAY: |
| 4808 | nfs_inc_server_stats(server, NFSIOS_DELAY); | 4811 | nfs_inc_server_stats(server, NFSIOS_DELAY); |
| @@ -4987,11 +4990,17 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) | |||
| 4987 | 4990 | ||
| 4988 | trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); | 4991 | trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); |
| 4989 | switch (task->tk_status) { | 4992 | switch (task->tk_status) { |
| 4990 | case -NFS4ERR_STALE_STATEID: | ||
| 4991 | case -NFS4ERR_EXPIRED: | ||
| 4992 | case 0: | 4993 | case 0: |
| 4993 | renew_lease(data->res.server, data->timestamp); | 4994 | renew_lease(data->res.server, data->timestamp); |
| 4994 | break; | 4995 | break; |
| 4996 | case -NFS4ERR_ADMIN_REVOKED: | ||
| 4997 | case -NFS4ERR_DELEG_REVOKED: | ||
| 4998 | case -NFS4ERR_BAD_STATEID: | ||
| 4999 | case -NFS4ERR_OLD_STATEID: | ||
| 5000 | case -NFS4ERR_STALE_STATEID: | ||
| 5001 | case -NFS4ERR_EXPIRED: | ||
| 5002 | task->tk_status = 0; | ||
| 5003 | break; | ||
| 4995 | default: | 5004 | default: |
| 4996 | if (nfs4_async_handle_error(task, data->res.server, NULL) == | 5005 | if (nfs4_async_handle_error(task, data->res.server, NULL) == |
| 4997 | -EAGAIN) { | 5006 | -EAGAIN) { |
| @@ -7589,7 +7598,14 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) | |||
| 7589 | return; | 7598 | return; |
| 7590 | 7599 | ||
| 7591 | server = NFS_SERVER(lrp->args.inode); | 7600 | server = NFS_SERVER(lrp->args.inode); |
| 7592 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { | 7601 | switch (task->tk_status) { |
| 7602 | default: | ||
| 7603 | task->tk_status = 0; | ||
| 7604 | case 0: | ||
| 7605 | break; | ||
| 7606 | case -NFS4ERR_DELAY: | ||
| 7607 | if (nfs4_async_handle_error(task, server, NULL) != -EAGAIN) | ||
| 7608 | break; | ||
| 7593 | rpc_restart_call_prepare(task); | 7609 | rpc_restart_call_prepare(task); |
| 7594 | return; | 7610 | return; |
| 7595 | } | 7611 | } |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 088de1355e93..ee7237f99f54 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
| @@ -141,8 +141,8 @@ xdr_error: \ | |||
| 141 | 141 | ||
| 142 | static void next_decode_page(struct nfsd4_compoundargs *argp) | 142 | static void next_decode_page(struct nfsd4_compoundargs *argp) |
| 143 | { | 143 | { |
| 144 | argp->pagelist++; | ||
| 145 | argp->p = page_address(argp->pagelist[0]); | 144 | argp->p = page_address(argp->pagelist[0]); |
| 145 | argp->pagelist++; | ||
| 146 | if (argp->pagelen < PAGE_SIZE) { | 146 | if (argp->pagelen < PAGE_SIZE) { |
| 147 | argp->end = argp->p + (argp->pagelen>>2); | 147 | argp->end = argp->p + (argp->pagelen>>2); |
| 148 | argp->pagelen = 0; | 148 | argp->pagelen = 0; |
| @@ -1229,6 +1229,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) | |||
| 1229 | len -= pages * PAGE_SIZE; | 1229 | len -= pages * PAGE_SIZE; |
| 1230 | 1230 | ||
| 1231 | argp->p = (__be32 *)page_address(argp->pagelist[0]); | 1231 | argp->p = (__be32 *)page_address(argp->pagelist[0]); |
| 1232 | argp->pagelist++; | ||
| 1232 | argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE); | 1233 | argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE); |
| 1233 | } | 1234 | } |
| 1234 | argp->p += XDR_QUADLEN(len); | 1235 | argp->p += XDR_QUADLEN(len); |
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 9186c7ce0b14..b6af150c96b8 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c | |||
| @@ -132,6 +132,13 @@ nfsd_reply_cache_alloc(void) | |||
| 132 | } | 132 | } |
| 133 | 133 | ||
| 134 | static void | 134 | static void |
| 135 | nfsd_reply_cache_unhash(struct svc_cacherep *rp) | ||
| 136 | { | ||
| 137 | hlist_del_init(&rp->c_hash); | ||
| 138 | list_del_init(&rp->c_lru); | ||
| 139 | } | ||
| 140 | |||
| 141 | static void | ||
| 135 | nfsd_reply_cache_free_locked(struct svc_cacherep *rp) | 142 | nfsd_reply_cache_free_locked(struct svc_cacherep *rp) |
| 136 | { | 143 | { |
| 137 | if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { | 144 | if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { |
| @@ -417,7 +424,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) | |||
| 417 | rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru); | 424 | rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru); |
| 418 | if (nfsd_cache_entry_expired(rp) || | 425 | if (nfsd_cache_entry_expired(rp) || |
| 419 | num_drc_entries >= max_drc_entries) { | 426 | num_drc_entries >= max_drc_entries) { |
| 420 | lru_put_end(rp); | 427 | nfsd_reply_cache_unhash(rp); |
| 421 | prune_cache_entries(); | 428 | prune_cache_entries(); |
| 422 | goto search_cache; | 429 | goto search_cache; |
| 423 | } | 430 | } |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 94b5f5d2bfed..7eea63cada1d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
| @@ -298,41 +298,12 @@ commit_metadata(struct svc_fh *fhp) | |||
| 298 | } | 298 | } |
| 299 | 299 | ||
| 300 | /* | 300 | /* |
| 301 | * Set various file attributes. | 301 | * Go over the attributes and take care of the small differences between |
| 302 | * N.B. After this call fhp needs an fh_put | 302 | * NFS semantics and what Linux expects. |
| 303 | */ | 303 | */ |
| 304 | __be32 | 304 | static void |
| 305 | nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, | 305 | nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) |
| 306 | int check_guard, time_t guardtime) | ||
| 307 | { | 306 | { |
| 308 | struct dentry *dentry; | ||
| 309 | struct inode *inode; | ||
| 310 | int accmode = NFSD_MAY_SATTR; | ||
| 311 | umode_t ftype = 0; | ||
| 312 | __be32 err; | ||
| 313 | int host_err; | ||
| 314 | int size_change = 0; | ||
| 315 | |||
| 316 | if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) | ||
| 317 | accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; | ||
| 318 | if (iap->ia_valid & ATTR_SIZE) | ||
| 319 | ftype = S_IFREG; | ||
| 320 | |||
| 321 | /* Get inode */ | ||
| 322 | err = fh_verify(rqstp, fhp, ftype, accmode); | ||
| 323 | if (err) | ||
| 324 | goto out; | ||
| 325 | |||
| 326 | dentry = fhp->fh_dentry; | ||
| 327 | inode = dentry->d_inode; | ||
| 328 | |||
| 329 | /* Ignore any mode updates on symlinks */ | ||
| 330 | if (S_ISLNK(inode->i_mode)) | ||
| 331 | iap->ia_valid &= ~ATTR_MODE; | ||
| 332 | |||
| 333 | if (!iap->ia_valid) | ||
| 334 | goto out; | ||
| 335 | |||
| 336 | /* | 307 | /* |
| 337 | * NFSv2 does not differentiate between "set-[ac]time-to-now" | 308 | * NFSv2 does not differentiate between "set-[ac]time-to-now" |
| 338 | * which only requires access, and "set-[ac]time-to-X" which | 309 | * which only requires access, and "set-[ac]time-to-X" which |
| @@ -342,8 +313,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, | |||
| 342 | * convert to "set to now" instead of "set to explicit time" | 313 | * convert to "set to now" instead of "set to explicit time" |
| 343 | * | 314 | * |
| 344 | * We only call inode_change_ok as the last test as technically | 315 | * We only call inode_change_ok as the last test as technically |
| 345 | * it is not an interface that we should be using. It is only | 316 | * it is not an interface that we should be using. |
| 346 | * valid if the filesystem does not define it's own i_op->setattr. | ||
| 347 | */ | 317 | */ |
| 348 | #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) | 318 | #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) |
| 349 | #define MAX_TOUCH_TIME_ERROR (30*60) | 319 | #define MAX_TOUCH_TIME_ERROR (30*60) |
| @@ -369,30 +339,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, | |||
| 369 | iap->ia_valid &= ~BOTH_TIME_SET; | 339 | iap->ia_valid &= ~BOTH_TIME_SET; |
| 370 | } | 340 | } |
| 371 | } | 341 | } |
| 372 | |||
| 373 | /* | ||
| 374 | * The size case is special. | ||
| 375 | * It changes the file as well as the attributes. | ||
| 376 | */ | ||
| 377 | if (iap->ia_valid & ATTR_SIZE) { | ||
| 378 | if (iap->ia_size < inode->i_size) { | ||
| 379 | err = nfsd_permission(rqstp, fhp->fh_export, dentry, | ||
| 380 | NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE); | ||
| 381 | if (err) | ||
| 382 | goto out; | ||
| 383 | } | ||
| 384 | |||
| 385 | host_err = get_write_access(inode); | ||
| 386 | if (host_err) | ||
| 387 | goto out_nfserr; | ||
| 388 | |||
| 389 | size_change = 1; | ||
| 390 | host_err = locks_verify_truncate(inode, NULL, iap->ia_size); | ||
| 391 | if (host_err) { | ||
| 392 | put_write_access(inode); | ||
| 393 | goto out_nfserr; | ||
| 394 | } | ||
| 395 | } | ||
| 396 | 342 | ||
| 397 | /* sanitize the mode change */ | 343 | /* sanitize the mode change */ |
| 398 | if (iap->ia_valid & ATTR_MODE) { | 344 | if (iap->ia_valid & ATTR_MODE) { |
| @@ -415,32 +361,111 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, | |||
| 415 | iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); | 361 | iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); |
| 416 | } | 362 | } |
| 417 | } | 363 | } |
| 364 | } | ||
| 418 | 365 | ||
| 419 | /* Change the attributes. */ | 366 | static __be32 |
| 367 | nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, | ||
| 368 | struct iattr *iap) | ||
| 369 | { | ||
| 370 | struct inode *inode = fhp->fh_dentry->d_inode; | ||
| 371 | int host_err; | ||
| 420 | 372 | ||
| 421 | iap->ia_valid |= ATTR_CTIME; | 373 | if (iap->ia_size < inode->i_size) { |
| 374 | __be32 err; | ||
| 422 | 375 | ||
| 423 | err = nfserr_notsync; | 376 | err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, |
| 424 | if (!check_guard || guardtime == inode->i_ctime.tv_sec) { | 377 | NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE); |
| 425 | host_err = nfsd_break_lease(inode); | 378 | if (err) |
| 426 | if (host_err) | 379 | return err; |
| 427 | goto out_nfserr; | 380 | } |
| 428 | fh_lock(fhp); | ||
| 429 | 381 | ||
| 430 | host_err = notify_change(dentry, iap, NULL); | 382 | host_err = get_write_access(inode); |
| 431 | err = nfserrno(host_err); | 383 | if (host_err) |
| 432 | fh_unlock(fhp); | 384 | goto out_nfserrno; |
| 385 | |||
| 386 | host_err = locks_verify_truncate(inode, NULL, iap->ia_size); | ||
| 387 | if (host_err) | ||
| 388 | goto out_put_write_access; | ||
| 389 | return 0; | ||
| 390 | |||
| 391 | out_put_write_access: | ||
| 392 | put_write_access(inode); | ||
| 393 | out_nfserrno: | ||
| 394 | return nfserrno(host_err); | ||
| 395 | } | ||
| 396 | |||
| 397 | /* | ||
| 398 | * Set various file attributes. After this call fhp needs an fh_put. | ||
| 399 | */ | ||
| 400 | __be32 | ||
| 401 | nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, | ||
| 402 | int check_guard, time_t guardtime) | ||
| 403 | { | ||
| 404 | struct dentry *dentry; | ||
| 405 | struct inode *inode; | ||
| 406 | int accmode = NFSD_MAY_SATTR; | ||
| 407 | umode_t ftype = 0; | ||
| 408 | __be32 err; | ||
| 409 | int host_err; | ||
| 410 | int size_change = 0; | ||
| 411 | |||
| 412 | if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) | ||
| 413 | accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; | ||
| 414 | if (iap->ia_valid & ATTR_SIZE) | ||
| 415 | ftype = S_IFREG; | ||
| 416 | |||
| 417 | /* Get inode */ | ||
| 418 | err = fh_verify(rqstp, fhp, ftype, accmode); | ||
| 419 | if (err) | ||
| 420 | goto out; | ||
| 421 | |||
| 422 | dentry = fhp->fh_dentry; | ||
| 423 | inode = dentry->d_inode; | ||
| 424 | |||
| 425 | /* Ignore any mode updates on symlinks */ | ||
| 426 | if (S_ISLNK(inode->i_mode)) | ||
| 427 | iap->ia_valid &= ~ATTR_MODE; | ||
| 428 | |||
| 429 | if (!iap->ia_valid) | ||
| 430 | goto out; | ||
| 431 | |||
| 432 | nfsd_sanitize_attrs(inode, iap); | ||
| 433 | |||
| 434 | /* | ||
| 435 | * The size case is special, it changes the file in addition to the | ||
| 436 | * attributes. | ||
| 437 | */ | ||
| 438 | if (iap->ia_valid & ATTR_SIZE) { | ||
| 439 | err = nfsd_get_write_access(rqstp, fhp, iap); | ||
| 440 | if (err) | ||
| 441 | goto out; | ||
| 442 | size_change = 1; | ||
| 433 | } | 443 | } |
| 444 | |||
| 445 | iap->ia_valid |= ATTR_CTIME; | ||
| 446 | |||
| 447 | if (check_guard && guardtime != inode->i_ctime.tv_sec) { | ||
| 448 | err = nfserr_notsync; | ||
| 449 | goto out_put_write_access; | ||
| 450 | } | ||
| 451 | |||
| 452 | host_err = nfsd_break_lease(inode); | ||
| 453 | if (host_err) | ||
| 454 | goto out_put_write_access_nfserror; | ||
| 455 | |||
| 456 | fh_lock(fhp); | ||
| 457 | host_err = notify_change(dentry, iap, NULL); | ||
| 458 | fh_unlock(fhp); | ||
| 459 | |||
| 460 | out_put_write_access_nfserror: | ||
| 461 | err = nfserrno(host_err); | ||
| 462 | out_put_write_access: | ||
| 434 | if (size_change) | 463 | if (size_change) |
| 435 | put_write_access(inode); | 464 | put_write_access(inode); |
| 436 | if (!err) | 465 | if (!err) |
| 437 | commit_metadata(fhp); | 466 | commit_metadata(fhp); |
| 438 | out: | 467 | out: |
| 439 | return err; | 468 | return err; |
| 440 | |||
| 441 | out_nfserr: | ||
| 442 | err = nfserrno(host_err); | ||
| 443 | goto out; | ||
| 444 | } | 469 | } |
| 445 | 470 | ||
| 446 | #if defined(CONFIG_NFSD_V2_ACL) || \ | 471 | #if defined(CONFIG_NFSD_V2_ACL) || \ |
| @@ -726,11 +726,25 @@ pipe_poll(struct file *filp, poll_table *wait) | |||
| 726 | return mask; | 726 | return mask; |
| 727 | } | 727 | } |
| 728 | 728 | ||
| 729 | static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe) | ||
| 730 | { | ||
| 731 | int kill = 0; | ||
| 732 | |||
| 733 | spin_lock(&inode->i_lock); | ||
| 734 | if (!--pipe->files) { | ||
| 735 | inode->i_pipe = NULL; | ||
| 736 | kill = 1; | ||
| 737 | } | ||
| 738 | spin_unlock(&inode->i_lock); | ||
| 739 | |||
| 740 | if (kill) | ||
| 741 | free_pipe_info(pipe); | ||
| 742 | } | ||
| 743 | |||
| 729 | static int | 744 | static int |
| 730 | pipe_release(struct inode *inode, struct file *file) | 745 | pipe_release(struct inode *inode, struct file *file) |
| 731 | { | 746 | { |
| 732 | struct pipe_inode_info *pipe = inode->i_pipe; | 747 | struct pipe_inode_info *pipe = file->private_data; |
| 733 | int kill = 0; | ||
| 734 | 748 | ||
| 735 | __pipe_lock(pipe); | 749 | __pipe_lock(pipe); |
| 736 | if (file->f_mode & FMODE_READ) | 750 | if (file->f_mode & FMODE_READ) |
| @@ -743,17 +757,9 @@ pipe_release(struct inode *inode, struct file *file) | |||
| 743 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | 757 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); |
| 744 | kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); | 758 | kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); |
| 745 | } | 759 | } |
| 746 | spin_lock(&inode->i_lock); | ||
| 747 | if (!--pipe->files) { | ||
| 748 | inode->i_pipe = NULL; | ||
| 749 | kill = 1; | ||
| 750 | } | ||
| 751 | spin_unlock(&inode->i_lock); | ||
| 752 | __pipe_unlock(pipe); | 760 | __pipe_unlock(pipe); |
| 753 | 761 | ||
| 754 | if (kill) | 762 | put_pipe_info(inode, pipe); |
| 755 | free_pipe_info(pipe); | ||
| 756 | |||
| 757 | return 0; | 763 | return 0; |
| 758 | } | 764 | } |
| 759 | 765 | ||
| @@ -1014,7 +1020,6 @@ static int fifo_open(struct inode *inode, struct file *filp) | |||
| 1014 | { | 1020 | { |
| 1015 | struct pipe_inode_info *pipe; | 1021 | struct pipe_inode_info *pipe; |
| 1016 | bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC; | 1022 | bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC; |
| 1017 | int kill = 0; | ||
| 1018 | int ret; | 1023 | int ret; |
| 1019 | 1024 | ||
| 1020 | filp->f_version = 0; | 1025 | filp->f_version = 0; |
| @@ -1130,15 +1135,9 @@ err_wr: | |||
| 1130 | goto err; | 1135 | goto err; |
| 1131 | 1136 | ||
| 1132 | err: | 1137 | err: |
| 1133 | spin_lock(&inode->i_lock); | ||
| 1134 | if (!--pipe->files) { | ||
| 1135 | inode->i_pipe = NULL; | ||
| 1136 | kill = 1; | ||
| 1137 | } | ||
| 1138 | spin_unlock(&inode->i_lock); | ||
| 1139 | __pipe_unlock(pipe); | 1138 | __pipe_unlock(pipe); |
| 1140 | if (kill) | 1139 | |
| 1141 | free_pipe_info(pipe); | 1140 | put_pipe_info(inode, pipe); |
| 1142 | return ret; | 1141 | return ret; |
| 1143 | } | 1142 | } |
| 1144 | 1143 | ||
diff --git a/fs/proc/base.c b/fs/proc/base.c index 1485e38daaa3..03c8d747be48 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
| @@ -1151,10 +1151,16 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, | |||
| 1151 | goto out_free_page; | 1151 | goto out_free_page; |
| 1152 | 1152 | ||
| 1153 | } | 1153 | } |
| 1154 | kloginuid = make_kuid(file->f_cred->user_ns, loginuid); | 1154 | |
| 1155 | if (!uid_valid(kloginuid)) { | 1155 | /* is userspace tring to explicitly UNSET the loginuid? */ |
| 1156 | length = -EINVAL; | 1156 | if (loginuid == AUDIT_UID_UNSET) { |
| 1157 | goto out_free_page; | 1157 | kloginuid = INVALID_UID; |
| 1158 | } else { | ||
| 1159 | kloginuid = make_kuid(file->f_cred->user_ns, loginuid); | ||
| 1160 | if (!uid_valid(kloginuid)) { | ||
| 1161 | length = -EINVAL; | ||
| 1162 | goto out_free_page; | ||
| 1163 | } | ||
| 1158 | } | 1164 | } |
| 1159 | 1165 | ||
| 1160 | length = audit_set_loginuid(kloginuid); | 1166 | length = audit_set_loginuid(kloginuid); |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 737e15615b04..cca93b6fb9a9 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
| @@ -175,22 +175,6 @@ static const struct inode_operations proc_link_inode_operations = { | |||
| 175 | }; | 175 | }; |
| 176 | 176 | ||
| 177 | /* | 177 | /* |
| 178 | * As some entries in /proc are volatile, we want to | ||
| 179 | * get rid of unused dentries. This could be made | ||
| 180 | * smarter: we could keep a "volatile" flag in the | ||
| 181 | * inode to indicate which ones to keep. | ||
| 182 | */ | ||
| 183 | static int proc_delete_dentry(const struct dentry * dentry) | ||
| 184 | { | ||
| 185 | return 1; | ||
| 186 | } | ||
| 187 | |||
| 188 | static const struct dentry_operations proc_dentry_operations = | ||
| 189 | { | ||
| 190 | .d_delete = proc_delete_dentry, | ||
| 191 | }; | ||
| 192 | |||
| 193 | /* | ||
| 194 | * Don't create negative dentries here, return -ENOENT by hand | 178 | * Don't create negative dentries here, return -ENOENT by hand |
| 195 | * instead. | 179 | * instead. |
| 196 | */ | 180 | */ |
| @@ -209,7 +193,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, | |||
| 209 | inode = proc_get_inode(dir->i_sb, de); | 193 | inode = proc_get_inode(dir->i_sb, de); |
| 210 | if (!inode) | 194 | if (!inode) |
| 211 | return ERR_PTR(-ENOMEM); | 195 | return ERR_PTR(-ENOMEM); |
| 212 | d_set_d_op(dentry, &proc_dentry_operations); | 196 | d_set_d_op(dentry, &simple_dentry_operations); |
| 213 | d_add(dentry, inode); | 197 | d_add(dentry, inode); |
| 214 | return NULL; | 198 | return NULL; |
| 215 | } | 199 | } |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 28955d4b7218..124fc43c7090 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
| @@ -292,16 +292,20 @@ proc_reg_get_unmapped_area(struct file *file, unsigned long orig_addr, | |||
| 292 | { | 292 | { |
| 293 | struct proc_dir_entry *pde = PDE(file_inode(file)); | 293 | struct proc_dir_entry *pde = PDE(file_inode(file)); |
| 294 | unsigned long rv = -EIO; | 294 | unsigned long rv = -EIO; |
| 295 | unsigned long (*get_area)(struct file *, unsigned long, unsigned long, | 295 | |
| 296 | unsigned long, unsigned long) = NULL; | ||
| 297 | if (use_pde(pde)) { | 296 | if (use_pde(pde)) { |
| 297 | typeof(proc_reg_get_unmapped_area) *get_area; | ||
| 298 | |||
| 299 | get_area = pde->proc_fops->get_unmapped_area; | ||
| 298 | #ifdef CONFIG_MMU | 300 | #ifdef CONFIG_MMU |
| 299 | get_area = current->mm->get_unmapped_area; | 301 | if (!get_area) |
| 302 | get_area = current->mm->get_unmapped_area; | ||
| 300 | #endif | 303 | #endif |
| 301 | if (pde->proc_fops->get_unmapped_area) | 304 | |
| 302 | get_area = pde->proc_fops->get_unmapped_area; | ||
| 303 | if (get_area) | 305 | if (get_area) |
| 304 | rv = get_area(file, orig_addr, len, pgoff, flags); | 306 | rv = get_area(file, orig_addr, len, pgoff, flags); |
| 307 | else | ||
| 308 | rv = orig_addr; | ||
| 305 | unuse_pde(pde); | 309 | unuse_pde(pde); |
| 306 | } | 310 | } |
| 307 | return rv; | 311 | return rv; |
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 49a7fff2e83a..9ae46b87470d 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c | |||
| @@ -42,12 +42,6 @@ static const struct inode_operations ns_inode_operations = { | |||
| 42 | .setattr = proc_setattr, | 42 | .setattr = proc_setattr, |
| 43 | }; | 43 | }; |
| 44 | 44 | ||
| 45 | static int ns_delete_dentry(const struct dentry *dentry) | ||
| 46 | { | ||
| 47 | /* Don't cache namespace inodes when not in use */ | ||
| 48 | return 1; | ||
| 49 | } | ||
| 50 | |||
| 51 | static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) | 45 | static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) |
| 52 | { | 46 | { |
| 53 | struct inode *inode = dentry->d_inode; | 47 | struct inode *inode = dentry->d_inode; |
| @@ -59,7 +53,7 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) | |||
| 59 | 53 | ||
| 60 | const struct dentry_operations ns_dentry_operations = | 54 | const struct dentry_operations ns_dentry_operations = |
| 61 | { | 55 | { |
| 62 | .d_delete = ns_delete_dentry, | 56 | .d_delete = always_delete_dentry, |
| 63 | .d_dname = ns_dname, | 57 | .d_dname = ns_dname, |
| 64 | }; | 58 | }; |
| 65 | 59 | ||
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index c70111ebefd4..b6fa8657dcbc 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig | |||
| @@ -25,6 +25,78 @@ config SQUASHFS | |||
| 25 | 25 | ||
| 26 | If unsure, say N. | 26 | If unsure, say N. |
| 27 | 27 | ||
| 28 | choice | ||
| 29 | prompt "File decompression options" | ||
| 30 | depends on SQUASHFS | ||
| 31 | help | ||
| 32 | Squashfs now supports two options for decompressing file | ||
| 33 | data. Traditionally Squashfs has decompressed into an | ||
| 34 | intermediate buffer and then memcopied it into the page cache. | ||
| 35 | Squashfs now supports the ability to decompress directly into | ||
| 36 | the page cache. | ||
| 37 | |||
| 38 | If unsure, select "Decompress file data into an intermediate buffer" | ||
| 39 | |||
| 40 | config SQUASHFS_FILE_CACHE | ||
| 41 | bool "Decompress file data into an intermediate buffer" | ||
| 42 | help | ||
| 43 | Decompress file data into an intermediate buffer and then | ||
| 44 | memcopy it into the page cache. | ||
| 45 | |||
| 46 | config SQUASHFS_FILE_DIRECT | ||
| 47 | bool "Decompress files directly into the page cache" | ||
| 48 | help | ||
| 49 | Directly decompress file data into the page cache. | ||
| 50 | Doing so can significantly improve performance because | ||
| 51 | it eliminates a memcpy and it also removes the lock contention | ||
| 52 | on the single buffer. | ||
| 53 | |||
| 54 | endchoice | ||
| 55 | |||
| 56 | choice | ||
| 57 | prompt "Decompressor parallelisation options" | ||
| 58 | depends on SQUASHFS | ||
| 59 | help | ||
| 60 | Squashfs now supports three parallelisation options for | ||
| 61 | decompression. Each one exhibits various trade-offs between | ||
| 62 | decompression performance and CPU and memory usage. | ||
| 63 | |||
| 64 | If in doubt, select "Single threaded compression" | ||
| 65 | |||
| 66 | config SQUASHFS_DECOMP_SINGLE | ||
| 67 | bool "Single threaded compression" | ||
| 68 | help | ||
| 69 | Traditionally Squashfs has used single-threaded decompression. | ||
| 70 | Only one block (data or metadata) can be decompressed at any | ||
| 71 | one time. This limits CPU and memory usage to a minimum. | ||
| 72 | |||
| 73 | config SQUASHFS_DECOMP_MULTI | ||
| 74 | bool "Use multiple decompressors for parallel I/O" | ||
| 75 | help | ||
| 76 | By default Squashfs uses a single decompressor but it gives | ||
| 77 | poor performance on parallel I/O workloads when using multiple CPU | ||
| 78 | machines due to waiting on decompressor availability. | ||
| 79 | |||
| 80 | If you have a parallel I/O workload and your system has enough memory, | ||
| 81 | using this option may improve overall I/O performance. | ||
| 82 | |||
| 83 | This decompressor implementation uses up to two parallel | ||
| 84 | decompressors per core. It dynamically allocates decompressors | ||
| 85 | on a demand basis. | ||
| 86 | |||
| 87 | config SQUASHFS_DECOMP_MULTI_PERCPU | ||
| 88 | bool "Use percpu multiple decompressors for parallel I/O" | ||
| 89 | help | ||
| 90 | By default Squashfs uses a single decompressor but it gives | ||
| 91 | poor performance on parallel I/O workloads when using multiple CPU | ||
| 92 | machines due to waiting on decompressor availability. | ||
| 93 | |||
| 94 | This decompressor implementation uses a maximum of one | ||
| 95 | decompressor per core. It uses percpu variables to ensure | ||
| 96 | decompression is load-balanced across the cores. | ||
| 97 | |||
| 98 | endchoice | ||
| 99 | |||
| 28 | config SQUASHFS_XATTR | 100 | config SQUASHFS_XATTR |
| 29 | bool "Squashfs XATTR support" | 101 | bool "Squashfs XATTR support" |
| 30 | depends on SQUASHFS | 102 | depends on SQUASHFS |
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index 110b0476f3b4..4132520b4ff2 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile | |||
| @@ -5,6 +5,11 @@ | |||
| 5 | obj-$(CONFIG_SQUASHFS) += squashfs.o | 5 | obj-$(CONFIG_SQUASHFS) += squashfs.o |
| 6 | squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o | 6 | squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o |
| 7 | squashfs-y += namei.o super.o symlink.o decompressor.o | 7 | squashfs-y += namei.o super.o symlink.o decompressor.o |
| 8 | squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o | ||
| 9 | squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o | ||
| 10 | squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o | ||
| 11 | squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o | ||
| 12 | squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o | ||
| 8 | squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o | 13 | squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o |
| 9 | squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o | 14 | squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o |
| 10 | squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o | 15 | squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o |
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 41d108ecc9be..0cea9b9236d0 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c | |||
| @@ -36,6 +36,7 @@ | |||
| 36 | #include "squashfs_fs_sb.h" | 36 | #include "squashfs_fs_sb.h" |
| 37 | #include "squashfs.h" | 37 | #include "squashfs.h" |
| 38 | #include "decompressor.h" | 38 | #include "decompressor.h" |
| 39 | #include "page_actor.h" | ||
| 39 | 40 | ||
| 40 | /* | 41 | /* |
| 41 | * Read the metadata block length, this is stored in the first two | 42 | * Read the metadata block length, this is stored in the first two |
| @@ -86,16 +87,16 @@ static struct buffer_head *get_block_length(struct super_block *sb, | |||
| 86 | * generated a larger block - this does occasionally happen with compression | 87 | * generated a larger block - this does occasionally happen with compression |
| 87 | * algorithms). | 88 | * algorithms). |
| 88 | */ | 89 | */ |
| 89 | int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, | 90 | int squashfs_read_data(struct super_block *sb, u64 index, int length, |
| 90 | int length, u64 *next_index, int srclength, int pages) | 91 | u64 *next_index, struct squashfs_page_actor *output) |
| 91 | { | 92 | { |
| 92 | struct squashfs_sb_info *msblk = sb->s_fs_info; | 93 | struct squashfs_sb_info *msblk = sb->s_fs_info; |
| 93 | struct buffer_head **bh; | 94 | struct buffer_head **bh; |
| 94 | int offset = index & ((1 << msblk->devblksize_log2) - 1); | 95 | int offset = index & ((1 << msblk->devblksize_log2) - 1); |
| 95 | u64 cur_index = index >> msblk->devblksize_log2; | 96 | u64 cur_index = index >> msblk->devblksize_log2; |
| 96 | int bytes, compressed, b = 0, k = 0, page = 0, avail; | 97 | int bytes, compressed, b = 0, k = 0, avail, i; |
| 97 | 98 | ||
| 98 | bh = kcalloc(((srclength + msblk->devblksize - 1) | 99 | bh = kcalloc(((output->length + msblk->devblksize - 1) |
| 99 | >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); | 100 | >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); |
| 100 | if (bh == NULL) | 101 | if (bh == NULL) |
| 101 | return -ENOMEM; | 102 | return -ENOMEM; |
| @@ -111,9 +112,9 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, | |||
| 111 | *next_index = index + length; | 112 | *next_index = index + length; |
| 112 | 113 | ||
| 113 | TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", | 114 | TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", |
| 114 | index, compressed ? "" : "un", length, srclength); | 115 | index, compressed ? "" : "un", length, output->length); |
| 115 | 116 | ||
| 116 | if (length < 0 || length > srclength || | 117 | if (length < 0 || length > output->length || |
| 117 | (index + length) > msblk->bytes_used) | 118 | (index + length) > msblk->bytes_used) |
| 118 | goto read_failure; | 119 | goto read_failure; |
| 119 | 120 | ||
| @@ -145,7 +146,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, | |||
| 145 | TRACE("Block @ 0x%llx, %scompressed size %d\n", index, | 146 | TRACE("Block @ 0x%llx, %scompressed size %d\n", index, |
| 146 | compressed ? "" : "un", length); | 147 | compressed ? "" : "un", length); |
| 147 | 148 | ||
| 148 | if (length < 0 || length > srclength || | 149 | if (length < 0 || length > output->length || |
| 149 | (index + length) > msblk->bytes_used) | 150 | (index + length) > msblk->bytes_used) |
| 150 | goto block_release; | 151 | goto block_release; |
| 151 | 152 | ||
| @@ -158,9 +159,15 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, | |||
| 158 | ll_rw_block(READ, b - 1, bh + 1); | 159 | ll_rw_block(READ, b - 1, bh + 1); |
| 159 | } | 160 | } |
| 160 | 161 | ||
| 162 | for (i = 0; i < b; i++) { | ||
| 163 | wait_on_buffer(bh[i]); | ||
| 164 | if (!buffer_uptodate(bh[i])) | ||
| 165 | goto block_release; | ||
| 166 | } | ||
| 167 | |||
| 161 | if (compressed) { | 168 | if (compressed) { |
| 162 | length = squashfs_decompress(msblk, buffer, bh, b, offset, | 169 | length = squashfs_decompress(msblk, bh, b, offset, length, |
| 163 | length, srclength, pages); | 170 | output); |
| 164 | if (length < 0) | 171 | if (length < 0) |
| 165 | goto read_failure; | 172 | goto read_failure; |
| 166 | } else { | 173 | } else { |
| @@ -168,22 +175,20 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, | |||
| 168 | * Block is uncompressed. | 175 | * Block is uncompressed. |
| 169 | */ | 176 | */ |
| 170 | int in, pg_offset = 0; | 177 | int in, pg_offset = 0; |
| 178 | void *data = squashfs_first_page(output); | ||
| 171 | 179 | ||
| 172 | for (bytes = length; k < b; k++) { | 180 | for (bytes = length; k < b; k++) { |
| 173 | in = min(bytes, msblk->devblksize - offset); | 181 | in = min(bytes, msblk->devblksize - offset); |
| 174 | bytes -= in; | 182 | bytes -= in; |
| 175 | wait_on_buffer(bh[k]); | ||
| 176 | if (!buffer_uptodate(bh[k])) | ||
| 177 | goto block_release; | ||
| 178 | while (in) { | 183 | while (in) { |
| 179 | if (pg_offset == PAGE_CACHE_SIZE) { | 184 | if (pg_offset == PAGE_CACHE_SIZE) { |
| 180 | page++; | 185 | data = squashfs_next_page(output); |
| 181 | pg_offset = 0; | 186 | pg_offset = 0; |
| 182 | } | 187 | } |
| 183 | avail = min_t(int, in, PAGE_CACHE_SIZE - | 188 | avail = min_t(int, in, PAGE_CACHE_SIZE - |
| 184 | pg_offset); | 189 | pg_offset); |
| 185 | memcpy(buffer[page] + pg_offset, | 190 | memcpy(data + pg_offset, bh[k]->b_data + offset, |
| 186 | bh[k]->b_data + offset, avail); | 191 | avail); |
| 187 | in -= avail; | 192 | in -= avail; |
| 188 | pg_offset += avail; | 193 | pg_offset += avail; |
| 189 | offset += avail; | 194 | offset += avail; |
| @@ -191,6 +196,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, | |||
| 191 | offset = 0; | 196 | offset = 0; |
| 192 | put_bh(bh[k]); | 197 | put_bh(bh[k]); |
| 193 | } | 198 | } |
| 199 | squashfs_finish_page(output); | ||
| 194 | } | 200 | } |
| 195 | 201 | ||
| 196 | kfree(bh); | 202 | kfree(bh); |
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c index af0b73802592..1cb70a0b2168 100644 --- a/fs/squashfs/cache.c +++ b/fs/squashfs/cache.c | |||
| @@ -56,6 +56,7 @@ | |||
| 56 | #include "squashfs_fs.h" | 56 | #include "squashfs_fs.h" |
| 57 | #include "squashfs_fs_sb.h" | 57 | #include "squashfs_fs_sb.h" |
| 58 | #include "squashfs.h" | 58 | #include "squashfs.h" |
| 59 | #include "page_actor.h" | ||
| 59 | 60 | ||
| 60 | /* | 61 | /* |
| 61 | * Look-up block in cache, and increment usage count. If not in cache, read | 62 | * Look-up block in cache, and increment usage count. If not in cache, read |
| @@ -119,9 +120,8 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb, | |||
| 119 | entry->error = 0; | 120 | entry->error = 0; |
| 120 | spin_unlock(&cache->lock); | 121 | spin_unlock(&cache->lock); |
| 121 | 122 | ||
| 122 | entry->length = squashfs_read_data(sb, entry->data, | 123 | entry->length = squashfs_read_data(sb, block, length, |
| 123 | block, length, &entry->next_index, | 124 | &entry->next_index, entry->actor); |
| 124 | cache->block_size, cache->pages); | ||
| 125 | 125 | ||
| 126 | spin_lock(&cache->lock); | 126 | spin_lock(&cache->lock); |
| 127 | 127 | ||
| @@ -220,6 +220,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache) | |||
| 220 | kfree(cache->entry[i].data[j]); | 220 | kfree(cache->entry[i].data[j]); |
| 221 | kfree(cache->entry[i].data); | 221 | kfree(cache->entry[i].data); |
| 222 | } | 222 | } |
| 223 | kfree(cache->entry[i].actor); | ||
| 223 | } | 224 | } |
| 224 | 225 | ||
| 225 | kfree(cache->entry); | 226 | kfree(cache->entry); |
| @@ -280,6 +281,13 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries, | |||
| 280 | goto cleanup; | 281 | goto cleanup; |
| 281 | } | 282 | } |
| 282 | } | 283 | } |
| 284 | |||
| 285 | entry->actor = squashfs_page_actor_init(entry->data, | ||
| 286 | cache->pages, 0); | ||
| 287 | if (entry->actor == NULL) { | ||
| 288 | ERROR("Failed to allocate %s cache entry\n", name); | ||
| 289 | goto cleanup; | ||
| 290 | } | ||
| 283 | } | 291 | } |
| 284 | 292 | ||
| 285 | return cache; | 293 | return cache; |
| @@ -410,6 +418,7 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length) | |||
| 410 | int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 418 | int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
| 411 | int i, res; | 419 | int i, res; |
| 412 | void *table, *buffer, **data; | 420 | void *table, *buffer, **data; |
| 421 | struct squashfs_page_actor *actor; | ||
| 413 | 422 | ||
| 414 | table = buffer = kmalloc(length, GFP_KERNEL); | 423 | table = buffer = kmalloc(length, GFP_KERNEL); |
| 415 | if (table == NULL) | 424 | if (table == NULL) |
| @@ -421,19 +430,28 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length) | |||
| 421 | goto failed; | 430 | goto failed; |
| 422 | } | 431 | } |
| 423 | 432 | ||
| 433 | actor = squashfs_page_actor_init(data, pages, length); | ||
| 434 | if (actor == NULL) { | ||
| 435 | res = -ENOMEM; | ||
| 436 | goto failed2; | ||
| 437 | } | ||
| 438 | |||
| 424 | for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE) | 439 | for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE) |
| 425 | data[i] = buffer; | 440 | data[i] = buffer; |
| 426 | 441 | ||
| 427 | res = squashfs_read_data(sb, data, block, length | | 442 | res = squashfs_read_data(sb, block, length | |
| 428 | SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages); | 443 | SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor); |
| 429 | 444 | ||
| 430 | kfree(data); | 445 | kfree(data); |
| 446 | kfree(actor); | ||
| 431 | 447 | ||
| 432 | if (res < 0) | 448 | if (res < 0) |
| 433 | goto failed; | 449 | goto failed; |
| 434 | 450 | ||
| 435 | return table; | 451 | return table; |
| 436 | 452 | ||
| 453 | failed2: | ||
| 454 | kfree(data); | ||
| 437 | failed: | 455 | failed: |
| 438 | kfree(table); | 456 | kfree(table); |
| 439 | return ERR_PTR(res); | 457 | return ERR_PTR(res); |
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 3f6271d86abc..ac22fe73b0ad 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include "squashfs_fs_sb.h" | 30 | #include "squashfs_fs_sb.h" |
| 31 | #include "decompressor.h" | 31 | #include "decompressor.h" |
| 32 | #include "squashfs.h" | 32 | #include "squashfs.h" |
| 33 | #include "page_actor.h" | ||
| 33 | 34 | ||
| 34 | /* | 35 | /* |
| 35 | * This file (and decompressor.h) implements a decompressor framework for | 36 | * This file (and decompressor.h) implements a decompressor framework for |
| @@ -37,29 +38,29 @@ | |||
| 37 | */ | 38 | */ |
| 38 | 39 | ||
| 39 | static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = { | 40 | static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = { |
| 40 | NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 | 41 | NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0 |
| 41 | }; | 42 | }; |
| 42 | 43 | ||
| 43 | #ifndef CONFIG_SQUASHFS_LZO | 44 | #ifndef CONFIG_SQUASHFS_LZO |
| 44 | static const struct squashfs_decompressor squashfs_lzo_comp_ops = { | 45 | static const struct squashfs_decompressor squashfs_lzo_comp_ops = { |
| 45 | NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 | 46 | NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0 |
| 46 | }; | 47 | }; |
| 47 | #endif | 48 | #endif |
| 48 | 49 | ||
| 49 | #ifndef CONFIG_SQUASHFS_XZ | 50 | #ifndef CONFIG_SQUASHFS_XZ |
| 50 | static const struct squashfs_decompressor squashfs_xz_comp_ops = { | 51 | static const struct squashfs_decompressor squashfs_xz_comp_ops = { |
| 51 | NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0 | 52 | NULL, NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0 |
| 52 | }; | 53 | }; |
| 53 | #endif | 54 | #endif |
| 54 | 55 | ||
| 55 | #ifndef CONFIG_SQUASHFS_ZLIB | 56 | #ifndef CONFIG_SQUASHFS_ZLIB |
| 56 | static const struct squashfs_decompressor squashfs_zlib_comp_ops = { | 57 | static const struct squashfs_decompressor squashfs_zlib_comp_ops = { |
| 57 | NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0 | 58 | NULL, NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0 |
| 58 | }; | 59 | }; |
| 59 | #endif | 60 | #endif |
| 60 | 61 | ||
| 61 | static const struct squashfs_decompressor squashfs_unknown_comp_ops = { | 62 | static const struct squashfs_decompressor squashfs_unknown_comp_ops = { |
| 62 | NULL, NULL, NULL, 0, "unknown", 0 | 63 | NULL, NULL, NULL, NULL, 0, "unknown", 0 |
| 63 | }; | 64 | }; |
| 64 | 65 | ||
| 65 | static const struct squashfs_decompressor *decompressor[] = { | 66 | static const struct squashfs_decompressor *decompressor[] = { |
| @@ -83,10 +84,11 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id) | |||
| 83 | } | 84 | } |
| 84 | 85 | ||
| 85 | 86 | ||
| 86 | void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags) | 87 | static void *get_comp_opts(struct super_block *sb, unsigned short flags) |
| 87 | { | 88 | { |
| 88 | struct squashfs_sb_info *msblk = sb->s_fs_info; | 89 | struct squashfs_sb_info *msblk = sb->s_fs_info; |
| 89 | void *strm, *buffer = NULL; | 90 | void *buffer = NULL, *comp_opts; |
| 91 | struct squashfs_page_actor *actor = NULL; | ||
| 90 | int length = 0; | 92 | int length = 0; |
| 91 | 93 | ||
| 92 | /* | 94 | /* |
| @@ -94,23 +96,46 @@ void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags) | |||
| 94 | */ | 96 | */ |
| 95 | if (SQUASHFS_COMP_OPTS(flags)) { | 97 | if (SQUASHFS_COMP_OPTS(flags)) { |
| 96 | buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); | 98 | buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); |
| 97 | if (buffer == NULL) | 99 | if (buffer == NULL) { |
| 98 | return ERR_PTR(-ENOMEM); | 100 | comp_opts = ERR_PTR(-ENOMEM); |
| 101 | goto out; | ||
| 102 | } | ||
| 103 | |||
| 104 | actor = squashfs_page_actor_init(&buffer, 1, 0); | ||
| 105 | if (actor == NULL) { | ||
| 106 | comp_opts = ERR_PTR(-ENOMEM); | ||
| 107 | goto out; | ||
| 108 | } | ||
| 99 | 109 | ||
| 100 | length = squashfs_read_data(sb, &buffer, | 110 | length = squashfs_read_data(sb, |
| 101 | sizeof(struct squashfs_super_block), 0, NULL, | 111 | sizeof(struct squashfs_super_block), 0, NULL, actor); |
| 102 | PAGE_CACHE_SIZE, 1); | ||
| 103 | 112 | ||
| 104 | if (length < 0) { | 113 | if (length < 0) { |
| 105 | strm = ERR_PTR(length); | 114 | comp_opts = ERR_PTR(length); |
| 106 | goto finished; | 115 | goto out; |
| 107 | } | 116 | } |
| 108 | } | 117 | } |
| 109 | 118 | ||
| 110 | strm = msblk->decompressor->init(msblk, buffer, length); | 119 | comp_opts = squashfs_comp_opts(msblk, buffer, length); |
| 111 | 120 | ||
| 112 | finished: | 121 | out: |
| 122 | kfree(actor); | ||
| 113 | kfree(buffer); | 123 | kfree(buffer); |
| 124 | return comp_opts; | ||
| 125 | } | ||
| 126 | |||
| 127 | |||
| 128 | void *squashfs_decompressor_setup(struct super_block *sb, unsigned short flags) | ||
| 129 | { | ||
| 130 | struct squashfs_sb_info *msblk = sb->s_fs_info; | ||
| 131 | void *stream, *comp_opts = get_comp_opts(sb, flags); | ||
| 132 | |||
| 133 | if (IS_ERR(comp_opts)) | ||
| 134 | return comp_opts; | ||
| 135 | |||
| 136 | stream = squashfs_decompressor_create(msblk, comp_opts); | ||
| 137 | if (IS_ERR(stream)) | ||
| 138 | kfree(comp_opts); | ||
| 114 | 139 | ||
| 115 | return strm; | 140 | return stream; |
| 116 | } | 141 | } |
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index 330073e29029..af0985321808 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h | |||
| @@ -24,28 +24,22 @@ | |||
| 24 | */ | 24 | */ |
| 25 | 25 | ||
| 26 | struct squashfs_decompressor { | 26 | struct squashfs_decompressor { |
| 27 | void *(*init)(struct squashfs_sb_info *, void *, int); | 27 | void *(*init)(struct squashfs_sb_info *, void *); |
| 28 | void *(*comp_opts)(struct squashfs_sb_info *, void *, int); | ||
| 28 | void (*free)(void *); | 29 | void (*free)(void *); |
| 29 | int (*decompress)(struct squashfs_sb_info *, void **, | 30 | int (*decompress)(struct squashfs_sb_info *, void *, |
| 30 | struct buffer_head **, int, int, int, int, int); | 31 | struct buffer_head **, int, int, int, |
| 32 | struct squashfs_page_actor *); | ||
| 31 | int id; | 33 | int id; |
| 32 | char *name; | 34 | char *name; |
| 33 | int supported; | 35 | int supported; |
| 34 | }; | 36 | }; |
| 35 | 37 | ||
| 36 | static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk, | 38 | static inline void *squashfs_comp_opts(struct squashfs_sb_info *msblk, |
| 37 | void *s) | 39 | void *buff, int length) |
| 38 | { | 40 | { |
| 39 | if (msblk->decompressor) | 41 | return msblk->decompressor->comp_opts ? |
| 40 | msblk->decompressor->free(s); | 42 | msblk->decompressor->comp_opts(msblk, buff, length) : NULL; |
| 41 | } | ||
| 42 | |||
| 43 | static inline int squashfs_decompress(struct squashfs_sb_info *msblk, | ||
| 44 | void **buffer, struct buffer_head **bh, int b, int offset, int length, | ||
| 45 | int srclength, int pages) | ||
| 46 | { | ||
| 47 | return msblk->decompressor->decompress(msblk, buffer, bh, b, offset, | ||
| 48 | length, srclength, pages); | ||
| 49 | } | 43 | } |
| 50 | 44 | ||
| 51 | #ifdef CONFIG_SQUASHFS_XZ | 45 | #ifdef CONFIG_SQUASHFS_XZ |
diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c new file mode 100644 index 000000000000..d6008a636479 --- /dev/null +++ b/fs/squashfs/decompressor_multi.c | |||
| @@ -0,0 +1,198 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2013 | ||
| 3 | * Minchan Kim <minchan@kernel.org> | ||
| 4 | * | ||
| 5 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
| 6 | * the COPYING file in the top-level directory. | ||
| 7 | */ | ||
| 8 | #include <linux/types.h> | ||
| 9 | #include <linux/mutex.h> | ||
| 10 | #include <linux/slab.h> | ||
| 11 | #include <linux/buffer_head.h> | ||
| 12 | #include <linux/sched.h> | ||
| 13 | #include <linux/wait.h> | ||
| 14 | #include <linux/cpumask.h> | ||
| 15 | |||
| 16 | #include "squashfs_fs.h" | ||
| 17 | #include "squashfs_fs_sb.h" | ||
| 18 | #include "decompressor.h" | ||
| 19 | #include "squashfs.h" | ||
| 20 | |||
| 21 | /* | ||
| 22 | * This file implements multi-threaded decompression in the | ||
| 23 | * decompressor framework | ||
| 24 | */ | ||
| 25 | |||
| 26 | |||
| 27 | /* | ||
| 28 | * The reason that multiply two is that a CPU can request new I/O | ||
| 29 | * while it is waiting previous request. | ||
| 30 | */ | ||
| 31 | #define MAX_DECOMPRESSOR (num_online_cpus() * 2) | ||
| 32 | |||
| 33 | |||
| 34 | int squashfs_max_decompressors(void) | ||
| 35 | { | ||
| 36 | return MAX_DECOMPRESSOR; | ||
| 37 | } | ||
| 38 | |||
| 39 | |||
| 40 | struct squashfs_stream { | ||
| 41 | void *comp_opts; | ||
| 42 | struct list_head strm_list; | ||
| 43 | struct mutex mutex; | ||
| 44 | int avail_decomp; | ||
| 45 | wait_queue_head_t wait; | ||
| 46 | }; | ||
| 47 | |||
| 48 | |||
| 49 | struct decomp_stream { | ||
| 50 | void *stream; | ||
| 51 | struct list_head list; | ||
| 52 | }; | ||
| 53 | |||
| 54 | |||
| 55 | static void put_decomp_stream(struct decomp_stream *decomp_strm, | ||
| 56 | struct squashfs_stream *stream) | ||
| 57 | { | ||
| 58 | mutex_lock(&stream->mutex); | ||
| 59 | list_add(&decomp_strm->list, &stream->strm_list); | ||
| 60 | mutex_unlock(&stream->mutex); | ||
| 61 | wake_up(&stream->wait); | ||
| 62 | } | ||
| 63 | |||
| 64 | void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, | ||
| 65 | void *comp_opts) | ||
| 66 | { | ||
| 67 | struct squashfs_stream *stream; | ||
| 68 | struct decomp_stream *decomp_strm = NULL; | ||
| 69 | int err = -ENOMEM; | ||
| 70 | |||
| 71 | stream = kzalloc(sizeof(*stream), GFP_KERNEL); | ||
| 72 | if (!stream) | ||
| 73 | goto out; | ||
| 74 | |||
| 75 | stream->comp_opts = comp_opts; | ||
| 76 | mutex_init(&stream->mutex); | ||
| 77 | INIT_LIST_HEAD(&stream->strm_list); | ||
| 78 | init_waitqueue_head(&stream->wait); | ||
| 79 | |||
| 80 | /* | ||
| 81 | * We should have a decompressor at least as default | ||
| 82 | * so if we fail to allocate new decompressor dynamically, | ||
| 83 | * we could always fall back to default decompressor and | ||
| 84 | * file system works. | ||
| 85 | */ | ||
| 86 | decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL); | ||
| 87 | if (!decomp_strm) | ||
| 88 | goto out; | ||
| 89 | |||
| 90 | decomp_strm->stream = msblk->decompressor->init(msblk, | ||
| 91 | stream->comp_opts); | ||
| 92 | if (IS_ERR(decomp_strm->stream)) { | ||
| 93 | err = PTR_ERR(decomp_strm->stream); | ||
| 94 | goto out; | ||
| 95 | } | ||
| 96 | |||
| 97 | list_add(&decomp_strm->list, &stream->strm_list); | ||
| 98 | stream->avail_decomp = 1; | ||
| 99 | return stream; | ||
| 100 | |||
| 101 | out: | ||
| 102 | kfree(decomp_strm); | ||
| 103 | kfree(stream); | ||
| 104 | return ERR_PTR(err); | ||
| 105 | } | ||
| 106 | |||
| 107 | |||
| 108 | void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) | ||
| 109 | { | ||
| 110 | struct squashfs_stream *stream = msblk->stream; | ||
| 111 | if (stream) { | ||
| 112 | struct decomp_stream *decomp_strm; | ||
| 113 | |||
| 114 | while (!list_empty(&stream->strm_list)) { | ||
| 115 | decomp_strm = list_entry(stream->strm_list.prev, | ||
| 116 | struct decomp_stream, list); | ||
| 117 | list_del(&decomp_strm->list); | ||
| 118 | msblk->decompressor->free(decomp_strm->stream); | ||
| 119 | kfree(decomp_strm); | ||
| 120 | stream->avail_decomp--; | ||
| 121 | } | ||
| 122 | WARN_ON(stream->avail_decomp); | ||
| 123 | kfree(stream->comp_opts); | ||
| 124 | kfree(stream); | ||
| 125 | } | ||
| 126 | } | ||
| 127 | |||
| 128 | |||
| 129 | static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk, | ||
| 130 | struct squashfs_stream *stream) | ||
| 131 | { | ||
| 132 | struct decomp_stream *decomp_strm; | ||
| 133 | |||
| 134 | while (1) { | ||
| 135 | mutex_lock(&stream->mutex); | ||
| 136 | |||
| 137 | /* There is available decomp_stream */ | ||
| 138 | if (!list_empty(&stream->strm_list)) { | ||
| 139 | decomp_strm = list_entry(stream->strm_list.prev, | ||
| 140 | struct decomp_stream, list); | ||
| 141 | list_del(&decomp_strm->list); | ||
| 142 | mutex_unlock(&stream->mutex); | ||
| 143 | break; | ||
| 144 | } | ||
| 145 | |||
| 146 | /* | ||
| 147 | * If there is no available decomp and already full, | ||
| 148 | * let's wait for releasing decomp from other users. | ||
| 149 | */ | ||
| 150 | if (stream->avail_decomp >= MAX_DECOMPRESSOR) | ||
| 151 | goto wait; | ||
| 152 | |||
| 153 | /* Let's allocate new decomp */ | ||
| 154 | decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL); | ||
| 155 | if (!decomp_strm) | ||
| 156 | goto wait; | ||
| 157 | |||
| 158 | decomp_strm->stream = msblk->decompressor->init(msblk, | ||
| 159 | stream->comp_opts); | ||
| 160 | if (IS_ERR(decomp_strm->stream)) { | ||
| 161 | kfree(decomp_strm); | ||
| 162 | goto wait; | ||
| 163 | } | ||
| 164 | |||
| 165 | stream->avail_decomp++; | ||
| 166 | WARN_ON(stream->avail_decomp > MAX_DECOMPRESSOR); | ||
| 167 | |||
| 168 | mutex_unlock(&stream->mutex); | ||
| 169 | break; | ||
| 170 | wait: | ||
| 171 | /* | ||
| 172 | * If system memory is tough, let's for other's | ||
| 173 | * releasing instead of hurting VM because it could | ||
| 174 | * make page cache thrashing. | ||
| 175 | */ | ||
| 176 | mutex_unlock(&stream->mutex); | ||
| 177 | wait_event(stream->wait, | ||
| 178 | !list_empty(&stream->strm_list)); | ||
| 179 | } | ||
| 180 | |||
| 181 | return decomp_strm; | ||
| 182 | } | ||
| 183 | |||
| 184 | |||
| 185 | int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, | ||
| 186 | int b, int offset, int length, struct squashfs_page_actor *output) | ||
| 187 | { | ||
| 188 | int res; | ||
| 189 | struct squashfs_stream *stream = msblk->stream; | ||
| 190 | struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream); | ||
| 191 | res = msblk->decompressor->decompress(msblk, decomp_stream->stream, | ||
| 192 | bh, b, offset, length, output); | ||
| 193 | put_decomp_stream(decomp_stream, stream); | ||
| 194 | if (res < 0) | ||
| 195 | ERROR("%s decompression failed, data probably corrupt\n", | ||
| 196 | msblk->decompressor->name); | ||
| 197 | return res; | ||
| 198 | } | ||
diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c new file mode 100644 index 000000000000..23a9c28ad8ea --- /dev/null +++ b/fs/squashfs/decompressor_multi_percpu.c | |||
| @@ -0,0 +1,97 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2013 | ||
| 3 | * Phillip Lougher <phillip@squashfs.org.uk> | ||
| 4 | * | ||
| 5 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
| 6 | * the COPYING file in the top-level directory. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include <linux/types.h> | ||
| 10 | #include <linux/slab.h> | ||
| 11 | #include <linux/percpu.h> | ||
| 12 | #include <linux/buffer_head.h> | ||
| 13 | |||
| 14 | #include "squashfs_fs.h" | ||
| 15 | #include "squashfs_fs_sb.h" | ||
| 16 | #include "decompressor.h" | ||
| 17 | #include "squashfs.h" | ||
| 18 | |||
| 19 | /* | ||
| 20 | * This file implements multi-threaded decompression using percpu | ||
| 21 | * variables, one thread per cpu core. | ||
| 22 | */ | ||
| 23 | |||
| 24 | struct squashfs_stream { | ||
| 25 | void *stream; | ||
| 26 | }; | ||
| 27 | |||
| 28 | void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, | ||
| 29 | void *comp_opts) | ||
| 30 | { | ||
| 31 | struct squashfs_stream *stream; | ||
| 32 | struct squashfs_stream __percpu *percpu; | ||
| 33 | int err, cpu; | ||
| 34 | |||
| 35 | percpu = alloc_percpu(struct squashfs_stream); | ||
| 36 | if (percpu == NULL) | ||
| 37 | return ERR_PTR(-ENOMEM); | ||
| 38 | |||
| 39 | for_each_possible_cpu(cpu) { | ||
| 40 | stream = per_cpu_ptr(percpu, cpu); | ||
| 41 | stream->stream = msblk->decompressor->init(msblk, comp_opts); | ||
| 42 | if (IS_ERR(stream->stream)) { | ||
| 43 | err = PTR_ERR(stream->stream); | ||
| 44 | goto out; | ||
| 45 | } | ||
| 46 | } | ||
| 47 | |||
| 48 | kfree(comp_opts); | ||
| 49 | return (__force void *) percpu; | ||
| 50 | |||
| 51 | out: | ||
| 52 | for_each_possible_cpu(cpu) { | ||
| 53 | stream = per_cpu_ptr(percpu, cpu); | ||
| 54 | if (!IS_ERR_OR_NULL(stream->stream)) | ||
| 55 | msblk->decompressor->free(stream->stream); | ||
| 56 | } | ||
| 57 | free_percpu(percpu); | ||
| 58 | return ERR_PTR(err); | ||
| 59 | } | ||
| 60 | |||
| 61 | void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) | ||
| 62 | { | ||
| 63 | struct squashfs_stream __percpu *percpu = | ||
| 64 | (struct squashfs_stream __percpu *) msblk->stream; | ||
| 65 | struct squashfs_stream *stream; | ||
| 66 | int cpu; | ||
| 67 | |||
| 68 | if (msblk->stream) { | ||
| 69 | for_each_possible_cpu(cpu) { | ||
| 70 | stream = per_cpu_ptr(percpu, cpu); | ||
| 71 | msblk->decompressor->free(stream->stream); | ||
| 72 | } | ||
| 73 | free_percpu(percpu); | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, | ||
| 78 | int b, int offset, int length, struct squashfs_page_actor *output) | ||
| 79 | { | ||
| 80 | struct squashfs_stream __percpu *percpu = | ||
| 81 | (struct squashfs_stream __percpu *) msblk->stream; | ||
| 82 | struct squashfs_stream *stream = get_cpu_ptr(percpu); | ||
| 83 | int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, | ||
| 84 | offset, length, output); | ||
| 85 | put_cpu_ptr(stream); | ||
| 86 | |||
| 87 | if (res < 0) | ||
| 88 | ERROR("%s decompression failed, data probably corrupt\n", | ||
| 89 | msblk->decompressor->name); | ||
| 90 | |||
| 91 | return res; | ||
| 92 | } | ||
| 93 | |||
| 94 | int squashfs_max_decompressors(void) | ||
| 95 | { | ||
| 96 | return num_possible_cpus(); | ||
| 97 | } | ||
diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c new file mode 100644 index 000000000000..a6c75929a00e --- /dev/null +++ b/fs/squashfs/decompressor_single.c | |||
| @@ -0,0 +1,85 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2013 | ||
| 3 | * Phillip Lougher <phillip@squashfs.org.uk> | ||
| 4 | * | ||
| 5 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
| 6 | * the COPYING file in the top-level directory. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include <linux/types.h> | ||
| 10 | #include <linux/mutex.h> | ||
| 11 | #include <linux/slab.h> | ||
| 12 | #include <linux/buffer_head.h> | ||
| 13 | |||
| 14 | #include "squashfs_fs.h" | ||
| 15 | #include "squashfs_fs_sb.h" | ||
| 16 | #include "decompressor.h" | ||
| 17 | #include "squashfs.h" | ||
| 18 | |||
| 19 | /* | ||
| 20 | * This file implements single-threaded decompression in the | ||
| 21 | * decompressor framework | ||
| 22 | */ | ||
| 23 | |||
| 24 | struct squashfs_stream { | ||
| 25 | void *stream; | ||
| 26 | struct mutex mutex; | ||
| 27 | }; | ||
| 28 | |||
| 29 | void *squashfs_decompressor_create(struct squashfs_sb_info *msblk, | ||
| 30 | void *comp_opts) | ||
| 31 | { | ||
| 32 | struct squashfs_stream *stream; | ||
| 33 | int err = -ENOMEM; | ||
| 34 | |||
| 35 | stream = kmalloc(sizeof(*stream), GFP_KERNEL); | ||
| 36 | if (stream == NULL) | ||
| 37 | goto out; | ||
| 38 | |||
| 39 | stream->stream = msblk->decompressor->init(msblk, comp_opts); | ||
| 40 | if (IS_ERR(stream->stream)) { | ||
| 41 | err = PTR_ERR(stream->stream); | ||
| 42 | goto out; | ||
| 43 | } | ||
| 44 | |||
| 45 | kfree(comp_opts); | ||
| 46 | mutex_init(&stream->mutex); | ||
| 47 | return stream; | ||
| 48 | |||
| 49 | out: | ||
| 50 | kfree(stream); | ||
| 51 | return ERR_PTR(err); | ||
| 52 | } | ||
| 53 | |||
| 54 | void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) | ||
| 55 | { | ||
| 56 | struct squashfs_stream *stream = msblk->stream; | ||
| 57 | |||
| 58 | if (stream) { | ||
| 59 | msblk->decompressor->free(stream->stream); | ||
| 60 | kfree(stream); | ||
| 61 | } | ||
| 62 | } | ||
| 63 | |||
| 64 | int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, | ||
| 65 | int b, int offset, int length, struct squashfs_page_actor *output) | ||
| 66 | { | ||
| 67 | int res; | ||
| 68 | struct squashfs_stream *stream = msblk->stream; | ||
| 69 | |||
| 70 | mutex_lock(&stream->mutex); | ||
| 71 | res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, | ||
| 72 | offset, length, output); | ||
| 73 | mutex_unlock(&stream->mutex); | ||
| 74 | |||
| 75 | if (res < 0) | ||
| 76 | ERROR("%s decompression failed, data probably corrupt\n", | ||
| 77 | msblk->decompressor->name); | ||
| 78 | |||
| 79 | return res; | ||
| 80 | } | ||
| 81 | |||
| 82 | int squashfs_max_decompressors(void) | ||
| 83 | { | ||
| 84 | return 1; | ||
| 85 | } | ||
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c index 8ca62c28fe12..e5c9689062ba 100644 --- a/fs/squashfs/file.c +++ b/fs/squashfs/file.c | |||
| @@ -370,77 +370,15 @@ static int read_blocklist(struct inode *inode, int index, u64 *block) | |||
| 370 | return le32_to_cpu(size); | 370 | return le32_to_cpu(size); |
| 371 | } | 371 | } |
| 372 | 372 | ||
| 373 | 373 | /* Copy data into page cache */ | |
| 374 | static int squashfs_readpage(struct file *file, struct page *page) | 374 | void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer, |
| 375 | int bytes, int offset) | ||
| 375 | { | 376 | { |
| 376 | struct inode *inode = page->mapping->host; | 377 | struct inode *inode = page->mapping->host; |
| 377 | struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; | 378 | struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; |
| 378 | int bytes, i, offset = 0, sparse = 0; | ||
| 379 | struct squashfs_cache_entry *buffer = NULL; | ||
| 380 | void *pageaddr; | 379 | void *pageaddr; |
| 381 | 380 | int i, mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; | |
| 382 | int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; | 381 | int start_index = page->index & ~mask, end_index = start_index | mask; |
| 383 | int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); | ||
| 384 | int start_index = page->index & ~mask; | ||
| 385 | int end_index = start_index | mask; | ||
| 386 | int file_end = i_size_read(inode) >> msblk->block_log; | ||
| 387 | |||
| 388 | TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", | ||
| 389 | page->index, squashfs_i(inode)->start); | ||
| 390 | |||
| 391 | if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | ||
| 392 | PAGE_CACHE_SHIFT)) | ||
| 393 | goto out; | ||
| 394 | |||
| 395 | if (index < file_end || squashfs_i(inode)->fragment_block == | ||
| 396 | SQUASHFS_INVALID_BLK) { | ||
| 397 | /* | ||
| 398 | * Reading a datablock from disk. Need to read block list | ||
| 399 | * to get location and block size. | ||
| 400 | */ | ||
| 401 | u64 block = 0; | ||
| 402 | int bsize = read_blocklist(inode, index, &block); | ||
| 403 | if (bsize < 0) | ||
| 404 | goto error_out; | ||
| 405 | |||
| 406 | if (bsize == 0) { /* hole */ | ||
| 407 | bytes = index == file_end ? | ||
| 408 | (i_size_read(inode) & (msblk->block_size - 1)) : | ||
| 409 | msblk->block_size; | ||
| 410 | sparse = 1; | ||
| 411 | } else { | ||
| 412 | /* | ||
| 413 | * Read and decompress datablock. | ||
| 414 | */ | ||
| 415 | buffer = squashfs_get_datablock(inode->i_sb, | ||
| 416 | block, bsize); | ||
| 417 | if (buffer->error) { | ||
| 418 | ERROR("Unable to read page, block %llx, size %x" | ||
| 419 | "\n", block, bsize); | ||
| 420 | squashfs_cache_put(buffer); | ||
| 421 | goto error_out; | ||
| 422 | } | ||
| 423 | bytes = buffer->length; | ||
| 424 | } | ||
| 425 | } else { | ||
| 426 | /* | ||
| 427 | * Datablock is stored inside a fragment (tail-end packed | ||
| 428 | * block). | ||
| 429 | */ | ||
| 430 | buffer = squashfs_get_fragment(inode->i_sb, | ||
| 431 | squashfs_i(inode)->fragment_block, | ||
| 432 | squashfs_i(inode)->fragment_size); | ||
| 433 | |||
| 434 | if (buffer->error) { | ||
| 435 | ERROR("Unable to read page, block %llx, size %x\n", | ||
| 436 | squashfs_i(inode)->fragment_block, | ||
| 437 | squashfs_i(inode)->fragment_size); | ||
| 438 | squashfs_cache_put(buffer); | ||
| 439 | goto error_out; | ||
| 440 | } | ||
| 441 | bytes = i_size_read(inode) & (msblk->block_size - 1); | ||
| 442 | offset = squashfs_i(inode)->fragment_offset; | ||
| 443 | } | ||
| 444 | 382 | ||
| 445 | /* | 383 | /* |
| 446 | * Loop copying datablock into pages. As the datablock likely covers | 384 | * Loop copying datablock into pages. As the datablock likely covers |
| @@ -451,7 +389,7 @@ static int squashfs_readpage(struct file *file, struct page *page) | |||
| 451 | for (i = start_index; i <= end_index && bytes > 0; i++, | 389 | for (i = start_index; i <= end_index && bytes > 0; i++, |
| 452 | bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { | 390 | bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { |
| 453 | struct page *push_page; | 391 | struct page *push_page; |
| 454 | int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE); | 392 | int avail = buffer ? min_t(int, bytes, PAGE_CACHE_SIZE) : 0; |
| 455 | 393 | ||
| 456 | TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail); | 394 | TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail); |
| 457 | 395 | ||
| @@ -475,11 +413,75 @@ skip_page: | |||
| 475 | if (i != page->index) | 413 | if (i != page->index) |
| 476 | page_cache_release(push_page); | 414 | page_cache_release(push_page); |
| 477 | } | 415 | } |
| 416 | } | ||
| 417 | |||
| 418 | /* Read datablock stored packed inside a fragment (tail-end packed block) */ | ||
| 419 | static int squashfs_readpage_fragment(struct page *page) | ||
| 420 | { | ||
| 421 | struct inode *inode = page->mapping->host; | ||
| 422 | struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; | ||
| 423 | struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb, | ||
| 424 | squashfs_i(inode)->fragment_block, | ||
| 425 | squashfs_i(inode)->fragment_size); | ||
| 426 | int res = buffer->error; | ||
| 427 | |||
| 428 | if (res) | ||
| 429 | ERROR("Unable to read page, block %llx, size %x\n", | ||
| 430 | squashfs_i(inode)->fragment_block, | ||
| 431 | squashfs_i(inode)->fragment_size); | ||
| 432 | else | ||
| 433 | squashfs_copy_cache(page, buffer, i_size_read(inode) & | ||
| 434 | (msblk->block_size - 1), | ||
| 435 | squashfs_i(inode)->fragment_offset); | ||
| 436 | |||
| 437 | squashfs_cache_put(buffer); | ||
| 438 | return res; | ||
| 439 | } | ||
| 478 | 440 | ||
| 479 | if (!sparse) | 441 | static int squashfs_readpage_sparse(struct page *page, int index, int file_end) |
| 480 | squashfs_cache_put(buffer); | 442 | { |
| 443 | struct inode *inode = page->mapping->host; | ||
| 444 | struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; | ||
| 445 | int bytes = index == file_end ? | ||
| 446 | (i_size_read(inode) & (msblk->block_size - 1)) : | ||
| 447 | msblk->block_size; | ||
| 481 | 448 | ||
| 449 | squashfs_copy_cache(page, NULL, bytes, 0); | ||
| 482 | return 0; | 450 | return 0; |
| 451 | } | ||
| 452 | |||
| 453 | static int squashfs_readpage(struct file *file, struct page *page) | ||
| 454 | { | ||
| 455 | struct inode *inode = page->mapping->host; | ||
| 456 | struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; | ||
| 457 | int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); | ||
| 458 | int file_end = i_size_read(inode) >> msblk->block_log; | ||
| 459 | int res; | ||
| 460 | void *pageaddr; | ||
| 461 | |||
| 462 | TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", | ||
| 463 | page->index, squashfs_i(inode)->start); | ||
| 464 | |||
| 465 | if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | ||
| 466 | PAGE_CACHE_SHIFT)) | ||
| 467 | goto out; | ||
| 468 | |||
| 469 | if (index < file_end || squashfs_i(inode)->fragment_block == | ||
| 470 | SQUASHFS_INVALID_BLK) { | ||
| 471 | u64 block = 0; | ||
| 472 | int bsize = read_blocklist(inode, index, &block); | ||
| 473 | if (bsize < 0) | ||
| 474 | goto error_out; | ||
| 475 | |||
| 476 | if (bsize == 0) | ||
| 477 | res = squashfs_readpage_sparse(page, index, file_end); | ||
| 478 | else | ||
| 479 | res = squashfs_readpage_block(page, block, bsize); | ||
| 480 | } else | ||
| 481 | res = squashfs_readpage_fragment(page); | ||
| 482 | |||
| 483 | if (!res) | ||
| 484 | return 0; | ||
| 483 | 485 | ||
| 484 | error_out: | 486 | error_out: |
| 485 | SetPageError(page); | 487 | SetPageError(page); |
diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c new file mode 100644 index 000000000000..f2310d2a2019 --- /dev/null +++ b/fs/squashfs/file_cache.c | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2013 | ||
| 3 | * Phillip Lougher <phillip@squashfs.org.uk> | ||
| 4 | * | ||
| 5 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
| 6 | * the COPYING file in the top-level directory. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include <linux/fs.h> | ||
| 10 | #include <linux/vfs.h> | ||
| 11 | #include <linux/kernel.h> | ||
| 12 | #include <linux/slab.h> | ||
| 13 | #include <linux/string.h> | ||
| 14 | #include <linux/pagemap.h> | ||
| 15 | #include <linux/mutex.h> | ||
| 16 | |||
| 17 | #include "squashfs_fs.h" | ||
| 18 | #include "squashfs_fs_sb.h" | ||
| 19 | #include "squashfs_fs_i.h" | ||
| 20 | #include "squashfs.h" | ||
| 21 | |||
| 22 | /* Read separately compressed datablock and memcopy into page cache */ | ||
| 23 | int squashfs_readpage_block(struct page *page, u64 block, int bsize) | ||
| 24 | { | ||
| 25 | struct inode *i = page->mapping->host; | ||
| 26 | struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, | ||
| 27 | block, bsize); | ||
| 28 | int res = buffer->error; | ||
| 29 | |||
| 30 | if (res) | ||
| 31 | ERROR("Unable to read page, block %llx, size %x\n", block, | ||
| 32 | bsize); | ||
| 33 | else | ||
| 34 | squashfs_copy_cache(page, buffer, buffer->length, 0); | ||
| 35 | |||
| 36 | squashfs_cache_put(buffer); | ||
| 37 | return res; | ||
| 38 | } | ||
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c new file mode 100644 index 000000000000..62a0de6632e1 --- /dev/null +++ b/fs/squashfs/file_direct.c | |||
| @@ -0,0 +1,176 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2013 | ||
| 3 | * Phillip Lougher <phillip@squashfs.org.uk> | ||
| 4 | * | ||
| 5 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
| 6 | * the COPYING file in the top-level directory. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include <linux/fs.h> | ||
| 10 | #include <linux/vfs.h> | ||
| 11 | #include <linux/kernel.h> | ||
| 12 | #include <linux/slab.h> | ||
| 13 | #include <linux/string.h> | ||
| 14 | #include <linux/pagemap.h> | ||
| 15 | #include <linux/mutex.h> | ||
| 16 | |||
| 17 | #include "squashfs_fs.h" | ||
| 18 | #include "squashfs_fs_sb.h" | ||
| 19 | #include "squashfs_fs_i.h" | ||
| 20 | #include "squashfs.h" | ||
| 21 | #include "page_actor.h" | ||
| 22 | |||
| 23 | static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, | ||
| 24 | int pages, struct page **page); | ||
| 25 | |||
| 26 | /* Read separately compressed datablock directly into page cache */ | ||
| 27 | int squashfs_readpage_block(struct page *target_page, u64 block, int bsize) | ||
| 28 | |||
| 29 | { | ||
| 30 | struct inode *inode = target_page->mapping->host; | ||
| 31 | struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; | ||
| 32 | |||
| 33 | int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; | ||
| 34 | int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; | ||
| 35 | int start_index = target_page->index & ~mask; | ||
| 36 | int end_index = start_index | mask; | ||
| 37 | int i, n, pages, missing_pages, bytes, res = -ENOMEM; | ||
| 38 | struct page **page; | ||
| 39 | struct squashfs_page_actor *actor; | ||
| 40 | void *pageaddr; | ||
| 41 | |||
| 42 | if (end_index > file_end) | ||
| 43 | end_index = file_end; | ||
| 44 | |||
| 45 | pages = end_index - start_index + 1; | ||
| 46 | |||
| 47 | page = kmalloc(sizeof(void *) * pages, GFP_KERNEL); | ||
| 48 | if (page == NULL) | ||
| 49 | return res; | ||
| 50 | |||
| 51 | /* | ||
| 52 | * Create a "page actor" which will kmap and kunmap the | ||
| 53 | * page cache pages appropriately within the decompressor | ||
| 54 | */ | ||
| 55 | actor = squashfs_page_actor_init_special(page, pages, 0); | ||
| 56 | if (actor == NULL) | ||
| 57 | goto out; | ||
| 58 | |||
| 59 | /* Try to grab all the pages covered by the Squashfs block */ | ||
| 60 | for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) { | ||
| 61 | page[i] = (n == target_page->index) ? target_page : | ||
| 62 | grab_cache_page_nowait(target_page->mapping, n); | ||
| 63 | |||
| 64 | if (page[i] == NULL) { | ||
| 65 | missing_pages++; | ||
| 66 | continue; | ||
| 67 | } | ||
| 68 | |||
| 69 | if (PageUptodate(page[i])) { | ||
| 70 | unlock_page(page[i]); | ||
| 71 | page_cache_release(page[i]); | ||
| 72 | page[i] = NULL; | ||
| 73 | missing_pages++; | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | if (missing_pages) { | ||
| 78 | /* | ||
| 79 | * Couldn't get one or more pages, this page has either | ||
| 80 | * been VM reclaimed, but others are still in the page cache | ||
| 81 | * and uptodate, or we're racing with another thread in | ||
| 82 | * squashfs_readpage also trying to grab them. Fall back to | ||
| 83 | * using an intermediate buffer. | ||
| 84 | */ | ||
| 85 | res = squashfs_read_cache(target_page, block, bsize, pages, | ||
| 86 | page); | ||
| 87 | if (res < 0) | ||
| 88 | goto mark_errored; | ||
| 89 | |||
| 90 | goto out; | ||
| 91 | } | ||
| 92 | |||
| 93 | /* Decompress directly into the page cache buffers */ | ||
| 94 | res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor); | ||
| 95 | if (res < 0) | ||
| 96 | goto mark_errored; | ||
| 97 | |||
| 98 | /* Last page may have trailing bytes not filled */ | ||
| 99 | bytes = res % PAGE_CACHE_SIZE; | ||
| 100 | if (bytes) { | ||
| 101 | pageaddr = kmap_atomic(page[pages - 1]); | ||
| 102 | memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); | ||
| 103 | kunmap_atomic(pageaddr); | ||
| 104 | } | ||
| 105 | |||
| 106 | /* Mark pages as uptodate, unlock and release */ | ||
| 107 | for (i = 0; i < pages; i++) { | ||
| 108 | flush_dcache_page(page[i]); | ||
| 109 | SetPageUptodate(page[i]); | ||
| 110 | unlock_page(page[i]); | ||
| 111 | if (page[i] != target_page) | ||
| 112 | page_cache_release(page[i]); | ||
| 113 | } | ||
| 114 | |||
| 115 | kfree(actor); | ||
| 116 | kfree(page); | ||
| 117 | |||
| 118 | return 0; | ||
| 119 | |||
| 120 | mark_errored: | ||
| 121 | /* Decompression failed, mark pages as errored. Target_page is | ||
| 122 | * dealt with by the caller | ||
| 123 | */ | ||
| 124 | for (i = 0; i < pages; i++) { | ||
| 125 | if (page[i] == NULL || page[i] == target_page) | ||
| 126 | continue; | ||
| 127 | flush_dcache_page(page[i]); | ||
| 128 | SetPageError(page[i]); | ||
| 129 | unlock_page(page[i]); | ||
| 130 | page_cache_release(page[i]); | ||
| 131 | } | ||
| 132 | |||
| 133 | out: | ||
| 134 | kfree(actor); | ||
| 135 | kfree(page); | ||
| 136 | return res; | ||
| 137 | } | ||
| 138 | |||
| 139 | |||
| 140 | static int squashfs_read_cache(struct page *target_page, u64 block, int bsize, | ||
| 141 | int pages, struct page **page) | ||
| 142 | { | ||
| 143 | struct inode *i = target_page->mapping->host; | ||
| 144 | struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb, | ||
| 145 | block, bsize); | ||
| 146 | int bytes = buffer->length, res = buffer->error, n, offset = 0; | ||
| 147 | void *pageaddr; | ||
| 148 | |||
| 149 | if (res) { | ||
| 150 | ERROR("Unable to read page, block %llx, size %x\n", block, | ||
| 151 | bsize); | ||
| 152 | goto out; | ||
| 153 | } | ||
| 154 | |||
| 155 | for (n = 0; n < pages && bytes > 0; n++, | ||
| 156 | bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { | ||
| 157 | int avail = min_t(int, bytes, PAGE_CACHE_SIZE); | ||
| 158 | |||
| 159 | if (page[n] == NULL) | ||
| 160 | continue; | ||
| 161 | |||
| 162 | pageaddr = kmap_atomic(page[n]); | ||
| 163 | squashfs_copy_data(pageaddr, buffer, offset, avail); | ||
| 164 | memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); | ||
| 165 | kunmap_atomic(pageaddr); | ||
| 166 | flush_dcache_page(page[n]); | ||
| 167 | SetPageUptodate(page[n]); | ||
| 168 | unlock_page(page[n]); | ||
| 169 | if (page[n] != target_page) | ||
| 170 | page_cache_release(page[n]); | ||
| 171 | } | ||
| 172 | |||
| 173 | out: | ||
| 174 | squashfs_cache_put(buffer); | ||
| 175 | return res; | ||
| 176 | } | ||
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index 00f4dfc5f088..244b9fbfff7b 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c | |||
| @@ -31,13 +31,14 @@ | |||
| 31 | #include "squashfs_fs_sb.h" | 31 | #include "squashfs_fs_sb.h" |
| 32 | #include "squashfs.h" | 32 | #include "squashfs.h" |
| 33 | #include "decompressor.h" | 33 | #include "decompressor.h" |
| 34 | #include "page_actor.h" | ||
| 34 | 35 | ||
| 35 | struct squashfs_lzo { | 36 | struct squashfs_lzo { |
| 36 | void *input; | 37 | void *input; |
| 37 | void *output; | 38 | void *output; |
| 38 | }; | 39 | }; |
| 39 | 40 | ||
| 40 | static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len) | 41 | static void *lzo_init(struct squashfs_sb_info *msblk, void *buff) |
| 41 | { | 42 | { |
| 42 | int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); | 43 | int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); |
| 43 | 44 | ||
| @@ -74,22 +75,16 @@ static void lzo_free(void *strm) | |||
| 74 | } | 75 | } |
| 75 | 76 | ||
| 76 | 77 | ||
| 77 | static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer, | 78 | static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, |
| 78 | struct buffer_head **bh, int b, int offset, int length, int srclength, | 79 | struct buffer_head **bh, int b, int offset, int length, |
| 79 | int pages) | 80 | struct squashfs_page_actor *output) |
| 80 | { | 81 | { |
| 81 | struct squashfs_lzo *stream = msblk->stream; | 82 | struct squashfs_lzo *stream = strm; |
| 82 | void *buff = stream->input; | 83 | void *buff = stream->input, *data; |
| 83 | int avail, i, bytes = length, res; | 84 | int avail, i, bytes = length, res; |
| 84 | size_t out_len = srclength; | 85 | size_t out_len = output->length; |
| 85 | |||
| 86 | mutex_lock(&msblk->read_data_mutex); | ||
| 87 | 86 | ||
| 88 | for (i = 0; i < b; i++) { | 87 | for (i = 0; i < b; i++) { |
| 89 | wait_on_buffer(bh[i]); | ||
| 90 | if (!buffer_uptodate(bh[i])) | ||
| 91 | goto block_release; | ||
| 92 | |||
| 93 | avail = min(bytes, msblk->devblksize - offset); | 88 | avail = min(bytes, msblk->devblksize - offset); |
| 94 | memcpy(buff, bh[i]->b_data + offset, avail); | 89 | memcpy(buff, bh[i]->b_data + offset, avail); |
| 95 | buff += avail; | 90 | buff += avail; |
| @@ -104,24 +99,24 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer, | |||
| 104 | goto failed; | 99 | goto failed; |
| 105 | 100 | ||
| 106 | res = bytes = (int)out_len; | 101 | res = bytes = (int)out_len; |
| 107 | for (i = 0, buff = stream->output; bytes && i < pages; i++) { | 102 | data = squashfs_first_page(output); |
| 108 | avail = min_t(int, bytes, PAGE_CACHE_SIZE); | 103 | buff = stream->output; |
| 109 | memcpy(buffer[i], buff, avail); | 104 | while (data) { |
| 110 | buff += avail; | 105 | if (bytes <= PAGE_CACHE_SIZE) { |
| 111 | bytes -= avail; | 106 | memcpy(data, buff, bytes); |
| 107 | break; | ||
| 108 | } else { | ||
| 109 | memcpy(data, buff, PAGE_CACHE_SIZE); | ||
| 110 | buff += PAGE_CACHE_SIZE; | ||
| 111 | bytes -= PAGE_CACHE_SIZE; | ||
| 112 | data = squashfs_next_page(output); | ||
| 113 | } | ||
| 112 | } | 114 | } |
| 115 | squashfs_finish_page(output); | ||
| 113 | 116 | ||
| 114 | mutex_unlock(&msblk->read_data_mutex); | ||
| 115 | return res; | 117 | return res; |
| 116 | 118 | ||
| 117 | block_release: | ||
| 118 | for (; i < b; i++) | ||
| 119 | put_bh(bh[i]); | ||
| 120 | |||
| 121 | failed: | 119 | failed: |
| 122 | mutex_unlock(&msblk->read_data_mutex); | ||
| 123 | |||
| 124 | ERROR("lzo decompression failed, data probably corrupt\n"); | ||
| 125 | return -EIO; | 120 | return -EIO; |
| 126 | } | 121 | } |
| 127 | 122 | ||
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c new file mode 100644 index 000000000000..5a1c11f56441 --- /dev/null +++ b/fs/squashfs/page_actor.c | |||
| @@ -0,0 +1,100 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2013 | ||
| 3 | * Phillip Lougher <phillip@squashfs.org.uk> | ||
| 4 | * | ||
| 5 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
| 6 | * the COPYING file in the top-level directory. | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include <linux/kernel.h> | ||
| 10 | #include <linux/slab.h> | ||
| 11 | #include <linux/pagemap.h> | ||
| 12 | #include "page_actor.h" | ||
| 13 | |||
| 14 | /* | ||
| 15 | * This file contains implementations of page_actor for decompressing into | ||
| 16 | * an intermediate buffer, and for decompressing directly into the | ||
| 17 | * page cache. | ||
| 18 | * | ||
| 19 | * Calling code should avoid sleeping between calls to squashfs_first_page() | ||
| 20 | * and squashfs_finish_page(). | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* Implementation of page_actor for decompressing into intermediate buffer */ | ||
| 24 | static void *cache_first_page(struct squashfs_page_actor *actor) | ||
| 25 | { | ||
| 26 | actor->next_page = 1; | ||
| 27 | return actor->buffer[0]; | ||
| 28 | } | ||
| 29 | |||
| 30 | static void *cache_next_page(struct squashfs_page_actor *actor) | ||
| 31 | { | ||
| 32 | if (actor->next_page == actor->pages) | ||
| 33 | return NULL; | ||
| 34 | |||
| 35 | return actor->buffer[actor->next_page++]; | ||
| 36 | } | ||
| 37 | |||
| 38 | static void cache_finish_page(struct squashfs_page_actor *actor) | ||
| 39 | { | ||
| 40 | /* empty */ | ||
| 41 | } | ||
| 42 | |||
| 43 | struct squashfs_page_actor *squashfs_page_actor_init(void **buffer, | ||
| 44 | int pages, int length) | ||
| 45 | { | ||
| 46 | struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); | ||
| 47 | |||
| 48 | if (actor == NULL) | ||
| 49 | return NULL; | ||
| 50 | |||
| 51 | actor->length = length ? : pages * PAGE_CACHE_SIZE; | ||
| 52 | actor->buffer = buffer; | ||
| 53 | actor->pages = pages; | ||
| 54 | actor->next_page = 0; | ||
| 55 | actor->squashfs_first_page = cache_first_page; | ||
| 56 | actor->squashfs_next_page = cache_next_page; | ||
| 57 | actor->squashfs_finish_page = cache_finish_page; | ||
| 58 | return actor; | ||
| 59 | } | ||
| 60 | |||
| 61 | /* Implementation of page_actor for decompressing directly into page cache. */ | ||
| 62 | static void *direct_first_page(struct squashfs_page_actor *actor) | ||
| 63 | { | ||
| 64 | actor->next_page = 1; | ||
| 65 | return actor->pageaddr = kmap_atomic(actor->page[0]); | ||
| 66 | } | ||
| 67 | |||
| 68 | static void *direct_next_page(struct squashfs_page_actor *actor) | ||
| 69 | { | ||
| 70 | if (actor->pageaddr) | ||
| 71 | kunmap_atomic(actor->pageaddr); | ||
| 72 | |||
| 73 | return actor->pageaddr = actor->next_page == actor->pages ? NULL : | ||
| 74 | kmap_atomic(actor->page[actor->next_page++]); | ||
| 75 | } | ||
| 76 | |||
| 77 | static void direct_finish_page(struct squashfs_page_actor *actor) | ||
| 78 | { | ||
| 79 | if (actor->pageaddr) | ||
| 80 | kunmap_atomic(actor->pageaddr); | ||
| 81 | } | ||
| 82 | |||
| 83 | struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page, | ||
| 84 | int pages, int length) | ||
| 85 | { | ||
| 86 | struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); | ||
| 87 | |||
| 88 | if (actor == NULL) | ||
| 89 | return NULL; | ||
| 90 | |||
| 91 | actor->length = length ? : pages * PAGE_CACHE_SIZE; | ||
| 92 | actor->page = page; | ||
| 93 | actor->pages = pages; | ||
| 94 | actor->next_page = 0; | ||
| 95 | actor->pageaddr = NULL; | ||
| 96 | actor->squashfs_first_page = direct_first_page; | ||
| 97 | actor->squashfs_next_page = direct_next_page; | ||
| 98 | actor->squashfs_finish_page = direct_finish_page; | ||
| 99 | return actor; | ||
| 100 | } | ||
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h new file mode 100644 index 000000000000..26dd82008b82 --- /dev/null +++ b/fs/squashfs/page_actor.h | |||
| @@ -0,0 +1,81 @@ | |||
| 1 | #ifndef PAGE_ACTOR_H | ||
| 2 | #define PAGE_ACTOR_H | ||
| 3 | /* | ||
| 4 | * Copyright (c) 2013 | ||
| 5 | * Phillip Lougher <phillip@squashfs.org.uk> | ||
| 6 | * | ||
| 7 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
| 8 | * the COPYING file in the top-level directory. | ||
| 9 | */ | ||
| 10 | |||
| 11 | #ifndef CONFIG_SQUASHFS_FILE_DIRECT | ||
| 12 | struct squashfs_page_actor { | ||
| 13 | void **page; | ||
| 14 | int pages; | ||
| 15 | int length; | ||
| 16 | int next_page; | ||
| 17 | }; | ||
| 18 | |||
| 19 | static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page, | ||
| 20 | int pages, int length) | ||
| 21 | { | ||
| 22 | struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL); | ||
| 23 | |||
| 24 | if (actor == NULL) | ||
| 25 | return NULL; | ||
| 26 | |||
| 27 | actor->length = length ? : pages * PAGE_CACHE_SIZE; | ||
| 28 | actor->page = page; | ||
| 29 | actor->pages = pages; | ||
| 30 | actor->next_page = 0; | ||
| 31 | return actor; | ||
| 32 | } | ||
| 33 | |||
| 34 | static inline void *squashfs_first_page(struct squashfs_page_actor *actor) | ||
| 35 | { | ||
| 36 | actor->next_page = 1; | ||
| 37 | return actor->page[0]; | ||
| 38 | } | ||
| 39 | |||
| 40 | static inline void *squashfs_next_page(struct squashfs_page_actor *actor) | ||
| 41 | { | ||
| 42 | return actor->next_page == actor->pages ? NULL : | ||
| 43 | actor->page[actor->next_page++]; | ||
| 44 | } | ||
| 45 | |||
| 46 | static inline void squashfs_finish_page(struct squashfs_page_actor *actor) | ||
| 47 | { | ||
| 48 | /* empty */ | ||
| 49 | } | ||
| 50 | #else | ||
| 51 | struct squashfs_page_actor { | ||
| 52 | union { | ||
| 53 | void **buffer; | ||
| 54 | struct page **page; | ||
| 55 | }; | ||
| 56 | void *pageaddr; | ||
| 57 | void *(*squashfs_first_page)(struct squashfs_page_actor *); | ||
| 58 | void *(*squashfs_next_page)(struct squashfs_page_actor *); | ||
| 59 | void (*squashfs_finish_page)(struct squashfs_page_actor *); | ||
| 60 | int pages; | ||
| 61 | int length; | ||
| 62 | int next_page; | ||
| 63 | }; | ||
| 64 | |||
| 65 | extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int); | ||
| 66 | extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page | ||
| 67 | **, int, int); | ||
| 68 | static inline void *squashfs_first_page(struct squashfs_page_actor *actor) | ||
| 69 | { | ||
| 70 | return actor->squashfs_first_page(actor); | ||
| 71 | } | ||
| 72 | static inline void *squashfs_next_page(struct squashfs_page_actor *actor) | ||
| 73 | { | ||
| 74 | return actor->squashfs_next_page(actor); | ||
| 75 | } | ||
| 76 | static inline void squashfs_finish_page(struct squashfs_page_actor *actor) | ||
| 77 | { | ||
| 78 | actor->squashfs_finish_page(actor); | ||
| 79 | } | ||
| 80 | #endif | ||
| 81 | #endif | ||
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index d1266516ed08..9e1bb79f7e6f 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h | |||
| @@ -28,8 +28,8 @@ | |||
| 28 | #define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args) | 28 | #define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args) |
| 29 | 29 | ||
| 30 | /* block.c */ | 30 | /* block.c */ |
| 31 | extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *, | 31 | extern int squashfs_read_data(struct super_block *, u64, int, u64 *, |
| 32 | int, int); | 32 | struct squashfs_page_actor *); |
| 33 | 33 | ||
| 34 | /* cache.c */ | 34 | /* cache.c */ |
| 35 | extern struct squashfs_cache *squashfs_cache_init(char *, int, int); | 35 | extern struct squashfs_cache *squashfs_cache_init(char *, int, int); |
| @@ -48,7 +48,14 @@ extern void *squashfs_read_table(struct super_block *, u64, int); | |||
| 48 | 48 | ||
| 49 | /* decompressor.c */ | 49 | /* decompressor.c */ |
| 50 | extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int); | 50 | extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int); |
| 51 | extern void *squashfs_decompressor_init(struct super_block *, unsigned short); | 51 | extern void *squashfs_decompressor_setup(struct super_block *, unsigned short); |
| 52 | |||
| 53 | /* decompressor_xxx.c */ | ||
| 54 | extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *); | ||
| 55 | extern void squashfs_decompressor_destroy(struct squashfs_sb_info *); | ||
| 56 | extern int squashfs_decompress(struct squashfs_sb_info *, struct buffer_head **, | ||
| 57 | int, int, int, struct squashfs_page_actor *); | ||
| 58 | extern int squashfs_max_decompressors(void); | ||
| 52 | 59 | ||
| 53 | /* export.c */ | 60 | /* export.c */ |
| 54 | extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64, | 61 | extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64, |
| @@ -59,6 +66,13 @@ extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *); | |||
| 59 | extern __le64 *squashfs_read_fragment_index_table(struct super_block *, | 66 | extern __le64 *squashfs_read_fragment_index_table(struct super_block *, |
| 60 | u64, u64, unsigned int); | 67 | u64, u64, unsigned int); |
| 61 | 68 | ||
| 69 | /* file.c */ | ||
| 70 | void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int, | ||
| 71 | int); | ||
| 72 | |||
| 73 | /* file_xxx.c */ | ||
| 74 | extern int squashfs_readpage_block(struct page *, u64, int); | ||
| 75 | |||
| 62 | /* id.c */ | 76 | /* id.c */ |
| 63 | extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); | 77 | extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); |
| 64 | extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, u64, | 78 | extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, u64, |
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 52934a22f296..1da565cb50c3 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h | |||
| @@ -50,6 +50,7 @@ struct squashfs_cache_entry { | |||
| 50 | wait_queue_head_t wait_queue; | 50 | wait_queue_head_t wait_queue; |
| 51 | struct squashfs_cache *cache; | 51 | struct squashfs_cache *cache; |
| 52 | void **data; | 52 | void **data; |
| 53 | struct squashfs_page_actor *actor; | ||
| 53 | }; | 54 | }; |
| 54 | 55 | ||
| 55 | struct squashfs_sb_info { | 56 | struct squashfs_sb_info { |
| @@ -63,10 +64,9 @@ struct squashfs_sb_info { | |||
| 63 | __le64 *id_table; | 64 | __le64 *id_table; |
| 64 | __le64 *fragment_index; | 65 | __le64 *fragment_index; |
| 65 | __le64 *xattr_id_table; | 66 | __le64 *xattr_id_table; |
| 66 | struct mutex read_data_mutex; | ||
| 67 | struct mutex meta_index_mutex; | 67 | struct mutex meta_index_mutex; |
| 68 | struct meta_index *meta_index; | 68 | struct meta_index *meta_index; |
| 69 | void *stream; | 69 | struct squashfs_stream *stream; |
| 70 | __le64 *inode_lookup_table; | 70 | __le64 *inode_lookup_table; |
| 71 | u64 inode_table; | 71 | u64 inode_table; |
| 72 | u64 directory_table; | 72 | u64 directory_table; |
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 60553a9053ca..202df6312d4e 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c | |||
| @@ -98,7 +98,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 98 | msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); | 98 | msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); |
| 99 | msblk->devblksize_log2 = ffz(~msblk->devblksize); | 99 | msblk->devblksize_log2 = ffz(~msblk->devblksize); |
| 100 | 100 | ||
| 101 | mutex_init(&msblk->read_data_mutex); | ||
| 102 | mutex_init(&msblk->meta_index_mutex); | 101 | mutex_init(&msblk->meta_index_mutex); |
| 103 | 102 | ||
| 104 | /* | 103 | /* |
| @@ -206,13 +205,14 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 206 | goto failed_mount; | 205 | goto failed_mount; |
| 207 | 206 | ||
| 208 | /* Allocate read_page block */ | 207 | /* Allocate read_page block */ |
| 209 | msblk->read_page = squashfs_cache_init("data", 1, msblk->block_size); | 208 | msblk->read_page = squashfs_cache_init("data", |
| 209 | squashfs_max_decompressors(), msblk->block_size); | ||
| 210 | if (msblk->read_page == NULL) { | 210 | if (msblk->read_page == NULL) { |
| 211 | ERROR("Failed to allocate read_page block\n"); | 211 | ERROR("Failed to allocate read_page block\n"); |
| 212 | goto failed_mount; | 212 | goto failed_mount; |
| 213 | } | 213 | } |
| 214 | 214 | ||
| 215 | msblk->stream = squashfs_decompressor_init(sb, flags); | 215 | msblk->stream = squashfs_decompressor_setup(sb, flags); |
| 216 | if (IS_ERR(msblk->stream)) { | 216 | if (IS_ERR(msblk->stream)) { |
| 217 | err = PTR_ERR(msblk->stream); | 217 | err = PTR_ERR(msblk->stream); |
| 218 | msblk->stream = NULL; | 218 | msblk->stream = NULL; |
| @@ -336,7 +336,7 @@ failed_mount: | |||
| 336 | squashfs_cache_delete(msblk->block_cache); | 336 | squashfs_cache_delete(msblk->block_cache); |
| 337 | squashfs_cache_delete(msblk->fragment_cache); | 337 | squashfs_cache_delete(msblk->fragment_cache); |
| 338 | squashfs_cache_delete(msblk->read_page); | 338 | squashfs_cache_delete(msblk->read_page); |
| 339 | squashfs_decompressor_free(msblk, msblk->stream); | 339 | squashfs_decompressor_destroy(msblk); |
| 340 | kfree(msblk->inode_lookup_table); | 340 | kfree(msblk->inode_lookup_table); |
| 341 | kfree(msblk->fragment_index); | 341 | kfree(msblk->fragment_index); |
| 342 | kfree(msblk->id_table); | 342 | kfree(msblk->id_table); |
| @@ -383,7 +383,7 @@ static void squashfs_put_super(struct super_block *sb) | |||
| 383 | squashfs_cache_delete(sbi->block_cache); | 383 | squashfs_cache_delete(sbi->block_cache); |
| 384 | squashfs_cache_delete(sbi->fragment_cache); | 384 | squashfs_cache_delete(sbi->fragment_cache); |
| 385 | squashfs_cache_delete(sbi->read_page); | 385 | squashfs_cache_delete(sbi->read_page); |
| 386 | squashfs_decompressor_free(sbi, sbi->stream); | 386 | squashfs_decompressor_destroy(sbi); |
| 387 | kfree(sbi->id_table); | 387 | kfree(sbi->id_table); |
| 388 | kfree(sbi->fragment_index); | 388 | kfree(sbi->fragment_index); |
| 389 | kfree(sbi->meta_index); | 389 | kfree(sbi->meta_index); |
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index 1760b7d108f6..c609624e4b8a 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c | |||
| @@ -32,44 +32,70 @@ | |||
| 32 | #include "squashfs_fs_sb.h" | 32 | #include "squashfs_fs_sb.h" |
| 33 | #include "squashfs.h" | 33 | #include "squashfs.h" |
| 34 | #include "decompressor.h" | 34 | #include "decompressor.h" |
| 35 | #include "page_actor.h" | ||
| 35 | 36 | ||
| 36 | struct squashfs_xz { | 37 | struct squashfs_xz { |
| 37 | struct xz_dec *state; | 38 | struct xz_dec *state; |
| 38 | struct xz_buf buf; | 39 | struct xz_buf buf; |
| 39 | }; | 40 | }; |
| 40 | 41 | ||
| 41 | struct comp_opts { | 42 | struct disk_comp_opts { |
| 42 | __le32 dictionary_size; | 43 | __le32 dictionary_size; |
| 43 | __le32 flags; | 44 | __le32 flags; |
| 44 | }; | 45 | }; |
| 45 | 46 | ||
| 46 | static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff, | 47 | struct comp_opts { |
| 47 | int len) | 48 | int dict_size; |
| 49 | }; | ||
| 50 | |||
| 51 | static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk, | ||
| 52 | void *buff, int len) | ||
| 48 | { | 53 | { |
| 49 | struct comp_opts *comp_opts = buff; | 54 | struct disk_comp_opts *comp_opts = buff; |
| 50 | struct squashfs_xz *stream; | 55 | struct comp_opts *opts; |
| 51 | int dict_size = msblk->block_size; | 56 | int err = 0, n; |
| 52 | int err, n; | 57 | |
| 58 | opts = kmalloc(sizeof(*opts), GFP_KERNEL); | ||
| 59 | if (opts == NULL) { | ||
| 60 | err = -ENOMEM; | ||
| 61 | goto out2; | ||
| 62 | } | ||
| 53 | 63 | ||
| 54 | if (comp_opts) { | 64 | if (comp_opts) { |
| 55 | /* check compressor options are the expected length */ | 65 | /* check compressor options are the expected length */ |
| 56 | if (len < sizeof(*comp_opts)) { | 66 | if (len < sizeof(*comp_opts)) { |
| 57 | err = -EIO; | 67 | err = -EIO; |
| 58 | goto failed; | 68 | goto out; |
| 59 | } | 69 | } |
| 60 | 70 | ||
| 61 | dict_size = le32_to_cpu(comp_opts->dictionary_size); | 71 | opts->dict_size = le32_to_cpu(comp_opts->dictionary_size); |
| 62 | 72 | ||
| 63 | /* the dictionary size should be 2^n or 2^n+2^(n+1) */ | 73 | /* the dictionary size should be 2^n or 2^n+2^(n+1) */ |
| 64 | n = ffs(dict_size) - 1; | 74 | n = ffs(opts->dict_size) - 1; |
| 65 | if (dict_size != (1 << n) && dict_size != (1 << n) + | 75 | if (opts->dict_size != (1 << n) && opts->dict_size != (1 << n) + |
| 66 | (1 << (n + 1))) { | 76 | (1 << (n + 1))) { |
| 67 | err = -EIO; | 77 | err = -EIO; |
| 68 | goto failed; | 78 | goto out; |
| 69 | } | 79 | } |
| 70 | } | 80 | } else |
| 81 | /* use defaults */ | ||
| 82 | opts->dict_size = max_t(int, msblk->block_size, | ||
| 83 | SQUASHFS_METADATA_SIZE); | ||
| 84 | |||
| 85 | return opts; | ||
| 86 | |||
| 87 | out: | ||
| 88 | kfree(opts); | ||
| 89 | out2: | ||
| 90 | return ERR_PTR(err); | ||
| 91 | } | ||
| 92 | |||
| 71 | 93 | ||
| 72 | dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE); | 94 | static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff) |
| 95 | { | ||
| 96 | struct comp_opts *comp_opts = buff; | ||
| 97 | struct squashfs_xz *stream; | ||
| 98 | int err; | ||
| 73 | 99 | ||
| 74 | stream = kmalloc(sizeof(*stream), GFP_KERNEL); | 100 | stream = kmalloc(sizeof(*stream), GFP_KERNEL); |
| 75 | if (stream == NULL) { | 101 | if (stream == NULL) { |
| @@ -77,7 +103,7 @@ static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff, | |||
| 77 | goto failed; | 103 | goto failed; |
| 78 | } | 104 | } |
| 79 | 105 | ||
| 80 | stream->state = xz_dec_init(XZ_PREALLOC, dict_size); | 106 | stream->state = xz_dec_init(XZ_PREALLOC, comp_opts->dict_size); |
| 81 | if (stream->state == NULL) { | 107 | if (stream->state == NULL) { |
| 82 | kfree(stream); | 108 | kfree(stream); |
| 83 | err = -ENOMEM; | 109 | err = -ENOMEM; |
| @@ -103,42 +129,37 @@ static void squashfs_xz_free(void *strm) | |||
| 103 | } | 129 | } |
| 104 | 130 | ||
| 105 | 131 | ||
| 106 | static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, | 132 | static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, |
| 107 | struct buffer_head **bh, int b, int offset, int length, int srclength, | 133 | struct buffer_head **bh, int b, int offset, int length, |
| 108 | int pages) | 134 | struct squashfs_page_actor *output) |
| 109 | { | 135 | { |
| 110 | enum xz_ret xz_err; | 136 | enum xz_ret xz_err; |
| 111 | int avail, total = 0, k = 0, page = 0; | 137 | int avail, total = 0, k = 0; |
| 112 | struct squashfs_xz *stream = msblk->stream; | 138 | struct squashfs_xz *stream = strm; |
| 113 | |||
| 114 | mutex_lock(&msblk->read_data_mutex); | ||
| 115 | 139 | ||
| 116 | xz_dec_reset(stream->state); | 140 | xz_dec_reset(stream->state); |
| 117 | stream->buf.in_pos = 0; | 141 | stream->buf.in_pos = 0; |
| 118 | stream->buf.in_size = 0; | 142 | stream->buf.in_size = 0; |
| 119 | stream->buf.out_pos = 0; | 143 | stream->buf.out_pos = 0; |
| 120 | stream->buf.out_size = PAGE_CACHE_SIZE; | 144 | stream->buf.out_size = PAGE_CACHE_SIZE; |
| 121 | stream->buf.out = buffer[page++]; | 145 | stream->buf.out = squashfs_first_page(output); |
| 122 | 146 | ||
| 123 | do { | 147 | do { |
| 124 | if (stream->buf.in_pos == stream->buf.in_size && k < b) { | 148 | if (stream->buf.in_pos == stream->buf.in_size && k < b) { |
| 125 | avail = min(length, msblk->devblksize - offset); | 149 | avail = min(length, msblk->devblksize - offset); |
| 126 | length -= avail; | 150 | length -= avail; |
| 127 | wait_on_buffer(bh[k]); | ||
| 128 | if (!buffer_uptodate(bh[k])) | ||
| 129 | goto release_mutex; | ||
| 130 | |||
| 131 | stream->buf.in = bh[k]->b_data + offset; | 151 | stream->buf.in = bh[k]->b_data + offset; |
| 132 | stream->buf.in_size = avail; | 152 | stream->buf.in_size = avail; |
| 133 | stream->buf.in_pos = 0; | 153 | stream->buf.in_pos = 0; |
| 134 | offset = 0; | 154 | offset = 0; |
| 135 | } | 155 | } |
| 136 | 156 | ||
| 137 | if (stream->buf.out_pos == stream->buf.out_size | 157 | if (stream->buf.out_pos == stream->buf.out_size) { |
| 138 | && page < pages) { | 158 | stream->buf.out = squashfs_next_page(output); |
| 139 | stream->buf.out = buffer[page++]; | 159 | if (stream->buf.out != NULL) { |
| 140 | stream->buf.out_pos = 0; | 160 | stream->buf.out_pos = 0; |
| 141 | total += PAGE_CACHE_SIZE; | 161 | total += PAGE_CACHE_SIZE; |
| 162 | } | ||
| 142 | } | 163 | } |
| 143 | 164 | ||
| 144 | xz_err = xz_dec_run(stream->state, &stream->buf); | 165 | xz_err = xz_dec_run(stream->state, &stream->buf); |
| @@ -147,23 +168,14 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer, | |||
| 147 | put_bh(bh[k++]); | 168 | put_bh(bh[k++]); |
| 148 | } while (xz_err == XZ_OK); | 169 | } while (xz_err == XZ_OK); |
| 149 | 170 | ||
| 150 | if (xz_err != XZ_STREAM_END) { | 171 | squashfs_finish_page(output); |
| 151 | ERROR("xz_dec_run error, data probably corrupt\n"); | ||
| 152 | goto release_mutex; | ||
| 153 | } | ||
| 154 | |||
| 155 | if (k < b) { | ||
| 156 | ERROR("xz_uncompress error, input remaining\n"); | ||
| 157 | goto release_mutex; | ||
| 158 | } | ||
| 159 | 172 | ||
| 160 | total += stream->buf.out_pos; | 173 | if (xz_err != XZ_STREAM_END || k < b) |
| 161 | mutex_unlock(&msblk->read_data_mutex); | 174 | goto out; |
| 162 | return total; | ||
| 163 | 175 | ||
| 164 | release_mutex: | 176 | return total + stream->buf.out_pos; |
| 165 | mutex_unlock(&msblk->read_data_mutex); | ||
| 166 | 177 | ||
| 178 | out: | ||
| 167 | for (; k < b; k++) | 179 | for (; k < b; k++) |
| 168 | put_bh(bh[k]); | 180 | put_bh(bh[k]); |
| 169 | 181 | ||
| @@ -172,6 +184,7 @@ release_mutex: | |||
| 172 | 184 | ||
| 173 | const struct squashfs_decompressor squashfs_xz_comp_ops = { | 185 | const struct squashfs_decompressor squashfs_xz_comp_ops = { |
| 174 | .init = squashfs_xz_init, | 186 | .init = squashfs_xz_init, |
| 187 | .comp_opts = squashfs_xz_comp_opts, | ||
| 175 | .free = squashfs_xz_free, | 188 | .free = squashfs_xz_free, |
| 176 | .decompress = squashfs_xz_uncompress, | 189 | .decompress = squashfs_xz_uncompress, |
| 177 | .id = XZ_COMPRESSION, | 190 | .id = XZ_COMPRESSION, |
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index 55d918fd2d86..8727caba6882 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c | |||
| @@ -32,8 +32,9 @@ | |||
| 32 | #include "squashfs_fs_sb.h" | 32 | #include "squashfs_fs_sb.h" |
| 33 | #include "squashfs.h" | 33 | #include "squashfs.h" |
| 34 | #include "decompressor.h" | 34 | #include "decompressor.h" |
| 35 | #include "page_actor.h" | ||
| 35 | 36 | ||
| 36 | static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len) | 37 | static void *zlib_init(struct squashfs_sb_info *dummy, void *buff) |
| 37 | { | 38 | { |
| 38 | z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL); | 39 | z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL); |
| 39 | if (stream == NULL) | 40 | if (stream == NULL) |
| @@ -61,44 +62,37 @@ static void zlib_free(void *strm) | |||
| 61 | } | 62 | } |
| 62 | 63 | ||
| 63 | 64 | ||
| 64 | static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, | 65 | static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, |
| 65 | struct buffer_head **bh, int b, int offset, int length, int srclength, | 66 | struct buffer_head **bh, int b, int offset, int length, |
| 66 | int pages) | 67 | struct squashfs_page_actor *output) |
| 67 | { | 68 | { |
| 68 | int zlib_err, zlib_init = 0; | 69 | int zlib_err, zlib_init = 0, k = 0; |
| 69 | int k = 0, page = 0; | 70 | z_stream *stream = strm; |
| 70 | z_stream *stream = msblk->stream; | ||
| 71 | |||
| 72 | mutex_lock(&msblk->read_data_mutex); | ||
| 73 | 71 | ||
| 74 | stream->avail_out = 0; | 72 | stream->avail_out = PAGE_CACHE_SIZE; |
| 73 | stream->next_out = squashfs_first_page(output); | ||
| 75 | stream->avail_in = 0; | 74 | stream->avail_in = 0; |
| 76 | 75 | ||
| 77 | do { | 76 | do { |
| 78 | if (stream->avail_in == 0 && k < b) { | 77 | if (stream->avail_in == 0 && k < b) { |
| 79 | int avail = min(length, msblk->devblksize - offset); | 78 | int avail = min(length, msblk->devblksize - offset); |
| 80 | length -= avail; | 79 | length -= avail; |
| 81 | wait_on_buffer(bh[k]); | ||
| 82 | if (!buffer_uptodate(bh[k])) | ||
| 83 | goto release_mutex; | ||
| 84 | |||
| 85 | stream->next_in = bh[k]->b_data + offset; | 80 | stream->next_in = bh[k]->b_data + offset; |
| 86 | stream->avail_in = avail; | 81 | stream->avail_in = avail; |
| 87 | offset = 0; | 82 | offset = 0; |
| 88 | } | 83 | } |
| 89 | 84 | ||
| 90 | if (stream->avail_out == 0 && page < pages) { | 85 | if (stream->avail_out == 0) { |
| 91 | stream->next_out = buffer[page++]; | 86 | stream->next_out = squashfs_next_page(output); |
| 92 | stream->avail_out = PAGE_CACHE_SIZE; | 87 | if (stream->next_out != NULL) |
| 88 | stream->avail_out = PAGE_CACHE_SIZE; | ||
| 93 | } | 89 | } |
| 94 | 90 | ||
| 95 | if (!zlib_init) { | 91 | if (!zlib_init) { |
| 96 | zlib_err = zlib_inflateInit(stream); | 92 | zlib_err = zlib_inflateInit(stream); |
| 97 | if (zlib_err != Z_OK) { | 93 | if (zlib_err != Z_OK) { |
| 98 | ERROR("zlib_inflateInit returned unexpected " | 94 | squashfs_finish_page(output); |
| 99 | "result 0x%x, srclength %d\n", | 95 | goto out; |
| 100 | zlib_err, srclength); | ||
| 101 | goto release_mutex; | ||
| 102 | } | 96 | } |
| 103 | zlib_init = 1; | 97 | zlib_init = 1; |
| 104 | } | 98 | } |
| @@ -109,29 +103,21 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer, | |||
| 109 | put_bh(bh[k++]); | 103 | put_bh(bh[k++]); |
| 110 | } while (zlib_err == Z_OK); | 104 | } while (zlib_err == Z_OK); |
| 111 | 105 | ||
| 112 | if (zlib_err != Z_STREAM_END) { | 106 | squashfs_finish_page(output); |
| 113 | ERROR("zlib_inflate error, data probably corrupt\n"); | ||
| 114 | goto release_mutex; | ||
| 115 | } | ||
| 116 | 107 | ||
| 117 | zlib_err = zlib_inflateEnd(stream); | 108 | if (zlib_err != Z_STREAM_END) |
| 118 | if (zlib_err != Z_OK) { | 109 | goto out; |
| 119 | ERROR("zlib_inflate error, data probably corrupt\n"); | ||
| 120 | goto release_mutex; | ||
| 121 | } | ||
| 122 | 110 | ||
| 123 | if (k < b) { | 111 | zlib_err = zlib_inflateEnd(stream); |
| 124 | ERROR("zlib_uncompress error, data remaining\n"); | 112 | if (zlib_err != Z_OK) |
| 125 | goto release_mutex; | 113 | goto out; |
| 126 | } | ||
| 127 | 114 | ||
| 128 | length = stream->total_out; | 115 | if (k < b) |
| 129 | mutex_unlock(&msblk->read_data_mutex); | 116 | goto out; |
| 130 | return length; | ||
| 131 | 117 | ||
| 132 | release_mutex: | 118 | return stream->total_out; |
| 133 | mutex_unlock(&msblk->read_data_mutex); | ||
| 134 | 119 | ||
| 120 | out: | ||
| 135 | for (; k < b; k++) | 121 | for (; k < b; k++) |
| 136 | put_bh(bh[k]); | 122 | put_bh(bh[k]); |
| 137 | 123 | ||
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 79b5da2acbe1..b94f93685093 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
| @@ -609,7 +609,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file) | |||
| 609 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; | 609 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; |
| 610 | struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; | 610 | struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; |
| 611 | struct sysfs_open_file *of; | 611 | struct sysfs_open_file *of; |
| 612 | bool has_read, has_write; | 612 | bool has_read, has_write, has_mmap; |
| 613 | int error = -EACCES; | 613 | int error = -EACCES; |
| 614 | 614 | ||
| 615 | /* need attr_sd for attr and ops, its parent for kobj */ | 615 | /* need attr_sd for attr and ops, its parent for kobj */ |
| @@ -621,6 +621,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file) | |||
| 621 | 621 | ||
| 622 | has_read = battr->read || battr->mmap; | 622 | has_read = battr->read || battr->mmap; |
| 623 | has_write = battr->write || battr->mmap; | 623 | has_write = battr->write || battr->mmap; |
| 624 | has_mmap = battr->mmap; | ||
| 624 | } else { | 625 | } else { |
| 625 | const struct sysfs_ops *ops = sysfs_file_ops(attr_sd); | 626 | const struct sysfs_ops *ops = sysfs_file_ops(attr_sd); |
| 626 | 627 | ||
| @@ -632,6 +633,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file) | |||
| 632 | 633 | ||
| 633 | has_read = ops->show; | 634 | has_read = ops->show; |
| 634 | has_write = ops->store; | 635 | has_write = ops->store; |
| 636 | has_mmap = false; | ||
| 635 | } | 637 | } |
| 636 | 638 | ||
| 637 | /* check perms and supported operations */ | 639 | /* check perms and supported operations */ |
| @@ -649,7 +651,23 @@ static int sysfs_open_file(struct inode *inode, struct file *file) | |||
| 649 | if (!of) | 651 | if (!of) |
| 650 | goto err_out; | 652 | goto err_out; |
| 651 | 653 | ||
| 652 | mutex_init(&of->mutex); | 654 | /* |
| 655 | * The following is done to give a different lockdep key to | ||
| 656 | * @of->mutex for files which implement mmap. This is a rather | ||
| 657 | * crude way to avoid false positive lockdep warning around | ||
| 658 | * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and | ||
| 659 | * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under | ||
| 660 | * which mm->mmap_sem nests, while holding @of->mutex. As each | ||
| 661 | * open file has a separate mutex, it's okay as long as those don't | ||
| 662 | * happen on the same file. At this point, we can't easily give | ||
| 663 | * each file a separate locking class. Let's differentiate on | ||
| 664 | * whether the file has mmap or not for now. | ||
| 665 | */ | ||
| 666 | if (has_mmap) | ||
| 667 | mutex_init(&of->mutex); | ||
| 668 | else | ||
| 669 | mutex_init(&of->mutex); | ||
| 670 | |||
| 653 | of->sd = attr_sd; | 671 | of->sd = attr_sd; |
| 654 | of->file = file; | 672 | of->file = file; |
| 655 | 673 | ||
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 1c02da8bb7df..3ef11b22e750 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
| @@ -1137,6 +1137,7 @@ xfs_bmap_add_attrfork( | |||
| 1137 | int committed; /* xaction was committed */ | 1137 | int committed; /* xaction was committed */ |
| 1138 | int logflags; /* logging flags */ | 1138 | int logflags; /* logging flags */ |
| 1139 | int error; /* error return value */ | 1139 | int error; /* error return value */ |
| 1140 | int cancel_flags = 0; | ||
| 1140 | 1141 | ||
| 1141 | ASSERT(XFS_IFORK_Q(ip) == 0); | 1142 | ASSERT(XFS_IFORK_Q(ip) == 0); |
| 1142 | 1143 | ||
| @@ -1147,19 +1148,20 @@ xfs_bmap_add_attrfork( | |||
| 1147 | if (rsvd) | 1148 | if (rsvd) |
| 1148 | tp->t_flags |= XFS_TRANS_RESERVE; | 1149 | tp->t_flags |= XFS_TRANS_RESERVE; |
| 1149 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0); | 1150 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0); |
| 1150 | if (error) | 1151 | if (error) { |
| 1151 | goto error0; | 1152 | xfs_trans_cancel(tp, 0); |
| 1153 | return error; | ||
| 1154 | } | ||
| 1155 | cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | ||
| 1152 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 1156 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
| 1153 | error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? | 1157 | error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ? |
| 1154 | XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : | 1158 | XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : |
| 1155 | XFS_QMOPT_RES_REGBLKS); | 1159 | XFS_QMOPT_RES_REGBLKS); |
| 1156 | if (error) { | 1160 | if (error) |
| 1157 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 1161 | goto trans_cancel; |
| 1158 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); | 1162 | cancel_flags |= XFS_TRANS_ABORT; |
| 1159 | return error; | ||
| 1160 | } | ||
| 1161 | if (XFS_IFORK_Q(ip)) | 1163 | if (XFS_IFORK_Q(ip)) |
| 1162 | goto error1; | 1164 | goto trans_cancel; |
| 1163 | if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { | 1165 | if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) { |
| 1164 | /* | 1166 | /* |
| 1165 | * For inodes coming from pre-6.2 filesystems. | 1167 | * For inodes coming from pre-6.2 filesystems. |
| @@ -1169,7 +1171,7 @@ xfs_bmap_add_attrfork( | |||
| 1169 | } | 1171 | } |
| 1170 | ASSERT(ip->i_d.di_anextents == 0); | 1172 | ASSERT(ip->i_d.di_anextents == 0); |
| 1171 | 1173 | ||
| 1172 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | 1174 | xfs_trans_ijoin(tp, ip, 0); |
| 1173 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 1175 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
| 1174 | 1176 | ||
| 1175 | switch (ip->i_d.di_format) { | 1177 | switch (ip->i_d.di_format) { |
| @@ -1191,7 +1193,7 @@ xfs_bmap_add_attrfork( | |||
| 1191 | default: | 1193 | default: |
| 1192 | ASSERT(0); | 1194 | ASSERT(0); |
| 1193 | error = XFS_ERROR(EINVAL); | 1195 | error = XFS_ERROR(EINVAL); |
| 1194 | goto error1; | 1196 | goto trans_cancel; |
| 1195 | } | 1197 | } |
| 1196 | 1198 | ||
| 1197 | ASSERT(ip->i_afp == NULL); | 1199 | ASSERT(ip->i_afp == NULL); |
| @@ -1219,7 +1221,7 @@ xfs_bmap_add_attrfork( | |||
| 1219 | if (logflags) | 1221 | if (logflags) |
| 1220 | xfs_trans_log_inode(tp, ip, logflags); | 1222 | xfs_trans_log_inode(tp, ip, logflags); |
| 1221 | if (error) | 1223 | if (error) |
| 1222 | goto error2; | 1224 | goto bmap_cancel; |
| 1223 | if (!xfs_sb_version_hasattr(&mp->m_sb) || | 1225 | if (!xfs_sb_version_hasattr(&mp->m_sb) || |
| 1224 | (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { | 1226 | (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) { |
| 1225 | __int64_t sbfields = 0; | 1227 | __int64_t sbfields = 0; |
| @@ -1242,14 +1244,16 @@ xfs_bmap_add_attrfork( | |||
| 1242 | 1244 | ||
| 1243 | error = xfs_bmap_finish(&tp, &flist, &committed); | 1245 | error = xfs_bmap_finish(&tp, &flist, &committed); |
| 1244 | if (error) | 1246 | if (error) |
| 1245 | goto error2; | 1247 | goto bmap_cancel; |
| 1246 | return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 1248 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
| 1247 | error2: | 1249 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
| 1250 | return error; | ||
| 1251 | |||
| 1252 | bmap_cancel: | ||
| 1248 | xfs_bmap_cancel(&flist); | 1253 | xfs_bmap_cancel(&flist); |
| 1249 | error1: | 1254 | trans_cancel: |
| 1255 | xfs_trans_cancel(tp, cancel_flags); | ||
| 1250 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 1256 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
| 1251 | error0: | ||
| 1252 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); | ||
| 1253 | return error; | 1257 | return error; |
| 1254 | } | 1258 | } |
| 1255 | 1259 | ||
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 8367d6dc18c9..4f11ef011139 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c | |||
| @@ -157,7 +157,7 @@ xfs_ioc_trim( | |||
| 157 | struct xfs_mount *mp, | 157 | struct xfs_mount *mp, |
| 158 | struct fstrim_range __user *urange) | 158 | struct fstrim_range __user *urange) |
| 159 | { | 159 | { |
| 160 | struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; | 160 | struct request_queue *q = bdev_get_queue(mp->m_ddev_targp->bt_bdev); |
| 161 | unsigned int granularity = q->limits.discard_granularity; | 161 | unsigned int granularity = q->limits.discard_granularity; |
| 162 | struct fstrim_range range; | 162 | struct fstrim_range range; |
| 163 | xfs_daddr_t start, end, minlen; | 163 | xfs_daddr_t start, end, minlen; |
| @@ -180,7 +180,8 @@ xfs_ioc_trim( | |||
| 180 | * matter as trimming blocks is an advisory interface. | 180 | * matter as trimming blocks is an advisory interface. |
| 181 | */ | 181 | */ |
| 182 | if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || | 182 | if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || |
| 183 | range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp))) | 183 | range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) || |
| 184 | range.len < mp->m_sb.sb_blocksize) | ||
| 184 | return -XFS_ERROR(EINVAL); | 185 | return -XFS_ERROR(EINVAL); |
| 185 | 186 | ||
| 186 | start = BTOBB(range.start); | 187 | start = BTOBB(range.start); |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index a6e54b3319bd..02fb943cbf22 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
| @@ -220,6 +220,8 @@ xfs_growfs_data_private( | |||
| 220 | */ | 220 | */ |
| 221 | nfree = 0; | 221 | nfree = 0; |
| 222 | for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) { | 222 | for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) { |
| 223 | __be32 *agfl_bno; | ||
| 224 | |||
| 223 | /* | 225 | /* |
| 224 | * AG freespace header block | 226 | * AG freespace header block |
| 225 | */ | 227 | */ |
| @@ -279,8 +281,10 @@ xfs_growfs_data_private( | |||
| 279 | agfl->agfl_seqno = cpu_to_be32(agno); | 281 | agfl->agfl_seqno = cpu_to_be32(agno); |
| 280 | uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid); | 282 | uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid); |
| 281 | } | 283 | } |
| 284 | |||
| 285 | agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp); | ||
| 282 | for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++) | 286 | for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++) |
| 283 | agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); | 287 | agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); |
| 284 | 288 | ||
| 285 | error = xfs_bwrite(bp); | 289 | error = xfs_bwrite(bp); |
| 286 | xfs_buf_relse(bp); | 290 | xfs_buf_relse(bp); |
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 4d613401a5e0..33ad9a77791f 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c | |||
| @@ -442,7 +442,8 @@ xfs_attrlist_by_handle( | |||
| 442 | return -XFS_ERROR(EPERM); | 442 | return -XFS_ERROR(EPERM); |
| 443 | if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) | 443 | if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) |
| 444 | return -XFS_ERROR(EFAULT); | 444 | return -XFS_ERROR(EFAULT); |
| 445 | if (al_hreq.buflen > XATTR_LIST_MAX) | 445 | if (al_hreq.buflen < sizeof(struct attrlist) || |
| 446 | al_hreq.buflen > XATTR_LIST_MAX) | ||
| 446 | return -XFS_ERROR(EINVAL); | 447 | return -XFS_ERROR(EINVAL); |
| 447 | 448 | ||
| 448 | /* | 449 | /* |
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index e8fb1231db81..a7992f8de9d3 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c | |||
| @@ -356,7 +356,8 @@ xfs_compat_attrlist_by_handle( | |||
| 356 | if (copy_from_user(&al_hreq, arg, | 356 | if (copy_from_user(&al_hreq, arg, |
| 357 | sizeof(compat_xfs_fsop_attrlist_handlereq_t))) | 357 | sizeof(compat_xfs_fsop_attrlist_handlereq_t))) |
| 358 | return -XFS_ERROR(EFAULT); | 358 | return -XFS_ERROR(EFAULT); |
| 359 | if (al_hreq.buflen > XATTR_LIST_MAX) | 359 | if (al_hreq.buflen < sizeof(struct attrlist) || |
| 360 | al_hreq.buflen > XATTR_LIST_MAX) | ||
| 360 | return -XFS_ERROR(EINVAL); | 361 | return -XFS_ERROR(EINVAL); |
| 361 | 362 | ||
| 362 | /* | 363 | /* |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index da88f167af78..02df7b408a26 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
| @@ -41,6 +41,7 @@ | |||
| 41 | #include "xfs_fsops.h" | 41 | #include "xfs_fsops.h" |
| 42 | #include "xfs_trace.h" | 42 | #include "xfs_trace.h" |
| 43 | #include "xfs_icache.h" | 43 | #include "xfs_icache.h" |
| 44 | #include "xfs_dinode.h" | ||
| 44 | 45 | ||
| 45 | 46 | ||
| 46 | #ifdef HAVE_PERCPU_SB | 47 | #ifdef HAVE_PERCPU_SB |
| @@ -718,8 +719,22 @@ xfs_mountfs( | |||
| 718 | * Set the inode cluster size. | 719 | * Set the inode cluster size. |
| 719 | * This may still be overridden by the file system | 720 | * This may still be overridden by the file system |
| 720 | * block size if it is larger than the chosen cluster size. | 721 | * block size if it is larger than the chosen cluster size. |
| 722 | * | ||
| 723 | * For v5 filesystems, scale the cluster size with the inode size to | ||
| 724 | * keep a constant ratio of inode per cluster buffer, but only if mkfs | ||
| 725 | * has set the inode alignment value appropriately for larger cluster | ||
| 726 | * sizes. | ||
| 721 | */ | 727 | */ |
| 722 | mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; | 728 | mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; |
| 729 | if (xfs_sb_version_hascrc(&mp->m_sb)) { | ||
| 730 | int new_size = mp->m_inode_cluster_size; | ||
| 731 | |||
| 732 | new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; | ||
| 733 | if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) | ||
| 734 | mp->m_inode_cluster_size = new_size; | ||
| 735 | xfs_info(mp, "Using inode cluster size of %d bytes", | ||
| 736 | mp->m_inode_cluster_size); | ||
| 737 | } | ||
| 723 | 738 | ||
| 724 | /* | 739 | /* |
| 725 | * Set inode alignment fields | 740 | * Set inode alignment fields |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 1d8101a10d8e..a466c5e5826e 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
| @@ -112,7 +112,7 @@ typedef struct xfs_mount { | |||
| 112 | __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ | 112 | __uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ |
| 113 | __uint8_t m_agno_log; /* log #ag's */ | 113 | __uint8_t m_agno_log; /* log #ag's */ |
| 114 | __uint8_t m_agino_log; /* #bits for agino in inum */ | 114 | __uint8_t m_agino_log; /* #bits for agino in inum */ |
| 115 | __uint16_t m_inode_cluster_size;/* min inode buf size */ | 115 | uint m_inode_cluster_size;/* min inode buf size */ |
| 116 | uint m_blockmask; /* sb_blocksize-1 */ | 116 | uint m_blockmask; /* sb_blocksize-1 */ |
| 117 | uint m_blockwsize; /* sb_blocksize in words */ | 117 | uint m_blockwsize; /* sb_blocksize in words */ |
| 118 | uint m_blockwmask; /* blockwsize-1 */ | 118 | uint m_blockwmask; /* blockwsize-1 */ |
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 1bba7f60d94c..50c3f5614288 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c | |||
| @@ -111,12 +111,14 @@ xfs_trans_log_inode( | |||
| 111 | 111 | ||
| 112 | /* | 112 | /* |
| 113 | * First time we log the inode in a transaction, bump the inode change | 113 | * First time we log the inode in a transaction, bump the inode change |
| 114 | * counter if it is configured for this to occur. | 114 | * counter if it is configured for this to occur. We don't use |
| 115 | * inode_inc_version() because there is no need for extra locking around | ||
| 116 | * i_version as we already hold the inode locked exclusively for | ||
| 117 | * metadata modification. | ||
| 115 | */ | 118 | */ |
| 116 | if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) && | 119 | if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) && |
| 117 | IS_I_VERSION(VFS_I(ip))) { | 120 | IS_I_VERSION(VFS_I(ip))) { |
| 118 | inode_inc_iversion(VFS_I(ip)); | 121 | ip->i_d.di_changecount = ++VFS_I(ip)->i_version; |
| 119 | ip->i_d.di_changecount = VFS_I(ip)->i_version; | ||
| 120 | flags |= XFS_ILOG_CORE; | 122 | flags |= XFS_ILOG_CORE; |
| 121 | } | 123 | } |
| 122 | 124 | ||
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c index d53d9f0627a7..2fd59c0dae66 100644 --- a/fs/xfs/xfs_trans_resv.c +++ b/fs/xfs/xfs_trans_resv.c | |||
| @@ -385,8 +385,7 @@ xfs_calc_ifree_reservation( | |||
| 385 | xfs_calc_inode_res(mp, 1) + | 385 | xfs_calc_inode_res(mp, 1) + |
| 386 | xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + | 386 | xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + |
| 387 | xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + | 387 | xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + |
| 388 | MAX((__uint16_t)XFS_FSB_TO_B(mp, 1), | 388 | max_t(uint, XFS_FSB_TO_B(mp, 1), XFS_INODE_CLUSTER_SIZE(mp)) + |
| 389 | XFS_INODE_CLUSTER_SIZE(mp)) + | ||
| 390 | xfs_calc_buf_res(1, 0) + | 389 | xfs_calc_buf_res(1, 0) + |
| 391 | xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + | 390 | xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + |
| 392 | mp->m_in_maxlevels, 0) + | 391 | mp->m_in_maxlevels, 0) + |
