diff options
Diffstat (limited to 'fs')
57 files changed, 918 insertions, 752 deletions
diff --git a/fs/affs/file.c b/fs/affs/file.c index 0548c53f41d5..22fc7c802d69 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c | |||
@@ -511,8 +511,6 @@ affs_do_readpage_ofs(struct page *page, unsigned to) | |||
511 | pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino, | 511 | pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino, |
512 | page->index, to); | 512 | page->index, to); |
513 | BUG_ON(to > PAGE_CACHE_SIZE); | 513 | BUG_ON(to > PAGE_CACHE_SIZE); |
514 | kmap(page); | ||
515 | data = page_address(page); | ||
516 | bsize = AFFS_SB(sb)->s_data_blksize; | 514 | bsize = AFFS_SB(sb)->s_data_blksize; |
517 | tmp = page->index << PAGE_CACHE_SHIFT; | 515 | tmp = page->index << PAGE_CACHE_SHIFT; |
518 | bidx = tmp / bsize; | 516 | bidx = tmp / bsize; |
@@ -524,14 +522,15 @@ affs_do_readpage_ofs(struct page *page, unsigned to) | |||
524 | return PTR_ERR(bh); | 522 | return PTR_ERR(bh); |
525 | tmp = min(bsize - boff, to - pos); | 523 | tmp = min(bsize - boff, to - pos); |
526 | BUG_ON(pos + tmp > to || tmp > bsize); | 524 | BUG_ON(pos + tmp > to || tmp > bsize); |
525 | data = kmap_atomic(page); | ||
527 | memcpy(data + pos, AFFS_DATA(bh) + boff, tmp); | 526 | memcpy(data + pos, AFFS_DATA(bh) + boff, tmp); |
527 | kunmap_atomic(data); | ||
528 | affs_brelse(bh); | 528 | affs_brelse(bh); |
529 | bidx++; | 529 | bidx++; |
530 | pos += tmp; | 530 | pos += tmp; |
531 | boff = 0; | 531 | boff = 0; |
532 | } | 532 | } |
533 | flush_dcache_page(page); | 533 | flush_dcache_page(page); |
534 | kunmap(page); | ||
535 | return 0; | 534 | return 0; |
536 | } | 535 | } |
537 | 536 | ||
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index c6d7d3dbd52a..75dd739ac3e6 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -537,8 +537,6 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, u | |||
537 | ino->dentry = dentry; | 537 | ino->dentry = dentry; |
538 | 538 | ||
539 | autofs4_add_active(dentry); | 539 | autofs4_add_active(dentry); |
540 | |||
541 | d_instantiate(dentry, NULL); | ||
542 | } | 540 | } |
543 | return NULL; | 541 | return NULL; |
544 | } | 542 | } |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 7b9cd49622b1..39b3a174a425 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -1730,43 +1730,25 @@ static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1730 | return __dax_fault(vma, vmf, blkdev_get_block, NULL); | 1730 | return __dax_fault(vma, vmf, blkdev_get_block, NULL); |
1731 | } | 1731 | } |
1732 | 1732 | ||
1733 | static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, | 1733 | static int blkdev_dax_pfn_mkwrite(struct vm_area_struct *vma, |
1734 | pmd_t *pmd, unsigned int flags) | 1734 | struct vm_fault *vmf) |
1735 | { | ||
1736 | return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL); | ||
1737 | } | ||
1738 | |||
1739 | static void blkdev_vm_open(struct vm_area_struct *vma) | ||
1740 | { | 1735 | { |
1741 | struct inode *bd_inode = bdev_file_inode(vma->vm_file); | 1736 | return dax_pfn_mkwrite(vma, vmf); |
1742 | struct block_device *bdev = I_BDEV(bd_inode); | ||
1743 | |||
1744 | inode_lock(bd_inode); | ||
1745 | bdev->bd_map_count++; | ||
1746 | inode_unlock(bd_inode); | ||
1747 | } | 1737 | } |
1748 | 1738 | ||
1749 | static void blkdev_vm_close(struct vm_area_struct *vma) | 1739 | static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, |
1740 | pmd_t *pmd, unsigned int flags) | ||
1750 | { | 1741 | { |
1751 | struct inode *bd_inode = bdev_file_inode(vma->vm_file); | 1742 | return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL); |
1752 | struct block_device *bdev = I_BDEV(bd_inode); | ||
1753 | |||
1754 | inode_lock(bd_inode); | ||
1755 | bdev->bd_map_count--; | ||
1756 | inode_unlock(bd_inode); | ||
1757 | } | 1743 | } |
1758 | 1744 | ||
1759 | static const struct vm_operations_struct blkdev_dax_vm_ops = { | 1745 | static const struct vm_operations_struct blkdev_dax_vm_ops = { |
1760 | .open = blkdev_vm_open, | ||
1761 | .close = blkdev_vm_close, | ||
1762 | .fault = blkdev_dax_fault, | 1746 | .fault = blkdev_dax_fault, |
1763 | .pmd_fault = blkdev_dax_pmd_fault, | 1747 | .pmd_fault = blkdev_dax_pmd_fault, |
1764 | .pfn_mkwrite = blkdev_dax_fault, | 1748 | .pfn_mkwrite = blkdev_dax_pfn_mkwrite, |
1765 | }; | 1749 | }; |
1766 | 1750 | ||
1767 | static const struct vm_operations_struct blkdev_default_vm_ops = { | 1751 | static const struct vm_operations_struct blkdev_default_vm_ops = { |
1768 | .open = blkdev_vm_open, | ||
1769 | .close = blkdev_vm_close, | ||
1770 | .fault = filemap_fault, | 1752 | .fault = filemap_fault, |
1771 | .map_pages = filemap_map_pages, | 1753 | .map_pages = filemap_map_pages, |
1772 | }; | 1754 | }; |
@@ -1774,18 +1756,14 @@ static const struct vm_operations_struct blkdev_default_vm_ops = { | |||
1774 | static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) | 1756 | static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) |
1775 | { | 1757 | { |
1776 | struct inode *bd_inode = bdev_file_inode(file); | 1758 | struct inode *bd_inode = bdev_file_inode(file); |
1777 | struct block_device *bdev = I_BDEV(bd_inode); | ||
1778 | 1759 | ||
1779 | file_accessed(file); | 1760 | file_accessed(file); |
1780 | inode_lock(bd_inode); | ||
1781 | bdev->bd_map_count++; | ||
1782 | if (IS_DAX(bd_inode)) { | 1761 | if (IS_DAX(bd_inode)) { |
1783 | vma->vm_ops = &blkdev_dax_vm_ops; | 1762 | vma->vm_ops = &blkdev_dax_vm_ops; |
1784 | vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; | 1763 | vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; |
1785 | } else { | 1764 | } else { |
1786 | vma->vm_ops = &blkdev_default_vm_ops; | 1765 | vma->vm_ops = &blkdev_default_vm_ops; |
1787 | } | 1766 | } |
1788 | inode_unlock(bd_inode); | ||
1789 | 1767 | ||
1790 | return 0; | 1768 | return 0; |
1791 | } | 1769 | } |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 88d9af3d4581..5fb60ea7eee2 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -328,8 +328,8 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq, | |||
328 | list_add_tail(&work->ordered_list, &wq->ordered_list); | 328 | list_add_tail(&work->ordered_list, &wq->ordered_list); |
329 | spin_unlock_irqrestore(&wq->list_lock, flags); | 329 | spin_unlock_irqrestore(&wq->list_lock, flags); |
330 | } | 330 | } |
331 | queue_work(wq->normal_wq, &work->normal_work); | ||
332 | trace_btrfs_work_queued(work); | 331 | trace_btrfs_work_queued(work); |
332 | queue_work(wq->normal_wq, &work->normal_work); | ||
333 | } | 333 | } |
334 | 334 | ||
335 | void btrfs_queue_work(struct btrfs_workqueue *wq, | 335 | void btrfs_queue_work(struct btrfs_workqueue *wq, |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index b90cd3776f8e..f6dac40f87ff 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -1406,7 +1406,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | |||
1406 | read_extent_buffer(eb, dest + bytes_left, | 1406 | read_extent_buffer(eb, dest + bytes_left, |
1407 | name_off, name_len); | 1407 | name_off, name_len); |
1408 | if (eb != eb_in) { | 1408 | if (eb != eb_in) { |
1409 | btrfs_tree_read_unlock_blocking(eb); | 1409 | if (!path->skip_locking) |
1410 | btrfs_tree_read_unlock_blocking(eb); | ||
1410 | free_extent_buffer(eb); | 1411 | free_extent_buffer(eb); |
1411 | } | 1412 | } |
1412 | ret = btrfs_find_item(fs_root, path, parent, 0, | 1413 | ret = btrfs_find_item(fs_root, path, parent, 0, |
@@ -1426,9 +1427,10 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | |||
1426 | eb = path->nodes[0]; | 1427 | eb = path->nodes[0]; |
1427 | /* make sure we can use eb after releasing the path */ | 1428 | /* make sure we can use eb after releasing the path */ |
1428 | if (eb != eb_in) { | 1429 | if (eb != eb_in) { |
1429 | atomic_inc(&eb->refs); | 1430 | if (!path->skip_locking) |
1430 | btrfs_tree_read_lock(eb); | 1431 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); |
1431 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); | 1432 | path->nodes[0] = NULL; |
1433 | path->locks[0] = 0; | ||
1432 | } | 1434 | } |
1433 | btrfs_release_path(path); | 1435 | btrfs_release_path(path); |
1434 | iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); | 1436 | iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index c473c42d7d6c..3346cd8f9910 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -637,11 +637,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
637 | faili = nr_pages - 1; | 637 | faili = nr_pages - 1; |
638 | cb->nr_pages = nr_pages; | 638 | cb->nr_pages = nr_pages; |
639 | 639 | ||
640 | /* In the parent-locked case, we only locked the range we are | 640 | add_ra_bio_pages(inode, em_start + em_len, cb); |
641 | * interested in. In all other cases, we can opportunistically | ||
642 | * cache decompressed data that goes beyond the requested range. */ | ||
643 | if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED)) | ||
644 | add_ra_bio_pages(inode, em_start + em_len, cb); | ||
645 | 641 | ||
646 | /* include any pages we added in add_ra-bio_pages */ | 642 | /* include any pages we added in add_ra-bio_pages */ |
647 | uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; | 643 | uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 0be47e4b8136..b57daa895cea 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -1689,7 +1689,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list, | |||
1689 | * | 1689 | * |
1690 | */ | 1690 | */ |
1691 | int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, | 1691 | int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, |
1692 | struct list_head *ins_list) | 1692 | struct list_head *ins_list, bool *emitted) |
1693 | { | 1693 | { |
1694 | struct btrfs_dir_item *di; | 1694 | struct btrfs_dir_item *di; |
1695 | struct btrfs_delayed_item *curr, *next; | 1695 | struct btrfs_delayed_item *curr, *next; |
@@ -1733,6 +1733,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, | |||
1733 | 1733 | ||
1734 | if (over) | 1734 | if (over) |
1735 | return 1; | 1735 | return 1; |
1736 | *emitted = true; | ||
1736 | } | 1737 | } |
1737 | return 0; | 1738 | return 0; |
1738 | } | 1739 | } |
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index f70119f25421..0167853c84ae 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h | |||
@@ -144,7 +144,7 @@ void btrfs_put_delayed_items(struct list_head *ins_list, | |||
144 | int btrfs_should_delete_dir_index(struct list_head *del_list, | 144 | int btrfs_should_delete_dir_index(struct list_head *del_list, |
145 | u64 index); | 145 | u64 index); |
146 | int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, | 146 | int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, |
147 | struct list_head *ins_list); | 147 | struct list_head *ins_list, bool *emitted); |
148 | 148 | ||
149 | /* for init */ | 149 | /* for init */ |
150 | int __init btrfs_delayed_inode_init(void); | 150 | int __init btrfs_delayed_inode_init(void); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dd08e29f5117..4545e2e2ad45 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -182,6 +182,7 @@ static struct btrfs_lockdep_keyset { | |||
182 | { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, | 182 | { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, |
183 | { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, | 183 | { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, |
184 | { .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" }, | 184 | { .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" }, |
185 | { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" }, | ||
185 | { .id = 0, .name_stem = "tree" }, | 186 | { .id = 0, .name_stem = "tree" }, |
186 | }; | 187 | }; |
187 | 188 | ||
@@ -1787,7 +1788,6 @@ static int cleaner_kthread(void *arg) | |||
1787 | int again; | 1788 | int again; |
1788 | struct btrfs_trans_handle *trans; | 1789 | struct btrfs_trans_handle *trans; |
1789 | 1790 | ||
1790 | set_freezable(); | ||
1791 | do { | 1791 | do { |
1792 | again = 0; | 1792 | again = 0; |
1793 | 1793 | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2e7c97a3f344..392592dc7010 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -2897,12 +2897,11 @@ static int __do_readpage(struct extent_io_tree *tree, | |||
2897 | struct block_device *bdev; | 2897 | struct block_device *bdev; |
2898 | int ret; | 2898 | int ret; |
2899 | int nr = 0; | 2899 | int nr = 0; |
2900 | int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED; | ||
2901 | size_t pg_offset = 0; | 2900 | size_t pg_offset = 0; |
2902 | size_t iosize; | 2901 | size_t iosize; |
2903 | size_t disk_io_size; | 2902 | size_t disk_io_size; |
2904 | size_t blocksize = inode->i_sb->s_blocksize; | 2903 | size_t blocksize = inode->i_sb->s_blocksize; |
2905 | unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED; | 2904 | unsigned long this_bio_flag = 0; |
2906 | 2905 | ||
2907 | set_page_extent_mapped(page); | 2906 | set_page_extent_mapped(page); |
2908 | 2907 | ||
@@ -2942,18 +2941,16 @@ static int __do_readpage(struct extent_io_tree *tree, | |||
2942 | kunmap_atomic(userpage); | 2941 | kunmap_atomic(userpage); |
2943 | set_extent_uptodate(tree, cur, cur + iosize - 1, | 2942 | set_extent_uptodate(tree, cur, cur + iosize - 1, |
2944 | &cached, GFP_NOFS); | 2943 | &cached, GFP_NOFS); |
2945 | if (!parent_locked) | 2944 | unlock_extent_cached(tree, cur, |
2946 | unlock_extent_cached(tree, cur, | 2945 | cur + iosize - 1, |
2947 | cur + iosize - 1, | 2946 | &cached, GFP_NOFS); |
2948 | &cached, GFP_NOFS); | ||
2949 | break; | 2947 | break; |
2950 | } | 2948 | } |
2951 | em = __get_extent_map(inode, page, pg_offset, cur, | 2949 | em = __get_extent_map(inode, page, pg_offset, cur, |
2952 | end - cur + 1, get_extent, em_cached); | 2950 | end - cur + 1, get_extent, em_cached); |
2953 | if (IS_ERR_OR_NULL(em)) { | 2951 | if (IS_ERR_OR_NULL(em)) { |
2954 | SetPageError(page); | 2952 | SetPageError(page); |
2955 | if (!parent_locked) | 2953 | unlock_extent(tree, cur, end); |
2956 | unlock_extent(tree, cur, end); | ||
2957 | break; | 2954 | break; |
2958 | } | 2955 | } |
2959 | extent_offset = cur - em->start; | 2956 | extent_offset = cur - em->start; |
@@ -3038,12 +3035,9 @@ static int __do_readpage(struct extent_io_tree *tree, | |||
3038 | 3035 | ||
3039 | set_extent_uptodate(tree, cur, cur + iosize - 1, | 3036 | set_extent_uptodate(tree, cur, cur + iosize - 1, |
3040 | &cached, GFP_NOFS); | 3037 | &cached, GFP_NOFS); |
3041 | if (parent_locked) | 3038 | unlock_extent_cached(tree, cur, |
3042 | free_extent_state(cached); | 3039 | cur + iosize - 1, |
3043 | else | 3040 | &cached, GFP_NOFS); |
3044 | unlock_extent_cached(tree, cur, | ||
3045 | cur + iosize - 1, | ||
3046 | &cached, GFP_NOFS); | ||
3047 | cur = cur + iosize; | 3041 | cur = cur + iosize; |
3048 | pg_offset += iosize; | 3042 | pg_offset += iosize; |
3049 | continue; | 3043 | continue; |
@@ -3052,8 +3046,7 @@ static int __do_readpage(struct extent_io_tree *tree, | |||
3052 | if (test_range_bit(tree, cur, cur_end, | 3046 | if (test_range_bit(tree, cur, cur_end, |
3053 | EXTENT_UPTODATE, 1, NULL)) { | 3047 | EXTENT_UPTODATE, 1, NULL)) { |
3054 | check_page_uptodate(tree, page); | 3048 | check_page_uptodate(tree, page); |
3055 | if (!parent_locked) | 3049 | unlock_extent(tree, cur, cur + iosize - 1); |
3056 | unlock_extent(tree, cur, cur + iosize - 1); | ||
3057 | cur = cur + iosize; | 3050 | cur = cur + iosize; |
3058 | pg_offset += iosize; | 3051 | pg_offset += iosize; |
3059 | continue; | 3052 | continue; |
@@ -3063,8 +3056,7 @@ static int __do_readpage(struct extent_io_tree *tree, | |||
3063 | */ | 3056 | */ |
3064 | if (block_start == EXTENT_MAP_INLINE) { | 3057 | if (block_start == EXTENT_MAP_INLINE) { |
3065 | SetPageError(page); | 3058 | SetPageError(page); |
3066 | if (!parent_locked) | 3059 | unlock_extent(tree, cur, cur + iosize - 1); |
3067 | unlock_extent(tree, cur, cur + iosize - 1); | ||
3068 | cur = cur + iosize; | 3060 | cur = cur + iosize; |
3069 | pg_offset += iosize; | 3061 | pg_offset += iosize; |
3070 | continue; | 3062 | continue; |
@@ -3083,8 +3075,7 @@ static int __do_readpage(struct extent_io_tree *tree, | |||
3083 | *bio_flags = this_bio_flag; | 3075 | *bio_flags = this_bio_flag; |
3084 | } else { | 3076 | } else { |
3085 | SetPageError(page); | 3077 | SetPageError(page); |
3086 | if (!parent_locked) | 3078 | unlock_extent(tree, cur, cur + iosize - 1); |
3087 | unlock_extent(tree, cur, cur + iosize - 1); | ||
3088 | } | 3079 | } |
3089 | cur = cur + iosize; | 3080 | cur = cur + iosize; |
3090 | pg_offset += iosize; | 3081 | pg_offset += iosize; |
@@ -3213,20 +3204,6 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | |||
3213 | return ret; | 3204 | return ret; |
3214 | } | 3205 | } |
3215 | 3206 | ||
3216 | int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page, | ||
3217 | get_extent_t *get_extent, int mirror_num) | ||
3218 | { | ||
3219 | struct bio *bio = NULL; | ||
3220 | unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED; | ||
3221 | int ret; | ||
3222 | |||
3223 | ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num, | ||
3224 | &bio_flags, READ, NULL); | ||
3225 | if (bio) | ||
3226 | ret = submit_one_bio(READ, bio, mirror_num, bio_flags); | ||
3227 | return ret; | ||
3228 | } | ||
3229 | |||
3230 | static noinline void update_nr_written(struct page *page, | 3207 | static noinline void update_nr_written(struct page *page, |
3231 | struct writeback_control *wbc, | 3208 | struct writeback_control *wbc, |
3232 | unsigned long nr_written) | 3209 | unsigned long nr_written) |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 0377413bd4b9..880d5292e972 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -29,7 +29,6 @@ | |||
29 | */ | 29 | */ |
30 | #define EXTENT_BIO_COMPRESSED 1 | 30 | #define EXTENT_BIO_COMPRESSED 1 |
31 | #define EXTENT_BIO_TREE_LOG 2 | 31 | #define EXTENT_BIO_TREE_LOG 2 |
32 | #define EXTENT_BIO_PARENT_LOCKED 4 | ||
33 | #define EXTENT_BIO_FLAG_SHIFT 16 | 32 | #define EXTENT_BIO_FLAG_SHIFT 16 |
34 | 33 | ||
35 | /* these are bit numbers for test/set bit */ | 34 | /* these are bit numbers for test/set bit */ |
@@ -210,8 +209,6 @@ static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end) | |||
210 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end); | 209 | int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end); |
211 | int extent_read_full_page(struct extent_io_tree *tree, struct page *page, | 210 | int extent_read_full_page(struct extent_io_tree *tree, struct page *page, |
212 | get_extent_t *get_extent, int mirror_num); | 211 | get_extent_t *get_extent, int mirror_num); |
213 | int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page, | ||
214 | get_extent_t *get_extent, int mirror_num); | ||
215 | int __init extent_io_init(void); | 212 | int __init extent_io_init(void); |
216 | void extent_io_exit(void); | 213 | void extent_io_exit(void); |
217 | 214 | ||
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 393e36bd5845..53dbeaf6ce94 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c | |||
@@ -153,6 +153,20 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize) | |||
153 | 153 | ||
154 | static unsigned long *alloc_bitmap(u32 bitmap_size) | 154 | static unsigned long *alloc_bitmap(u32 bitmap_size) |
155 | { | 155 | { |
156 | void *mem; | ||
157 | |||
158 | /* | ||
159 | * The allocation size varies, observed numbers were < 4K up to 16K. | ||
160 | * Using vmalloc unconditionally would be too heavy, we'll try | ||
161 | * contiguous allocations first. | ||
162 | */ | ||
163 | if (bitmap_size <= PAGE_SIZE) | ||
164 | return kzalloc(bitmap_size, GFP_NOFS); | ||
165 | |||
166 | mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN); | ||
167 | if (mem) | ||
168 | return mem; | ||
169 | |||
156 | return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO, | 170 | return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO, |
157 | PAGE_KERNEL); | 171 | PAGE_KERNEL); |
158 | } | 172 | } |
@@ -289,7 +303,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, | |||
289 | 303 | ||
290 | ret = 0; | 304 | ret = 0; |
291 | out: | 305 | out: |
292 | vfree(bitmap); | 306 | kvfree(bitmap); |
293 | if (ret) | 307 | if (ret) |
294 | btrfs_abort_transaction(trans, root, ret); | 308 | btrfs_abort_transaction(trans, root, ret); |
295 | return ret; | 309 | return ret; |
@@ -438,7 +452,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans, | |||
438 | 452 | ||
439 | ret = 0; | 453 | ret = 0; |
440 | out: | 454 | out: |
441 | vfree(bitmap); | 455 | kvfree(bitmap); |
442 | if (ret) | 456 | if (ret) |
443 | btrfs_abort_transaction(trans, root, ret); | 457 | btrfs_abort_transaction(trans, root, ret); |
444 | return ret; | 458 | return ret; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e28f3d4691af..151b7c71b868 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -5717,6 +5717,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) | |||
5717 | char *name_ptr; | 5717 | char *name_ptr; |
5718 | int name_len; | 5718 | int name_len; |
5719 | int is_curr = 0; /* ctx->pos points to the current index? */ | 5719 | int is_curr = 0; /* ctx->pos points to the current index? */ |
5720 | bool emitted; | ||
5720 | 5721 | ||
5721 | /* FIXME, use a real flag for deciding about the key type */ | 5722 | /* FIXME, use a real flag for deciding about the key type */ |
5722 | if (root->fs_info->tree_root == root) | 5723 | if (root->fs_info->tree_root == root) |
@@ -5745,6 +5746,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) | |||
5745 | if (ret < 0) | 5746 | if (ret < 0) |
5746 | goto err; | 5747 | goto err; |
5747 | 5748 | ||
5749 | emitted = false; | ||
5748 | while (1) { | 5750 | while (1) { |
5749 | leaf = path->nodes[0]; | 5751 | leaf = path->nodes[0]; |
5750 | slot = path->slots[0]; | 5752 | slot = path->slots[0]; |
@@ -5824,6 +5826,7 @@ skip: | |||
5824 | 5826 | ||
5825 | if (over) | 5827 | if (over) |
5826 | goto nopos; | 5828 | goto nopos; |
5829 | emitted = true; | ||
5827 | di_len = btrfs_dir_name_len(leaf, di) + | 5830 | di_len = btrfs_dir_name_len(leaf, di) + |
5828 | btrfs_dir_data_len(leaf, di) + sizeof(*di); | 5831 | btrfs_dir_data_len(leaf, di) + sizeof(*di); |
5829 | di_cur += di_len; | 5832 | di_cur += di_len; |
@@ -5836,11 +5839,20 @@ next: | |||
5836 | if (key_type == BTRFS_DIR_INDEX_KEY) { | 5839 | if (key_type == BTRFS_DIR_INDEX_KEY) { |
5837 | if (is_curr) | 5840 | if (is_curr) |
5838 | ctx->pos++; | 5841 | ctx->pos++; |
5839 | ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list); | 5842 | ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list, &emitted); |
5840 | if (ret) | 5843 | if (ret) |
5841 | goto nopos; | 5844 | goto nopos; |
5842 | } | 5845 | } |
5843 | 5846 | ||
5847 | /* | ||
5848 | * If we haven't emitted any dir entry, we must not touch ctx->pos as | ||
5849 | * it was was set to the termination value in previous call. We assume | ||
5850 | * that "." and ".." were emitted if we reach this point and set the | ||
5851 | * termination value as well for an empty directory. | ||
5852 | */ | ||
5853 | if (ctx->pos > 2 && !emitted) | ||
5854 | goto nopos; | ||
5855 | |||
5844 | /* Reached end of directory/root. Bump pos past the last item. */ | 5856 | /* Reached end of directory/root. Bump pos past the last item. */ |
5845 | ctx->pos++; | 5857 | ctx->pos++; |
5846 | 5858 | ||
@@ -7116,21 +7128,41 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
7116 | if (ret) | 7128 | if (ret) |
7117 | return ERR_PTR(ret); | 7129 | return ERR_PTR(ret); |
7118 | 7130 | ||
7119 | em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, | 7131 | /* |
7120 | ins.offset, ins.offset, ins.offset, 0); | 7132 | * Create the ordered extent before the extent map. This is to avoid |
7121 | if (IS_ERR(em)) { | 7133 | * races with the fast fsync path that would lead to it logging file |
7122 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); | 7134 | * extent items that point to disk extents that were not yet written to. |
7123 | return em; | 7135 | * The fast fsync path collects ordered extents into a local list and |
7124 | } | 7136 | * then collects all the new extent maps, so we must create the ordered |
7125 | 7137 | * extent first and make sure the fast fsync path collects any new | |
7138 | * ordered extents after collecting new extent maps as well. | ||
7139 | * The fsync path simply can not rely on inode_dio_wait() because it | ||
7140 | * causes deadlock with AIO. | ||
7141 | */ | ||
7126 | ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, | 7142 | ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, |
7127 | ins.offset, ins.offset, 0); | 7143 | ins.offset, ins.offset, 0); |
7128 | if (ret) { | 7144 | if (ret) { |
7129 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); | 7145 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); |
7130 | free_extent_map(em); | ||
7131 | return ERR_PTR(ret); | 7146 | return ERR_PTR(ret); |
7132 | } | 7147 | } |
7133 | 7148 | ||
7149 | em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, | ||
7150 | ins.offset, ins.offset, ins.offset, 0); | ||
7151 | if (IS_ERR(em)) { | ||
7152 | struct btrfs_ordered_extent *oe; | ||
7153 | |||
7154 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); | ||
7155 | oe = btrfs_lookup_ordered_extent(inode, start); | ||
7156 | ASSERT(oe); | ||
7157 | if (WARN_ON(!oe)) | ||
7158 | return em; | ||
7159 | set_bit(BTRFS_ORDERED_IOERR, &oe->flags); | ||
7160 | set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags); | ||
7161 | btrfs_remove_ordered_extent(inode, oe); | ||
7162 | /* Once for our lookup and once for the ordered extents tree. */ | ||
7163 | btrfs_put_ordered_extent(oe); | ||
7164 | btrfs_put_ordered_extent(oe); | ||
7165 | } | ||
7134 | return em; | 7166 | return em; |
7135 | } | 7167 | } |
7136 | 7168 | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 952172ca7e45..48aee9846329 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -2794,24 +2794,29 @@ out: | |||
2794 | static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) | 2794 | static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) |
2795 | { | 2795 | { |
2796 | struct page *page; | 2796 | struct page *page; |
2797 | struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; | ||
2798 | 2797 | ||
2799 | page = grab_cache_page(inode->i_mapping, index); | 2798 | page = grab_cache_page(inode->i_mapping, index); |
2800 | if (!page) | 2799 | if (!page) |
2801 | return NULL; | 2800 | return ERR_PTR(-ENOMEM); |
2802 | 2801 | ||
2803 | if (!PageUptodate(page)) { | 2802 | if (!PageUptodate(page)) { |
2804 | if (extent_read_full_page_nolock(tree, page, btrfs_get_extent, | 2803 | int ret; |
2805 | 0)) | 2804 | |
2806 | return NULL; | 2805 | ret = btrfs_readpage(NULL, page); |
2806 | if (ret) | ||
2807 | return ERR_PTR(ret); | ||
2807 | lock_page(page); | 2808 | lock_page(page); |
2808 | if (!PageUptodate(page)) { | 2809 | if (!PageUptodate(page)) { |
2809 | unlock_page(page); | 2810 | unlock_page(page); |
2810 | page_cache_release(page); | 2811 | page_cache_release(page); |
2811 | return NULL; | 2812 | return ERR_PTR(-EIO); |
2813 | } | ||
2814 | if (page->mapping != inode->i_mapping) { | ||
2815 | unlock_page(page); | ||
2816 | page_cache_release(page); | ||
2817 | return ERR_PTR(-EAGAIN); | ||
2812 | } | 2818 | } |
2813 | } | 2819 | } |
2814 | unlock_page(page); | ||
2815 | 2820 | ||
2816 | return page; | 2821 | return page; |
2817 | } | 2822 | } |
@@ -2823,17 +2828,31 @@ static int gather_extent_pages(struct inode *inode, struct page **pages, | |||
2823 | pgoff_t index = off >> PAGE_CACHE_SHIFT; | 2828 | pgoff_t index = off >> PAGE_CACHE_SHIFT; |
2824 | 2829 | ||
2825 | for (i = 0; i < num_pages; i++) { | 2830 | for (i = 0; i < num_pages; i++) { |
2831 | again: | ||
2826 | pages[i] = extent_same_get_page(inode, index + i); | 2832 | pages[i] = extent_same_get_page(inode, index + i); |
2827 | if (!pages[i]) | 2833 | if (IS_ERR(pages[i])) { |
2828 | return -ENOMEM; | 2834 | int err = PTR_ERR(pages[i]); |
2835 | |||
2836 | if (err == -EAGAIN) | ||
2837 | goto again; | ||
2838 | pages[i] = NULL; | ||
2839 | return err; | ||
2840 | } | ||
2829 | } | 2841 | } |
2830 | return 0; | 2842 | return 0; |
2831 | } | 2843 | } |
2832 | 2844 | ||
2833 | static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) | 2845 | static int lock_extent_range(struct inode *inode, u64 off, u64 len, |
2846 | bool retry_range_locking) | ||
2834 | { | 2847 | { |
2835 | /* do any pending delalloc/csum calc on src, one way or | 2848 | /* |
2836 | another, and lock file content */ | 2849 | * Do any pending delalloc/csum calculations on inode, one way or |
2850 | * another, and lock file content. | ||
2851 | * The locking order is: | ||
2852 | * | ||
2853 | * 1) pages | ||
2854 | * 2) range in the inode's io tree | ||
2855 | */ | ||
2837 | while (1) { | 2856 | while (1) { |
2838 | struct btrfs_ordered_extent *ordered; | 2857 | struct btrfs_ordered_extent *ordered; |
2839 | lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); | 2858 | lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); |
@@ -2851,8 +2870,11 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) | |||
2851 | unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); | 2870 | unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); |
2852 | if (ordered) | 2871 | if (ordered) |
2853 | btrfs_put_ordered_extent(ordered); | 2872 | btrfs_put_ordered_extent(ordered); |
2873 | if (!retry_range_locking) | ||
2874 | return -EAGAIN; | ||
2854 | btrfs_wait_ordered_range(inode, off, len); | 2875 | btrfs_wait_ordered_range(inode, off, len); |
2855 | } | 2876 | } |
2877 | return 0; | ||
2856 | } | 2878 | } |
2857 | 2879 | ||
2858 | static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) | 2880 | static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) |
@@ -2877,15 +2899,24 @@ static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, | |||
2877 | unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); | 2899 | unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); |
2878 | } | 2900 | } |
2879 | 2901 | ||
2880 | static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, | 2902 | static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1, |
2881 | struct inode *inode2, u64 loff2, u64 len) | 2903 | struct inode *inode2, u64 loff2, u64 len, |
2904 | bool retry_range_locking) | ||
2882 | { | 2905 | { |
2906 | int ret; | ||
2907 | |||
2883 | if (inode1 < inode2) { | 2908 | if (inode1 < inode2) { |
2884 | swap(inode1, inode2); | 2909 | swap(inode1, inode2); |
2885 | swap(loff1, loff2); | 2910 | swap(loff1, loff2); |
2886 | } | 2911 | } |
2887 | lock_extent_range(inode1, loff1, len); | 2912 | ret = lock_extent_range(inode1, loff1, len, retry_range_locking); |
2888 | lock_extent_range(inode2, loff2, len); | 2913 | if (ret) |
2914 | return ret; | ||
2915 | ret = lock_extent_range(inode2, loff2, len, retry_range_locking); | ||
2916 | if (ret) | ||
2917 | unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, | ||
2918 | loff1 + len - 1); | ||
2919 | return ret; | ||
2889 | } | 2920 | } |
2890 | 2921 | ||
2891 | struct cmp_pages { | 2922 | struct cmp_pages { |
@@ -2901,11 +2932,15 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp) | |||
2901 | 2932 | ||
2902 | for (i = 0; i < cmp->num_pages; i++) { | 2933 | for (i = 0; i < cmp->num_pages; i++) { |
2903 | pg = cmp->src_pages[i]; | 2934 | pg = cmp->src_pages[i]; |
2904 | if (pg) | 2935 | if (pg) { |
2936 | unlock_page(pg); | ||
2905 | page_cache_release(pg); | 2937 | page_cache_release(pg); |
2938 | } | ||
2906 | pg = cmp->dst_pages[i]; | 2939 | pg = cmp->dst_pages[i]; |
2907 | if (pg) | 2940 | if (pg) { |
2941 | unlock_page(pg); | ||
2908 | page_cache_release(pg); | 2942 | page_cache_release(pg); |
2943 | } | ||
2909 | } | 2944 | } |
2910 | kfree(cmp->src_pages); | 2945 | kfree(cmp->src_pages); |
2911 | kfree(cmp->dst_pages); | 2946 | kfree(cmp->dst_pages); |
@@ -2966,6 +3001,8 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, | |||
2966 | 3001 | ||
2967 | src_page = cmp->src_pages[i]; | 3002 | src_page = cmp->src_pages[i]; |
2968 | dst_page = cmp->dst_pages[i]; | 3003 | dst_page = cmp->dst_pages[i]; |
3004 | ASSERT(PageLocked(src_page)); | ||
3005 | ASSERT(PageLocked(dst_page)); | ||
2969 | 3006 | ||
2970 | addr = kmap_atomic(src_page); | 3007 | addr = kmap_atomic(src_page); |
2971 | dst_addr = kmap_atomic(dst_page); | 3008 | dst_addr = kmap_atomic(dst_page); |
@@ -3078,14 +3115,46 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, | |||
3078 | goto out_unlock; | 3115 | goto out_unlock; |
3079 | } | 3116 | } |
3080 | 3117 | ||
3118 | again: | ||
3081 | ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); | 3119 | ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); |
3082 | if (ret) | 3120 | if (ret) |
3083 | goto out_unlock; | 3121 | goto out_unlock; |
3084 | 3122 | ||
3085 | if (same_inode) | 3123 | if (same_inode) |
3086 | lock_extent_range(src, same_lock_start, same_lock_len); | 3124 | ret = lock_extent_range(src, same_lock_start, same_lock_len, |
3125 | false); | ||
3087 | else | 3126 | else |
3088 | btrfs_double_extent_lock(src, loff, dst, dst_loff, len); | 3127 | ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len, |
3128 | false); | ||
3129 | /* | ||
3130 | * If one of the inodes has dirty pages in the respective range or | ||
3131 | * ordered extents, we need to flush dellaloc and wait for all ordered | ||
3132 | * extents in the range. We must unlock the pages and the ranges in the | ||
3133 | * io trees to avoid deadlocks when flushing delalloc (requires locking | ||
3134 | * pages) and when waiting for ordered extents to complete (they require | ||
3135 | * range locking). | ||
3136 | */ | ||
3137 | if (ret == -EAGAIN) { | ||
3138 | /* | ||
3139 | * Ranges in the io trees already unlocked. Now unlock all | ||
3140 | * pages before waiting for all IO to complete. | ||
3141 | */ | ||
3142 | btrfs_cmp_data_free(&cmp); | ||
3143 | if (same_inode) { | ||
3144 | btrfs_wait_ordered_range(src, same_lock_start, | ||
3145 | same_lock_len); | ||
3146 | } else { | ||
3147 | btrfs_wait_ordered_range(src, loff, len); | ||
3148 | btrfs_wait_ordered_range(dst, dst_loff, len); | ||
3149 | } | ||
3150 | goto again; | ||
3151 | } | ||
3152 | ASSERT(ret == 0); | ||
3153 | if (WARN_ON(ret)) { | ||
3154 | /* ranges in the io trees already unlocked */ | ||
3155 | btrfs_cmp_data_free(&cmp); | ||
3156 | return ret; | ||
3157 | } | ||
3089 | 3158 | ||
3090 | /* pass original length for comparison so we stay within i_size */ | 3159 | /* pass original length for comparison so we stay within i_size */ |
3091 | ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); | 3160 | ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); |
@@ -3795,9 +3864,15 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src, | |||
3795 | u64 lock_start = min_t(u64, off, destoff); | 3864 | u64 lock_start = min_t(u64, off, destoff); |
3796 | u64 lock_len = max_t(u64, off, destoff) + len - lock_start; | 3865 | u64 lock_len = max_t(u64, off, destoff) + len - lock_start; |
3797 | 3866 | ||
3798 | lock_extent_range(src, lock_start, lock_len); | 3867 | ret = lock_extent_range(src, lock_start, lock_len, true); |
3799 | } else { | 3868 | } else { |
3800 | btrfs_double_extent_lock(src, off, inode, destoff, len); | 3869 | ret = btrfs_double_extent_lock(src, off, inode, destoff, len, |
3870 | true); | ||
3871 | } | ||
3872 | ASSERT(ret == 0); | ||
3873 | if (WARN_ON(ret)) { | ||
3874 | /* ranges in the io trees already unlocked */ | ||
3875 | goto out_unlock; | ||
3801 | } | 3876 | } |
3802 | 3877 | ||
3803 | ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); | 3878 | ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index fd1c4d982463..2bd0011450df 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -575,7 +575,8 @@ static int is_cowonly_root(u64 root_objectid) | |||
575 | root_objectid == BTRFS_TREE_LOG_OBJECTID || | 575 | root_objectid == BTRFS_TREE_LOG_OBJECTID || |
576 | root_objectid == BTRFS_CSUM_TREE_OBJECTID || | 576 | root_objectid == BTRFS_CSUM_TREE_OBJECTID || |
577 | root_objectid == BTRFS_UUID_TREE_OBJECTID || | 577 | root_objectid == BTRFS_UUID_TREE_OBJECTID || |
578 | root_objectid == BTRFS_QUOTA_TREE_OBJECTID) | 578 | root_objectid == BTRFS_QUOTA_TREE_OBJECTID || |
579 | root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) | ||
579 | return 1; | 580 | return 1; |
580 | return 0; | 581 | return 0; |
581 | } | 582 | } |
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index e0ac85949067..539e7b5e3f86 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
@@ -202,6 +202,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF); | |||
202 | BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56); | 202 | BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56); |
203 | BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA); | 203 | BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA); |
204 | BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES); | 204 | BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES); |
205 | BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE); | ||
205 | 206 | ||
206 | static struct attribute *btrfs_supported_feature_attrs[] = { | 207 | static struct attribute *btrfs_supported_feature_attrs[] = { |
207 | BTRFS_FEAT_ATTR_PTR(mixed_backref), | 208 | BTRFS_FEAT_ATTR_PTR(mixed_backref), |
@@ -213,6 +214,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = { | |||
213 | BTRFS_FEAT_ATTR_PTR(raid56), | 214 | BTRFS_FEAT_ATTR_PTR(raid56), |
214 | BTRFS_FEAT_ATTR_PTR(skinny_metadata), | 215 | BTRFS_FEAT_ATTR_PTR(skinny_metadata), |
215 | BTRFS_FEAT_ATTR_PTR(no_holes), | 216 | BTRFS_FEAT_ATTR_PTR(no_holes), |
217 | BTRFS_FEAT_ATTR_PTR(free_space_tree), | ||
216 | NULL | 218 | NULL |
217 | }; | 219 | }; |
218 | 220 | ||
@@ -780,6 +782,39 @@ failure: | |||
780 | return error; | 782 | return error; |
781 | } | 783 | } |
782 | 784 | ||
785 | |||
786 | /* | ||
787 | * Change per-fs features in /sys/fs/btrfs/UUID/features to match current | ||
788 | * values in superblock. Call after any changes to incompat/compat_ro flags | ||
789 | */ | ||
790 | void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info, | ||
791 | u64 bit, enum btrfs_feature_set set) | ||
792 | { | ||
793 | struct btrfs_fs_devices *fs_devs; | ||
794 | struct kobject *fsid_kobj; | ||
795 | u64 features; | ||
796 | int ret; | ||
797 | |||
798 | if (!fs_info) | ||
799 | return; | ||
800 | |||
801 | features = get_features(fs_info, set); | ||
802 | ASSERT(bit & supported_feature_masks[set]); | ||
803 | |||
804 | fs_devs = fs_info->fs_devices; | ||
805 | fsid_kobj = &fs_devs->fsid_kobj; | ||
806 | |||
807 | if (!fsid_kobj->state_initialized) | ||
808 | return; | ||
809 | |||
810 | /* | ||
811 | * FIXME: this is too heavy to update just one value, ideally we'd like | ||
812 | * to use sysfs_update_group but some refactoring is needed first. | ||
813 | */ | ||
814 | sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group); | ||
815 | ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group); | ||
816 | } | ||
817 | |||
783 | static int btrfs_init_debugfs(void) | 818 | static int btrfs_init_debugfs(void) |
784 | { | 819 | { |
785 | #ifdef CONFIG_DEBUG_FS | 820 | #ifdef CONFIG_DEBUG_FS |
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index 9c09522125a6..d7da1a4c2f6c 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h | |||
@@ -56,7 +56,7 @@ static struct btrfs_feature_attr btrfs_attr_##_name = { \ | |||
56 | #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \ | 56 | #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \ |
57 | BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature) | 57 | BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature) |
58 | #define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \ | 58 | #define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \ |
59 | BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature) | 59 | BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature) |
60 | #define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \ | 60 | #define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \ |
61 | BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature) | 61 | BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature) |
62 | 62 | ||
@@ -90,4 +90,7 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs, | |||
90 | struct kobject *parent); | 90 | struct kobject *parent); |
91 | int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs); | 91 | int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs); |
92 | void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs); | 92 | void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs); |
93 | void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info, | ||
94 | u64 bit, enum btrfs_feature_set set); | ||
95 | |||
93 | #endif /* _BTRFS_SYSFS_H_ */ | 96 | #endif /* _BTRFS_SYSFS_H_ */ |
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index b1d920b30070..0e1e61a7ec23 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c | |||
@@ -82,18 +82,18 @@ void btrfs_destroy_test_fs(void) | |||
82 | struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void) | 82 | struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void) |
83 | { | 83 | { |
84 | struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info), | 84 | struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info), |
85 | GFP_NOFS); | 85 | GFP_KERNEL); |
86 | 86 | ||
87 | if (!fs_info) | 87 | if (!fs_info) |
88 | return fs_info; | 88 | return fs_info; |
89 | fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices), | 89 | fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices), |
90 | GFP_NOFS); | 90 | GFP_KERNEL); |
91 | if (!fs_info->fs_devices) { | 91 | if (!fs_info->fs_devices) { |
92 | kfree(fs_info); | 92 | kfree(fs_info); |
93 | return NULL; | 93 | return NULL; |
94 | } | 94 | } |
95 | fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block), | 95 | fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block), |
96 | GFP_NOFS); | 96 | GFP_KERNEL); |
97 | if (!fs_info->super_copy) { | 97 | if (!fs_info->super_copy) { |
98 | kfree(fs_info->fs_devices); | 98 | kfree(fs_info->fs_devices); |
99 | kfree(fs_info); | 99 | kfree(fs_info); |
@@ -180,11 +180,11 @@ btrfs_alloc_dummy_block_group(unsigned long length) | |||
180 | { | 180 | { |
181 | struct btrfs_block_group_cache *cache; | 181 | struct btrfs_block_group_cache *cache; |
182 | 182 | ||
183 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | 183 | cache = kzalloc(sizeof(*cache), GFP_KERNEL); |
184 | if (!cache) | 184 | if (!cache) |
185 | return NULL; | 185 | return NULL; |
186 | cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), | 186 | cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), |
187 | GFP_NOFS); | 187 | GFP_KERNEL); |
188 | if (!cache->free_space_ctl) { | 188 | if (!cache->free_space_ctl) { |
189 | kfree(cache); | 189 | kfree(cache); |
190 | return NULL; | 190 | return NULL; |
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index e29fa297e053..669b58201e36 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c | |||
@@ -94,7 +94,7 @@ static int test_find_delalloc(void) | |||
94 | * test. | 94 | * test. |
95 | */ | 95 | */ |
96 | for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) { | 96 | for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) { |
97 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); | 97 | page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL); |
98 | if (!page) { | 98 | if (!page) { |
99 | test_msg("Failed to allocate test page\n"); | 99 | test_msg("Failed to allocate test page\n"); |
100 | ret = -ENOMEM; | 100 | ret = -ENOMEM; |
@@ -113,7 +113,7 @@ static int test_find_delalloc(void) | |||
113 | * |--- delalloc ---| | 113 | * |--- delalloc ---| |
114 | * |--- search ---| | 114 | * |--- search ---| |
115 | */ | 115 | */ |
116 | set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS); | 116 | set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL); |
117 | start = 0; | 117 | start = 0; |
118 | end = 0; | 118 | end = 0; |
119 | found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, | 119 | found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, |
@@ -144,7 +144,7 @@ static int test_find_delalloc(void) | |||
144 | test_msg("Couldn't find the locked page\n"); | 144 | test_msg("Couldn't find the locked page\n"); |
145 | goto out_bits; | 145 | goto out_bits; |
146 | } | 146 | } |
147 | set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS); | 147 | set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL); |
148 | start = test_start; | 148 | start = test_start; |
149 | end = 0; | 149 | end = 0; |
150 | found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, | 150 | found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, |
@@ -199,7 +199,7 @@ static int test_find_delalloc(void) | |||
199 | * | 199 | * |
200 | * We are re-using our test_start from above since it works out well. | 200 | * We are re-using our test_start from above since it works out well. |
201 | */ | 201 | */ |
202 | set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS); | 202 | set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL); |
203 | start = test_start; | 203 | start = test_start; |
204 | end = 0; | 204 | end = 0; |
205 | found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, | 205 | found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, |
@@ -262,7 +262,7 @@ static int test_find_delalloc(void) | |||
262 | } | 262 | } |
263 | ret = 0; | 263 | ret = 0; |
264 | out_bits: | 264 | out_bits: |
265 | clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS); | 265 | clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL); |
266 | out: | 266 | out: |
267 | if (locked_page) | 267 | if (locked_page) |
268 | page_cache_release(locked_page); | 268 | page_cache_release(locked_page); |
@@ -360,7 +360,7 @@ static int test_eb_bitmaps(void) | |||
360 | 360 | ||
361 | test_msg("Running extent buffer bitmap tests\n"); | 361 | test_msg("Running extent buffer bitmap tests\n"); |
362 | 362 | ||
363 | bitmap = kmalloc(len, GFP_NOFS); | 363 | bitmap = kmalloc(len, GFP_KERNEL); |
364 | if (!bitmap) { | 364 | if (!bitmap) { |
365 | test_msg("Couldn't allocate test bitmap\n"); | 365 | test_msg("Couldn't allocate test bitmap\n"); |
366 | return -ENOMEM; | 366 | return -ENOMEM; |
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 5de55fdd28bc..e2d3da02deee 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c | |||
@@ -974,7 +974,7 @@ static int test_extent_accounting(void) | |||
974 | (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, | 974 | (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, |
975 | EXTENT_DELALLOC | EXTENT_DIRTY | | 975 | EXTENT_DELALLOC | EXTENT_DIRTY | |
976 | EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0, | 976 | EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0, |
977 | NULL, GFP_NOFS); | 977 | NULL, GFP_KERNEL); |
978 | if (ret) { | 978 | if (ret) { |
979 | test_msg("clear_extent_bit returned %d\n", ret); | 979 | test_msg("clear_extent_bit returned %d\n", ret); |
980 | goto out; | 980 | goto out; |
@@ -1045,7 +1045,7 @@ static int test_extent_accounting(void) | |||
1045 | BTRFS_MAX_EXTENT_SIZE+8191, | 1045 | BTRFS_MAX_EXTENT_SIZE+8191, |
1046 | EXTENT_DIRTY | EXTENT_DELALLOC | | 1046 | EXTENT_DIRTY | EXTENT_DELALLOC | |
1047 | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, | 1047 | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, |
1048 | NULL, GFP_NOFS); | 1048 | NULL, GFP_KERNEL); |
1049 | if (ret) { | 1049 | if (ret) { |
1050 | test_msg("clear_extent_bit returned %d\n", ret); | 1050 | test_msg("clear_extent_bit returned %d\n", ret); |
1051 | goto out; | 1051 | goto out; |
@@ -1079,7 +1079,7 @@ static int test_extent_accounting(void) | |||
1079 | ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, | 1079 | ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, |
1080 | EXTENT_DIRTY | EXTENT_DELALLOC | | 1080 | EXTENT_DIRTY | EXTENT_DELALLOC | |
1081 | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, | 1081 | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, |
1082 | NULL, GFP_NOFS); | 1082 | NULL, GFP_KERNEL); |
1083 | if (ret) { | 1083 | if (ret) { |
1084 | test_msg("clear_extent_bit returned %d\n", ret); | 1084 | test_msg("clear_extent_bit returned %d\n", ret); |
1085 | goto out; | 1085 | goto out; |
@@ -1096,7 +1096,7 @@ out: | |||
1096 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, | 1096 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, |
1097 | EXTENT_DIRTY | EXTENT_DELALLOC | | 1097 | EXTENT_DIRTY | EXTENT_DELALLOC | |
1098 | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, | 1098 | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, |
1099 | NULL, GFP_NOFS); | 1099 | NULL, GFP_KERNEL); |
1100 | iput(inode); | 1100 | iput(inode); |
1101 | btrfs_free_dummy_root(root); | 1101 | btrfs_free_dummy_root(root); |
1102 | return ret; | 1102 | return ret; |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 323e12cc9d2f..978c3a810893 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -4127,7 +4127,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | |||
4127 | struct inode *inode, | 4127 | struct inode *inode, |
4128 | struct btrfs_path *path, | 4128 | struct btrfs_path *path, |
4129 | struct list_head *logged_list, | 4129 | struct list_head *logged_list, |
4130 | struct btrfs_log_ctx *ctx) | 4130 | struct btrfs_log_ctx *ctx, |
4131 | const u64 start, | ||
4132 | const u64 end) | ||
4131 | { | 4133 | { |
4132 | struct extent_map *em, *n; | 4134 | struct extent_map *em, *n; |
4133 | struct list_head extents; | 4135 | struct list_head extents; |
@@ -4166,7 +4168,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | |||
4166 | } | 4168 | } |
4167 | 4169 | ||
4168 | list_sort(NULL, &extents, extent_cmp); | 4170 | list_sort(NULL, &extents, extent_cmp); |
4169 | 4171 | /* | |
4172 | * Collect any new ordered extents within the range. This is to | ||
4173 | * prevent logging file extent items without waiting for the disk | ||
4174 | * location they point to being written. We do this only to deal | ||
4175 | * with races against concurrent lockless direct IO writes. | ||
4176 | */ | ||
4177 | btrfs_get_logged_extents(inode, logged_list, start, end); | ||
4170 | process: | 4178 | process: |
4171 | while (!list_empty(&extents)) { | 4179 | while (!list_empty(&extents)) { |
4172 | em = list_entry(extents.next, struct extent_map, list); | 4180 | em = list_entry(extents.next, struct extent_map, list); |
@@ -4701,7 +4709,7 @@ log_extents: | |||
4701 | goto out_unlock; | 4709 | goto out_unlock; |
4702 | } | 4710 | } |
4703 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path, | 4711 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path, |
4704 | &logged_list, ctx); | 4712 | &logged_list, ctx, start, end); |
4705 | if (ret) { | 4713 | if (ret) { |
4706 | err = ret; | 4714 | err = ret; |
4707 | goto out_unlock; | 4715 | goto out_unlock; |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 86a9c383955e..eb9028e8cfc5 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -698,8 +698,8 @@ static void ceph_aio_retry_work(struct work_struct *work) | |||
698 | 698 | ||
699 | req = ceph_osdc_alloc_request(orig_req->r_osdc, snapc, 2, | 699 | req = ceph_osdc_alloc_request(orig_req->r_osdc, snapc, 2, |
700 | false, GFP_NOFS); | 700 | false, GFP_NOFS); |
701 | if (IS_ERR(req)) { | 701 | if (!req) { |
702 | ret = PTR_ERR(req); | 702 | ret = -ENOMEM; |
703 | req = orig_req; | 703 | req = orig_req; |
704 | goto out; | 704 | goto out; |
705 | } | 705 | } |
@@ -716,7 +716,6 @@ static void ceph_aio_retry_work(struct work_struct *work) | |||
716 | ceph_osdc_build_request(req, req->r_ops[0].extent.offset, | 716 | ceph_osdc_build_request(req, req->r_ops[0].extent.offset, |
717 | snapc, CEPH_NOSNAP, &aio_req->mtime); | 717 | snapc, CEPH_NOSNAP, &aio_req->mtime); |
718 | 718 | ||
719 | ceph_put_snap_context(snapc); | ||
720 | ceph_osdc_put_request(orig_req); | 719 | ceph_osdc_put_request(orig_req); |
721 | 720 | ||
722 | req->r_callback = ceph_aio_complete_req; | 721 | req->r_callback = ceph_aio_complete_req; |
@@ -731,6 +730,7 @@ out: | |||
731 | ceph_aio_complete_req(req, NULL); | 730 | ceph_aio_complete_req(req, NULL); |
732 | } | 731 | } |
733 | 732 | ||
733 | ceph_put_snap_context(snapc); | ||
734 | kfree(aio_work); | 734 | kfree(aio_work); |
735 | } | 735 | } |
736 | 736 | ||
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index fb4ba2e4e2a5..be2d87f33177 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -975,13 +975,8 @@ out_unlock: | |||
975 | /* | 975 | /* |
976 | * splice a dentry to an inode. | 976 | * splice a dentry to an inode. |
977 | * caller must hold directory i_mutex for this to be safe. | 977 | * caller must hold directory i_mutex for this to be safe. |
978 | * | ||
979 | * we will only rehash the resulting dentry if @prehash is | ||
980 | * true; @prehash will be set to false (for the benefit of | ||
981 | * the caller) if we fail. | ||
982 | */ | 978 | */ |
983 | static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | 979 | static struct dentry *splice_dentry(struct dentry *dn, struct inode *in) |
984 | bool *prehash) | ||
985 | { | 980 | { |
986 | struct dentry *realdn; | 981 | struct dentry *realdn; |
987 | 982 | ||
@@ -994,8 +989,6 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
994 | if (IS_ERR(realdn)) { | 989 | if (IS_ERR(realdn)) { |
995 | pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", | 990 | pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", |
996 | PTR_ERR(realdn), dn, in, ceph_vinop(in)); | 991 | PTR_ERR(realdn), dn, in, ceph_vinop(in)); |
997 | if (prehash) | ||
998 | *prehash = false; /* don't rehash on error */ | ||
999 | dn = realdn; /* note realdn contains the error */ | 992 | dn = realdn; /* note realdn contains the error */ |
1000 | goto out; | 993 | goto out; |
1001 | } else if (realdn) { | 994 | } else if (realdn) { |
@@ -1011,8 +1004,6 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
1011 | dout("dn %p attached to %p ino %llx.%llx\n", | 1004 | dout("dn %p attached to %p ino %llx.%llx\n", |
1012 | dn, d_inode(dn), ceph_vinop(d_inode(dn))); | 1005 | dn, d_inode(dn), ceph_vinop(d_inode(dn))); |
1013 | } | 1006 | } |
1014 | if ((!prehash || *prehash) && d_unhashed(dn)) | ||
1015 | d_rehash(dn); | ||
1016 | out: | 1007 | out: |
1017 | return dn; | 1008 | return dn; |
1018 | } | 1009 | } |
@@ -1245,10 +1236,8 @@ retry_lookup: | |||
1245 | dout("d_delete %p\n", dn); | 1236 | dout("d_delete %p\n", dn); |
1246 | d_delete(dn); | 1237 | d_delete(dn); |
1247 | } else { | 1238 | } else { |
1248 | dout("d_instantiate %p NULL\n", dn); | ||
1249 | d_instantiate(dn, NULL); | ||
1250 | if (have_lease && d_unhashed(dn)) | 1239 | if (have_lease && d_unhashed(dn)) |
1251 | d_rehash(dn); | 1240 | d_add(dn, NULL); |
1252 | update_dentry_lease(dn, rinfo->dlease, | 1241 | update_dentry_lease(dn, rinfo->dlease, |
1253 | session, | 1242 | session, |
1254 | req->r_request_started); | 1243 | req->r_request_started); |
@@ -1260,7 +1249,7 @@ retry_lookup: | |||
1260 | if (d_really_is_negative(dn)) { | 1249 | if (d_really_is_negative(dn)) { |
1261 | ceph_dir_clear_ordered(dir); | 1250 | ceph_dir_clear_ordered(dir); |
1262 | ihold(in); | 1251 | ihold(in); |
1263 | dn = splice_dentry(dn, in, &have_lease); | 1252 | dn = splice_dentry(dn, in); |
1264 | if (IS_ERR(dn)) { | 1253 | if (IS_ERR(dn)) { |
1265 | err = PTR_ERR(dn); | 1254 | err = PTR_ERR(dn); |
1266 | goto done; | 1255 | goto done; |
@@ -1290,7 +1279,7 @@ retry_lookup: | |||
1290 | dout(" linking snapped dir %p to dn %p\n", in, dn); | 1279 | dout(" linking snapped dir %p to dn %p\n", in, dn); |
1291 | ceph_dir_clear_ordered(dir); | 1280 | ceph_dir_clear_ordered(dir); |
1292 | ihold(in); | 1281 | ihold(in); |
1293 | dn = splice_dentry(dn, in, NULL); | 1282 | dn = splice_dentry(dn, in); |
1294 | if (IS_ERR(dn)) { | 1283 | if (IS_ERR(dn)) { |
1295 | err = PTR_ERR(dn); | 1284 | err = PTR_ERR(dn); |
1296 | goto done; | 1285 | goto done; |
@@ -1501,7 +1490,7 @@ retry_lookup: | |||
1501 | } | 1490 | } |
1502 | 1491 | ||
1503 | if (d_really_is_negative(dn)) { | 1492 | if (d_really_is_negative(dn)) { |
1504 | struct dentry *realdn = splice_dentry(dn, in, NULL); | 1493 | struct dentry *realdn = splice_dentry(dn, in); |
1505 | if (IS_ERR(realdn)) { | 1494 | if (IS_ERR(realdn)) { |
1506 | err = PTR_ERR(realdn); | 1495 | err = PTR_ERR(realdn); |
1507 | d_drop(dn); | 1496 | d_drop(dn); |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index c48ca13673e3..09b1db2cac31 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -642,9 +642,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) | |||
642 | while (*s && *s != sep) | 642 | while (*s && *s != sep) |
643 | s++; | 643 | s++; |
644 | 644 | ||
645 | inode_lock(dir); | 645 | child = lookup_one_len_unlocked(p, dentry, s - p); |
646 | child = lookup_one_len(p, dentry, s - p); | ||
647 | inode_unlock(dir); | ||
648 | dput(dentry); | 646 | dput(dentry); |
649 | dentry = child; | 647 | dentry = child; |
650 | } while (!IS_ERR(dentry)); | 648 | } while (!IS_ERR(dentry)); |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index a5b8eb69a8f4..6402eaf8ab95 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -1261,6 +1261,9 @@ COMPATIBLE_IOCTL(HCIUNBLOCKADDR) | |||
1261 | COMPATIBLE_IOCTL(HCIINQUIRY) | 1261 | COMPATIBLE_IOCTL(HCIINQUIRY) |
1262 | COMPATIBLE_IOCTL(HCIUARTSETPROTO) | 1262 | COMPATIBLE_IOCTL(HCIUARTSETPROTO) |
1263 | COMPATIBLE_IOCTL(HCIUARTGETPROTO) | 1263 | COMPATIBLE_IOCTL(HCIUARTGETPROTO) |
1264 | COMPATIBLE_IOCTL(HCIUARTGETDEVICE) | ||
1265 | COMPATIBLE_IOCTL(HCIUARTSETFLAGS) | ||
1266 | COMPATIBLE_IOCTL(HCIUARTGETFLAGS) | ||
1264 | COMPATIBLE_IOCTL(RFCOMMCREATEDEV) | 1267 | COMPATIBLE_IOCTL(RFCOMMCREATEDEV) |
1265 | COMPATIBLE_IOCTL(RFCOMMRELEASEDEV) | 1268 | COMPATIBLE_IOCTL(RFCOMMRELEASEDEV) |
1266 | COMPATIBLE_IOCTL(RFCOMMGETDEVLIST) | 1269 | COMPATIBLE_IOCTL(RFCOMMGETDEVLIST) |
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index f419519ec41f..214ec14149d9 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
@@ -432,14 +432,9 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den | |||
432 | (sd->s_type & CONFIGFS_ITEM_BIN_ATTR) ? | 432 | (sd->s_type & CONFIGFS_ITEM_BIN_ATTR) ? |
433 | configfs_init_bin_file : | 433 | configfs_init_bin_file : |
434 | configfs_init_file); | 434 | configfs_init_file); |
435 | if (error) { | 435 | if (error) |
436 | configfs_put(sd); | 436 | configfs_put(sd); |
437 | return error; | 437 | return error; |
438 | } | ||
439 | |||
440 | d_rehash(dentry); | ||
441 | |||
442 | return 0; | ||
443 | } | 438 | } |
444 | 439 | ||
445 | static struct dentry * configfs_lookup(struct inode *dir, | 440 | static struct dentry * configfs_lookup(struct inode *dir, |
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index cee087d8f7e0..45811ea3fd87 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c | |||
@@ -199,9 +199,17 @@ int configfs_create(struct dentry * dentry, umode_t mode, void (*init)(struct in | |||
199 | configfs_set_inode_lock_class(sd, inode); | 199 | configfs_set_inode_lock_class(sd, inode); |
200 | 200 | ||
201 | init(inode); | 201 | init(inode); |
202 | d_instantiate(dentry, inode); | 202 | if (S_ISDIR(mode) || S_ISLNK(mode)) { |
203 | if (S_ISDIR(mode) || S_ISLNK(mode)) | 203 | /* |
204 | * ->symlink(), ->mkdir(), configfs_register_subsystem() or | ||
205 | * create_default_group() - already hashed. | ||
206 | */ | ||
207 | d_instantiate(dentry, inode); | ||
204 | dget(dentry); /* pin link and directory dentries in core */ | 208 | dget(dentry); /* pin link and directory dentries in core */ |
209 | } else { | ||
210 | /* ->lookup() */ | ||
211 | d_add(dentry, inode); | ||
212 | } | ||
205 | return error; | 213 | return error; |
206 | } | 214 | } |
207 | 215 | ||
@@ -58,6 +58,26 @@ static void dax_unmap_atomic(struct block_device *bdev, | |||
58 | blk_queue_exit(bdev->bd_queue); | 58 | blk_queue_exit(bdev->bd_queue); |
59 | } | 59 | } |
60 | 60 | ||
61 | struct page *read_dax_sector(struct block_device *bdev, sector_t n) | ||
62 | { | ||
63 | struct page *page = alloc_pages(GFP_KERNEL, 0); | ||
64 | struct blk_dax_ctl dax = { | ||
65 | .size = PAGE_SIZE, | ||
66 | .sector = n & ~((((int) PAGE_SIZE) / 512) - 1), | ||
67 | }; | ||
68 | long rc; | ||
69 | |||
70 | if (!page) | ||
71 | return ERR_PTR(-ENOMEM); | ||
72 | |||
73 | rc = dax_map_atomic(bdev, &dax); | ||
74 | if (rc < 0) | ||
75 | return ERR_PTR(rc); | ||
76 | memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE); | ||
77 | dax_unmap_atomic(bdev, &dax); | ||
78 | return page; | ||
79 | } | ||
80 | |||
61 | /* | 81 | /* |
62 | * dax_clear_blocks() is called from within transaction context from XFS, | 82 | * dax_clear_blocks() is called from within transaction context from XFS, |
63 | * and hence this means the stack from this point must follow GFP_NOFS | 83 | * and hence this means the stack from this point must follow GFP_NOFS |
@@ -338,7 +358,8 @@ static int dax_radix_entry(struct address_space *mapping, pgoff_t index, | |||
338 | void *entry; | 358 | void *entry; |
339 | 359 | ||
340 | WARN_ON_ONCE(pmd_entry && !dirty); | 360 | WARN_ON_ONCE(pmd_entry && !dirty); |
341 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | 361 | if (dirty) |
362 | __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); | ||
342 | 363 | ||
343 | spin_lock_irq(&mapping->tree_lock); | 364 | spin_lock_irq(&mapping->tree_lock); |
344 | 365 | ||
diff --git a/fs/dcache.c b/fs/dcache.c index 92d5140de851..32ceae3e6112 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -269,9 +269,6 @@ static inline int dname_external(const struct dentry *dentry) | |||
269 | return dentry->d_name.name != dentry->d_iname; | 269 | return dentry->d_name.name != dentry->d_iname; |
270 | } | 270 | } |
271 | 271 | ||
272 | /* | ||
273 | * Make sure other CPUs see the inode attached before the type is set. | ||
274 | */ | ||
275 | static inline void __d_set_inode_and_type(struct dentry *dentry, | 272 | static inline void __d_set_inode_and_type(struct dentry *dentry, |
276 | struct inode *inode, | 273 | struct inode *inode, |
277 | unsigned type_flags) | 274 | unsigned type_flags) |
@@ -279,28 +276,18 @@ static inline void __d_set_inode_and_type(struct dentry *dentry, | |||
279 | unsigned flags; | 276 | unsigned flags; |
280 | 277 | ||
281 | dentry->d_inode = inode; | 278 | dentry->d_inode = inode; |
282 | smp_wmb(); | ||
283 | flags = READ_ONCE(dentry->d_flags); | 279 | flags = READ_ONCE(dentry->d_flags); |
284 | flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); | 280 | flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); |
285 | flags |= type_flags; | 281 | flags |= type_flags; |
286 | WRITE_ONCE(dentry->d_flags, flags); | 282 | WRITE_ONCE(dentry->d_flags, flags); |
287 | } | 283 | } |
288 | 284 | ||
289 | /* | ||
290 | * Ideally, we want to make sure that other CPUs see the flags cleared before | ||
291 | * the inode is detached, but this is really a violation of RCU principles | ||
292 | * since the ordering suggests we should always set inode before flags. | ||
293 | * | ||
294 | * We should instead replace or discard the entire dentry - but that sucks | ||
295 | * performancewise on mass deletion/rename. | ||
296 | */ | ||
297 | static inline void __d_clear_type_and_inode(struct dentry *dentry) | 285 | static inline void __d_clear_type_and_inode(struct dentry *dentry) |
298 | { | 286 | { |
299 | unsigned flags = READ_ONCE(dentry->d_flags); | 287 | unsigned flags = READ_ONCE(dentry->d_flags); |
300 | 288 | ||
301 | flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); | 289 | flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); |
302 | WRITE_ONCE(dentry->d_flags, flags); | 290 | WRITE_ONCE(dentry->d_flags, flags); |
303 | smp_wmb(); | ||
304 | dentry->d_inode = NULL; | 291 | dentry->d_inode = NULL; |
305 | } | 292 | } |
306 | 293 | ||
@@ -370,9 +357,11 @@ static void dentry_unlink_inode(struct dentry * dentry) | |||
370 | __releases(dentry->d_inode->i_lock) | 357 | __releases(dentry->d_inode->i_lock) |
371 | { | 358 | { |
372 | struct inode *inode = dentry->d_inode; | 359 | struct inode *inode = dentry->d_inode; |
360 | |||
361 | raw_write_seqcount_begin(&dentry->d_seq); | ||
373 | __d_clear_type_and_inode(dentry); | 362 | __d_clear_type_and_inode(dentry); |
374 | hlist_del_init(&dentry->d_u.d_alias); | 363 | hlist_del_init(&dentry->d_u.d_alias); |
375 | dentry_rcuwalk_invalidate(dentry); | 364 | raw_write_seqcount_end(&dentry->d_seq); |
376 | spin_unlock(&dentry->d_lock); | 365 | spin_unlock(&dentry->d_lock); |
377 | spin_unlock(&inode->i_lock); | 366 | spin_unlock(&inode->i_lock); |
378 | if (!inode->i_nlink) | 367 | if (!inode->i_nlink) |
@@ -1756,12 +1745,12 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) | |||
1756 | unsigned add_flags = d_flags_for_inode(inode); | 1745 | unsigned add_flags = d_flags_for_inode(inode); |
1757 | 1746 | ||
1758 | spin_lock(&dentry->d_lock); | 1747 | spin_lock(&dentry->d_lock); |
1759 | if (inode) | 1748 | hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); |
1760 | hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); | 1749 | raw_write_seqcount_begin(&dentry->d_seq); |
1761 | __d_set_inode_and_type(dentry, inode, add_flags); | 1750 | __d_set_inode_and_type(dentry, inode, add_flags); |
1762 | dentry_rcuwalk_invalidate(dentry); | 1751 | raw_write_seqcount_end(&dentry->d_seq); |
1752 | __fsnotify_d_instantiate(dentry); | ||
1763 | spin_unlock(&dentry->d_lock); | 1753 | spin_unlock(&dentry->d_lock); |
1764 | fsnotify_d_instantiate(dentry, inode); | ||
1765 | } | 1754 | } |
1766 | 1755 | ||
1767 | /** | 1756 | /** |
@@ -1782,91 +1771,16 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) | |||
1782 | void d_instantiate(struct dentry *entry, struct inode * inode) | 1771 | void d_instantiate(struct dentry *entry, struct inode * inode) |
1783 | { | 1772 | { |
1784 | BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); | 1773 | BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); |
1785 | if (inode) | 1774 | if (inode) { |
1786 | spin_lock(&inode->i_lock); | 1775 | spin_lock(&inode->i_lock); |
1787 | __d_instantiate(entry, inode); | 1776 | __d_instantiate(entry, inode); |
1788 | if (inode) | ||
1789 | spin_unlock(&inode->i_lock); | 1777 | spin_unlock(&inode->i_lock); |
1778 | } | ||
1790 | security_d_instantiate(entry, inode); | 1779 | security_d_instantiate(entry, inode); |
1791 | } | 1780 | } |
1792 | EXPORT_SYMBOL(d_instantiate); | 1781 | EXPORT_SYMBOL(d_instantiate); |
1793 | 1782 | ||
1794 | /** | 1783 | /** |
1795 | * d_instantiate_unique - instantiate a non-aliased dentry | ||
1796 | * @entry: dentry to instantiate | ||
1797 | * @inode: inode to attach to this dentry | ||
1798 | * | ||
1799 | * Fill in inode information in the entry. On success, it returns NULL. | ||
1800 | * If an unhashed alias of "entry" already exists, then we return the | ||
1801 | * aliased dentry instead and drop one reference to inode. | ||
1802 | * | ||
1803 | * Note that in order to avoid conflicts with rename() etc, the caller | ||
1804 | * had better be holding the parent directory semaphore. | ||
1805 | * | ||
1806 | * This also assumes that the inode count has been incremented | ||
1807 | * (or otherwise set) by the caller to indicate that it is now | ||
1808 | * in use by the dcache. | ||
1809 | */ | ||
1810 | static struct dentry *__d_instantiate_unique(struct dentry *entry, | ||
1811 | struct inode *inode) | ||
1812 | { | ||
1813 | struct dentry *alias; | ||
1814 | int len = entry->d_name.len; | ||
1815 | const char *name = entry->d_name.name; | ||
1816 | unsigned int hash = entry->d_name.hash; | ||
1817 | |||
1818 | if (!inode) { | ||
1819 | __d_instantiate(entry, NULL); | ||
1820 | return NULL; | ||
1821 | } | ||
1822 | |||
1823 | hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { | ||
1824 | /* | ||
1825 | * Don't need alias->d_lock here, because aliases with | ||
1826 | * d_parent == entry->d_parent are not subject to name or | ||
1827 | * parent changes, because the parent inode i_mutex is held. | ||
1828 | */ | ||
1829 | if (alias->d_name.hash != hash) | ||
1830 | continue; | ||
1831 | if (alias->d_parent != entry->d_parent) | ||
1832 | continue; | ||
1833 | if (alias->d_name.len != len) | ||
1834 | continue; | ||
1835 | if (dentry_cmp(alias, name, len)) | ||
1836 | continue; | ||
1837 | __dget(alias); | ||
1838 | return alias; | ||
1839 | } | ||
1840 | |||
1841 | __d_instantiate(entry, inode); | ||
1842 | return NULL; | ||
1843 | } | ||
1844 | |||
1845 | struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) | ||
1846 | { | ||
1847 | struct dentry *result; | ||
1848 | |||
1849 | BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); | ||
1850 | |||
1851 | if (inode) | ||
1852 | spin_lock(&inode->i_lock); | ||
1853 | result = __d_instantiate_unique(entry, inode); | ||
1854 | if (inode) | ||
1855 | spin_unlock(&inode->i_lock); | ||
1856 | |||
1857 | if (!result) { | ||
1858 | security_d_instantiate(entry, inode); | ||
1859 | return NULL; | ||
1860 | } | ||
1861 | |||
1862 | BUG_ON(!d_unhashed(result)); | ||
1863 | iput(inode); | ||
1864 | return result; | ||
1865 | } | ||
1866 | |||
1867 | EXPORT_SYMBOL(d_instantiate_unique); | ||
1868 | |||
1869 | /** | ||
1870 | * d_instantiate_no_diralias - instantiate a non-aliased dentry | 1784 | * d_instantiate_no_diralias - instantiate a non-aliased dentry |
1871 | * @entry: dentry to complete | 1785 | * @entry: dentry to complete |
1872 | * @inode: inode to attach to this dentry | 1786 | * @inode: inode to attach to this dentry |
@@ -2446,6 +2360,86 @@ void d_rehash(struct dentry * entry) | |||
2446 | } | 2360 | } |
2447 | EXPORT_SYMBOL(d_rehash); | 2361 | EXPORT_SYMBOL(d_rehash); |
2448 | 2362 | ||
2363 | |||
2364 | /* inode->i_lock held if inode is non-NULL */ | ||
2365 | |||
2366 | static inline void __d_add(struct dentry *dentry, struct inode *inode) | ||
2367 | { | ||
2368 | if (inode) { | ||
2369 | __d_instantiate(dentry, inode); | ||
2370 | spin_unlock(&inode->i_lock); | ||
2371 | } | ||
2372 | security_d_instantiate(dentry, inode); | ||
2373 | d_rehash(dentry); | ||
2374 | } | ||
2375 | |||
2376 | /** | ||
2377 | * d_add - add dentry to hash queues | ||
2378 | * @entry: dentry to add | ||
2379 | * @inode: The inode to attach to this dentry | ||
2380 | * | ||
2381 | * This adds the entry to the hash queues and initializes @inode. | ||
2382 | * The entry was actually filled in earlier during d_alloc(). | ||
2383 | */ | ||
2384 | |||
2385 | void d_add(struct dentry *entry, struct inode *inode) | ||
2386 | { | ||
2387 | if (inode) | ||
2388 | spin_lock(&inode->i_lock); | ||
2389 | __d_add(entry, inode); | ||
2390 | } | ||
2391 | EXPORT_SYMBOL(d_add); | ||
2392 | |||
2393 | /** | ||
2394 | * d_exact_alias - find and hash an exact unhashed alias | ||
2395 | * @entry: dentry to add | ||
2396 | * @inode: The inode to go with this dentry | ||
2397 | * | ||
2398 | * If an unhashed dentry with the same name/parent and desired | ||
2399 | * inode already exists, hash and return it. Otherwise, return | ||
2400 | * NULL. | ||
2401 | * | ||
2402 | * Parent directory should be locked. | ||
2403 | */ | ||
2404 | struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode) | ||
2405 | { | ||
2406 | struct dentry *alias; | ||
2407 | int len = entry->d_name.len; | ||
2408 | const char *name = entry->d_name.name; | ||
2409 | unsigned int hash = entry->d_name.hash; | ||
2410 | |||
2411 | spin_lock(&inode->i_lock); | ||
2412 | hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { | ||
2413 | /* | ||
2414 | * Don't need alias->d_lock here, because aliases with | ||
2415 | * d_parent == entry->d_parent are not subject to name or | ||
2416 | * parent changes, because the parent inode i_mutex is held. | ||
2417 | */ | ||
2418 | if (alias->d_name.hash != hash) | ||
2419 | continue; | ||
2420 | if (alias->d_parent != entry->d_parent) | ||
2421 | continue; | ||
2422 | if (alias->d_name.len != len) | ||
2423 | continue; | ||
2424 | if (dentry_cmp(alias, name, len)) | ||
2425 | continue; | ||
2426 | spin_lock(&alias->d_lock); | ||
2427 | if (!d_unhashed(alias)) { | ||
2428 | spin_unlock(&alias->d_lock); | ||
2429 | alias = NULL; | ||
2430 | } else { | ||
2431 | __dget_dlock(alias); | ||
2432 | _d_rehash(alias); | ||
2433 | spin_unlock(&alias->d_lock); | ||
2434 | } | ||
2435 | spin_unlock(&inode->i_lock); | ||
2436 | return alias; | ||
2437 | } | ||
2438 | spin_unlock(&inode->i_lock); | ||
2439 | return NULL; | ||
2440 | } | ||
2441 | EXPORT_SYMBOL(d_exact_alias); | ||
2442 | |||
2449 | /** | 2443 | /** |
2450 | * dentry_update_name_case - update case insensitive dentry with a new name | 2444 | * dentry_update_name_case - update case insensitive dentry with a new name |
2451 | * @dentry: dentry to be updated | 2445 | * @dentry: dentry to be updated |
@@ -2782,10 +2776,9 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | |||
2782 | 2776 | ||
2783 | BUG_ON(!d_unhashed(dentry)); | 2777 | BUG_ON(!d_unhashed(dentry)); |
2784 | 2778 | ||
2785 | if (!inode) { | 2779 | if (!inode) |
2786 | __d_instantiate(dentry, NULL); | ||
2787 | goto out; | 2780 | goto out; |
2788 | } | 2781 | |
2789 | spin_lock(&inode->i_lock); | 2782 | spin_lock(&inode->i_lock); |
2790 | if (S_ISDIR(inode->i_mode)) { | 2783 | if (S_ISDIR(inode->i_mode)) { |
2791 | struct dentry *new = __d_find_any_alias(inode); | 2784 | struct dentry *new = __d_find_any_alias(inode); |
@@ -2819,12 +2812,8 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | |||
2819 | return new; | 2812 | return new; |
2820 | } | 2813 | } |
2821 | } | 2814 | } |
2822 | /* already taking inode->i_lock, so d_add() by hand */ | ||
2823 | __d_instantiate(dentry, inode); | ||
2824 | spin_unlock(&inode->i_lock); | ||
2825 | out: | 2815 | out: |
2826 | security_d_instantiate(dentry, inode); | 2816 | __d_add(dentry, inode); |
2827 | d_rehash(dentry); | ||
2828 | return NULL; | 2817 | return NULL; |
2829 | } | 2818 | } |
2830 | EXPORT_SYMBOL(d_splice_alias); | 2819 | EXPORT_SYMBOL(d_splice_alias); |
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 1f107fd51328..655f21f99160 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c | |||
@@ -575,6 +575,26 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx) | |||
575 | mutex_unlock(&allocated_ptys_lock); | 575 | mutex_unlock(&allocated_ptys_lock); |
576 | } | 576 | } |
577 | 577 | ||
578 | /* | ||
579 | * pty code needs to hold extra references in case of last /dev/tty close | ||
580 | */ | ||
581 | |||
582 | void devpts_add_ref(struct inode *ptmx_inode) | ||
583 | { | ||
584 | struct super_block *sb = pts_sb_from_inode(ptmx_inode); | ||
585 | |||
586 | atomic_inc(&sb->s_active); | ||
587 | ihold(ptmx_inode); | ||
588 | } | ||
589 | |||
590 | void devpts_del_ref(struct inode *ptmx_inode) | ||
591 | { | ||
592 | struct super_block *sb = pts_sb_from_inode(ptmx_inode); | ||
593 | |||
594 | iput(ptmx_inode); | ||
595 | deactivate_super(sb); | ||
596 | } | ||
597 | |||
578 | /** | 598 | /** |
579 | * devpts_pty_new -- create a new inode in /dev/pts/ | 599 | * devpts_pty_new -- create a new inode in /dev/pts/ |
580 | * @ptmx_inode: inode of the master | 600 | * @ptmx_inode: inode of the master |
diff --git a/fs/direct-io.c b/fs/direct-io.c index 1b2f7ffc8b84..85463171053b 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -445,7 +445,8 @@ static struct bio *dio_await_one(struct dio *dio) | |||
445 | __set_current_state(TASK_UNINTERRUPTIBLE); | 445 | __set_current_state(TASK_UNINTERRUPTIBLE); |
446 | dio->waiter = current; | 446 | dio->waiter = current; |
447 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 447 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
448 | if (!blk_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie)) | 448 | if (!(dio->iocb->ki_flags & IOCB_HIPRI) || |
449 | !blk_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie)) | ||
449 | io_schedule(); | 450 | io_schedule(); |
450 | /* wake up sets us TASK_RUNNING */ | 451 | /* wake up sets us TASK_RUNNING */ |
451 | spin_lock_irqsave(&dio->bio_lock, flags); | 452 | spin_lock_irqsave(&dio->bio_lock, flags); |
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 80d6901493cf..87dbdd4881ab 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c | |||
@@ -1499,16 +1499,14 @@ out: | |||
1499 | */ | 1499 | */ |
1500 | static int | 1500 | static int |
1501 | ecryptfs_encrypt_filename(struct ecryptfs_filename *filename, | 1501 | ecryptfs_encrypt_filename(struct ecryptfs_filename *filename, |
1502 | struct ecryptfs_crypt_stat *crypt_stat, | ||
1503 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat) | 1502 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat) |
1504 | { | 1503 | { |
1505 | int rc = 0; | 1504 | int rc = 0; |
1506 | 1505 | ||
1507 | filename->encrypted_filename = NULL; | 1506 | filename->encrypted_filename = NULL; |
1508 | filename->encrypted_filename_size = 0; | 1507 | filename->encrypted_filename_size = 0; |
1509 | if ((crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCFN_USE_MOUNT_FNEK)) | 1508 | if (mount_crypt_stat && (mount_crypt_stat->flags |
1510 | || (mount_crypt_stat && (mount_crypt_stat->flags | 1509 | & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK)) { |
1511 | & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK))) { | ||
1512 | size_t packet_size; | 1510 | size_t packet_size; |
1513 | size_t remaining_bytes; | 1511 | size_t remaining_bytes; |
1514 | 1512 | ||
@@ -1944,7 +1942,6 @@ out: | |||
1944 | int ecryptfs_encrypt_and_encode_filename( | 1942 | int ecryptfs_encrypt_and_encode_filename( |
1945 | char **encoded_name, | 1943 | char **encoded_name, |
1946 | size_t *encoded_name_size, | 1944 | size_t *encoded_name_size, |
1947 | struct ecryptfs_crypt_stat *crypt_stat, | ||
1948 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat, | 1945 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat, |
1949 | const char *name, size_t name_size) | 1946 | const char *name, size_t name_size) |
1950 | { | 1947 | { |
@@ -1953,9 +1950,8 @@ int ecryptfs_encrypt_and_encode_filename( | |||
1953 | 1950 | ||
1954 | (*encoded_name) = NULL; | 1951 | (*encoded_name) = NULL; |
1955 | (*encoded_name_size) = 0; | 1952 | (*encoded_name_size) = 0; |
1956 | if ((crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCRYPT_FILENAMES)) | 1953 | if (mount_crypt_stat && (mount_crypt_stat->flags |
1957 | || (mount_crypt_stat && (mount_crypt_stat->flags | 1954 | & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)) { |
1958 | & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES))) { | ||
1959 | struct ecryptfs_filename *filename; | 1955 | struct ecryptfs_filename *filename; |
1960 | 1956 | ||
1961 | filename = kzalloc(sizeof(*filename), GFP_KERNEL); | 1957 | filename = kzalloc(sizeof(*filename), GFP_KERNEL); |
@@ -1968,8 +1964,7 @@ int ecryptfs_encrypt_and_encode_filename( | |||
1968 | } | 1964 | } |
1969 | filename->filename = (char *)name; | 1965 | filename->filename = (char *)name; |
1970 | filename->filename_size = name_size; | 1966 | filename->filename_size = name_size; |
1971 | rc = ecryptfs_encrypt_filename(filename, crypt_stat, | 1967 | rc = ecryptfs_encrypt_filename(filename, mount_crypt_stat); |
1972 | mount_crypt_stat); | ||
1973 | if (rc) { | 1968 | if (rc) { |
1974 | printk(KERN_ERR "%s: Error attempting to encrypt " | 1969 | printk(KERN_ERR "%s: Error attempting to encrypt " |
1975 | "filename; rc = [%d]\n", __func__, rc); | 1970 | "filename; rc = [%d]\n", __func__, rc); |
@@ -1980,11 +1975,9 @@ int ecryptfs_encrypt_and_encode_filename( | |||
1980 | NULL, &encoded_name_no_prefix_size, | 1975 | NULL, &encoded_name_no_prefix_size, |
1981 | filename->encrypted_filename, | 1976 | filename->encrypted_filename, |
1982 | filename->encrypted_filename_size); | 1977 | filename->encrypted_filename_size); |
1983 | if ((crypt_stat && (crypt_stat->flags | 1978 | if (mount_crypt_stat |
1984 | & ECRYPTFS_ENCFN_USE_MOUNT_FNEK)) | ||
1985 | || (mount_crypt_stat | ||
1986 | && (mount_crypt_stat->flags | 1979 | && (mount_crypt_stat->flags |
1987 | & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK))) | 1980 | & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK)) |
1988 | (*encoded_name_size) = | 1981 | (*encoded_name_size) = |
1989 | (ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE | 1982 | (ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE |
1990 | + encoded_name_no_prefix_size); | 1983 | + encoded_name_no_prefix_size); |
@@ -2002,11 +1995,9 @@ int ecryptfs_encrypt_and_encode_filename( | |||
2002 | kfree(filename); | 1995 | kfree(filename); |
2003 | goto out; | 1996 | goto out; |
2004 | } | 1997 | } |
2005 | if ((crypt_stat && (crypt_stat->flags | 1998 | if (mount_crypt_stat |
2006 | & ECRYPTFS_ENCFN_USE_MOUNT_FNEK)) | ||
2007 | || (mount_crypt_stat | ||
2008 | && (mount_crypt_stat->flags | 1999 | && (mount_crypt_stat->flags |
2009 | & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK))) { | 2000 | & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK)) { |
2010 | memcpy((*encoded_name), | 2001 | memcpy((*encoded_name), |
2011 | ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX, | 2002 | ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX, |
2012 | ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE); | 2003 | ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE); |
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 7b39260c7bba..67e16128c572 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h | |||
@@ -569,7 +569,6 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length); | |||
569 | int ecryptfs_encrypt_and_encode_filename( | 569 | int ecryptfs_encrypt_and_encode_filename( |
570 | char **encoded_name, | 570 | char **encoded_name, |
571 | size_t *encoded_name_size, | 571 | size_t *encoded_name_size, |
572 | struct ecryptfs_crypt_stat *crypt_stat, | ||
573 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat, | 572 | struct ecryptfs_mount_crypt_stat *mount_crypt_stat, |
574 | const char *name, size_t name_size); | 573 | const char *name, size_t name_size); |
575 | struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry); | 574 | struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry); |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 4e685ac1024d..26651636cd1d 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -397,11 +397,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
397 | int rc = 0; | 397 | int rc = 0; |
398 | 398 | ||
399 | lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); | 399 | lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); |
400 | inode_lock(d_inode(lower_dir_dentry)); | 400 | lower_dentry = lookup_one_len_unlocked(ecryptfs_dentry->d_name.name, |
401 | lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name, | ||
402 | lower_dir_dentry, | 401 | lower_dir_dentry, |
403 | ecryptfs_dentry->d_name.len); | 402 | ecryptfs_dentry->d_name.len); |
404 | inode_unlock(d_inode(lower_dir_dentry)); | ||
405 | if (IS_ERR(lower_dentry)) { | 403 | if (IS_ERR(lower_dentry)) { |
406 | rc = PTR_ERR(lower_dentry); | 404 | rc = PTR_ERR(lower_dentry); |
407 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " | 405 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " |
@@ -419,18 +417,16 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, | |||
419 | dput(lower_dentry); | 417 | dput(lower_dentry); |
420 | rc = ecryptfs_encrypt_and_encode_filename( | 418 | rc = ecryptfs_encrypt_and_encode_filename( |
421 | &encrypted_and_encoded_name, &encrypted_and_encoded_name_size, | 419 | &encrypted_and_encoded_name, &encrypted_and_encoded_name_size, |
422 | NULL, mount_crypt_stat, ecryptfs_dentry->d_name.name, | 420 | mount_crypt_stat, ecryptfs_dentry->d_name.name, |
423 | ecryptfs_dentry->d_name.len); | 421 | ecryptfs_dentry->d_name.len); |
424 | if (rc) { | 422 | if (rc) { |
425 | printk(KERN_ERR "%s: Error attempting to encrypt and encode " | 423 | printk(KERN_ERR "%s: Error attempting to encrypt and encode " |
426 | "filename; rc = [%d]\n", __func__, rc); | 424 | "filename; rc = [%d]\n", __func__, rc); |
427 | goto out; | 425 | goto out; |
428 | } | 426 | } |
429 | inode_lock(d_inode(lower_dir_dentry)); | 427 | lower_dentry = lookup_one_len_unlocked(encrypted_and_encoded_name, |
430 | lower_dentry = lookup_one_len(encrypted_and_encoded_name, | ||
431 | lower_dir_dentry, | 428 | lower_dir_dentry, |
432 | encrypted_and_encoded_name_size); | 429 | encrypted_and_encoded_name_size); |
433 | inode_unlock(d_inode(lower_dir_dentry)); | ||
434 | if (IS_ERR(lower_dentry)) { | 430 | if (IS_ERR(lower_dentry)) { |
435 | rc = PTR_ERR(lower_dentry); | 431 | rc = PTR_ERR(lower_dentry); |
436 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " | 432 | ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " |
@@ -502,7 +498,6 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry, | |||
502 | dir->i_sb)->mount_crypt_stat; | 498 | dir->i_sb)->mount_crypt_stat; |
503 | rc = ecryptfs_encrypt_and_encode_filename(&encoded_symname, | 499 | rc = ecryptfs_encrypt_and_encode_filename(&encoded_symname, |
504 | &encoded_symlen, | 500 | &encoded_symlen, |
505 | NULL, | ||
506 | mount_crypt_stat, symname, | 501 | mount_crypt_stat, symname, |
507 | strlen(symname)); | 502 | strlen(symname)); |
508 | if (rc) | 503 | if (rc) |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index ae1dbcf47e97..cde60741cad2 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -94,6 +94,11 @@ | |||
94 | /* Epoll private bits inside the event mask */ | 94 | /* Epoll private bits inside the event mask */ |
95 | #define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE) | 95 | #define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE) |
96 | 96 | ||
97 | #define EPOLLINOUT_BITS (POLLIN | POLLOUT) | ||
98 | |||
99 | #define EPOLLEXCLUSIVE_OK_BITS (EPOLLINOUT_BITS | POLLERR | POLLHUP | \ | ||
100 | EPOLLWAKEUP | EPOLLET | EPOLLEXCLUSIVE) | ||
101 | |||
97 | /* Maximum number of nesting allowed inside epoll sets */ | 102 | /* Maximum number of nesting allowed inside epoll sets */ |
98 | #define EP_MAX_NESTS 4 | 103 | #define EP_MAX_NESTS 4 |
99 | 104 | ||
@@ -1068,7 +1073,22 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k | |||
1068 | * wait list. | 1073 | * wait list. |
1069 | */ | 1074 | */ |
1070 | if (waitqueue_active(&ep->wq)) { | 1075 | if (waitqueue_active(&ep->wq)) { |
1071 | ewake = 1; | 1076 | if ((epi->event.events & EPOLLEXCLUSIVE) && |
1077 | !((unsigned long)key & POLLFREE)) { | ||
1078 | switch ((unsigned long)key & EPOLLINOUT_BITS) { | ||
1079 | case POLLIN: | ||
1080 | if (epi->event.events & POLLIN) | ||
1081 | ewake = 1; | ||
1082 | break; | ||
1083 | case POLLOUT: | ||
1084 | if (epi->event.events & POLLOUT) | ||
1085 | ewake = 1; | ||
1086 | break; | ||
1087 | case 0: | ||
1088 | ewake = 1; | ||
1089 | break; | ||
1090 | } | ||
1091 | } | ||
1072 | wake_up_locked(&ep->wq); | 1092 | wake_up_locked(&ep->wq); |
1073 | } | 1093 | } |
1074 | if (waitqueue_active(&ep->poll_wait)) | 1094 | if (waitqueue_active(&ep->poll_wait)) |
@@ -1875,9 +1895,13 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1875 | * so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation. | 1895 | * so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation. |
1876 | * Also, we do not currently supported nested exclusive wakeups. | 1896 | * Also, we do not currently supported nested exclusive wakeups. |
1877 | */ | 1897 | */ |
1878 | if ((epds.events & EPOLLEXCLUSIVE) && (op == EPOLL_CTL_MOD || | 1898 | if (epds.events & EPOLLEXCLUSIVE) { |
1879 | (op == EPOLL_CTL_ADD && is_file_epoll(tf.file)))) | 1899 | if (op == EPOLL_CTL_MOD) |
1880 | goto error_tgt_fput; | 1900 | goto error_tgt_fput; |
1901 | if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) || | ||
1902 | (epds.events & ~EPOLLEXCLUSIVE_OK_BITS))) | ||
1903 | goto error_tgt_fput; | ||
1904 | } | ||
1881 | 1905 | ||
1882 | /* | 1906 | /* |
1883 | * At this point it is safe to assume that the "private_data" contains | 1907 | * At this point it is safe to assume that the "private_data" contains |
@@ -1950,8 +1974,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, | |||
1950 | break; | 1974 | break; |
1951 | case EPOLL_CTL_MOD: | 1975 | case EPOLL_CTL_MOD: |
1952 | if (epi) { | 1976 | if (epi) { |
1953 | epds.events |= POLLERR | POLLHUP; | 1977 | if (!(epi->event.events & EPOLLEXCLUSIVE)) { |
1954 | error = ep_modify(ep, epi, &epds); | 1978 | epds.events |= POLLERR | POLLHUP; |
1979 | error = ep_modify(ep, epi, &epds); | ||
1980 | } | ||
1955 | } else | 1981 | } else |
1956 | error = -ENOENT; | 1982 | error = -ENOENT; |
1957 | break; | 1983 | break; |
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 506765afa1a3..bb8d67e2740a 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c | |||
@@ -376,12 +376,11 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry) | |||
376 | struct inode *inode = d_inode(dentry); | 376 | struct inode *inode = d_inode(dentry); |
377 | dnode_secno dno; | 377 | dnode_secno dno; |
378 | int r; | 378 | int r; |
379 | int rep = 0; | ||
380 | int err; | 379 | int err; |
381 | 380 | ||
382 | hpfs_lock(dir->i_sb); | 381 | hpfs_lock(dir->i_sb); |
383 | hpfs_adjust_length(name, &len); | 382 | hpfs_adjust_length(name, &len); |
384 | again: | 383 | |
385 | err = -ENOENT; | 384 | err = -ENOENT; |
386 | de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh); | 385 | de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh); |
387 | if (!de) | 386 | if (!de) |
@@ -401,33 +400,9 @@ again: | |||
401 | hpfs_error(dir->i_sb, "there was error when removing dirent"); | 400 | hpfs_error(dir->i_sb, "there was error when removing dirent"); |
402 | err = -EFSERROR; | 401 | err = -EFSERROR; |
403 | break; | 402 | break; |
404 | case 2: /* no space for deleting, try to truncate file */ | 403 | case 2: /* no space for deleting */ |
405 | |||
406 | err = -ENOSPC; | 404 | err = -ENOSPC; |
407 | if (rep++) | 405 | break; |
408 | break; | ||
409 | |||
410 | dentry_unhash(dentry); | ||
411 | if (!d_unhashed(dentry)) { | ||
412 | hpfs_unlock(dir->i_sb); | ||
413 | return -ENOSPC; | ||
414 | } | ||
415 | if (generic_permission(inode, MAY_WRITE) || | ||
416 | !S_ISREG(inode->i_mode) || | ||
417 | get_write_access(inode)) { | ||
418 | d_rehash(dentry); | ||
419 | } else { | ||
420 | struct iattr newattrs; | ||
421 | /*pr_info("truncating file before delete.\n");*/ | ||
422 | newattrs.ia_size = 0; | ||
423 | newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; | ||
424 | err = notify_change(dentry, &newattrs, NULL); | ||
425 | put_write_access(inode); | ||
426 | if (!err) | ||
427 | goto again; | ||
428 | } | ||
429 | hpfs_unlock(dir->i_sb); | ||
430 | return -ENOSPC; | ||
431 | default: | 406 | default: |
432 | drop_nlink(inode); | 407 | drop_nlink(inode); |
433 | err = 0; | 408 | err = 0; |
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index d211b8e18566..30c4c9ebb693 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c | |||
@@ -843,9 +843,14 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, | |||
843 | 843 | ||
844 | pr_notice("%s(): Link succeeded, unlink failed (err %d). You now have a hard link\n", | 844 | pr_notice("%s(): Link succeeded, unlink failed (err %d). You now have a hard link\n", |
845 | __func__, ret); | 845 | __func__, ret); |
846 | /* Might as well let the VFS know */ | 846 | /* |
847 | d_instantiate(new_dentry, d_inode(old_dentry)); | 847 | * We can't keep the target in dcache after that. |
848 | ihold(d_inode(old_dentry)); | 848 | * For one thing, we can't afford dentry aliases for directories. |
849 | * For another, if there was a victim, we _can't_ set new inode | ||
850 | * for that sucker and we have to trigger mount eviction - the | ||
851 | * caller won't do it on its own since we are returning an error. | ||
852 | */ | ||
853 | d_invalidate(new_dentry); | ||
849 | new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now); | 854 | new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now); |
850 | return ret; | 855 | return ret; |
851 | } | 856 | } |
diff --git a/fs/namei.c b/fs/namei.c index f624d132e01e..794f81dce766 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1220,8 +1220,8 @@ static int follow_managed(struct path *path, struct nameidata *nd) | |||
1220 | 1220 | ||
1221 | if (need_mntput && path->mnt == mnt) | 1221 | if (need_mntput && path->mnt == mnt) |
1222 | mntput(path->mnt); | 1222 | mntput(path->mnt); |
1223 | if (ret == -EISDIR) | 1223 | if (ret == -EISDIR || !ret) |
1224 | ret = 0; | 1224 | ret = 1; |
1225 | if (need_mntput) | 1225 | if (need_mntput) |
1226 | nd->flags |= LOOKUP_JUMPED; | 1226 | nd->flags |= LOOKUP_JUMPED; |
1227 | if (unlikely(ret < 0)) | 1227 | if (unlikely(ret < 0)) |
@@ -1444,40 +1444,26 @@ static int follow_dotdot(struct nameidata *nd) | |||
1444 | * This looks up the name in dcache, possibly revalidates the old dentry and | 1444 | * This looks up the name in dcache, possibly revalidates the old dentry and |
1445 | * allocates a new one if not found or not valid. In the need_lookup argument | 1445 | * allocates a new one if not found or not valid. In the need_lookup argument |
1446 | * returns whether i_op->lookup is necessary. | 1446 | * returns whether i_op->lookup is necessary. |
1447 | * | ||
1448 | * dir->d_inode->i_mutex must be held | ||
1449 | */ | 1447 | */ |
1450 | static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, | 1448 | static struct dentry *lookup_dcache(const struct qstr *name, |
1451 | unsigned int flags, bool *need_lookup) | 1449 | struct dentry *dir, |
1450 | unsigned int flags) | ||
1452 | { | 1451 | { |
1453 | struct dentry *dentry; | 1452 | struct dentry *dentry; |
1454 | int error; | 1453 | int error; |
1455 | 1454 | ||
1456 | *need_lookup = false; | ||
1457 | dentry = d_lookup(dir, name); | 1455 | dentry = d_lookup(dir, name); |
1458 | if (dentry) { | 1456 | if (dentry) { |
1459 | if (dentry->d_flags & DCACHE_OP_REVALIDATE) { | 1457 | if (dentry->d_flags & DCACHE_OP_REVALIDATE) { |
1460 | error = d_revalidate(dentry, flags); | 1458 | error = d_revalidate(dentry, flags); |
1461 | if (unlikely(error <= 0)) { | 1459 | if (unlikely(error <= 0)) { |
1462 | if (error < 0) { | 1460 | if (!error) |
1463 | dput(dentry); | ||
1464 | return ERR_PTR(error); | ||
1465 | } else { | ||
1466 | d_invalidate(dentry); | 1461 | d_invalidate(dentry); |
1467 | dput(dentry); | 1462 | dput(dentry); |
1468 | dentry = NULL; | 1463 | return ERR_PTR(error); |
1469 | } | ||
1470 | } | 1464 | } |
1471 | } | 1465 | } |
1472 | } | 1466 | } |
1473 | |||
1474 | if (!dentry) { | ||
1475 | dentry = d_alloc(dir, name); | ||
1476 | if (unlikely(!dentry)) | ||
1477 | return ERR_PTR(-ENOMEM); | ||
1478 | |||
1479 | *need_lookup = true; | ||
1480 | } | ||
1481 | return dentry; | 1467 | return dentry; |
1482 | } | 1468 | } |
1483 | 1469 | ||
@@ -1506,45 +1492,44 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, | |||
1506 | return dentry; | 1492 | return dentry; |
1507 | } | 1493 | } |
1508 | 1494 | ||
1509 | static struct dentry *__lookup_hash(struct qstr *name, | 1495 | static struct dentry *__lookup_hash(const struct qstr *name, |
1510 | struct dentry *base, unsigned int flags) | 1496 | struct dentry *base, unsigned int flags) |
1511 | { | 1497 | { |
1512 | bool need_lookup; | 1498 | struct dentry *dentry = lookup_dcache(name, base, flags); |
1513 | struct dentry *dentry; | ||
1514 | 1499 | ||
1515 | dentry = lookup_dcache(name, base, flags, &need_lookup); | 1500 | if (dentry) |
1516 | if (!need_lookup) | ||
1517 | return dentry; | 1501 | return dentry; |
1518 | 1502 | ||
1503 | dentry = d_alloc(base, name); | ||
1504 | if (unlikely(!dentry)) | ||
1505 | return ERR_PTR(-ENOMEM); | ||
1506 | |||
1519 | return lookup_real(base->d_inode, dentry, flags); | 1507 | return lookup_real(base->d_inode, dentry, flags); |
1520 | } | 1508 | } |
1521 | 1509 | ||
1522 | /* | ||
1523 | * It's more convoluted than I'd like it to be, but... it's still fairly | ||
1524 | * small and for now I'd prefer to have fast path as straight as possible. | ||
1525 | * It _is_ time-critical. | ||
1526 | */ | ||
1527 | static int lookup_fast(struct nameidata *nd, | 1510 | static int lookup_fast(struct nameidata *nd, |
1528 | struct path *path, struct inode **inode, | 1511 | struct path *path, struct inode **inode, |
1529 | unsigned *seqp) | 1512 | unsigned *seqp) |
1530 | { | 1513 | { |
1531 | struct vfsmount *mnt = nd->path.mnt; | 1514 | struct vfsmount *mnt = nd->path.mnt; |
1532 | struct dentry *dentry, *parent = nd->path.dentry; | 1515 | struct dentry *dentry, *parent = nd->path.dentry; |
1533 | int need_reval = 1; | ||
1534 | int status = 1; | 1516 | int status = 1; |
1535 | int err; | 1517 | int err; |
1536 | 1518 | ||
1537 | /* | 1519 | /* |
1538 | * Rename seqlock is not required here because in the off chance | 1520 | * Rename seqlock is not required here because in the off chance |
1539 | * of a false negative due to a concurrent rename, we're going to | 1521 | * of a false negative due to a concurrent rename, the caller is |
1540 | * do the non-racy lookup, below. | 1522 | * going to fall back to non-racy lookup. |
1541 | */ | 1523 | */ |
1542 | if (nd->flags & LOOKUP_RCU) { | 1524 | if (nd->flags & LOOKUP_RCU) { |
1543 | unsigned seq; | 1525 | unsigned seq; |
1544 | bool negative; | 1526 | bool negative; |
1545 | dentry = __d_lookup_rcu(parent, &nd->last, &seq); | 1527 | dentry = __d_lookup_rcu(parent, &nd->last, &seq); |
1546 | if (!dentry) | 1528 | if (unlikely(!dentry)) { |
1547 | goto unlazy; | 1529 | if (unlazy_walk(nd, NULL, 0)) |
1530 | return -ECHILD; | ||
1531 | return 0; | ||
1532 | } | ||
1548 | 1533 | ||
1549 | /* | 1534 | /* |
1550 | * This sequence count validates that the inode matches | 1535 | * This sequence count validates that the inode matches |
@@ -1552,7 +1537,7 @@ static int lookup_fast(struct nameidata *nd, | |||
1552 | */ | 1537 | */ |
1553 | *inode = d_backing_inode(dentry); | 1538 | *inode = d_backing_inode(dentry); |
1554 | negative = d_is_negative(dentry); | 1539 | negative = d_is_negative(dentry); |
1555 | if (read_seqcount_retry(&dentry->d_seq, seq)) | 1540 | if (unlikely(read_seqcount_retry(&dentry->d_seq, seq))) |
1556 | return -ECHILD; | 1541 | return -ECHILD; |
1557 | 1542 | ||
1558 | /* | 1543 | /* |
@@ -1562,81 +1547,89 @@ static int lookup_fast(struct nameidata *nd, | |||
1562 | * The memory barrier in read_seqcount_begin of child is | 1547 | * The memory barrier in read_seqcount_begin of child is |
1563 | * enough, we can use __read_seqcount_retry here. | 1548 | * enough, we can use __read_seqcount_retry here. |
1564 | */ | 1549 | */ |
1565 | if (__read_seqcount_retry(&parent->d_seq, nd->seq)) | 1550 | if (unlikely(__read_seqcount_retry(&parent->d_seq, nd->seq))) |
1566 | return -ECHILD; | 1551 | return -ECHILD; |
1567 | 1552 | ||
1568 | *seqp = seq; | 1553 | *seqp = seq; |
1569 | if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { | 1554 | if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) |
1570 | status = d_revalidate(dentry, nd->flags); | 1555 | status = d_revalidate(dentry, nd->flags); |
1571 | if (unlikely(status <= 0)) { | 1556 | if (unlikely(status <= 0)) { |
1572 | if (status != -ECHILD) | 1557 | if (unlazy_walk(nd, dentry, seq)) |
1573 | need_reval = 0; | 1558 | return -ECHILD; |
1574 | goto unlazy; | 1559 | if (status == -ECHILD) |
1575 | } | 1560 | status = d_revalidate(dentry, nd->flags); |
1561 | } else { | ||
1562 | /* | ||
1563 | * Note: do negative dentry check after revalidation in | ||
1564 | * case that drops it. | ||
1565 | */ | ||
1566 | if (unlikely(negative)) | ||
1567 | return -ENOENT; | ||
1568 | path->mnt = mnt; | ||
1569 | path->dentry = dentry; | ||
1570 | if (likely(__follow_mount_rcu(nd, path, inode, seqp))) | ||
1571 | return 1; | ||
1572 | if (unlazy_walk(nd, dentry, seq)) | ||
1573 | return -ECHILD; | ||
1576 | } | 1574 | } |
1577 | /* | ||
1578 | * Note: do negative dentry check after revalidation in | ||
1579 | * case that drops it. | ||
1580 | */ | ||
1581 | if (negative) | ||
1582 | return -ENOENT; | ||
1583 | path->mnt = mnt; | ||
1584 | path->dentry = dentry; | ||
1585 | if (likely(__follow_mount_rcu(nd, path, inode, seqp))) | ||
1586 | return 0; | ||
1587 | unlazy: | ||
1588 | if (unlazy_walk(nd, dentry, seq)) | ||
1589 | return -ECHILD; | ||
1590 | } else { | 1575 | } else { |
1591 | dentry = __d_lookup(parent, &nd->last); | 1576 | dentry = __d_lookup(parent, &nd->last); |
1577 | if (unlikely(!dentry)) | ||
1578 | return 0; | ||
1579 | if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) | ||
1580 | status = d_revalidate(dentry, nd->flags); | ||
1592 | } | 1581 | } |
1593 | |||
1594 | if (unlikely(!dentry)) | ||
1595 | goto need_lookup; | ||
1596 | |||
1597 | if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval) | ||
1598 | status = d_revalidate(dentry, nd->flags); | ||
1599 | if (unlikely(status <= 0)) { | 1582 | if (unlikely(status <= 0)) { |
1600 | if (status < 0) { | 1583 | if (!status) |
1601 | dput(dentry); | 1584 | d_invalidate(dentry); |
1602 | return status; | ||
1603 | } | ||
1604 | d_invalidate(dentry); | ||
1605 | dput(dentry); | 1585 | dput(dentry); |
1606 | goto need_lookup; | 1586 | return status; |
1607 | } | 1587 | } |
1608 | |||
1609 | if (unlikely(d_is_negative(dentry))) { | 1588 | if (unlikely(d_is_negative(dentry))) { |
1610 | dput(dentry); | 1589 | dput(dentry); |
1611 | return -ENOENT; | 1590 | return -ENOENT; |
1612 | } | 1591 | } |
1592 | |||
1613 | path->mnt = mnt; | 1593 | path->mnt = mnt; |
1614 | path->dentry = dentry; | 1594 | path->dentry = dentry; |
1615 | err = follow_managed(path, nd); | 1595 | err = follow_managed(path, nd); |
1616 | if (likely(!err)) | 1596 | if (likely(err > 0)) |
1617 | *inode = d_backing_inode(path->dentry); | 1597 | *inode = d_backing_inode(path->dentry); |
1618 | return err; | 1598 | return err; |
1619 | |||
1620 | need_lookup: | ||
1621 | return 1; | ||
1622 | } | 1599 | } |
1623 | 1600 | ||
1624 | /* Fast lookup failed, do it the slow way */ | 1601 | /* Fast lookup failed, do it the slow way */ |
1625 | static int lookup_slow(struct nameidata *nd, struct path *path) | 1602 | static struct dentry *lookup_slow(const struct qstr *name, |
1603 | struct dentry *dir, | ||
1604 | unsigned int flags) | ||
1626 | { | 1605 | { |
1627 | struct dentry *dentry, *parent; | 1606 | struct dentry *dentry; |
1628 | 1607 | inode_lock(dir->d_inode); | |
1629 | parent = nd->path.dentry; | 1608 | dentry = d_lookup(dir, name); |
1630 | BUG_ON(nd->inode != parent->d_inode); | 1609 | if (unlikely(dentry)) { |
1631 | 1610 | if ((dentry->d_flags & DCACHE_OP_REVALIDATE) && | |
1632 | inode_lock(parent->d_inode); | 1611 | !(flags & LOOKUP_NO_REVAL)) { |
1633 | dentry = __lookup_hash(&nd->last, parent, nd->flags); | 1612 | int error = d_revalidate(dentry, flags); |
1634 | inode_unlock(parent->d_inode); | 1613 | if (unlikely(error <= 0)) { |
1635 | if (IS_ERR(dentry)) | 1614 | if (!error) |
1636 | return PTR_ERR(dentry); | 1615 | d_invalidate(dentry); |
1637 | path->mnt = nd->path.mnt; | 1616 | dput(dentry); |
1638 | path->dentry = dentry; | 1617 | dentry = ERR_PTR(error); |
1639 | return follow_managed(path, nd); | 1618 | } |
1619 | } | ||
1620 | if (dentry) { | ||
1621 | inode_unlock(dir->d_inode); | ||
1622 | return dentry; | ||
1623 | } | ||
1624 | } | ||
1625 | dentry = d_alloc(dir, name); | ||
1626 | if (unlikely(!dentry)) { | ||
1627 | inode_unlock(dir->d_inode); | ||
1628 | return ERR_PTR(-ENOMEM); | ||
1629 | } | ||
1630 | dentry = lookup_real(dir->d_inode, dentry, flags); | ||
1631 | inode_unlock(dir->d_inode); | ||
1632 | return dentry; | ||
1640 | } | 1633 | } |
1641 | 1634 | ||
1642 | static inline int may_lookup(struct nameidata *nd) | 1635 | static inline int may_lookup(struct nameidata *nd) |
@@ -1712,6 +1705,11 @@ static inline int should_follow_link(struct nameidata *nd, struct path *link, | |||
1712 | return 0; | 1705 | return 0; |
1713 | if (!follow) | 1706 | if (!follow) |
1714 | return 0; | 1707 | return 0; |
1708 | /* make sure that d_is_symlink above matches inode */ | ||
1709 | if (nd->flags & LOOKUP_RCU) { | ||
1710 | if (read_seqcount_retry(&link->dentry->d_seq, seq)) | ||
1711 | return -ECHILD; | ||
1712 | } | ||
1715 | return pick_link(nd, link, inode, seq); | 1713 | return pick_link(nd, link, inode, seq); |
1716 | } | 1714 | } |
1717 | 1715 | ||
@@ -1735,19 +1733,24 @@ static int walk_component(struct nameidata *nd, int flags) | |||
1735 | return err; | 1733 | return err; |
1736 | } | 1734 | } |
1737 | err = lookup_fast(nd, &path, &inode, &seq); | 1735 | err = lookup_fast(nd, &path, &inode, &seq); |
1738 | if (unlikely(err)) { | 1736 | if (unlikely(err <= 0)) { |
1739 | if (err < 0) | 1737 | if (err < 0) |
1740 | return err; | 1738 | return err; |
1741 | 1739 | path.dentry = lookup_slow(&nd->last, nd->path.dentry, | |
1742 | err = lookup_slow(nd, &path); | 1740 | nd->flags); |
1743 | if (err < 0) | 1741 | if (IS_ERR(path.dentry)) |
1742 | return PTR_ERR(path.dentry); | ||
1743 | if (unlikely(d_is_negative(path.dentry))) { | ||
1744 | dput(path.dentry); | ||
1745 | return -ENOENT; | ||
1746 | } | ||
1747 | path.mnt = nd->path.mnt; | ||
1748 | err = follow_managed(&path, nd); | ||
1749 | if (unlikely(err < 0)) | ||
1744 | return err; | 1750 | return err; |
1745 | 1751 | ||
1746 | inode = d_backing_inode(path.dentry); | ||
1747 | seq = 0; /* we are already out of RCU mode */ | 1752 | seq = 0; /* we are already out of RCU mode */ |
1748 | err = -ENOENT; | 1753 | inode = d_backing_inode(path.dentry); |
1749 | if (d_is_negative(path.dentry)) | ||
1750 | goto out_path_put; | ||
1751 | } | 1754 | } |
1752 | 1755 | ||
1753 | if (flags & WALK_PUT) | 1756 | if (flags & WALK_PUT) |
@@ -1759,10 +1762,6 @@ static int walk_component(struct nameidata *nd, int flags) | |||
1759 | nd->inode = inode; | 1762 | nd->inode = inode; |
1760 | nd->seq = seq; | 1763 | nd->seq = seq; |
1761 | return 0; | 1764 | return 0; |
1762 | |||
1763 | out_path_put: | ||
1764 | path_to_nameidata(&path, nd); | ||
1765 | return err; | ||
1766 | } | 1765 | } |
1767 | 1766 | ||
1768 | /* | 1767 | /* |
@@ -2368,21 +2367,9 @@ struct dentry *lookup_one_len_unlocked(const char *name, | |||
2368 | if (err) | 2367 | if (err) |
2369 | return ERR_PTR(err); | 2368 | return ERR_PTR(err); |
2370 | 2369 | ||
2371 | /* | 2370 | ret = lookup_dcache(&this, base, 0); |
2372 | * __d_lookup() is used to try to get a quick answer and avoid the | 2371 | if (!ret) |
2373 | * mutex. A false-negative does no harm. | 2372 | ret = lookup_slow(&this, base, 0); |
2374 | */ | ||
2375 | ret = __d_lookup(base, &this); | ||
2376 | if (ret && unlikely(ret->d_flags & DCACHE_OP_REVALIDATE)) { | ||
2377 | dput(ret); | ||
2378 | ret = NULL; | ||
2379 | } | ||
2380 | if (ret) | ||
2381 | return ret; | ||
2382 | |||
2383 | inode_lock(base->d_inode); | ||
2384 | ret = __lookup_hash(&this, base, 0); | ||
2385 | inode_unlock(base->d_inode); | ||
2386 | return ret; | 2373 | return ret; |
2387 | } | 2374 | } |
2388 | EXPORT_SYMBOL(lookup_one_len_unlocked); | 2375 | EXPORT_SYMBOL(lookup_one_len_unlocked); |
@@ -2460,31 +2447,21 @@ mountpoint_last(struct nameidata *nd, struct path *path) | |||
2460 | if (error) | 2447 | if (error) |
2461 | return error; | 2448 | return error; |
2462 | dentry = dget(nd->path.dentry); | 2449 | dentry = dget(nd->path.dentry); |
2463 | goto done; | 2450 | } else { |
2464 | } | 2451 | dentry = d_lookup(dir, &nd->last); |
2465 | |||
2466 | inode_lock(dir->d_inode); | ||
2467 | dentry = d_lookup(dir, &nd->last); | ||
2468 | if (!dentry) { | ||
2469 | /* | ||
2470 | * No cached dentry. Mounted dentries are pinned in the cache, | ||
2471 | * so that means that this dentry is probably a symlink or the | ||
2472 | * path doesn't actually point to a mounted dentry. | ||
2473 | */ | ||
2474 | dentry = d_alloc(dir, &nd->last); | ||
2475 | if (!dentry) { | 2452 | if (!dentry) { |
2476 | inode_unlock(dir->d_inode); | 2453 | /* |
2477 | return -ENOMEM; | 2454 | * No cached dentry. Mounted dentries are pinned in the |
2478 | } | 2455 | * cache, so that means that this dentry is probably |
2479 | dentry = lookup_real(dir->d_inode, dentry, nd->flags); | 2456 | * a symlink or the path doesn't actually point |
2480 | if (IS_ERR(dentry)) { | 2457 | * to a mounted dentry. |
2481 | inode_unlock(dir->d_inode); | 2458 | */ |
2482 | return PTR_ERR(dentry); | 2459 | dentry = lookup_slow(&nd->last, dir, |
2460 | nd->flags | LOOKUP_NO_REVAL); | ||
2461 | if (IS_ERR(dentry)) | ||
2462 | return PTR_ERR(dentry); | ||
2483 | } | 2463 | } |
2484 | } | 2464 | } |
2485 | inode_unlock(dir->d_inode); | ||
2486 | |||
2487 | done: | ||
2488 | if (d_is_negative(dentry)) { | 2465 | if (d_is_negative(dentry)) { |
2489 | dput(dentry); | 2466 | dput(dentry); |
2490 | return -ENOENT; | 2467 | return -ENOENT; |
@@ -3013,16 +2990,22 @@ static int lookup_open(struct nameidata *nd, struct path *path, | |||
3013 | struct inode *dir_inode = dir->d_inode; | 2990 | struct inode *dir_inode = dir->d_inode; |
3014 | struct dentry *dentry; | 2991 | struct dentry *dentry; |
3015 | int error; | 2992 | int error; |
3016 | bool need_lookup; | 2993 | bool need_lookup = false; |
3017 | 2994 | ||
3018 | *opened &= ~FILE_CREATED; | 2995 | *opened &= ~FILE_CREATED; |
3019 | dentry = lookup_dcache(&nd->last, dir, nd->flags, &need_lookup); | 2996 | dentry = lookup_dcache(&nd->last, dir, nd->flags); |
3020 | if (IS_ERR(dentry)) | 2997 | if (IS_ERR(dentry)) |
3021 | return PTR_ERR(dentry); | 2998 | return PTR_ERR(dentry); |
3022 | 2999 | ||
3023 | /* Cached positive dentry: will open in f_op->open */ | 3000 | if (!dentry) { |
3024 | if (!need_lookup && dentry->d_inode) | 3001 | dentry = d_alloc(dir, &nd->last); |
3002 | if (unlikely(!dentry)) | ||
3003 | return -ENOMEM; | ||
3004 | need_lookup = true; | ||
3005 | } else if (dentry->d_inode) { | ||
3006 | /* Cached positive dentry: will open in f_op->open */ | ||
3025 | goto out_no_open; | 3007 | goto out_no_open; |
3008 | } | ||
3026 | 3009 | ||
3027 | if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { | 3010 | if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { |
3028 | return atomic_open(nd, dentry, path, file, op, got_write, | 3011 | return atomic_open(nd, dentry, path, file, op, got_write, |
@@ -3106,13 +3089,14 @@ static int do_last(struct nameidata *nd, | |||
3106 | nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; | 3089 | nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; |
3107 | /* we _can_ be in RCU mode here */ | 3090 | /* we _can_ be in RCU mode here */ |
3108 | error = lookup_fast(nd, &path, &inode, &seq); | 3091 | error = lookup_fast(nd, &path, &inode, &seq); |
3109 | if (likely(!error)) | 3092 | if (likely(error > 0)) |
3110 | goto finish_lookup; | 3093 | goto finish_lookup; |
3111 | 3094 | ||
3112 | if (error < 0) | 3095 | if (error < 0) |
3113 | return error; | 3096 | return error; |
3114 | 3097 | ||
3115 | BUG_ON(nd->inode != dir->d_inode); | 3098 | BUG_ON(nd->inode != dir->d_inode); |
3099 | BUG_ON(nd->flags & LOOKUP_RCU); | ||
3116 | } else { | 3100 | } else { |
3117 | /* create side of things */ | 3101 | /* create side of things */ |
3118 | /* | 3102 | /* |
@@ -3167,12 +3151,6 @@ retry_lookup: | |||
3167 | } | 3151 | } |
3168 | 3152 | ||
3169 | /* | 3153 | /* |
3170 | * create/update audit record if it already exists. | ||
3171 | */ | ||
3172 | if (d_is_positive(path.dentry)) | ||
3173 | audit_inode(nd->name, path.dentry, 0); | ||
3174 | |||
3175 | /* | ||
3176 | * If atomic_open() acquired write access it is dropped now due to | 3154 | * If atomic_open() acquired write access it is dropped now due to |
3177 | * possible mount and symlink following (this might be optimized away if | 3155 | * possible mount and symlink following (this might be optimized away if |
3178 | * necessary...) | 3156 | * necessary...) |
@@ -3182,6 +3160,16 @@ retry_lookup: | |||
3182 | got_write = false; | 3160 | got_write = false; |
3183 | } | 3161 | } |
3184 | 3162 | ||
3163 | if (unlikely(d_is_negative(path.dentry))) { | ||
3164 | path_to_nameidata(&path, nd); | ||
3165 | return -ENOENT; | ||
3166 | } | ||
3167 | |||
3168 | /* | ||
3169 | * create/update audit record if it already exists. | ||
3170 | */ | ||
3171 | audit_inode(nd->name, path.dentry, 0); | ||
3172 | |||
3185 | if (unlikely((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))) { | 3173 | if (unlikely((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))) { |
3186 | path_to_nameidata(&path, nd); | 3174 | path_to_nameidata(&path, nd); |
3187 | return -EEXIST; | 3175 | return -EEXIST; |
@@ -3191,13 +3179,8 @@ retry_lookup: | |||
3191 | if (unlikely(error < 0)) | 3179 | if (unlikely(error < 0)) |
3192 | return error; | 3180 | return error; |
3193 | 3181 | ||
3194 | BUG_ON(nd->flags & LOOKUP_RCU); | ||
3195 | inode = d_backing_inode(path.dentry); | ||
3196 | seq = 0; /* out of RCU mode, so the value doesn't matter */ | 3182 | seq = 0; /* out of RCU mode, so the value doesn't matter */ |
3197 | if (unlikely(d_is_negative(path.dentry))) { | 3183 | inode = d_backing_inode(path.dentry); |
3198 | path_to_nameidata(&path, nd); | ||
3199 | return -ENOENT; | ||
3200 | } | ||
3201 | finish_lookup: | 3184 | finish_lookup: |
3202 | if (nd->depth) | 3185 | if (nd->depth) |
3203 | put_link(nd); | 3186 | put_link(nd); |
@@ -3206,11 +3189,6 @@ finish_lookup: | |||
3206 | if (unlikely(error)) | 3189 | if (unlikely(error)) |
3207 | return error; | 3190 | return error; |
3208 | 3191 | ||
3209 | if (unlikely(d_is_symlink(path.dentry)) && !(open_flag & O_PATH)) { | ||
3210 | path_to_nameidata(&path, nd); | ||
3211 | return -ELOOP; | ||
3212 | } | ||
3213 | |||
3214 | if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) { | 3192 | if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) { |
3215 | path_to_nameidata(&path, nd); | 3193 | path_to_nameidata(&path, nd); |
3216 | } else { | 3194 | } else { |
@@ -3229,6 +3207,10 @@ finish_open: | |||
3229 | return error; | 3207 | return error; |
3230 | } | 3208 | } |
3231 | audit_inode(nd->name, nd->path.dentry, 0); | 3209 | audit_inode(nd->name, nd->path.dentry, 0); |
3210 | if (unlikely(d_is_symlink(nd->path.dentry)) && !(open_flag & O_PATH)) { | ||
3211 | error = -ELOOP; | ||
3212 | goto out; | ||
3213 | } | ||
3232 | error = -EISDIR; | 3214 | error = -EISDIR; |
3233 | if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) | 3215 | if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) |
3234 | goto out; | 3216 | goto out; |
@@ -3273,6 +3255,10 @@ opened: | |||
3273 | goto exit_fput; | 3255 | goto exit_fput; |
3274 | } | 3256 | } |
3275 | out: | 3257 | out: |
3258 | if (unlikely(error > 0)) { | ||
3259 | WARN_ON(1); | ||
3260 | error = -EINVAL; | ||
3261 | } | ||
3276 | if (got_write) | 3262 | if (got_write) |
3277 | mnt_drop_write(nd->path.mnt); | 3263 | mnt_drop_write(nd->path.mnt); |
3278 | path_put(&save_parent); | 3264 | path_put(&save_parent); |
@@ -3699,31 +3685,6 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) | |||
3699 | return sys_mkdirat(AT_FDCWD, pathname, mode); | 3685 | return sys_mkdirat(AT_FDCWD, pathname, mode); |
3700 | } | 3686 | } |
3701 | 3687 | ||
3702 | /* | ||
3703 | * The dentry_unhash() helper will try to drop the dentry early: we | ||
3704 | * should have a usage count of 1 if we're the only user of this | ||
3705 | * dentry, and if that is true (possibly after pruning the dcache), | ||
3706 | * then we drop the dentry now. | ||
3707 | * | ||
3708 | * A low-level filesystem can, if it choses, legally | ||
3709 | * do a | ||
3710 | * | ||
3711 | * if (!d_unhashed(dentry)) | ||
3712 | * return -EBUSY; | ||
3713 | * | ||
3714 | * if it cannot handle the case of removing a directory | ||
3715 | * that is still in use by something else.. | ||
3716 | */ | ||
3717 | void dentry_unhash(struct dentry *dentry) | ||
3718 | { | ||
3719 | shrink_dcache_parent(dentry); | ||
3720 | spin_lock(&dentry->d_lock); | ||
3721 | if (dentry->d_lockref.count == 1) | ||
3722 | __d_drop(dentry); | ||
3723 | spin_unlock(&dentry->d_lock); | ||
3724 | } | ||
3725 | EXPORT_SYMBOL(dentry_unhash); | ||
3726 | |||
3727 | int vfs_rmdir(struct inode *dir, struct dentry *dentry) | 3688 | int vfs_rmdir(struct inode *dir, struct dentry *dentry) |
3728 | { | 3689 | { |
3729 | int error = may_delete(dir, dentry, 1); | 3690 | int error = may_delete(dir, dentry, 1); |
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 26c2de2de13f..b7f8eaeea5d8 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c | |||
@@ -633,7 +633,7 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx, | |||
633 | d_rehash(newdent); | 633 | d_rehash(newdent); |
634 | } else { | 634 | } else { |
635 | spin_lock(&dentry->d_lock); | 635 | spin_lock(&dentry->d_lock); |
636 | NCP_FINFO(inode)->flags &= ~NCPI_DIR_CACHE; | 636 | NCP_FINFO(dir)->flags &= ~NCPI_DIR_CACHE; |
637 | spin_unlock(&dentry->d_lock); | 637 | spin_unlock(&dentry->d_lock); |
638 | } | 638 | } |
639 | } else { | 639 | } else { |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9cce67043f92..4bfa7d8bcade 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -1360,19 +1360,15 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in | |||
1360 | dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry); | 1360 | dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry); |
1361 | nfs_inc_stats(dir, NFSIOS_VFSLOOKUP); | 1361 | nfs_inc_stats(dir, NFSIOS_VFSLOOKUP); |
1362 | 1362 | ||
1363 | res = ERR_PTR(-ENAMETOOLONG); | 1363 | if (unlikely(dentry->d_name.len > NFS_SERVER(dir)->namelen)) |
1364 | if (dentry->d_name.len > NFS_SERVER(dir)->namelen) | 1364 | return ERR_PTR(-ENAMETOOLONG); |
1365 | goto out; | ||
1366 | 1365 | ||
1367 | /* | 1366 | /* |
1368 | * If we're doing an exclusive create, optimize away the lookup | 1367 | * If we're doing an exclusive create, optimize away the lookup |
1369 | * but don't hash the dentry. | 1368 | * but don't hash the dentry. |
1370 | */ | 1369 | */ |
1371 | if (nfs_is_exclusive_create(dir, flags)) { | 1370 | if (nfs_is_exclusive_create(dir, flags)) |
1372 | d_instantiate(dentry, NULL); | 1371 | return NULL; |
1373 | res = NULL; | ||
1374 | goto out; | ||
1375 | } | ||
1376 | 1372 | ||
1377 | res = ERR_PTR(-ENOMEM); | 1373 | res = ERR_PTR(-ENOMEM); |
1378 | fhandle = nfs_alloc_fhandle(); | 1374 | fhandle = nfs_alloc_fhandle(); |
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 5bcd92d50e82..0cb1abd535e3 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c | |||
@@ -1215,7 +1215,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task, | |||
1215 | hdr->pgio_mirror_idx + 1, | 1215 | hdr->pgio_mirror_idx + 1, |
1216 | &hdr->pgio_mirror_idx)) | 1216 | &hdr->pgio_mirror_idx)) |
1217 | goto out_eagain; | 1217 | goto out_eagain; |
1218 | set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | 1218 | set_bit(NFS_LAYOUT_RETURN_REQUESTED, |
1219 | &hdr->lseg->pls_layout->plh_flags); | 1219 | &hdr->lseg->pls_layout->plh_flags); |
1220 | pnfs_read_resend_pnfs(hdr); | 1220 | pnfs_read_resend_pnfs(hdr); |
1221 | return task->tk_status; | 1221 | return task->tk_status; |
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 29898a9550fa..eb370460ce20 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c | |||
@@ -412,7 +412,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | |||
412 | OP_ILLEGAL, GFP_NOIO); | 412 | OP_ILLEGAL, GFP_NOIO); |
413 | if (!fail_return) { | 413 | if (!fail_return) { |
414 | if (ff_layout_has_available_ds(lseg)) | 414 | if (ff_layout_has_available_ds(lseg)) |
415 | set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | 415 | set_bit(NFS_LAYOUT_RETURN_REQUESTED, |
416 | &lseg->pls_layout->plh_flags); | 416 | &lseg->pls_layout->plh_flags); |
417 | else | 417 | else |
418 | pnfs_error_mark_layout_for_return(ino, lseg); | 418 | pnfs_error_mark_layout_for_return(ino, lseg); |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4bfc33ad0563..400a70b3be7b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -2461,14 +2461,15 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, | |||
2461 | 2461 | ||
2462 | dentry = opendata->dentry; | 2462 | dentry = opendata->dentry; |
2463 | if (d_really_is_negative(dentry)) { | 2463 | if (d_really_is_negative(dentry)) { |
2464 | /* FIXME: Is this d_drop() ever needed? */ | 2464 | struct dentry *alias; |
2465 | d_drop(dentry); | 2465 | d_drop(dentry); |
2466 | dentry = d_add_unique(dentry, igrab(state->inode)); | 2466 | alias = d_exact_alias(dentry, state->inode); |
2467 | if (dentry == NULL) { | 2467 | if (!alias) |
2468 | dentry = opendata->dentry; | 2468 | alias = d_splice_alias(igrab(state->inode), dentry); |
2469 | } else if (dentry != ctx->dentry) { | 2469 | /* d_splice_alias() can't fail here - it's a non-directory */ |
2470 | if (alias) { | ||
2470 | dput(ctx->dentry); | 2471 | dput(ctx->dentry); |
2471 | ctx->dentry = dget(dentry); | 2472 | ctx->dentry = dentry = alias; |
2472 | } | 2473 | } |
2473 | nfs_set_verifier(dentry, | 2474 | nfs_set_verifier(dentry, |
2474 | nfs_save_change_attribute(d_inode(opendata->dir))); | 2475 | nfs_save_change_attribute(d_inode(opendata->dir))); |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a3592cc34a20..482b6e94bb37 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -52,9 +52,7 @@ static DEFINE_SPINLOCK(pnfs_spinlock); | |||
52 | */ | 52 | */ |
53 | static LIST_HEAD(pnfs_modules_tbl); | 53 | static LIST_HEAD(pnfs_modules_tbl); |
54 | 54 | ||
55 | static int | 55 | static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo); |
56 | pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, | ||
57 | enum pnfs_iomode iomode, bool sync); | ||
58 | 56 | ||
59 | /* Return the registered pnfs layout driver module matching given id */ | 57 | /* Return the registered pnfs layout driver module matching given id */ |
60 | static struct pnfs_layoutdriver_type * | 58 | static struct pnfs_layoutdriver_type * |
@@ -243,6 +241,8 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) | |||
243 | { | 241 | { |
244 | struct inode *inode = lo->plh_inode; | 242 | struct inode *inode = lo->plh_inode; |
245 | 243 | ||
244 | pnfs_layoutreturn_before_put_layout_hdr(lo); | ||
245 | |||
246 | if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { | 246 | if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { |
247 | if (!list_empty(&lo->plh_segs)) | 247 | if (!list_empty(&lo->plh_segs)) |
248 | WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); | 248 | WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); |
@@ -345,58 +345,6 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, | |||
345 | rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); | 345 | rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); |
346 | } | 346 | } |
347 | 347 | ||
348 | /* Return true if layoutreturn is needed */ | ||
349 | static bool | ||
350 | pnfs_layout_need_return(struct pnfs_layout_hdr *lo, | ||
351 | struct pnfs_layout_segment *lseg) | ||
352 | { | ||
353 | struct pnfs_layout_segment *s; | ||
354 | |||
355 | if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) | ||
356 | return false; | ||
357 | |||
358 | list_for_each_entry(s, &lo->plh_segs, pls_list) | ||
359 | if (s != lseg && test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags)) | ||
360 | return false; | ||
361 | |||
362 | return true; | ||
363 | } | ||
364 | |||
365 | static bool | ||
366 | pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo) | ||
367 | { | ||
368 | if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) | ||
369 | return false; | ||
370 | lo->plh_return_iomode = 0; | ||
371 | pnfs_get_layout_hdr(lo); | ||
372 | clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); | ||
373 | return true; | ||
374 | } | ||
375 | |||
376 | static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, | ||
377 | struct pnfs_layout_hdr *lo, struct inode *inode) | ||
378 | { | ||
379 | lo = lseg->pls_layout; | ||
380 | inode = lo->plh_inode; | ||
381 | |||
382 | spin_lock(&inode->i_lock); | ||
383 | if (pnfs_layout_need_return(lo, lseg)) { | ||
384 | nfs4_stateid stateid; | ||
385 | enum pnfs_iomode iomode; | ||
386 | bool send; | ||
387 | |||
388 | nfs4_stateid_copy(&stateid, &lo->plh_stateid); | ||
389 | iomode = lo->plh_return_iomode; | ||
390 | send = pnfs_prepare_layoutreturn(lo); | ||
391 | spin_unlock(&inode->i_lock); | ||
392 | if (send) { | ||
393 | /* Send an async layoutreturn so we dont deadlock */ | ||
394 | pnfs_send_layoutreturn(lo, &stateid, iomode, false); | ||
395 | } | ||
396 | } else | ||
397 | spin_unlock(&inode->i_lock); | ||
398 | } | ||
399 | |||
400 | void | 348 | void |
401 | pnfs_put_lseg(struct pnfs_layout_segment *lseg) | 349 | pnfs_put_lseg(struct pnfs_layout_segment *lseg) |
402 | { | 350 | { |
@@ -410,15 +358,8 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) | |||
410 | atomic_read(&lseg->pls_refcount), | 358 | atomic_read(&lseg->pls_refcount), |
411 | test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); | 359 | test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); |
412 | 360 | ||
413 | /* Handle the case where refcount != 1 */ | ||
414 | if (atomic_add_unless(&lseg->pls_refcount, -1, 1)) | ||
415 | return; | ||
416 | |||
417 | lo = lseg->pls_layout; | 361 | lo = lseg->pls_layout; |
418 | inode = lo->plh_inode; | 362 | inode = lo->plh_inode; |
419 | /* Do we need a layoutreturn? */ | ||
420 | if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) | ||
421 | pnfs_layoutreturn_before_put_lseg(lseg, lo, inode); | ||
422 | 363 | ||
423 | if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { | 364 | if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { |
424 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { | 365 | if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { |
@@ -937,6 +878,17 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) | |||
937 | rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); | 878 | rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); |
938 | } | 879 | } |
939 | 880 | ||
881 | static bool | ||
882 | pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo) | ||
883 | { | ||
884 | if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) | ||
885 | return false; | ||
886 | lo->plh_return_iomode = 0; | ||
887 | pnfs_get_layout_hdr(lo); | ||
888 | clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); | ||
889 | return true; | ||
890 | } | ||
891 | |||
940 | static int | 892 | static int |
941 | pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, | 893 | pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, |
942 | enum pnfs_iomode iomode, bool sync) | 894 | enum pnfs_iomode iomode, bool sync) |
@@ -971,6 +923,48 @@ out: | |||
971 | return status; | 923 | return status; |
972 | } | 924 | } |
973 | 925 | ||
926 | /* Return true if layoutreturn is needed */ | ||
927 | static bool | ||
928 | pnfs_layout_need_return(struct pnfs_layout_hdr *lo) | ||
929 | { | ||
930 | struct pnfs_layout_segment *s; | ||
931 | |||
932 | if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) | ||
933 | return false; | ||
934 | |||
935 | /* Defer layoutreturn until all lsegs are done */ | ||
936 | list_for_each_entry(s, &lo->plh_segs, pls_list) { | ||
937 | if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags)) | ||
938 | return false; | ||
939 | } | ||
940 | |||
941 | return true; | ||
942 | } | ||
943 | |||
944 | static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo) | ||
945 | { | ||
946 | struct inode *inode= lo->plh_inode; | ||
947 | |||
948 | if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) | ||
949 | return; | ||
950 | spin_lock(&inode->i_lock); | ||
951 | if (pnfs_layout_need_return(lo)) { | ||
952 | nfs4_stateid stateid; | ||
953 | enum pnfs_iomode iomode; | ||
954 | bool send; | ||
955 | |||
956 | nfs4_stateid_copy(&stateid, &lo->plh_stateid); | ||
957 | iomode = lo->plh_return_iomode; | ||
958 | send = pnfs_prepare_layoutreturn(lo); | ||
959 | spin_unlock(&inode->i_lock); | ||
960 | if (send) { | ||
961 | /* Send an async layoutreturn so we dont deadlock */ | ||
962 | pnfs_send_layoutreturn(lo, &stateid, iomode, false); | ||
963 | } | ||
964 | } else | ||
965 | spin_unlock(&inode->i_lock); | ||
966 | } | ||
967 | |||
974 | /* | 968 | /* |
975 | * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr | 969 | * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr |
976 | * when the layout segment list is empty. | 970 | * when the layout segment list is empty. |
@@ -1091,7 +1085,7 @@ bool pnfs_roc(struct inode *ino) | |||
1091 | 1085 | ||
1092 | nfs4_stateid_copy(&stateid, &lo->plh_stateid); | 1086 | nfs4_stateid_copy(&stateid, &lo->plh_stateid); |
1093 | /* always send layoutreturn if being marked so */ | 1087 | /* always send layoutreturn if being marked so */ |
1094 | if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | 1088 | if (test_and_clear_bit(NFS_LAYOUT_RETURN_REQUESTED, |
1095 | &lo->plh_flags)) | 1089 | &lo->plh_flags)) |
1096 | layoutreturn = pnfs_prepare_layoutreturn(lo); | 1090 | layoutreturn = pnfs_prepare_layoutreturn(lo); |
1097 | 1091 | ||
@@ -1772,7 +1766,7 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, | |||
1772 | pnfs_set_plh_return_iomode(lo, return_range->iomode); | 1766 | pnfs_set_plh_return_iomode(lo, return_range->iomode); |
1773 | if (!mark_lseg_invalid(lseg, tmp_list)) | 1767 | if (!mark_lseg_invalid(lseg, tmp_list)) |
1774 | remaining++; | 1768 | remaining++; |
1775 | set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | 1769 | set_bit(NFS_LAYOUT_RETURN_REQUESTED, |
1776 | &lo->plh_flags); | 1770 | &lo->plh_flags); |
1777 | } | 1771 | } |
1778 | return remaining; | 1772 | return remaining; |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 9f4e2a47f4aa..1ac1db5f6dad 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -94,8 +94,8 @@ enum { | |||
94 | NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ | 94 | NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ |
95 | NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ | 95 | NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ |
96 | NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ | 96 | NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ |
97 | NFS_LAYOUT_RETURN, /* Return this layout ASAP */ | 97 | NFS_LAYOUT_RETURN, /* layoutreturn in progress */ |
98 | NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */ | 98 | NFS_LAYOUT_RETURN_REQUESTED, /* Return this layout ASAP */ |
99 | NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ | 99 | NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ |
100 | NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ | 100 | NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ |
101 | }; | 101 | }; |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 5d2a57e4c03a..d40010e4f1a9 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -870,7 +870,7 @@ __be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, | |||
870 | 870 | ||
871 | oldfs = get_fs(); | 871 | oldfs = get_fs(); |
872 | set_fs(KERNEL_DS); | 872 | set_fs(KERNEL_DS); |
873 | host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); | 873 | host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset, 0); |
874 | set_fs(oldfs); | 874 | set_fs(oldfs); |
875 | return nfsd_finish_read(file, count, host_err); | 875 | return nfsd_finish_read(file, count, host_err); |
876 | } | 876 | } |
@@ -957,7 +957,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, | |||
957 | 957 | ||
958 | /* Write the data. */ | 958 | /* Write the data. */ |
959 | oldfs = get_fs(); set_fs(KERNEL_DS); | 959 | oldfs = get_fs(); set_fs(KERNEL_DS); |
960 | host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos); | 960 | host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos, 0); |
961 | set_fs(oldfs); | 961 | set_fs(oldfs); |
962 | if (host_err < 0) | 962 | if (host_err < 0) |
963 | goto out_nfserr; | 963 | goto out_nfserr; |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index a3cc6d2fc896..a76b9ea7722e 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -1254,15 +1254,15 @@ static const struct file_operations o2hb_debug_fops = { | |||
1254 | 1254 | ||
1255 | void o2hb_exit(void) | 1255 | void o2hb_exit(void) |
1256 | { | 1256 | { |
1257 | kfree(o2hb_db_livenodes); | ||
1258 | kfree(o2hb_db_liveregions); | ||
1259 | kfree(o2hb_db_quorumregions); | ||
1260 | kfree(o2hb_db_failedregions); | ||
1261 | debugfs_remove(o2hb_debug_failedregions); | 1257 | debugfs_remove(o2hb_debug_failedregions); |
1262 | debugfs_remove(o2hb_debug_quorumregions); | 1258 | debugfs_remove(o2hb_debug_quorumregions); |
1263 | debugfs_remove(o2hb_debug_liveregions); | 1259 | debugfs_remove(o2hb_debug_liveregions); |
1264 | debugfs_remove(o2hb_debug_livenodes); | 1260 | debugfs_remove(o2hb_debug_livenodes); |
1265 | debugfs_remove(o2hb_debug_dir); | 1261 | debugfs_remove(o2hb_debug_dir); |
1262 | kfree(o2hb_db_livenodes); | ||
1263 | kfree(o2hb_db_liveregions); | ||
1264 | kfree(o2hb_db_quorumregions); | ||
1265 | kfree(o2hb_db_failedregions); | ||
1266 | } | 1266 | } |
1267 | 1267 | ||
1268 | static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir, | 1268 | static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir, |
@@ -1438,13 +1438,15 @@ static void o2hb_region_release(struct config_item *item) | |||
1438 | 1438 | ||
1439 | kfree(reg->hr_slots); | 1439 | kfree(reg->hr_slots); |
1440 | 1440 | ||
1441 | kfree(reg->hr_db_regnum); | ||
1442 | kfree(reg->hr_db_livenodes); | ||
1443 | debugfs_remove(reg->hr_debug_livenodes); | 1441 | debugfs_remove(reg->hr_debug_livenodes); |
1444 | debugfs_remove(reg->hr_debug_regnum); | 1442 | debugfs_remove(reg->hr_debug_regnum); |
1445 | debugfs_remove(reg->hr_debug_elapsed_time); | 1443 | debugfs_remove(reg->hr_debug_elapsed_time); |
1446 | debugfs_remove(reg->hr_debug_pinned); | 1444 | debugfs_remove(reg->hr_debug_pinned); |
1447 | debugfs_remove(reg->hr_debug_dir); | 1445 | debugfs_remove(reg->hr_debug_dir); |
1446 | kfree(reg->hr_db_livenodes); | ||
1447 | kfree(reg->hr_db_regnum); | ||
1448 | kfree(reg->hr_debug_elapsed_time); | ||
1449 | kfree(reg->hr_debug_pinned); | ||
1448 | 1450 | ||
1449 | spin_lock(&o2hb_live_lock); | 1451 | spin_lock(&o2hb_live_lock); |
1450 | list_del(®->hr_all_item); | 1452 | list_del(®->hr_all_item); |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index c5bdf02c213b..b94a425f0175 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -2367,6 +2367,8 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) | |||
2367 | break; | 2367 | break; |
2368 | } | 2368 | } |
2369 | } | 2369 | } |
2370 | dlm_lockres_clear_refmap_bit(dlm, res, | ||
2371 | dead_node); | ||
2370 | spin_unlock(&res->spinlock); | 2372 | spin_unlock(&res->spinlock); |
2371 | continue; | 2373 | continue; |
2372 | } | 2374 | } |
diff --git a/fs/pnode.c b/fs/pnode.c index 6367e1e435c6..c524fdddc7fb 100644 --- a/fs/pnode.c +++ b/fs/pnode.c | |||
@@ -202,6 +202,11 @@ static struct mount *last_dest, *last_source, *dest_master; | |||
202 | static struct mountpoint *mp; | 202 | static struct mountpoint *mp; |
203 | static struct hlist_head *list; | 203 | static struct hlist_head *list; |
204 | 204 | ||
205 | static inline bool peers(struct mount *m1, struct mount *m2) | ||
206 | { | ||
207 | return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id; | ||
208 | } | ||
209 | |||
205 | static int propagate_one(struct mount *m) | 210 | static int propagate_one(struct mount *m) |
206 | { | 211 | { |
207 | struct mount *child; | 212 | struct mount *child; |
@@ -212,7 +217,7 @@ static int propagate_one(struct mount *m) | |||
212 | /* skip if mountpoint isn't covered by it */ | 217 | /* skip if mountpoint isn't covered by it */ |
213 | if (!is_subdir(mp->m_dentry, m->mnt.mnt_root)) | 218 | if (!is_subdir(mp->m_dentry, m->mnt.mnt_root)) |
214 | return 0; | 219 | return 0; |
215 | if (m->mnt_group_id == last_dest->mnt_group_id) { | 220 | if (peers(m, last_dest)) { |
216 | type = CL_MAKE_SHARED; | 221 | type = CL_MAKE_SHARED; |
217 | } else { | 222 | } else { |
218 | struct mount *n, *p; | 223 | struct mount *n, *p; |
@@ -223,7 +228,7 @@ static int propagate_one(struct mount *m) | |||
223 | last_source = last_source->mnt_master; | 228 | last_source = last_source->mnt_master; |
224 | last_dest = last_source->mnt_parent; | 229 | last_dest = last_source->mnt_parent; |
225 | } | 230 | } |
226 | if (n->mnt_group_id != last_dest->mnt_group_id) { | 231 | if (!peers(n, last_dest)) { |
227 | last_source = last_source->mnt_master; | 232 | last_source = last_source->mnt_master; |
228 | last_dest = last_source->mnt_parent; | 233 | last_dest = last_source->mnt_parent; |
229 | } | 234 | } |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 85d16c67c33e..fa95ab2d3674 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -259,23 +259,29 @@ static int do_maps_open(struct inode *inode, struct file *file, | |||
259 | sizeof(struct proc_maps_private)); | 259 | sizeof(struct proc_maps_private)); |
260 | } | 260 | } |
261 | 261 | ||
262 | static pid_t pid_of_stack(struct proc_maps_private *priv, | 262 | /* |
263 | struct vm_area_struct *vma, bool is_pid) | 263 | * Indicate if the VMA is a stack for the given task; for |
264 | * /proc/PID/maps that is the stack of the main task. | ||
265 | */ | ||
266 | static int is_stack(struct proc_maps_private *priv, | ||
267 | struct vm_area_struct *vma, int is_pid) | ||
264 | { | 268 | { |
265 | struct inode *inode = priv->inode; | 269 | int stack = 0; |
266 | struct task_struct *task; | 270 | |
267 | pid_t ret = 0; | 271 | if (is_pid) { |
272 | stack = vma->vm_start <= vma->vm_mm->start_stack && | ||
273 | vma->vm_end >= vma->vm_mm->start_stack; | ||
274 | } else { | ||
275 | struct inode *inode = priv->inode; | ||
276 | struct task_struct *task; | ||
268 | 277 | ||
269 | rcu_read_lock(); | 278 | rcu_read_lock(); |
270 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | 279 | task = pid_task(proc_pid(inode), PIDTYPE_PID); |
271 | if (task) { | ||
272 | task = task_of_stack(task, vma, is_pid); | ||
273 | if (task) | 280 | if (task) |
274 | ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); | 281 | stack = vma_is_stack_for_task(vma, task); |
282 | rcu_read_unlock(); | ||
275 | } | 283 | } |
276 | rcu_read_unlock(); | 284 | return stack; |
277 | |||
278 | return ret; | ||
279 | } | 285 | } |
280 | 286 | ||
281 | static void | 287 | static void |
@@ -335,8 +341,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) | |||
335 | 341 | ||
336 | name = arch_vma_name(vma); | 342 | name = arch_vma_name(vma); |
337 | if (!name) { | 343 | if (!name) { |
338 | pid_t tid; | ||
339 | |||
340 | if (!mm) { | 344 | if (!mm) { |
341 | name = "[vdso]"; | 345 | name = "[vdso]"; |
342 | goto done; | 346 | goto done; |
@@ -348,21 +352,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) | |||
348 | goto done; | 352 | goto done; |
349 | } | 353 | } |
350 | 354 | ||
351 | tid = pid_of_stack(priv, vma, is_pid); | 355 | if (is_stack(priv, vma, is_pid)) |
352 | if (tid != 0) { | 356 | name = "[stack]"; |
353 | /* | ||
354 | * Thread stack in /proc/PID/task/TID/maps or | ||
355 | * the main process stack. | ||
356 | */ | ||
357 | if (!is_pid || (vma->vm_start <= mm->start_stack && | ||
358 | vma->vm_end >= mm->start_stack)) { | ||
359 | name = "[stack]"; | ||
360 | } else { | ||
361 | /* Thread stack in /proc/PID/maps */ | ||
362 | seq_pad(m, ' '); | ||
363 | seq_printf(m, "[stack:%d]", tid); | ||
364 | } | ||
365 | } | ||
366 | } | 357 | } |
367 | 358 | ||
368 | done: | 359 | done: |
@@ -1552,18 +1543,19 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | |||
1552 | static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, | 1543 | static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, |
1553 | unsigned long addr, unsigned long end, struct mm_walk *walk) | 1544 | unsigned long addr, unsigned long end, struct mm_walk *walk) |
1554 | { | 1545 | { |
1546 | pte_t huge_pte = huge_ptep_get(pte); | ||
1555 | struct numa_maps *md; | 1547 | struct numa_maps *md; |
1556 | struct page *page; | 1548 | struct page *page; |
1557 | 1549 | ||
1558 | if (!pte_present(*pte)) | 1550 | if (!pte_present(huge_pte)) |
1559 | return 0; | 1551 | return 0; |
1560 | 1552 | ||
1561 | page = pte_page(*pte); | 1553 | page = pte_page(huge_pte); |
1562 | if (!page) | 1554 | if (!page) |
1563 | return 0; | 1555 | return 0; |
1564 | 1556 | ||
1565 | md = walk->private; | 1557 | md = walk->private; |
1566 | gather_stats(page, md, pte_dirty(*pte), 1); | 1558 | gather_stats(page, md, pte_dirty(huge_pte), 1); |
1567 | return 0; | 1559 | return 0; |
1568 | } | 1560 | } |
1569 | 1561 | ||
@@ -1617,19 +1609,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
1617 | seq_file_path(m, file, "\n\t= "); | 1609 | seq_file_path(m, file, "\n\t= "); |
1618 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { | 1610 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { |
1619 | seq_puts(m, " heap"); | 1611 | seq_puts(m, " heap"); |
1620 | } else { | 1612 | } else if (is_stack(proc_priv, vma, is_pid)) { |
1621 | pid_t tid = pid_of_stack(proc_priv, vma, is_pid); | 1613 | seq_puts(m, " stack"); |
1622 | if (tid != 0) { | ||
1623 | /* | ||
1624 | * Thread stack in /proc/PID/task/TID/maps or | ||
1625 | * the main process stack. | ||
1626 | */ | ||
1627 | if (!is_pid || (vma->vm_start <= mm->start_stack && | ||
1628 | vma->vm_end >= mm->start_stack)) | ||
1629 | seq_puts(m, " stack"); | ||
1630 | else | ||
1631 | seq_printf(m, " stack:%d", tid); | ||
1632 | } | ||
1633 | } | 1614 | } |
1634 | 1615 | ||
1635 | if (is_vm_hugetlb_page(vma)) | 1616 | if (is_vm_hugetlb_page(vma)) |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index e0d64c92e4f6..faacb0c0d857 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
@@ -123,23 +123,26 @@ unsigned long task_statm(struct mm_struct *mm, | |||
123 | return size; | 123 | return size; |
124 | } | 124 | } |
125 | 125 | ||
126 | static pid_t pid_of_stack(struct proc_maps_private *priv, | 126 | static int is_stack(struct proc_maps_private *priv, |
127 | struct vm_area_struct *vma, bool is_pid) | 127 | struct vm_area_struct *vma, int is_pid) |
128 | { | 128 | { |
129 | struct inode *inode = priv->inode; | 129 | struct mm_struct *mm = vma->vm_mm; |
130 | struct task_struct *task; | 130 | int stack = 0; |
131 | pid_t ret = 0; | 131 | |
132 | 132 | if (is_pid) { | |
133 | rcu_read_lock(); | 133 | stack = vma->vm_start <= mm->start_stack && |
134 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | 134 | vma->vm_end >= mm->start_stack; |
135 | if (task) { | 135 | } else { |
136 | task = task_of_stack(task, vma, is_pid); | 136 | struct inode *inode = priv->inode; |
137 | struct task_struct *task; | ||
138 | |||
139 | rcu_read_lock(); | ||
140 | task = pid_task(proc_pid(inode), PIDTYPE_PID); | ||
137 | if (task) | 141 | if (task) |
138 | ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); | 142 | stack = vma_is_stack_for_task(vma, task); |
143 | rcu_read_unlock(); | ||
139 | } | 144 | } |
140 | rcu_read_unlock(); | 145 | return stack; |
141 | |||
142 | return ret; | ||
143 | } | 146 | } |
144 | 147 | ||
145 | /* | 148 | /* |
@@ -181,21 +184,9 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, | |||
181 | if (file) { | 184 | if (file) { |
182 | seq_pad(m, ' '); | 185 | seq_pad(m, ' '); |
183 | seq_file_path(m, file, ""); | 186 | seq_file_path(m, file, ""); |
184 | } else if (mm) { | 187 | } else if (mm && is_stack(priv, vma, is_pid)) { |
185 | pid_t tid = pid_of_stack(priv, vma, is_pid); | 188 | seq_pad(m, ' '); |
186 | 189 | seq_printf(m, "[stack]"); | |
187 | if (tid != 0) { | ||
188 | seq_pad(m, ' '); | ||
189 | /* | ||
190 | * Thread stack in /proc/PID/task/TID/maps or | ||
191 | * the main process stack. | ||
192 | */ | ||
193 | if (!is_pid || (vma->vm_start <= mm->start_stack && | ||
194 | vma->vm_end >= mm->start_stack)) | ||
195 | seq_printf(m, "[stack]"); | ||
196 | else | ||
197 | seq_printf(m, "[stack:%d]", tid); | ||
198 | } | ||
199 | } | 190 | } |
200 | 191 | ||
201 | seq_putc(m, '\n'); | 192 | seq_putc(m, '\n'); |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 3c3b81bb6dfe..04ca0cc6d065 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -2430,9 +2430,7 @@ int dquot_quota_on_mount(struct super_block *sb, char *qf_name, | |||
2430 | struct dentry *dentry; | 2430 | struct dentry *dentry; |
2431 | int error; | 2431 | int error; |
2432 | 2432 | ||
2433 | inode_lock(d_inode(sb->s_root)); | 2433 | dentry = lookup_one_len_unlocked(qf_name, sb->s_root, strlen(qf_name)); |
2434 | dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name)); | ||
2435 | inode_unlock(d_inode(sb->s_root)); | ||
2436 | if (IS_ERR(dentry)) | 2434 | if (IS_ERR(dentry)) |
2437 | return PTR_ERR(dentry); | 2435 | return PTR_ERR(dentry); |
2438 | 2436 | ||
diff --git a/fs/read_write.c b/fs/read_write.c index 324ec271cc4e..cf377cf9dfe3 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/splice.h> | 17 | #include <linux/splice.h> |
18 | #include <linux/compat.h> | 18 | #include <linux/compat.h> |
19 | #include <linux/mount.h> | 19 | #include <linux/mount.h> |
20 | #include <linux/fs.h> | ||
20 | #include "internal.h" | 21 | #include "internal.h" |
21 | 22 | ||
22 | #include <asm/uaccess.h> | 23 | #include <asm/uaccess.h> |
@@ -183,7 +184,7 @@ loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence) | |||
183 | switch (whence) { | 184 | switch (whence) { |
184 | case SEEK_SET: case SEEK_CUR: | 185 | case SEEK_SET: case SEEK_CUR: |
185 | return generic_file_llseek_size(file, offset, whence, | 186 | return generic_file_llseek_size(file, offset, whence, |
186 | ~0ULL, 0); | 187 | OFFSET_MAX, 0); |
187 | default: | 188 | default: |
188 | return -EINVAL; | 189 | return -EINVAL; |
189 | } | 190 | } |
@@ -692,12 +693,17 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) | |||
692 | EXPORT_SYMBOL(iov_shorten); | 693 | EXPORT_SYMBOL(iov_shorten); |
693 | 694 | ||
694 | static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, | 695 | static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, |
695 | loff_t *ppos, iter_fn_t fn) | 696 | loff_t *ppos, iter_fn_t fn, int flags) |
696 | { | 697 | { |
697 | struct kiocb kiocb; | 698 | struct kiocb kiocb; |
698 | ssize_t ret; | 699 | ssize_t ret; |
699 | 700 | ||
701 | if (flags & ~RWF_HIPRI) | ||
702 | return -EOPNOTSUPP; | ||
703 | |||
700 | init_sync_kiocb(&kiocb, filp); | 704 | init_sync_kiocb(&kiocb, filp); |
705 | if (flags & RWF_HIPRI) | ||
706 | kiocb.ki_flags |= IOCB_HIPRI; | ||
701 | kiocb.ki_pos = *ppos; | 707 | kiocb.ki_pos = *ppos; |
702 | 708 | ||
703 | ret = fn(&kiocb, iter); | 709 | ret = fn(&kiocb, iter); |
@@ -708,10 +714,13 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, | |||
708 | 714 | ||
709 | /* Do it by hand, with file-ops */ | 715 | /* Do it by hand, with file-ops */ |
710 | static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, | 716 | static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, |
711 | loff_t *ppos, io_fn_t fn) | 717 | loff_t *ppos, io_fn_t fn, int flags) |
712 | { | 718 | { |
713 | ssize_t ret = 0; | 719 | ssize_t ret = 0; |
714 | 720 | ||
721 | if (flags & ~RWF_HIPRI) | ||
722 | return -EOPNOTSUPP; | ||
723 | |||
715 | while (iov_iter_count(iter)) { | 724 | while (iov_iter_count(iter)) { |
716 | struct iovec iovec = iov_iter_iovec(iter); | 725 | struct iovec iovec = iov_iter_iovec(iter); |
717 | ssize_t nr; | 726 | ssize_t nr; |
@@ -812,7 +821,8 @@ out: | |||
812 | 821 | ||
813 | static ssize_t do_readv_writev(int type, struct file *file, | 822 | static ssize_t do_readv_writev(int type, struct file *file, |
814 | const struct iovec __user * uvector, | 823 | const struct iovec __user * uvector, |
815 | unsigned long nr_segs, loff_t *pos) | 824 | unsigned long nr_segs, loff_t *pos, |
825 | int flags) | ||
816 | { | 826 | { |
817 | size_t tot_len; | 827 | size_t tot_len; |
818 | struct iovec iovstack[UIO_FASTIOV]; | 828 | struct iovec iovstack[UIO_FASTIOV]; |
@@ -844,9 +854,9 @@ static ssize_t do_readv_writev(int type, struct file *file, | |||
844 | } | 854 | } |
845 | 855 | ||
846 | if (iter_fn) | 856 | if (iter_fn) |
847 | ret = do_iter_readv_writev(file, &iter, pos, iter_fn); | 857 | ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags); |
848 | else | 858 | else |
849 | ret = do_loop_readv_writev(file, &iter, pos, fn); | 859 | ret = do_loop_readv_writev(file, &iter, pos, fn, flags); |
850 | 860 | ||
851 | if (type != READ) | 861 | if (type != READ) |
852 | file_end_write(file); | 862 | file_end_write(file); |
@@ -863,40 +873,40 @@ out: | |||
863 | } | 873 | } |
864 | 874 | ||
865 | ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, | 875 | ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, |
866 | unsigned long vlen, loff_t *pos) | 876 | unsigned long vlen, loff_t *pos, int flags) |
867 | { | 877 | { |
868 | if (!(file->f_mode & FMODE_READ)) | 878 | if (!(file->f_mode & FMODE_READ)) |
869 | return -EBADF; | 879 | return -EBADF; |
870 | if (!(file->f_mode & FMODE_CAN_READ)) | 880 | if (!(file->f_mode & FMODE_CAN_READ)) |
871 | return -EINVAL; | 881 | return -EINVAL; |
872 | 882 | ||
873 | return do_readv_writev(READ, file, vec, vlen, pos); | 883 | return do_readv_writev(READ, file, vec, vlen, pos, flags); |
874 | } | 884 | } |
875 | 885 | ||
876 | EXPORT_SYMBOL(vfs_readv); | 886 | EXPORT_SYMBOL(vfs_readv); |
877 | 887 | ||
878 | ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, | 888 | ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, |
879 | unsigned long vlen, loff_t *pos) | 889 | unsigned long vlen, loff_t *pos, int flags) |
880 | { | 890 | { |
881 | if (!(file->f_mode & FMODE_WRITE)) | 891 | if (!(file->f_mode & FMODE_WRITE)) |
882 | return -EBADF; | 892 | return -EBADF; |
883 | if (!(file->f_mode & FMODE_CAN_WRITE)) | 893 | if (!(file->f_mode & FMODE_CAN_WRITE)) |
884 | return -EINVAL; | 894 | return -EINVAL; |
885 | 895 | ||
886 | return do_readv_writev(WRITE, file, vec, vlen, pos); | 896 | return do_readv_writev(WRITE, file, vec, vlen, pos, flags); |
887 | } | 897 | } |
888 | 898 | ||
889 | EXPORT_SYMBOL(vfs_writev); | 899 | EXPORT_SYMBOL(vfs_writev); |
890 | 900 | ||
891 | SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, | 901 | static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec, |
892 | unsigned long, vlen) | 902 | unsigned long vlen, int flags) |
893 | { | 903 | { |
894 | struct fd f = fdget_pos(fd); | 904 | struct fd f = fdget_pos(fd); |
895 | ssize_t ret = -EBADF; | 905 | ssize_t ret = -EBADF; |
896 | 906 | ||
897 | if (f.file) { | 907 | if (f.file) { |
898 | loff_t pos = file_pos_read(f.file); | 908 | loff_t pos = file_pos_read(f.file); |
899 | ret = vfs_readv(f.file, vec, vlen, &pos); | 909 | ret = vfs_readv(f.file, vec, vlen, &pos, flags); |
900 | if (ret >= 0) | 910 | if (ret >= 0) |
901 | file_pos_write(f.file, pos); | 911 | file_pos_write(f.file, pos); |
902 | fdput_pos(f); | 912 | fdput_pos(f); |
@@ -908,15 +918,15 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, | |||
908 | return ret; | 918 | return ret; |
909 | } | 919 | } |
910 | 920 | ||
911 | SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, | 921 | static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec, |
912 | unsigned long, vlen) | 922 | unsigned long vlen, int flags) |
913 | { | 923 | { |
914 | struct fd f = fdget_pos(fd); | 924 | struct fd f = fdget_pos(fd); |
915 | ssize_t ret = -EBADF; | 925 | ssize_t ret = -EBADF; |
916 | 926 | ||
917 | if (f.file) { | 927 | if (f.file) { |
918 | loff_t pos = file_pos_read(f.file); | 928 | loff_t pos = file_pos_read(f.file); |
919 | ret = vfs_writev(f.file, vec, vlen, &pos); | 929 | ret = vfs_writev(f.file, vec, vlen, &pos, flags); |
920 | if (ret >= 0) | 930 | if (ret >= 0) |
921 | file_pos_write(f.file, pos); | 931 | file_pos_write(f.file, pos); |
922 | fdput_pos(f); | 932 | fdput_pos(f); |
@@ -934,10 +944,9 @@ static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) | |||
934 | return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; | 944 | return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; |
935 | } | 945 | } |
936 | 946 | ||
937 | SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, | 947 | static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec, |
938 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) | 948 | unsigned long vlen, loff_t pos, int flags) |
939 | { | 949 | { |
940 | loff_t pos = pos_from_hilo(pos_h, pos_l); | ||
941 | struct fd f; | 950 | struct fd f; |
942 | ssize_t ret = -EBADF; | 951 | ssize_t ret = -EBADF; |
943 | 952 | ||
@@ -948,7 +957,7 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, | |||
948 | if (f.file) { | 957 | if (f.file) { |
949 | ret = -ESPIPE; | 958 | ret = -ESPIPE; |
950 | if (f.file->f_mode & FMODE_PREAD) | 959 | if (f.file->f_mode & FMODE_PREAD) |
951 | ret = vfs_readv(f.file, vec, vlen, &pos); | 960 | ret = vfs_readv(f.file, vec, vlen, &pos, flags); |
952 | fdput(f); | 961 | fdput(f); |
953 | } | 962 | } |
954 | 963 | ||
@@ -958,10 +967,9 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, | |||
958 | return ret; | 967 | return ret; |
959 | } | 968 | } |
960 | 969 | ||
961 | SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, | 970 | static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec, |
962 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) | 971 | unsigned long vlen, loff_t pos, int flags) |
963 | { | 972 | { |
964 | loff_t pos = pos_from_hilo(pos_h, pos_l); | ||
965 | struct fd f; | 973 | struct fd f; |
966 | ssize_t ret = -EBADF; | 974 | ssize_t ret = -EBADF; |
967 | 975 | ||
@@ -972,7 +980,7 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, | |||
972 | if (f.file) { | 980 | if (f.file) { |
973 | ret = -ESPIPE; | 981 | ret = -ESPIPE; |
974 | if (f.file->f_mode & FMODE_PWRITE) | 982 | if (f.file->f_mode & FMODE_PWRITE) |
975 | ret = vfs_writev(f.file, vec, vlen, &pos); | 983 | ret = vfs_writev(f.file, vec, vlen, &pos, flags); |
976 | fdput(f); | 984 | fdput(f); |
977 | } | 985 | } |
978 | 986 | ||
@@ -982,11 +990,64 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, | |||
982 | return ret; | 990 | return ret; |
983 | } | 991 | } |
984 | 992 | ||
993 | SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, | ||
994 | unsigned long, vlen) | ||
995 | { | ||
996 | return do_readv(fd, vec, vlen, 0); | ||
997 | } | ||
998 | |||
999 | SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, | ||
1000 | unsigned long, vlen) | ||
1001 | { | ||
1002 | return do_writev(fd, vec, vlen, 0); | ||
1003 | } | ||
1004 | |||
1005 | SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, | ||
1006 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) | ||
1007 | { | ||
1008 | loff_t pos = pos_from_hilo(pos_h, pos_l); | ||
1009 | |||
1010 | return do_preadv(fd, vec, vlen, pos, 0); | ||
1011 | } | ||
1012 | |||
1013 | SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec, | ||
1014 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, | ||
1015 | int, flags) | ||
1016 | { | ||
1017 | loff_t pos = pos_from_hilo(pos_h, pos_l); | ||
1018 | |||
1019 | if (pos == -1) | ||
1020 | return do_readv(fd, vec, vlen, flags); | ||
1021 | |||
1022 | return do_preadv(fd, vec, vlen, pos, flags); | ||
1023 | } | ||
1024 | |||
1025 | SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, | ||
1026 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) | ||
1027 | { | ||
1028 | loff_t pos = pos_from_hilo(pos_h, pos_l); | ||
1029 | |||
1030 | return do_pwritev(fd, vec, vlen, pos, 0); | ||
1031 | } | ||
1032 | |||
1033 | SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec, | ||
1034 | unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, | ||
1035 | int, flags) | ||
1036 | { | ||
1037 | loff_t pos = pos_from_hilo(pos_h, pos_l); | ||
1038 | |||
1039 | if (pos == -1) | ||
1040 | return do_writev(fd, vec, vlen, flags); | ||
1041 | |||
1042 | return do_pwritev(fd, vec, vlen, pos, flags); | ||
1043 | } | ||
1044 | |||
985 | #ifdef CONFIG_COMPAT | 1045 | #ifdef CONFIG_COMPAT |
986 | 1046 | ||
987 | static ssize_t compat_do_readv_writev(int type, struct file *file, | 1047 | static ssize_t compat_do_readv_writev(int type, struct file *file, |
988 | const struct compat_iovec __user *uvector, | 1048 | const struct compat_iovec __user *uvector, |
989 | unsigned long nr_segs, loff_t *pos) | 1049 | unsigned long nr_segs, loff_t *pos, |
1050 | int flags) | ||
990 | { | 1051 | { |
991 | compat_ssize_t tot_len; | 1052 | compat_ssize_t tot_len; |
992 | struct iovec iovstack[UIO_FASTIOV]; | 1053 | struct iovec iovstack[UIO_FASTIOV]; |
@@ -1018,9 +1079,9 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, | |||
1018 | } | 1079 | } |
1019 | 1080 | ||
1020 | if (iter_fn) | 1081 | if (iter_fn) |
1021 | ret = do_iter_readv_writev(file, &iter, pos, iter_fn); | 1082 | ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags); |
1022 | else | 1083 | else |
1023 | ret = do_loop_readv_writev(file, &iter, pos, fn); | 1084 | ret = do_loop_readv_writev(file, &iter, pos, fn, flags); |
1024 | 1085 | ||
1025 | if (type != READ) | 1086 | if (type != READ) |
1026 | file_end_write(file); | 1087 | file_end_write(file); |
@@ -1038,7 +1099,7 @@ out: | |||
1038 | 1099 | ||
1039 | static size_t compat_readv(struct file *file, | 1100 | static size_t compat_readv(struct file *file, |
1040 | const struct compat_iovec __user *vec, | 1101 | const struct compat_iovec __user *vec, |
1041 | unsigned long vlen, loff_t *pos) | 1102 | unsigned long vlen, loff_t *pos, int flags) |
1042 | { | 1103 | { |
1043 | ssize_t ret = -EBADF; | 1104 | ssize_t ret = -EBADF; |
1044 | 1105 | ||
@@ -1049,7 +1110,7 @@ static size_t compat_readv(struct file *file, | |||
1049 | if (!(file->f_mode & FMODE_CAN_READ)) | 1110 | if (!(file->f_mode & FMODE_CAN_READ)) |
1050 | goto out; | 1111 | goto out; |
1051 | 1112 | ||
1052 | ret = compat_do_readv_writev(READ, file, vec, vlen, pos); | 1113 | ret = compat_do_readv_writev(READ, file, vec, vlen, pos, flags); |
1053 | 1114 | ||
1054 | out: | 1115 | out: |
1055 | if (ret > 0) | 1116 | if (ret > 0) |
@@ -1058,9 +1119,9 @@ out: | |||
1058 | return ret; | 1119 | return ret; |
1059 | } | 1120 | } |
1060 | 1121 | ||
1061 | COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, | 1122 | static size_t do_compat_readv(compat_ulong_t fd, |
1062 | const struct compat_iovec __user *,vec, | 1123 | const struct compat_iovec __user *vec, |
1063 | compat_ulong_t, vlen) | 1124 | compat_ulong_t vlen, int flags) |
1064 | { | 1125 | { |
1065 | struct fd f = fdget_pos(fd); | 1126 | struct fd f = fdget_pos(fd); |
1066 | ssize_t ret; | 1127 | ssize_t ret; |
@@ -1069,16 +1130,24 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, | |||
1069 | if (!f.file) | 1130 | if (!f.file) |
1070 | return -EBADF; | 1131 | return -EBADF; |
1071 | pos = f.file->f_pos; | 1132 | pos = f.file->f_pos; |
1072 | ret = compat_readv(f.file, vec, vlen, &pos); | 1133 | ret = compat_readv(f.file, vec, vlen, &pos, flags); |
1073 | if (ret >= 0) | 1134 | if (ret >= 0) |
1074 | f.file->f_pos = pos; | 1135 | f.file->f_pos = pos; |
1075 | fdput_pos(f); | 1136 | fdput_pos(f); |
1076 | return ret; | 1137 | return ret; |
1138 | |||
1077 | } | 1139 | } |
1078 | 1140 | ||
1079 | static long __compat_sys_preadv64(unsigned long fd, | 1141 | COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, |
1142 | const struct compat_iovec __user *,vec, | ||
1143 | compat_ulong_t, vlen) | ||
1144 | { | ||
1145 | return do_compat_readv(fd, vec, vlen, 0); | ||
1146 | } | ||
1147 | |||
1148 | static long do_compat_preadv64(unsigned long fd, | ||
1080 | const struct compat_iovec __user *vec, | 1149 | const struct compat_iovec __user *vec, |
1081 | unsigned long vlen, loff_t pos) | 1150 | unsigned long vlen, loff_t pos, int flags) |
1082 | { | 1151 | { |
1083 | struct fd f; | 1152 | struct fd f; |
1084 | ssize_t ret; | 1153 | ssize_t ret; |
@@ -1090,7 +1159,7 @@ static long __compat_sys_preadv64(unsigned long fd, | |||
1090 | return -EBADF; | 1159 | return -EBADF; |
1091 | ret = -ESPIPE; | 1160 | ret = -ESPIPE; |
1092 | if (f.file->f_mode & FMODE_PREAD) | 1161 | if (f.file->f_mode & FMODE_PREAD) |
1093 | ret = compat_readv(f.file, vec, vlen, &pos); | 1162 | ret = compat_readv(f.file, vec, vlen, &pos, flags); |
1094 | fdput(f); | 1163 | fdput(f); |
1095 | return ret; | 1164 | return ret; |
1096 | } | 1165 | } |
@@ -1100,7 +1169,7 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, | |||
1100 | const struct compat_iovec __user *,vec, | 1169 | const struct compat_iovec __user *,vec, |
1101 | unsigned long, vlen, loff_t, pos) | 1170 | unsigned long, vlen, loff_t, pos) |
1102 | { | 1171 | { |
1103 | return __compat_sys_preadv64(fd, vec, vlen, pos); | 1172 | return do_compat_preadv64(fd, vec, vlen, pos, 0); |
1104 | } | 1173 | } |
1105 | #endif | 1174 | #endif |
1106 | 1175 | ||
@@ -1110,12 +1179,25 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, | |||
1110 | { | 1179 | { |
1111 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; | 1180 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; |
1112 | 1181 | ||
1113 | return __compat_sys_preadv64(fd, vec, vlen, pos); | 1182 | return do_compat_preadv64(fd, vec, vlen, pos, 0); |
1183 | } | ||
1184 | |||
1185 | COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, | ||
1186 | const struct compat_iovec __user *,vec, | ||
1187 | compat_ulong_t, vlen, u32, pos_low, u32, pos_high, | ||
1188 | int, flags) | ||
1189 | { | ||
1190 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; | ||
1191 | |||
1192 | if (pos == -1) | ||
1193 | return do_compat_readv(fd, vec, vlen, flags); | ||
1194 | |||
1195 | return do_compat_preadv64(fd, vec, vlen, pos, flags); | ||
1114 | } | 1196 | } |
1115 | 1197 | ||
1116 | static size_t compat_writev(struct file *file, | 1198 | static size_t compat_writev(struct file *file, |
1117 | const struct compat_iovec __user *vec, | 1199 | const struct compat_iovec __user *vec, |
1118 | unsigned long vlen, loff_t *pos) | 1200 | unsigned long vlen, loff_t *pos, int flags) |
1119 | { | 1201 | { |
1120 | ssize_t ret = -EBADF; | 1202 | ssize_t ret = -EBADF; |
1121 | 1203 | ||
@@ -1126,7 +1208,7 @@ static size_t compat_writev(struct file *file, | |||
1126 | if (!(file->f_mode & FMODE_CAN_WRITE)) | 1208 | if (!(file->f_mode & FMODE_CAN_WRITE)) |
1127 | goto out; | 1209 | goto out; |
1128 | 1210 | ||
1129 | ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); | 1211 | ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, 0); |
1130 | 1212 | ||
1131 | out: | 1213 | out: |
1132 | if (ret > 0) | 1214 | if (ret > 0) |
@@ -1135,9 +1217,9 @@ out: | |||
1135 | return ret; | 1217 | return ret; |
1136 | } | 1218 | } |
1137 | 1219 | ||
1138 | COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, | 1220 | static size_t do_compat_writev(compat_ulong_t fd, |
1139 | const struct compat_iovec __user *, vec, | 1221 | const struct compat_iovec __user* vec, |
1140 | compat_ulong_t, vlen) | 1222 | compat_ulong_t vlen, int flags) |
1141 | { | 1223 | { |
1142 | struct fd f = fdget_pos(fd); | 1224 | struct fd f = fdget_pos(fd); |
1143 | ssize_t ret; | 1225 | ssize_t ret; |
@@ -1146,16 +1228,23 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, | |||
1146 | if (!f.file) | 1228 | if (!f.file) |
1147 | return -EBADF; | 1229 | return -EBADF; |
1148 | pos = f.file->f_pos; | 1230 | pos = f.file->f_pos; |
1149 | ret = compat_writev(f.file, vec, vlen, &pos); | 1231 | ret = compat_writev(f.file, vec, vlen, &pos, flags); |
1150 | if (ret >= 0) | 1232 | if (ret >= 0) |
1151 | f.file->f_pos = pos; | 1233 | f.file->f_pos = pos; |
1152 | fdput_pos(f); | 1234 | fdput_pos(f); |
1153 | return ret; | 1235 | return ret; |
1154 | } | 1236 | } |
1155 | 1237 | ||
1156 | static long __compat_sys_pwritev64(unsigned long fd, | 1238 | COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, |
1239 | const struct compat_iovec __user *, vec, | ||
1240 | compat_ulong_t, vlen) | ||
1241 | { | ||
1242 | return do_compat_writev(fd, vec, vlen, 0); | ||
1243 | } | ||
1244 | |||
1245 | static long do_compat_pwritev64(unsigned long fd, | ||
1157 | const struct compat_iovec __user *vec, | 1246 | const struct compat_iovec __user *vec, |
1158 | unsigned long vlen, loff_t pos) | 1247 | unsigned long vlen, loff_t pos, int flags) |
1159 | { | 1248 | { |
1160 | struct fd f; | 1249 | struct fd f; |
1161 | ssize_t ret; | 1250 | ssize_t ret; |
@@ -1167,7 +1256,7 @@ static long __compat_sys_pwritev64(unsigned long fd, | |||
1167 | return -EBADF; | 1256 | return -EBADF; |
1168 | ret = -ESPIPE; | 1257 | ret = -ESPIPE; |
1169 | if (f.file->f_mode & FMODE_PWRITE) | 1258 | if (f.file->f_mode & FMODE_PWRITE) |
1170 | ret = compat_writev(f.file, vec, vlen, &pos); | 1259 | ret = compat_writev(f.file, vec, vlen, &pos, flags); |
1171 | fdput(f); | 1260 | fdput(f); |
1172 | return ret; | 1261 | return ret; |
1173 | } | 1262 | } |
@@ -1177,7 +1266,7 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, | |||
1177 | const struct compat_iovec __user *,vec, | 1266 | const struct compat_iovec __user *,vec, |
1178 | unsigned long, vlen, loff_t, pos) | 1267 | unsigned long, vlen, loff_t, pos) |
1179 | { | 1268 | { |
1180 | return __compat_sys_pwritev64(fd, vec, vlen, pos); | 1269 | return do_compat_pwritev64(fd, vec, vlen, pos, 0); |
1181 | } | 1270 | } |
1182 | #endif | 1271 | #endif |
1183 | 1272 | ||
@@ -1187,8 +1276,21 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, | |||
1187 | { | 1276 | { |
1188 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; | 1277 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; |
1189 | 1278 | ||
1190 | return __compat_sys_pwritev64(fd, vec, vlen, pos); | 1279 | return do_compat_pwritev64(fd, vec, vlen, pos, 0); |
1191 | } | 1280 | } |
1281 | |||
1282 | COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd, | ||
1283 | const struct compat_iovec __user *,vec, | ||
1284 | compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags) | ||
1285 | { | ||
1286 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; | ||
1287 | |||
1288 | if (pos == -1) | ||
1289 | return do_compat_writev(fd, vec, vlen, flags); | ||
1290 | |||
1291 | return do_compat_pwritev64(fd, vec, vlen, pos, flags); | ||
1292 | } | ||
1293 | |||
1192 | #endif | 1294 | #endif |
1193 | 1295 | ||
1194 | static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | 1296 | static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, |
@@ -1532,10 +1634,12 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in, | |||
1532 | 1634 | ||
1533 | if (!(file_in->f_mode & FMODE_READ) || | 1635 | if (!(file_in->f_mode & FMODE_READ) || |
1534 | !(file_out->f_mode & FMODE_WRITE) || | 1636 | !(file_out->f_mode & FMODE_WRITE) || |
1535 | (file_out->f_flags & O_APPEND) || | 1637 | (file_out->f_flags & O_APPEND)) |
1536 | !file_in->f_op->clone_file_range) | ||
1537 | return -EBADF; | 1638 | return -EBADF; |
1538 | 1639 | ||
1640 | if (!file_in->f_op->clone_file_range) | ||
1641 | return -EOPNOTSUPP; | ||
1642 | |||
1539 | ret = clone_verify_area(file_in, pos_in, len, false); | 1643 | ret = clone_verify_area(file_in, pos_in, len, false); |
1540 | if (ret) | 1644 | if (ret) |
1541 | return ret; | 1645 | return ret; |
diff --git a/fs/splice.c b/fs/splice.c index 19e0b103d253..9947b5c69664 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
@@ -580,7 +580,7 @@ static ssize_t kernel_readv(struct file *file, const struct iovec *vec, | |||
580 | old_fs = get_fs(); | 580 | old_fs = get_fs(); |
581 | set_fs(get_ds()); | 581 | set_fs(get_ds()); |
582 | /* The cast to a user pointer is valid due to the set_fs() */ | 582 | /* The cast to a user pointer is valid due to the set_fs() */ |
583 | res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos); | 583 | res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos, 0); |
584 | set_fs(old_fs); | 584 | set_fs(old_fs); |
585 | 585 | ||
586 | return res; | 586 | return res; |
diff --git a/fs/timerfd.c b/fs/timerfd.c index b94fa6c3c6eb..053818dd6c18 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c | |||
@@ -153,7 +153,7 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) | |||
153 | if (isalarm(ctx)) | 153 | if (isalarm(ctx)) |
154 | remaining = alarm_expires_remaining(&ctx->t.alarm); | 154 | remaining = alarm_expires_remaining(&ctx->t.alarm); |
155 | else | 155 | else |
156 | remaining = hrtimer_expires_remaining(&ctx->t.tmr); | 156 | remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr); |
157 | 157 | ||
158 | return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; | 158 | return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; |
159 | } | 159 | } |
diff --git a/fs/xattr.c b/fs/xattr.c index 07d0e47f6a7f..4861322e28e8 100644 --- a/fs/xattr.c +++ b/fs/xattr.c | |||
@@ -940,7 +940,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, | |||
940 | bool trusted = capable(CAP_SYS_ADMIN); | 940 | bool trusted = capable(CAP_SYS_ADMIN); |
941 | struct simple_xattr *xattr; | 941 | struct simple_xattr *xattr; |
942 | ssize_t remaining_size = size; | 942 | ssize_t remaining_size = size; |
943 | int err; | 943 | int err = 0; |
944 | 944 | ||
945 | #ifdef CONFIG_FS_POSIX_ACL | 945 | #ifdef CONFIG_FS_POSIX_ACL |
946 | if (inode->i_acl) { | 946 | if (inode->i_acl) { |
@@ -965,11 +965,11 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, | |||
965 | 965 | ||
966 | err = xattr_list_one(&buffer, &remaining_size, xattr->name); | 966 | err = xattr_list_one(&buffer, &remaining_size, xattr->name); |
967 | if (err) | 967 | if (err) |
968 | return err; | 968 | break; |
969 | } | 969 | } |
970 | spin_unlock(&xattrs->lock); | 970 | spin_unlock(&xattrs->lock); |
971 | 971 | ||
972 | return size - remaining_size; | 972 | return err ? err : size - remaining_size; |
973 | } | 973 | } |
974 | 974 | ||
975 | /* | 975 | /* |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index da37beb76f6e..594f7e63b432 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -4491,7 +4491,7 @@ xlog_recover_process( | |||
4491 | * know precisely what failed. | 4491 | * know precisely what failed. |
4492 | */ | 4492 | */ |
4493 | if (pass == XLOG_RECOVER_CRCPASS) { | 4493 | if (pass == XLOG_RECOVER_CRCPASS) { |
4494 | if (rhead->h_crc && crc != le32_to_cpu(rhead->h_crc)) | 4494 | if (rhead->h_crc && crc != rhead->h_crc) |
4495 | return -EFSBADCRC; | 4495 | return -EFSBADCRC; |
4496 | return 0; | 4496 | return 0; |
4497 | } | 4497 | } |
@@ -4502,7 +4502,7 @@ xlog_recover_process( | |||
4502 | * zero CRC check prevents warnings from being emitted when upgrading | 4502 | * zero CRC check prevents warnings from being emitted when upgrading |
4503 | * the kernel from one that does not add CRCs by default. | 4503 | * the kernel from one that does not add CRCs by default. |
4504 | */ | 4504 | */ |
4505 | if (crc != le32_to_cpu(rhead->h_crc)) { | 4505 | if (crc != rhead->h_crc) { |
4506 | if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) { | 4506 | if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) { |
4507 | xfs_alert(log->l_mp, | 4507 | xfs_alert(log->l_mp, |
4508 | "log record CRC mismatch: found 0x%x, expected 0x%x.", | 4508 | "log record CRC mismatch: found 0x%x, expected 0x%x.", |