summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/affs/file.c5
-rw-r--r--fs/autofs4/root.c2
-rw-r--r--fs/block_dev.c36
-rw-r--r--fs/btrfs/async-thread.c2
-rw-r--r--fs/btrfs/backref.c10
-rw-r--r--fs/btrfs/compression.c6
-rw-r--r--fs/btrfs/delayed-inode.c3
-rw-r--r--fs/btrfs/delayed-inode.h2
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent_io.c45
-rw-r--r--fs/btrfs/extent_io.h3
-rw-r--r--fs/btrfs/free-space-tree.c18
-rw-r--r--fs/btrfs/inode.c50
-rw-r--r--fs/btrfs/ioctl.c119
-rw-r--r--fs/btrfs/relocation.c3
-rw-r--r--fs/btrfs/sysfs.c35
-rw-r--r--fs/btrfs/sysfs.h5
-rw-r--r--fs/btrfs/tests/btrfs-tests.c10
-rw-r--r--fs/btrfs/tests/extent-io-tests.c12
-rw-r--r--fs/btrfs/tests/inode-tests.c8
-rw-r--r--fs/btrfs/tree-log.c14
-rw-r--r--fs/ceph/file.c6
-rw-r--r--fs/ceph/inode.c21
-rw-r--r--fs/cifs/cifsfs.c4
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/configfs/dir.c9
-rw-r--r--fs/configfs/inode.c12
-rw-r--r--fs/dax.c23
-rw-r--r--fs/dcache.c197
-rw-r--r--fs/devpts/inode.c20
-rw-r--r--fs/direct-io.c3
-rw-r--r--fs/ecryptfs/crypto.c27
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h1
-rw-r--r--fs/ecryptfs/inode.c11
-rw-r--r--fs/eventpoll.c38
-rw-r--r--fs/hpfs/namei.c31
-rw-r--r--fs/jffs2/dir.c11
-rw-r--r--fs/namei.c331
-rw-r--r--fs/ncpfs/dir.c2
-rw-r--r--fs/nfs/dir.c12
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c2
-rw-r--r--fs/nfs/nfs4proc.c13
-rw-r--r--fs/nfs/pnfs.c122
-rw-r--r--fs/nfs/pnfs.h4
-rw-r--r--fs/nfsd/vfs.c4
-rw-r--r--fs/ocfs2/cluster/heartbeat.c14
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/pnode.c9
-rw-r--r--fs/proc/task_mmu.c73
-rw-r--r--fs/proc/task_nommu.c49
-rw-r--r--fs/quota/dquot.c4
-rw-r--r--fs/read_write.c206
-rw-r--r--fs/splice.c2
-rw-r--r--fs/timerfd.c2
-rw-r--r--fs/xattr.c6
-rw-r--r--fs/xfs/xfs_log_recover.c4
57 files changed, 918 insertions, 752 deletions
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 0548c53f41d5..22fc7c802d69 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -511,8 +511,6 @@ affs_do_readpage_ofs(struct page *page, unsigned to)
511 pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino, 511 pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino,
512 page->index, to); 512 page->index, to);
513 BUG_ON(to > PAGE_CACHE_SIZE); 513 BUG_ON(to > PAGE_CACHE_SIZE);
514 kmap(page);
515 data = page_address(page);
516 bsize = AFFS_SB(sb)->s_data_blksize; 514 bsize = AFFS_SB(sb)->s_data_blksize;
517 tmp = page->index << PAGE_CACHE_SHIFT; 515 tmp = page->index << PAGE_CACHE_SHIFT;
518 bidx = tmp / bsize; 516 bidx = tmp / bsize;
@@ -524,14 +522,15 @@ affs_do_readpage_ofs(struct page *page, unsigned to)
524 return PTR_ERR(bh); 522 return PTR_ERR(bh);
525 tmp = min(bsize - boff, to - pos); 523 tmp = min(bsize - boff, to - pos);
526 BUG_ON(pos + tmp > to || tmp > bsize); 524 BUG_ON(pos + tmp > to || tmp > bsize);
525 data = kmap_atomic(page);
527 memcpy(data + pos, AFFS_DATA(bh) + boff, tmp); 526 memcpy(data + pos, AFFS_DATA(bh) + boff, tmp);
527 kunmap_atomic(data);
528 affs_brelse(bh); 528 affs_brelse(bh);
529 bidx++; 529 bidx++;
530 pos += tmp; 530 pos += tmp;
531 boff = 0; 531 boff = 0;
532 } 532 }
533 flush_dcache_page(page); 533 flush_dcache_page(page);
534 kunmap(page);
535 return 0; 534 return 0;
536} 535}
537 536
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index c6d7d3dbd52a..75dd739ac3e6 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -537,8 +537,6 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, u
537 ino->dentry = dentry; 537 ino->dentry = dentry;
538 538
539 autofs4_add_active(dentry); 539 autofs4_add_active(dentry);
540
541 d_instantiate(dentry, NULL);
542 } 540 }
543 return NULL; 541 return NULL;
544} 542}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7b9cd49622b1..39b3a174a425 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1730,43 +1730,25 @@ static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1730 return __dax_fault(vma, vmf, blkdev_get_block, NULL); 1730 return __dax_fault(vma, vmf, blkdev_get_block, NULL);
1731} 1731}
1732 1732
1733static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, 1733static int blkdev_dax_pfn_mkwrite(struct vm_area_struct *vma,
1734 pmd_t *pmd, unsigned int flags) 1734 struct vm_fault *vmf)
1735{
1736 return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
1737}
1738
1739static void blkdev_vm_open(struct vm_area_struct *vma)
1740{ 1735{
1741 struct inode *bd_inode = bdev_file_inode(vma->vm_file); 1736 return dax_pfn_mkwrite(vma, vmf);
1742 struct block_device *bdev = I_BDEV(bd_inode);
1743
1744 inode_lock(bd_inode);
1745 bdev->bd_map_count++;
1746 inode_unlock(bd_inode);
1747} 1737}
1748 1738
1749static void blkdev_vm_close(struct vm_area_struct *vma) 1739static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
1740 pmd_t *pmd, unsigned int flags)
1750{ 1741{
1751 struct inode *bd_inode = bdev_file_inode(vma->vm_file); 1742 return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
1752 struct block_device *bdev = I_BDEV(bd_inode);
1753
1754 inode_lock(bd_inode);
1755 bdev->bd_map_count--;
1756 inode_unlock(bd_inode);
1757} 1743}
1758 1744
1759static const struct vm_operations_struct blkdev_dax_vm_ops = { 1745static const struct vm_operations_struct blkdev_dax_vm_ops = {
1760 .open = blkdev_vm_open,
1761 .close = blkdev_vm_close,
1762 .fault = blkdev_dax_fault, 1746 .fault = blkdev_dax_fault,
1763 .pmd_fault = blkdev_dax_pmd_fault, 1747 .pmd_fault = blkdev_dax_pmd_fault,
1764 .pfn_mkwrite = blkdev_dax_fault, 1748 .pfn_mkwrite = blkdev_dax_pfn_mkwrite,
1765}; 1749};
1766 1750
1767static const struct vm_operations_struct blkdev_default_vm_ops = { 1751static const struct vm_operations_struct blkdev_default_vm_ops = {
1768 .open = blkdev_vm_open,
1769 .close = blkdev_vm_close,
1770 .fault = filemap_fault, 1752 .fault = filemap_fault,
1771 .map_pages = filemap_map_pages, 1753 .map_pages = filemap_map_pages,
1772}; 1754};
@@ -1774,18 +1756,14 @@ static const struct vm_operations_struct blkdev_default_vm_ops = {
1774static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) 1756static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
1775{ 1757{
1776 struct inode *bd_inode = bdev_file_inode(file); 1758 struct inode *bd_inode = bdev_file_inode(file);
1777 struct block_device *bdev = I_BDEV(bd_inode);
1778 1759
1779 file_accessed(file); 1760 file_accessed(file);
1780 inode_lock(bd_inode);
1781 bdev->bd_map_count++;
1782 if (IS_DAX(bd_inode)) { 1761 if (IS_DAX(bd_inode)) {
1783 vma->vm_ops = &blkdev_dax_vm_ops; 1762 vma->vm_ops = &blkdev_dax_vm_ops;
1784 vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; 1763 vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
1785 } else { 1764 } else {
1786 vma->vm_ops = &blkdev_default_vm_ops; 1765 vma->vm_ops = &blkdev_default_vm_ops;
1787 } 1766 }
1788 inode_unlock(bd_inode);
1789 1767
1790 return 0; 1768 return 0;
1791} 1769}
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 88d9af3d4581..5fb60ea7eee2 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -328,8 +328,8 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq,
328 list_add_tail(&work->ordered_list, &wq->ordered_list); 328 list_add_tail(&work->ordered_list, &wq->ordered_list);
329 spin_unlock_irqrestore(&wq->list_lock, flags); 329 spin_unlock_irqrestore(&wq->list_lock, flags);
330 } 330 }
331 queue_work(wq->normal_wq, &work->normal_work);
332 trace_btrfs_work_queued(work); 331 trace_btrfs_work_queued(work);
332 queue_work(wq->normal_wq, &work->normal_work);
333} 333}
334 334
335void btrfs_queue_work(struct btrfs_workqueue *wq, 335void btrfs_queue_work(struct btrfs_workqueue *wq,
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index b90cd3776f8e..f6dac40f87ff 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1406,7 +1406,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
1406 read_extent_buffer(eb, dest + bytes_left, 1406 read_extent_buffer(eb, dest + bytes_left,
1407 name_off, name_len); 1407 name_off, name_len);
1408 if (eb != eb_in) { 1408 if (eb != eb_in) {
1409 btrfs_tree_read_unlock_blocking(eb); 1409 if (!path->skip_locking)
1410 btrfs_tree_read_unlock_blocking(eb);
1410 free_extent_buffer(eb); 1411 free_extent_buffer(eb);
1411 } 1412 }
1412 ret = btrfs_find_item(fs_root, path, parent, 0, 1413 ret = btrfs_find_item(fs_root, path, parent, 0,
@@ -1426,9 +1427,10 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
1426 eb = path->nodes[0]; 1427 eb = path->nodes[0];
1427 /* make sure we can use eb after releasing the path */ 1428 /* make sure we can use eb after releasing the path */
1428 if (eb != eb_in) { 1429 if (eb != eb_in) {
1429 atomic_inc(&eb->refs); 1430 if (!path->skip_locking)
1430 btrfs_tree_read_lock(eb); 1431 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1431 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1432 path->nodes[0] = NULL;
1433 path->locks[0] = 0;
1432 } 1434 }
1433 btrfs_release_path(path); 1435 btrfs_release_path(path);
1434 iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); 1436 iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index c473c42d7d6c..3346cd8f9910 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -637,11 +637,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
637 faili = nr_pages - 1; 637 faili = nr_pages - 1;
638 cb->nr_pages = nr_pages; 638 cb->nr_pages = nr_pages;
639 639
640 /* In the parent-locked case, we only locked the range we are 640 add_ra_bio_pages(inode, em_start + em_len, cb);
641 * interested in. In all other cases, we can opportunistically
642 * cache decompressed data that goes beyond the requested range. */
643 if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED))
644 add_ra_bio_pages(inode, em_start + em_len, cb);
645 641
646 /* include any pages we added in add_ra-bio_pages */ 642 /* include any pages we added in add_ra-bio_pages */
647 uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; 643 uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 0be47e4b8136..b57daa895cea 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1689,7 +1689,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
1689 * 1689 *
1690 */ 1690 */
1691int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, 1691int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1692 struct list_head *ins_list) 1692 struct list_head *ins_list, bool *emitted)
1693{ 1693{
1694 struct btrfs_dir_item *di; 1694 struct btrfs_dir_item *di;
1695 struct btrfs_delayed_item *curr, *next; 1695 struct btrfs_delayed_item *curr, *next;
@@ -1733,6 +1733,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1733 1733
1734 if (over) 1734 if (over)
1735 return 1; 1735 return 1;
1736 *emitted = true;
1736 } 1737 }
1737 return 0; 1738 return 0;
1738} 1739}
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index f70119f25421..0167853c84ae 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -144,7 +144,7 @@ void btrfs_put_delayed_items(struct list_head *ins_list,
144int btrfs_should_delete_dir_index(struct list_head *del_list, 144int btrfs_should_delete_dir_index(struct list_head *del_list,
145 u64 index); 145 u64 index);
146int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, 146int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
147 struct list_head *ins_list); 147 struct list_head *ins_list, bool *emitted);
148 148
149/* for init */ 149/* for init */
150int __init btrfs_delayed_inode_init(void); 150int __init btrfs_delayed_inode_init(void);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index dd08e29f5117..4545e2e2ad45 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -182,6 +182,7 @@ static struct btrfs_lockdep_keyset {
182 { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, 182 { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
183 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, 183 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
184 { .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" }, 184 { .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" },
185 { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" },
185 { .id = 0, .name_stem = "tree" }, 186 { .id = 0, .name_stem = "tree" },
186}; 187};
187 188
@@ -1787,7 +1788,6 @@ static int cleaner_kthread(void *arg)
1787 int again; 1788 int again;
1788 struct btrfs_trans_handle *trans; 1789 struct btrfs_trans_handle *trans;
1789 1790
1790 set_freezable();
1791 do { 1791 do {
1792 again = 0; 1792 again = 0;
1793 1793
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2e7c97a3f344..392592dc7010 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2897,12 +2897,11 @@ static int __do_readpage(struct extent_io_tree *tree,
2897 struct block_device *bdev; 2897 struct block_device *bdev;
2898 int ret; 2898 int ret;
2899 int nr = 0; 2899 int nr = 0;
2900 int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
2901 size_t pg_offset = 0; 2900 size_t pg_offset = 0;
2902 size_t iosize; 2901 size_t iosize;
2903 size_t disk_io_size; 2902 size_t disk_io_size;
2904 size_t blocksize = inode->i_sb->s_blocksize; 2903 size_t blocksize = inode->i_sb->s_blocksize;
2905 unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED; 2904 unsigned long this_bio_flag = 0;
2906 2905
2907 set_page_extent_mapped(page); 2906 set_page_extent_mapped(page);
2908 2907
@@ -2942,18 +2941,16 @@ static int __do_readpage(struct extent_io_tree *tree,
2942 kunmap_atomic(userpage); 2941 kunmap_atomic(userpage);
2943 set_extent_uptodate(tree, cur, cur + iosize - 1, 2942 set_extent_uptodate(tree, cur, cur + iosize - 1,
2944 &cached, GFP_NOFS); 2943 &cached, GFP_NOFS);
2945 if (!parent_locked) 2944 unlock_extent_cached(tree, cur,
2946 unlock_extent_cached(tree, cur, 2945 cur + iosize - 1,
2947 cur + iosize - 1, 2946 &cached, GFP_NOFS);
2948 &cached, GFP_NOFS);
2949 break; 2947 break;
2950 } 2948 }
2951 em = __get_extent_map(inode, page, pg_offset, cur, 2949 em = __get_extent_map(inode, page, pg_offset, cur,
2952 end - cur + 1, get_extent, em_cached); 2950 end - cur + 1, get_extent, em_cached);
2953 if (IS_ERR_OR_NULL(em)) { 2951 if (IS_ERR_OR_NULL(em)) {
2954 SetPageError(page); 2952 SetPageError(page);
2955 if (!parent_locked) 2953 unlock_extent(tree, cur, end);
2956 unlock_extent(tree, cur, end);
2957 break; 2954 break;
2958 } 2955 }
2959 extent_offset = cur - em->start; 2956 extent_offset = cur - em->start;
@@ -3038,12 +3035,9 @@ static int __do_readpage(struct extent_io_tree *tree,
3038 3035
3039 set_extent_uptodate(tree, cur, cur + iosize - 1, 3036 set_extent_uptodate(tree, cur, cur + iosize - 1,
3040 &cached, GFP_NOFS); 3037 &cached, GFP_NOFS);
3041 if (parent_locked) 3038 unlock_extent_cached(tree, cur,
3042 free_extent_state(cached); 3039 cur + iosize - 1,
3043 else 3040 &cached, GFP_NOFS);
3044 unlock_extent_cached(tree, cur,
3045 cur + iosize - 1,
3046 &cached, GFP_NOFS);
3047 cur = cur + iosize; 3041 cur = cur + iosize;
3048 pg_offset += iosize; 3042 pg_offset += iosize;
3049 continue; 3043 continue;
@@ -3052,8 +3046,7 @@ static int __do_readpage(struct extent_io_tree *tree,
3052 if (test_range_bit(tree, cur, cur_end, 3046 if (test_range_bit(tree, cur, cur_end,
3053 EXTENT_UPTODATE, 1, NULL)) { 3047 EXTENT_UPTODATE, 1, NULL)) {
3054 check_page_uptodate(tree, page); 3048 check_page_uptodate(tree, page);
3055 if (!parent_locked) 3049 unlock_extent(tree, cur, cur + iosize - 1);
3056 unlock_extent(tree, cur, cur + iosize - 1);
3057 cur = cur + iosize; 3050 cur = cur + iosize;
3058 pg_offset += iosize; 3051 pg_offset += iosize;
3059 continue; 3052 continue;
@@ -3063,8 +3056,7 @@ static int __do_readpage(struct extent_io_tree *tree,
3063 */ 3056 */
3064 if (block_start == EXTENT_MAP_INLINE) { 3057 if (block_start == EXTENT_MAP_INLINE) {
3065 SetPageError(page); 3058 SetPageError(page);
3066 if (!parent_locked) 3059 unlock_extent(tree, cur, cur + iosize - 1);
3067 unlock_extent(tree, cur, cur + iosize - 1);
3068 cur = cur + iosize; 3060 cur = cur + iosize;
3069 pg_offset += iosize; 3061 pg_offset += iosize;
3070 continue; 3062 continue;
@@ -3083,8 +3075,7 @@ static int __do_readpage(struct extent_io_tree *tree,
3083 *bio_flags = this_bio_flag; 3075 *bio_flags = this_bio_flag;
3084 } else { 3076 } else {
3085 SetPageError(page); 3077 SetPageError(page);
3086 if (!parent_locked) 3078 unlock_extent(tree, cur, cur + iosize - 1);
3087 unlock_extent(tree, cur, cur + iosize - 1);
3088 } 3079 }
3089 cur = cur + iosize; 3080 cur = cur + iosize;
3090 pg_offset += iosize; 3081 pg_offset += iosize;
@@ -3213,20 +3204,6 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
3213 return ret; 3204 return ret;
3214} 3205}
3215 3206
3216int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
3217 get_extent_t *get_extent, int mirror_num)
3218{
3219 struct bio *bio = NULL;
3220 unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
3221 int ret;
3222
3223 ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
3224 &bio_flags, READ, NULL);
3225 if (bio)
3226 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
3227 return ret;
3228}
3229
3230static noinline void update_nr_written(struct page *page, 3207static noinline void update_nr_written(struct page *page,
3231 struct writeback_control *wbc, 3208 struct writeback_control *wbc,
3232 unsigned long nr_written) 3209 unsigned long nr_written)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 0377413bd4b9..880d5292e972 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -29,7 +29,6 @@
29 */ 29 */
30#define EXTENT_BIO_COMPRESSED 1 30#define EXTENT_BIO_COMPRESSED 1
31#define EXTENT_BIO_TREE_LOG 2 31#define EXTENT_BIO_TREE_LOG 2
32#define EXTENT_BIO_PARENT_LOCKED 4
33#define EXTENT_BIO_FLAG_SHIFT 16 32#define EXTENT_BIO_FLAG_SHIFT 16
34 33
35/* these are bit numbers for test/set bit */ 34/* these are bit numbers for test/set bit */
@@ -210,8 +209,6 @@ static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
210int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end); 209int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
211int extent_read_full_page(struct extent_io_tree *tree, struct page *page, 210int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
212 get_extent_t *get_extent, int mirror_num); 211 get_extent_t *get_extent, int mirror_num);
213int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
214 get_extent_t *get_extent, int mirror_num);
215int __init extent_io_init(void); 212int __init extent_io_init(void);
216void extent_io_exit(void); 213void extent_io_exit(void);
217 214
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 393e36bd5845..53dbeaf6ce94 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -153,6 +153,20 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
153 153
154static unsigned long *alloc_bitmap(u32 bitmap_size) 154static unsigned long *alloc_bitmap(u32 bitmap_size)
155{ 155{
156 void *mem;
157
158 /*
159 * The allocation size varies, observed numbers were < 4K up to 16K.
160 * Using vmalloc unconditionally would be too heavy, we'll try
161 * contiguous allocations first.
162 */
163 if (bitmap_size <= PAGE_SIZE)
164 return kzalloc(bitmap_size, GFP_NOFS);
165
166 mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN);
167 if (mem)
168 return mem;
169
156 return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO, 170 return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
157 PAGE_KERNEL); 171 PAGE_KERNEL);
158} 172}
@@ -289,7 +303,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
289 303
290 ret = 0; 304 ret = 0;
291out: 305out:
292 vfree(bitmap); 306 kvfree(bitmap);
293 if (ret) 307 if (ret)
294 btrfs_abort_transaction(trans, root, ret); 308 btrfs_abort_transaction(trans, root, ret);
295 return ret; 309 return ret;
@@ -438,7 +452,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
438 452
439 ret = 0; 453 ret = 0;
440out: 454out:
441 vfree(bitmap); 455 kvfree(bitmap);
442 if (ret) 456 if (ret)
443 btrfs_abort_transaction(trans, root, ret); 457 btrfs_abort_transaction(trans, root, ret);
444 return ret; 458 return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e28f3d4691af..151b7c71b868 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5717,6 +5717,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5717 char *name_ptr; 5717 char *name_ptr;
5718 int name_len; 5718 int name_len;
5719 int is_curr = 0; /* ctx->pos points to the current index? */ 5719 int is_curr = 0; /* ctx->pos points to the current index? */
5720 bool emitted;
5720 5721
5721 /* FIXME, use a real flag for deciding about the key type */ 5722 /* FIXME, use a real flag for deciding about the key type */
5722 if (root->fs_info->tree_root == root) 5723 if (root->fs_info->tree_root == root)
@@ -5745,6 +5746,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5745 if (ret < 0) 5746 if (ret < 0)
5746 goto err; 5747 goto err;
5747 5748
5749 emitted = false;
5748 while (1) { 5750 while (1) {
5749 leaf = path->nodes[0]; 5751 leaf = path->nodes[0];
5750 slot = path->slots[0]; 5752 slot = path->slots[0];
@@ -5824,6 +5826,7 @@ skip:
5824 5826
5825 if (over) 5827 if (over)
5826 goto nopos; 5828 goto nopos;
5829 emitted = true;
5827 di_len = btrfs_dir_name_len(leaf, di) + 5830 di_len = btrfs_dir_name_len(leaf, di) +
5828 btrfs_dir_data_len(leaf, di) + sizeof(*di); 5831 btrfs_dir_data_len(leaf, di) + sizeof(*di);
5829 di_cur += di_len; 5832 di_cur += di_len;
@@ -5836,11 +5839,20 @@ next:
5836 if (key_type == BTRFS_DIR_INDEX_KEY) { 5839 if (key_type == BTRFS_DIR_INDEX_KEY) {
5837 if (is_curr) 5840 if (is_curr)
5838 ctx->pos++; 5841 ctx->pos++;
5839 ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list); 5842 ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list, &emitted);
5840 if (ret) 5843 if (ret)
5841 goto nopos; 5844 goto nopos;
5842 } 5845 }
5843 5846
5847 /*
5848 * If we haven't emitted any dir entry, we must not touch ctx->pos as
5849 * it was was set to the termination value in previous call. We assume
5850 * that "." and ".." were emitted if we reach this point and set the
5851 * termination value as well for an empty directory.
5852 */
5853 if (ctx->pos > 2 && !emitted)
5854 goto nopos;
5855
5844 /* Reached end of directory/root. Bump pos past the last item. */ 5856 /* Reached end of directory/root. Bump pos past the last item. */
5845 ctx->pos++; 5857 ctx->pos++;
5846 5858
@@ -7116,21 +7128,41 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
7116 if (ret) 7128 if (ret)
7117 return ERR_PTR(ret); 7129 return ERR_PTR(ret);
7118 7130
7119 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, 7131 /*
7120 ins.offset, ins.offset, ins.offset, 0); 7132 * Create the ordered extent before the extent map. This is to avoid
7121 if (IS_ERR(em)) { 7133 * races with the fast fsync path that would lead to it logging file
7122 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); 7134 * extent items that point to disk extents that were not yet written to.
7123 return em; 7135 * The fast fsync path collects ordered extents into a local list and
7124 } 7136 * then collects all the new extent maps, so we must create the ordered
7125 7137 * extent first and make sure the fast fsync path collects any new
7138 * ordered extents after collecting new extent maps as well.
7139 * The fsync path simply can not rely on inode_dio_wait() because it
7140 * causes deadlock with AIO.
7141 */
7126 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, 7142 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
7127 ins.offset, ins.offset, 0); 7143 ins.offset, ins.offset, 0);
7128 if (ret) { 7144 if (ret) {
7129 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); 7145 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
7130 free_extent_map(em);
7131 return ERR_PTR(ret); 7146 return ERR_PTR(ret);
7132 } 7147 }
7133 7148
7149 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
7150 ins.offset, ins.offset, ins.offset, 0);
7151 if (IS_ERR(em)) {
7152 struct btrfs_ordered_extent *oe;
7153
7154 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
7155 oe = btrfs_lookup_ordered_extent(inode, start);
7156 ASSERT(oe);
7157 if (WARN_ON(!oe))
7158 return em;
7159 set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
7160 set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
7161 btrfs_remove_ordered_extent(inode, oe);
7162 /* Once for our lookup and once for the ordered extents tree. */
7163 btrfs_put_ordered_extent(oe);
7164 btrfs_put_ordered_extent(oe);
7165 }
7134 return em; 7166 return em;
7135} 7167}
7136 7168
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 952172ca7e45..48aee9846329 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2794,24 +2794,29 @@ out:
2794static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) 2794static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
2795{ 2795{
2796 struct page *page; 2796 struct page *page;
2797 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2798 2797
2799 page = grab_cache_page(inode->i_mapping, index); 2798 page = grab_cache_page(inode->i_mapping, index);
2800 if (!page) 2799 if (!page)
2801 return NULL; 2800 return ERR_PTR(-ENOMEM);
2802 2801
2803 if (!PageUptodate(page)) { 2802 if (!PageUptodate(page)) {
2804 if (extent_read_full_page_nolock(tree, page, btrfs_get_extent, 2803 int ret;
2805 0)) 2804
2806 return NULL; 2805 ret = btrfs_readpage(NULL, page);
2806 if (ret)
2807 return ERR_PTR(ret);
2807 lock_page(page); 2808 lock_page(page);
2808 if (!PageUptodate(page)) { 2809 if (!PageUptodate(page)) {
2809 unlock_page(page); 2810 unlock_page(page);
2810 page_cache_release(page); 2811 page_cache_release(page);
2811 return NULL; 2812 return ERR_PTR(-EIO);
2813 }
2814 if (page->mapping != inode->i_mapping) {
2815 unlock_page(page);
2816 page_cache_release(page);
2817 return ERR_PTR(-EAGAIN);
2812 } 2818 }
2813 } 2819 }
2814 unlock_page(page);
2815 2820
2816 return page; 2821 return page;
2817} 2822}
@@ -2823,17 +2828,31 @@ static int gather_extent_pages(struct inode *inode, struct page **pages,
2823 pgoff_t index = off >> PAGE_CACHE_SHIFT; 2828 pgoff_t index = off >> PAGE_CACHE_SHIFT;
2824 2829
2825 for (i = 0; i < num_pages; i++) { 2830 for (i = 0; i < num_pages; i++) {
2831again:
2826 pages[i] = extent_same_get_page(inode, index + i); 2832 pages[i] = extent_same_get_page(inode, index + i);
2827 if (!pages[i]) 2833 if (IS_ERR(pages[i])) {
2828 return -ENOMEM; 2834 int err = PTR_ERR(pages[i]);
2835
2836 if (err == -EAGAIN)
2837 goto again;
2838 pages[i] = NULL;
2839 return err;
2840 }
2829 } 2841 }
2830 return 0; 2842 return 0;
2831} 2843}
2832 2844
2833static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) 2845static int lock_extent_range(struct inode *inode, u64 off, u64 len,
2846 bool retry_range_locking)
2834{ 2847{
2835 /* do any pending delalloc/csum calc on src, one way or 2848 /*
2836 another, and lock file content */ 2849 * Do any pending delalloc/csum calculations on inode, one way or
2850 * another, and lock file content.
2851 * The locking order is:
2852 *
2853 * 1) pages
2854 * 2) range in the inode's io tree
2855 */
2837 while (1) { 2856 while (1) {
2838 struct btrfs_ordered_extent *ordered; 2857 struct btrfs_ordered_extent *ordered;
2839 lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); 2858 lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
@@ -2851,8 +2870,11 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
2851 unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); 2870 unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
2852 if (ordered) 2871 if (ordered)
2853 btrfs_put_ordered_extent(ordered); 2872 btrfs_put_ordered_extent(ordered);
2873 if (!retry_range_locking)
2874 return -EAGAIN;
2854 btrfs_wait_ordered_range(inode, off, len); 2875 btrfs_wait_ordered_range(inode, off, len);
2855 } 2876 }
2877 return 0;
2856} 2878}
2857 2879
2858static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) 2880static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
@@ -2877,15 +2899,24 @@ static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
2877 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); 2899 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
2878} 2900}
2879 2901
2880static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, 2902static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
2881 struct inode *inode2, u64 loff2, u64 len) 2903 struct inode *inode2, u64 loff2, u64 len,
2904 bool retry_range_locking)
2882{ 2905{
2906 int ret;
2907
2883 if (inode1 < inode2) { 2908 if (inode1 < inode2) {
2884 swap(inode1, inode2); 2909 swap(inode1, inode2);
2885 swap(loff1, loff2); 2910 swap(loff1, loff2);
2886 } 2911 }
2887 lock_extent_range(inode1, loff1, len); 2912 ret = lock_extent_range(inode1, loff1, len, retry_range_locking);
2888 lock_extent_range(inode2, loff2, len); 2913 if (ret)
2914 return ret;
2915 ret = lock_extent_range(inode2, loff2, len, retry_range_locking);
2916 if (ret)
2917 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1,
2918 loff1 + len - 1);
2919 return ret;
2889} 2920}
2890 2921
2891struct cmp_pages { 2922struct cmp_pages {
@@ -2901,11 +2932,15 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp)
2901 2932
2902 for (i = 0; i < cmp->num_pages; i++) { 2933 for (i = 0; i < cmp->num_pages; i++) {
2903 pg = cmp->src_pages[i]; 2934 pg = cmp->src_pages[i];
2904 if (pg) 2935 if (pg) {
2936 unlock_page(pg);
2905 page_cache_release(pg); 2937 page_cache_release(pg);
2938 }
2906 pg = cmp->dst_pages[i]; 2939 pg = cmp->dst_pages[i];
2907 if (pg) 2940 if (pg) {
2941 unlock_page(pg);
2908 page_cache_release(pg); 2942 page_cache_release(pg);
2943 }
2909 } 2944 }
2910 kfree(cmp->src_pages); 2945 kfree(cmp->src_pages);
2911 kfree(cmp->dst_pages); 2946 kfree(cmp->dst_pages);
@@ -2966,6 +3001,8 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
2966 3001
2967 src_page = cmp->src_pages[i]; 3002 src_page = cmp->src_pages[i];
2968 dst_page = cmp->dst_pages[i]; 3003 dst_page = cmp->dst_pages[i];
3004 ASSERT(PageLocked(src_page));
3005 ASSERT(PageLocked(dst_page));
2969 3006
2970 addr = kmap_atomic(src_page); 3007 addr = kmap_atomic(src_page);
2971 dst_addr = kmap_atomic(dst_page); 3008 dst_addr = kmap_atomic(dst_page);
@@ -3078,14 +3115,46 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
3078 goto out_unlock; 3115 goto out_unlock;
3079 } 3116 }
3080 3117
3118again:
3081 ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); 3119 ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
3082 if (ret) 3120 if (ret)
3083 goto out_unlock; 3121 goto out_unlock;
3084 3122
3085 if (same_inode) 3123 if (same_inode)
3086 lock_extent_range(src, same_lock_start, same_lock_len); 3124 ret = lock_extent_range(src, same_lock_start, same_lock_len,
3125 false);
3087 else 3126 else
3088 btrfs_double_extent_lock(src, loff, dst, dst_loff, len); 3127 ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len,
3128 false);
3129 /*
3130 * If one of the inodes has dirty pages in the respective range or
3131 * ordered extents, we need to flush dellaloc and wait for all ordered
3132 * extents in the range. We must unlock the pages and the ranges in the
3133 * io trees to avoid deadlocks when flushing delalloc (requires locking
3134 * pages) and when waiting for ordered extents to complete (they require
3135 * range locking).
3136 */
3137 if (ret == -EAGAIN) {
3138 /*
3139 * Ranges in the io trees already unlocked. Now unlock all
3140 * pages before waiting for all IO to complete.
3141 */
3142 btrfs_cmp_data_free(&cmp);
3143 if (same_inode) {
3144 btrfs_wait_ordered_range(src, same_lock_start,
3145 same_lock_len);
3146 } else {
3147 btrfs_wait_ordered_range(src, loff, len);
3148 btrfs_wait_ordered_range(dst, dst_loff, len);
3149 }
3150 goto again;
3151 }
3152 ASSERT(ret == 0);
3153 if (WARN_ON(ret)) {
3154 /* ranges in the io trees already unlocked */
3155 btrfs_cmp_data_free(&cmp);
3156 return ret;
3157 }
3089 3158
3090 /* pass original length for comparison so we stay within i_size */ 3159 /* pass original length for comparison so we stay within i_size */
3091 ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); 3160 ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
@@ -3795,9 +3864,15 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
3795 u64 lock_start = min_t(u64, off, destoff); 3864 u64 lock_start = min_t(u64, off, destoff);
3796 u64 lock_len = max_t(u64, off, destoff) + len - lock_start; 3865 u64 lock_len = max_t(u64, off, destoff) + len - lock_start;
3797 3866
3798 lock_extent_range(src, lock_start, lock_len); 3867 ret = lock_extent_range(src, lock_start, lock_len, true);
3799 } else { 3868 } else {
3800 btrfs_double_extent_lock(src, off, inode, destoff, len); 3869 ret = btrfs_double_extent_lock(src, off, inode, destoff, len,
3870 true);
3871 }
3872 ASSERT(ret == 0);
3873 if (WARN_ON(ret)) {
3874 /* ranges in the io trees already unlocked */
3875 goto out_unlock;
3801 } 3876 }
3802 3877
3803 ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); 3878 ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index fd1c4d982463..2bd0011450df 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -575,7 +575,8 @@ static int is_cowonly_root(u64 root_objectid)
575 root_objectid == BTRFS_TREE_LOG_OBJECTID || 575 root_objectid == BTRFS_TREE_LOG_OBJECTID ||
576 root_objectid == BTRFS_CSUM_TREE_OBJECTID || 576 root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
577 root_objectid == BTRFS_UUID_TREE_OBJECTID || 577 root_objectid == BTRFS_UUID_TREE_OBJECTID ||
578 root_objectid == BTRFS_QUOTA_TREE_OBJECTID) 578 root_objectid == BTRFS_QUOTA_TREE_OBJECTID ||
579 root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
579 return 1; 580 return 1;
580 return 0; 581 return 0;
581} 582}
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index e0ac85949067..539e7b5e3f86 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -202,6 +202,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
202BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56); 202BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
203BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA); 203BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
204BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES); 204BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
205BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
205 206
206static struct attribute *btrfs_supported_feature_attrs[] = { 207static struct attribute *btrfs_supported_feature_attrs[] = {
207 BTRFS_FEAT_ATTR_PTR(mixed_backref), 208 BTRFS_FEAT_ATTR_PTR(mixed_backref),
@@ -213,6 +214,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
213 BTRFS_FEAT_ATTR_PTR(raid56), 214 BTRFS_FEAT_ATTR_PTR(raid56),
214 BTRFS_FEAT_ATTR_PTR(skinny_metadata), 215 BTRFS_FEAT_ATTR_PTR(skinny_metadata),
215 BTRFS_FEAT_ATTR_PTR(no_holes), 216 BTRFS_FEAT_ATTR_PTR(no_holes),
217 BTRFS_FEAT_ATTR_PTR(free_space_tree),
216 NULL 218 NULL
217}; 219};
218 220
@@ -780,6 +782,39 @@ failure:
780 return error; 782 return error;
781} 783}
782 784
785
786/*
787 * Change per-fs features in /sys/fs/btrfs/UUID/features to match current
788 * values in superblock. Call after any changes to incompat/compat_ro flags
789 */
790void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
791 u64 bit, enum btrfs_feature_set set)
792{
793 struct btrfs_fs_devices *fs_devs;
794 struct kobject *fsid_kobj;
795 u64 features;
796 int ret;
797
798 if (!fs_info)
799 return;
800
801 features = get_features(fs_info, set);
802 ASSERT(bit & supported_feature_masks[set]);
803
804 fs_devs = fs_info->fs_devices;
805 fsid_kobj = &fs_devs->fsid_kobj;
806
807 if (!fsid_kobj->state_initialized)
808 return;
809
810 /*
811 * FIXME: this is too heavy to update just one value, ideally we'd like
812 * to use sysfs_update_group but some refactoring is needed first.
813 */
814 sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
815 ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
816}
817
783static int btrfs_init_debugfs(void) 818static int btrfs_init_debugfs(void)
784{ 819{
785#ifdef CONFIG_DEBUG_FS 820#ifdef CONFIG_DEBUG_FS
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 9c09522125a6..d7da1a4c2f6c 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -56,7 +56,7 @@ static struct btrfs_feature_attr btrfs_attr_##_name = { \
56#define BTRFS_FEAT_ATTR_COMPAT(name, feature) \ 56#define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
57 BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature) 57 BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
58#define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \ 58#define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
59 BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature) 59 BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature)
60#define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \ 60#define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
61 BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature) 61 BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
62 62
@@ -90,4 +90,7 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
90 struct kobject *parent); 90 struct kobject *parent);
91int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs); 91int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
92void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs); 92void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
93void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
94 u64 bit, enum btrfs_feature_set set);
95
93#endif /* _BTRFS_SYSFS_H_ */ 96#endif /* _BTRFS_SYSFS_H_ */
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index b1d920b30070..0e1e61a7ec23 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -82,18 +82,18 @@ void btrfs_destroy_test_fs(void)
82struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void) 82struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
83{ 83{
84 struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info), 84 struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
85 GFP_NOFS); 85 GFP_KERNEL);
86 86
87 if (!fs_info) 87 if (!fs_info)
88 return fs_info; 88 return fs_info;
89 fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices), 89 fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
90 GFP_NOFS); 90 GFP_KERNEL);
91 if (!fs_info->fs_devices) { 91 if (!fs_info->fs_devices) {
92 kfree(fs_info); 92 kfree(fs_info);
93 return NULL; 93 return NULL;
94 } 94 }
95 fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block), 95 fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block),
96 GFP_NOFS); 96 GFP_KERNEL);
97 if (!fs_info->super_copy) { 97 if (!fs_info->super_copy) {
98 kfree(fs_info->fs_devices); 98 kfree(fs_info->fs_devices);
99 kfree(fs_info); 99 kfree(fs_info);
@@ -180,11 +180,11 @@ btrfs_alloc_dummy_block_group(unsigned long length)
180{ 180{
181 struct btrfs_block_group_cache *cache; 181 struct btrfs_block_group_cache *cache;
182 182
183 cache = kzalloc(sizeof(*cache), GFP_NOFS); 183 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
184 if (!cache) 184 if (!cache)
185 return NULL; 185 return NULL;
186 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), 186 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
187 GFP_NOFS); 187 GFP_KERNEL);
188 if (!cache->free_space_ctl) { 188 if (!cache->free_space_ctl) {
189 kfree(cache); 189 kfree(cache);
190 return NULL; 190 return NULL;
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index e29fa297e053..669b58201e36 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -94,7 +94,7 @@ static int test_find_delalloc(void)
94 * test. 94 * test.
95 */ 95 */
96 for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) { 96 for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
97 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 97 page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
98 if (!page) { 98 if (!page) {
99 test_msg("Failed to allocate test page\n"); 99 test_msg("Failed to allocate test page\n");
100 ret = -ENOMEM; 100 ret = -ENOMEM;
@@ -113,7 +113,7 @@ static int test_find_delalloc(void)
113 * |--- delalloc ---| 113 * |--- delalloc ---|
114 * |--- search ---| 114 * |--- search ---|
115 */ 115 */
116 set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS); 116 set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL);
117 start = 0; 117 start = 0;
118 end = 0; 118 end = 0;
119 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, 119 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -144,7 +144,7 @@ static int test_find_delalloc(void)
144 test_msg("Couldn't find the locked page\n"); 144 test_msg("Couldn't find the locked page\n");
145 goto out_bits; 145 goto out_bits;
146 } 146 }
147 set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS); 147 set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL);
148 start = test_start; 148 start = test_start;
149 end = 0; 149 end = 0;
150 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, 150 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -199,7 +199,7 @@ static int test_find_delalloc(void)
199 * 199 *
200 * We are re-using our test_start from above since it works out well. 200 * We are re-using our test_start from above since it works out well.
201 */ 201 */
202 set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS); 202 set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL);
203 start = test_start; 203 start = test_start;
204 end = 0; 204 end = 0;
205 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, 205 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -262,7 +262,7 @@ static int test_find_delalloc(void)
262 } 262 }
263 ret = 0; 263 ret = 0;
264out_bits: 264out_bits:
265 clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS); 265 clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL);
266out: 266out:
267 if (locked_page) 267 if (locked_page)
268 page_cache_release(locked_page); 268 page_cache_release(locked_page);
@@ -360,7 +360,7 @@ static int test_eb_bitmaps(void)
360 360
361 test_msg("Running extent buffer bitmap tests\n"); 361 test_msg("Running extent buffer bitmap tests\n");
362 362
363 bitmap = kmalloc(len, GFP_NOFS); 363 bitmap = kmalloc(len, GFP_KERNEL);
364 if (!bitmap) { 364 if (!bitmap) {
365 test_msg("Couldn't allocate test bitmap\n"); 365 test_msg("Couldn't allocate test bitmap\n");
366 return -ENOMEM; 366 return -ENOMEM;
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 5de55fdd28bc..e2d3da02deee 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -974,7 +974,7 @@ static int test_extent_accounting(void)
974 (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, 974 (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
975 EXTENT_DELALLOC | EXTENT_DIRTY | 975 EXTENT_DELALLOC | EXTENT_DIRTY |
976 EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0, 976 EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
977 NULL, GFP_NOFS); 977 NULL, GFP_KERNEL);
978 if (ret) { 978 if (ret) {
979 test_msg("clear_extent_bit returned %d\n", ret); 979 test_msg("clear_extent_bit returned %d\n", ret);
980 goto out; 980 goto out;
@@ -1045,7 +1045,7 @@ static int test_extent_accounting(void)
1045 BTRFS_MAX_EXTENT_SIZE+8191, 1045 BTRFS_MAX_EXTENT_SIZE+8191,
1046 EXTENT_DIRTY | EXTENT_DELALLOC | 1046 EXTENT_DIRTY | EXTENT_DELALLOC |
1047 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, 1047 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1048 NULL, GFP_NOFS); 1048 NULL, GFP_KERNEL);
1049 if (ret) { 1049 if (ret) {
1050 test_msg("clear_extent_bit returned %d\n", ret); 1050 test_msg("clear_extent_bit returned %d\n", ret);
1051 goto out; 1051 goto out;
@@ -1079,7 +1079,7 @@ static int test_extent_accounting(void)
1079 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, 1079 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
1080 EXTENT_DIRTY | EXTENT_DELALLOC | 1080 EXTENT_DIRTY | EXTENT_DELALLOC |
1081 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, 1081 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1082 NULL, GFP_NOFS); 1082 NULL, GFP_KERNEL);
1083 if (ret) { 1083 if (ret) {
1084 test_msg("clear_extent_bit returned %d\n", ret); 1084 test_msg("clear_extent_bit returned %d\n", ret);
1085 goto out; 1085 goto out;
@@ -1096,7 +1096,7 @@ out:
1096 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, 1096 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
1097 EXTENT_DIRTY | EXTENT_DELALLOC | 1097 EXTENT_DIRTY | EXTENT_DELALLOC |
1098 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, 1098 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1099 NULL, GFP_NOFS); 1099 NULL, GFP_KERNEL);
1100 iput(inode); 1100 iput(inode);
1101 btrfs_free_dummy_root(root); 1101 btrfs_free_dummy_root(root);
1102 return ret; 1102 return ret;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 323e12cc9d2f..978c3a810893 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4127,7 +4127,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
4127 struct inode *inode, 4127 struct inode *inode,
4128 struct btrfs_path *path, 4128 struct btrfs_path *path,
4129 struct list_head *logged_list, 4129 struct list_head *logged_list,
4130 struct btrfs_log_ctx *ctx) 4130 struct btrfs_log_ctx *ctx,
4131 const u64 start,
4132 const u64 end)
4131{ 4133{
4132 struct extent_map *em, *n; 4134 struct extent_map *em, *n;
4133 struct list_head extents; 4135 struct list_head extents;
@@ -4166,7 +4168,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
4166 } 4168 }
4167 4169
4168 list_sort(NULL, &extents, extent_cmp); 4170 list_sort(NULL, &extents, extent_cmp);
4169 4171 /*
4172 * Collect any new ordered extents within the range. This is to
4173 * prevent logging file extent items without waiting for the disk
4174 * location they point to being written. We do this only to deal
4175 * with races against concurrent lockless direct IO writes.
4176 */
4177 btrfs_get_logged_extents(inode, logged_list, start, end);
4170process: 4178process:
4171 while (!list_empty(&extents)) { 4179 while (!list_empty(&extents)) {
4172 em = list_entry(extents.next, struct extent_map, list); 4180 em = list_entry(extents.next, struct extent_map, list);
@@ -4701,7 +4709,7 @@ log_extents:
4701 goto out_unlock; 4709 goto out_unlock;
4702 } 4710 }
4703 ret = btrfs_log_changed_extents(trans, root, inode, dst_path, 4711 ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
4704 &logged_list, ctx); 4712 &logged_list, ctx, start, end);
4705 if (ret) { 4713 if (ret) {
4706 err = ret; 4714 err = ret;
4707 goto out_unlock; 4715 goto out_unlock;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 86a9c383955e..eb9028e8cfc5 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -698,8 +698,8 @@ static void ceph_aio_retry_work(struct work_struct *work)
698 698
699 req = ceph_osdc_alloc_request(orig_req->r_osdc, snapc, 2, 699 req = ceph_osdc_alloc_request(orig_req->r_osdc, snapc, 2,
700 false, GFP_NOFS); 700 false, GFP_NOFS);
701 if (IS_ERR(req)) { 701 if (!req) {
702 ret = PTR_ERR(req); 702 ret = -ENOMEM;
703 req = orig_req; 703 req = orig_req;
704 goto out; 704 goto out;
705 } 705 }
@@ -716,7 +716,6 @@ static void ceph_aio_retry_work(struct work_struct *work)
716 ceph_osdc_build_request(req, req->r_ops[0].extent.offset, 716 ceph_osdc_build_request(req, req->r_ops[0].extent.offset,
717 snapc, CEPH_NOSNAP, &aio_req->mtime); 717 snapc, CEPH_NOSNAP, &aio_req->mtime);
718 718
719 ceph_put_snap_context(snapc);
720 ceph_osdc_put_request(orig_req); 719 ceph_osdc_put_request(orig_req);
721 720
722 req->r_callback = ceph_aio_complete_req; 721 req->r_callback = ceph_aio_complete_req;
@@ -731,6 +730,7 @@ out:
731 ceph_aio_complete_req(req, NULL); 730 ceph_aio_complete_req(req, NULL);
732 } 731 }
733 732
733 ceph_put_snap_context(snapc);
734 kfree(aio_work); 734 kfree(aio_work);
735} 735}
736 736
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index fb4ba2e4e2a5..be2d87f33177 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -975,13 +975,8 @@ out_unlock:
975/* 975/*
976 * splice a dentry to an inode. 976 * splice a dentry to an inode.
977 * caller must hold directory i_mutex for this to be safe. 977 * caller must hold directory i_mutex for this to be safe.
978 *
979 * we will only rehash the resulting dentry if @prehash is
980 * true; @prehash will be set to false (for the benefit of
981 * the caller) if we fail.
982 */ 978 */
983static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, 979static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
984 bool *prehash)
985{ 980{
986 struct dentry *realdn; 981 struct dentry *realdn;
987 982
@@ -994,8 +989,6 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
994 if (IS_ERR(realdn)) { 989 if (IS_ERR(realdn)) {
995 pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", 990 pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
996 PTR_ERR(realdn), dn, in, ceph_vinop(in)); 991 PTR_ERR(realdn), dn, in, ceph_vinop(in));
997 if (prehash)
998 *prehash = false; /* don't rehash on error */
999 dn = realdn; /* note realdn contains the error */ 992 dn = realdn; /* note realdn contains the error */
1000 goto out; 993 goto out;
1001 } else if (realdn) { 994 } else if (realdn) {
@@ -1011,8 +1004,6 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
1011 dout("dn %p attached to %p ino %llx.%llx\n", 1004 dout("dn %p attached to %p ino %llx.%llx\n",
1012 dn, d_inode(dn), ceph_vinop(d_inode(dn))); 1005 dn, d_inode(dn), ceph_vinop(d_inode(dn)));
1013 } 1006 }
1014 if ((!prehash || *prehash) && d_unhashed(dn))
1015 d_rehash(dn);
1016out: 1007out:
1017 return dn; 1008 return dn;
1018} 1009}
@@ -1245,10 +1236,8 @@ retry_lookup:
1245 dout("d_delete %p\n", dn); 1236 dout("d_delete %p\n", dn);
1246 d_delete(dn); 1237 d_delete(dn);
1247 } else { 1238 } else {
1248 dout("d_instantiate %p NULL\n", dn);
1249 d_instantiate(dn, NULL);
1250 if (have_lease && d_unhashed(dn)) 1239 if (have_lease && d_unhashed(dn))
1251 d_rehash(dn); 1240 d_add(dn, NULL);
1252 update_dentry_lease(dn, rinfo->dlease, 1241 update_dentry_lease(dn, rinfo->dlease,
1253 session, 1242 session,
1254 req->r_request_started); 1243 req->r_request_started);
@@ -1260,7 +1249,7 @@ retry_lookup:
1260 if (d_really_is_negative(dn)) { 1249 if (d_really_is_negative(dn)) {
1261 ceph_dir_clear_ordered(dir); 1250 ceph_dir_clear_ordered(dir);
1262 ihold(in); 1251 ihold(in);
1263 dn = splice_dentry(dn, in, &have_lease); 1252 dn = splice_dentry(dn, in);
1264 if (IS_ERR(dn)) { 1253 if (IS_ERR(dn)) {
1265 err = PTR_ERR(dn); 1254 err = PTR_ERR(dn);
1266 goto done; 1255 goto done;
@@ -1290,7 +1279,7 @@ retry_lookup:
1290 dout(" linking snapped dir %p to dn %p\n", in, dn); 1279 dout(" linking snapped dir %p to dn %p\n", in, dn);
1291 ceph_dir_clear_ordered(dir); 1280 ceph_dir_clear_ordered(dir);
1292 ihold(in); 1281 ihold(in);
1293 dn = splice_dentry(dn, in, NULL); 1282 dn = splice_dentry(dn, in);
1294 if (IS_ERR(dn)) { 1283 if (IS_ERR(dn)) {
1295 err = PTR_ERR(dn); 1284 err = PTR_ERR(dn);
1296 goto done; 1285 goto done;
@@ -1501,7 +1490,7 @@ retry_lookup:
1501 } 1490 }
1502 1491
1503 if (d_really_is_negative(dn)) { 1492 if (d_really_is_negative(dn)) {
1504 struct dentry *realdn = splice_dentry(dn, in, NULL); 1493 struct dentry *realdn = splice_dentry(dn, in);
1505 if (IS_ERR(realdn)) { 1494 if (IS_ERR(realdn)) {
1506 err = PTR_ERR(realdn); 1495 err = PTR_ERR(realdn);
1507 d_drop(dn); 1496 d_drop(dn);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c48ca13673e3..09b1db2cac31 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -642,9 +642,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
642 while (*s && *s != sep) 642 while (*s && *s != sep)
643 s++; 643 s++;
644 644
645 inode_lock(dir); 645 child = lookup_one_len_unlocked(p, dentry, s - p);
646 child = lookup_one_len(p, dentry, s - p);
647 inode_unlock(dir);
648 dput(dentry); 646 dput(dentry);
649 dentry = child; 647 dentry = child;
650 } while (!IS_ERR(dentry)); 648 } while (!IS_ERR(dentry));
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a5b8eb69a8f4..6402eaf8ab95 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1261,6 +1261,9 @@ COMPATIBLE_IOCTL(HCIUNBLOCKADDR)
1261COMPATIBLE_IOCTL(HCIINQUIRY) 1261COMPATIBLE_IOCTL(HCIINQUIRY)
1262COMPATIBLE_IOCTL(HCIUARTSETPROTO) 1262COMPATIBLE_IOCTL(HCIUARTSETPROTO)
1263COMPATIBLE_IOCTL(HCIUARTGETPROTO) 1263COMPATIBLE_IOCTL(HCIUARTGETPROTO)
1264COMPATIBLE_IOCTL(HCIUARTGETDEVICE)
1265COMPATIBLE_IOCTL(HCIUARTSETFLAGS)
1266COMPATIBLE_IOCTL(HCIUARTGETFLAGS)
1264COMPATIBLE_IOCTL(RFCOMMCREATEDEV) 1267COMPATIBLE_IOCTL(RFCOMMCREATEDEV)
1265COMPATIBLE_IOCTL(RFCOMMRELEASEDEV) 1268COMPATIBLE_IOCTL(RFCOMMRELEASEDEV)
1266COMPATIBLE_IOCTL(RFCOMMGETDEVLIST) 1269COMPATIBLE_IOCTL(RFCOMMGETDEVLIST)
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index f419519ec41f..214ec14149d9 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -432,14 +432,9 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
432 (sd->s_type & CONFIGFS_ITEM_BIN_ATTR) ? 432 (sd->s_type & CONFIGFS_ITEM_BIN_ATTR) ?
433 configfs_init_bin_file : 433 configfs_init_bin_file :
434 configfs_init_file); 434 configfs_init_file);
435 if (error) { 435 if (error)
436 configfs_put(sd); 436 configfs_put(sd);
437 return error; 437 return error;
438 }
439
440 d_rehash(dentry);
441
442 return 0;
443} 438}
444 439
445static struct dentry * configfs_lookup(struct inode *dir, 440static struct dentry * configfs_lookup(struct inode *dir,
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index cee087d8f7e0..45811ea3fd87 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -199,9 +199,17 @@ int configfs_create(struct dentry * dentry, umode_t mode, void (*init)(struct in
199 configfs_set_inode_lock_class(sd, inode); 199 configfs_set_inode_lock_class(sd, inode);
200 200
201 init(inode); 201 init(inode);
202 d_instantiate(dentry, inode); 202 if (S_ISDIR(mode) || S_ISLNK(mode)) {
203 if (S_ISDIR(mode) || S_ISLNK(mode)) 203 /*
204 * ->symlink(), ->mkdir(), configfs_register_subsystem() or
205 * create_default_group() - already hashed.
206 */
207 d_instantiate(dentry, inode);
204 dget(dentry); /* pin link and directory dentries in core */ 208 dget(dentry); /* pin link and directory dentries in core */
209 } else {
210 /* ->lookup() */
211 d_add(dentry, inode);
212 }
205 return error; 213 return error;
206} 214}
207 215
diff --git a/fs/dax.c b/fs/dax.c
index 4fd6b0c5c6b5..fc2e3141138b 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -58,6 +58,26 @@ static void dax_unmap_atomic(struct block_device *bdev,
58 blk_queue_exit(bdev->bd_queue); 58 blk_queue_exit(bdev->bd_queue);
59} 59}
60 60
61struct page *read_dax_sector(struct block_device *bdev, sector_t n)
62{
63 struct page *page = alloc_pages(GFP_KERNEL, 0);
64 struct blk_dax_ctl dax = {
65 .size = PAGE_SIZE,
66 .sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
67 };
68 long rc;
69
70 if (!page)
71 return ERR_PTR(-ENOMEM);
72
73 rc = dax_map_atomic(bdev, &dax);
74 if (rc < 0)
75 return ERR_PTR(rc);
76 memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
77 dax_unmap_atomic(bdev, &dax);
78 return page;
79}
80
61/* 81/*
62 * dax_clear_blocks() is called from within transaction context from XFS, 82 * dax_clear_blocks() is called from within transaction context from XFS,
63 * and hence this means the stack from this point must follow GFP_NOFS 83 * and hence this means the stack from this point must follow GFP_NOFS
@@ -338,7 +358,8 @@ static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
338 void *entry; 358 void *entry;
339 359
340 WARN_ON_ONCE(pmd_entry && !dirty); 360 WARN_ON_ONCE(pmd_entry && !dirty);
341 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 361 if (dirty)
362 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
342 363
343 spin_lock_irq(&mapping->tree_lock); 364 spin_lock_irq(&mapping->tree_lock);
344 365
diff --git a/fs/dcache.c b/fs/dcache.c
index 92d5140de851..32ceae3e6112 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -269,9 +269,6 @@ static inline int dname_external(const struct dentry *dentry)
269 return dentry->d_name.name != dentry->d_iname; 269 return dentry->d_name.name != dentry->d_iname;
270} 270}
271 271
272/*
273 * Make sure other CPUs see the inode attached before the type is set.
274 */
275static inline void __d_set_inode_and_type(struct dentry *dentry, 272static inline void __d_set_inode_and_type(struct dentry *dentry,
276 struct inode *inode, 273 struct inode *inode,
277 unsigned type_flags) 274 unsigned type_flags)
@@ -279,28 +276,18 @@ static inline void __d_set_inode_and_type(struct dentry *dentry,
279 unsigned flags; 276 unsigned flags;
280 277
281 dentry->d_inode = inode; 278 dentry->d_inode = inode;
282 smp_wmb();
283 flags = READ_ONCE(dentry->d_flags); 279 flags = READ_ONCE(dentry->d_flags);
284 flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); 280 flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
285 flags |= type_flags; 281 flags |= type_flags;
286 WRITE_ONCE(dentry->d_flags, flags); 282 WRITE_ONCE(dentry->d_flags, flags);
287} 283}
288 284
289/*
290 * Ideally, we want to make sure that other CPUs see the flags cleared before
291 * the inode is detached, but this is really a violation of RCU principles
292 * since the ordering suggests we should always set inode before flags.
293 *
294 * We should instead replace or discard the entire dentry - but that sucks
295 * performancewise on mass deletion/rename.
296 */
297static inline void __d_clear_type_and_inode(struct dentry *dentry) 285static inline void __d_clear_type_and_inode(struct dentry *dentry)
298{ 286{
299 unsigned flags = READ_ONCE(dentry->d_flags); 287 unsigned flags = READ_ONCE(dentry->d_flags);
300 288
301 flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU); 289 flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
302 WRITE_ONCE(dentry->d_flags, flags); 290 WRITE_ONCE(dentry->d_flags, flags);
303 smp_wmb();
304 dentry->d_inode = NULL; 291 dentry->d_inode = NULL;
305} 292}
306 293
@@ -370,9 +357,11 @@ static void dentry_unlink_inode(struct dentry * dentry)
370 __releases(dentry->d_inode->i_lock) 357 __releases(dentry->d_inode->i_lock)
371{ 358{
372 struct inode *inode = dentry->d_inode; 359 struct inode *inode = dentry->d_inode;
360
361 raw_write_seqcount_begin(&dentry->d_seq);
373 __d_clear_type_and_inode(dentry); 362 __d_clear_type_and_inode(dentry);
374 hlist_del_init(&dentry->d_u.d_alias); 363 hlist_del_init(&dentry->d_u.d_alias);
375 dentry_rcuwalk_invalidate(dentry); 364 raw_write_seqcount_end(&dentry->d_seq);
376 spin_unlock(&dentry->d_lock); 365 spin_unlock(&dentry->d_lock);
377 spin_unlock(&inode->i_lock); 366 spin_unlock(&inode->i_lock);
378 if (!inode->i_nlink) 367 if (!inode->i_nlink)
@@ -1756,12 +1745,12 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1756 unsigned add_flags = d_flags_for_inode(inode); 1745 unsigned add_flags = d_flags_for_inode(inode);
1757 1746
1758 spin_lock(&dentry->d_lock); 1747 spin_lock(&dentry->d_lock);
1759 if (inode) 1748 hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
1760 hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); 1749 raw_write_seqcount_begin(&dentry->d_seq);
1761 __d_set_inode_and_type(dentry, inode, add_flags); 1750 __d_set_inode_and_type(dentry, inode, add_flags);
1762 dentry_rcuwalk_invalidate(dentry); 1751 raw_write_seqcount_end(&dentry->d_seq);
1752 __fsnotify_d_instantiate(dentry);
1763 spin_unlock(&dentry->d_lock); 1753 spin_unlock(&dentry->d_lock);
1764 fsnotify_d_instantiate(dentry, inode);
1765} 1754}
1766 1755
1767/** 1756/**
@@ -1782,91 +1771,16 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1782void d_instantiate(struct dentry *entry, struct inode * inode) 1771void d_instantiate(struct dentry *entry, struct inode * inode)
1783{ 1772{
1784 BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); 1773 BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
1785 if (inode) 1774 if (inode) {
1786 spin_lock(&inode->i_lock); 1775 spin_lock(&inode->i_lock);
1787 __d_instantiate(entry, inode); 1776 __d_instantiate(entry, inode);
1788 if (inode)
1789 spin_unlock(&inode->i_lock); 1777 spin_unlock(&inode->i_lock);
1778 }
1790 security_d_instantiate(entry, inode); 1779 security_d_instantiate(entry, inode);
1791} 1780}
1792EXPORT_SYMBOL(d_instantiate); 1781EXPORT_SYMBOL(d_instantiate);
1793 1782
1794/** 1783/**
1795 * d_instantiate_unique - instantiate a non-aliased dentry
1796 * @entry: dentry to instantiate
1797 * @inode: inode to attach to this dentry
1798 *
1799 * Fill in inode information in the entry. On success, it returns NULL.
1800 * If an unhashed alias of "entry" already exists, then we return the
1801 * aliased dentry instead and drop one reference to inode.
1802 *
1803 * Note that in order to avoid conflicts with rename() etc, the caller
1804 * had better be holding the parent directory semaphore.
1805 *
1806 * This also assumes that the inode count has been incremented
1807 * (or otherwise set) by the caller to indicate that it is now
1808 * in use by the dcache.
1809 */
1810static struct dentry *__d_instantiate_unique(struct dentry *entry,
1811 struct inode *inode)
1812{
1813 struct dentry *alias;
1814 int len = entry->d_name.len;
1815 const char *name = entry->d_name.name;
1816 unsigned int hash = entry->d_name.hash;
1817
1818 if (!inode) {
1819 __d_instantiate(entry, NULL);
1820 return NULL;
1821 }
1822
1823 hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
1824 /*
1825 * Don't need alias->d_lock here, because aliases with
1826 * d_parent == entry->d_parent are not subject to name or
1827 * parent changes, because the parent inode i_mutex is held.
1828 */
1829 if (alias->d_name.hash != hash)
1830 continue;
1831 if (alias->d_parent != entry->d_parent)
1832 continue;
1833 if (alias->d_name.len != len)
1834 continue;
1835 if (dentry_cmp(alias, name, len))
1836 continue;
1837 __dget(alias);
1838 return alias;
1839 }
1840
1841 __d_instantiate(entry, inode);
1842 return NULL;
1843}
1844
1845struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1846{
1847 struct dentry *result;
1848
1849 BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
1850
1851 if (inode)
1852 spin_lock(&inode->i_lock);
1853 result = __d_instantiate_unique(entry, inode);
1854 if (inode)
1855 spin_unlock(&inode->i_lock);
1856
1857 if (!result) {
1858 security_d_instantiate(entry, inode);
1859 return NULL;
1860 }
1861
1862 BUG_ON(!d_unhashed(result));
1863 iput(inode);
1864 return result;
1865}
1866
1867EXPORT_SYMBOL(d_instantiate_unique);
1868
1869/**
1870 * d_instantiate_no_diralias - instantiate a non-aliased dentry 1784 * d_instantiate_no_diralias - instantiate a non-aliased dentry
1871 * @entry: dentry to complete 1785 * @entry: dentry to complete
1872 * @inode: inode to attach to this dentry 1786 * @inode: inode to attach to this dentry
@@ -2446,6 +2360,86 @@ void d_rehash(struct dentry * entry)
2446} 2360}
2447EXPORT_SYMBOL(d_rehash); 2361EXPORT_SYMBOL(d_rehash);
2448 2362
2363
2364/* inode->i_lock held if inode is non-NULL */
2365
2366static inline void __d_add(struct dentry *dentry, struct inode *inode)
2367{
2368 if (inode) {
2369 __d_instantiate(dentry, inode);
2370 spin_unlock(&inode->i_lock);
2371 }
2372 security_d_instantiate(dentry, inode);
2373 d_rehash(dentry);
2374}
2375
2376/**
2377 * d_add - add dentry to hash queues
2378 * @entry: dentry to add
2379 * @inode: The inode to attach to this dentry
2380 *
2381 * This adds the entry to the hash queues and initializes @inode.
2382 * The entry was actually filled in earlier during d_alloc().
2383 */
2384
2385void d_add(struct dentry *entry, struct inode *inode)
2386{
2387 if (inode)
2388 spin_lock(&inode->i_lock);
2389 __d_add(entry, inode);
2390}
2391EXPORT_SYMBOL(d_add);
2392
2393/**
2394 * d_exact_alias - find and hash an exact unhashed alias
2395 * @entry: dentry to add
2396 * @inode: The inode to go with this dentry
2397 *
2398 * If an unhashed dentry with the same name/parent and desired
2399 * inode already exists, hash and return it. Otherwise, return
2400 * NULL.
2401 *
2402 * Parent directory should be locked.
2403 */
2404struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode)
2405{
2406 struct dentry *alias;
2407 int len = entry->d_name.len;
2408 const char *name = entry->d_name.name;
2409 unsigned int hash = entry->d_name.hash;
2410
2411 spin_lock(&inode->i_lock);
2412 hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
2413 /*
2414 * Don't need alias->d_lock here, because aliases with
2415 * d_parent == entry->d_parent are not subject to name or
2416 * parent changes, because the parent inode i_mutex is held.
2417 */
2418 if (alias->d_name.hash != hash)
2419 continue;
2420 if (alias->d_parent != entry->d_parent)
2421 continue;
2422 if (alias->d_name.len != len)
2423 continue;
2424 if (dentry_cmp(alias, name, len))
2425 continue;
2426 spin_lock(&alias->d_lock);
2427 if (!d_unhashed(alias)) {
2428 spin_unlock(&alias->d_lock);
2429 alias = NULL;
2430 } else {
2431 __dget_dlock(alias);
2432 _d_rehash(alias);
2433 spin_unlock(&alias->d_lock);
2434 }
2435 spin_unlock(&inode->i_lock);
2436 return alias;
2437 }
2438 spin_unlock(&inode->i_lock);
2439 return NULL;
2440}
2441EXPORT_SYMBOL(d_exact_alias);
2442
2449/** 2443/**
2450 * dentry_update_name_case - update case insensitive dentry with a new name 2444 * dentry_update_name_case - update case insensitive dentry with a new name
2451 * @dentry: dentry to be updated 2445 * @dentry: dentry to be updated
@@ -2782,10 +2776,9 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
2782 2776
2783 BUG_ON(!d_unhashed(dentry)); 2777 BUG_ON(!d_unhashed(dentry));
2784 2778
2785 if (!inode) { 2779 if (!inode)
2786 __d_instantiate(dentry, NULL);
2787 goto out; 2780 goto out;
2788 } 2781
2789 spin_lock(&inode->i_lock); 2782 spin_lock(&inode->i_lock);
2790 if (S_ISDIR(inode->i_mode)) { 2783 if (S_ISDIR(inode->i_mode)) {
2791 struct dentry *new = __d_find_any_alias(inode); 2784 struct dentry *new = __d_find_any_alias(inode);
@@ -2819,12 +2812,8 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
2819 return new; 2812 return new;
2820 } 2813 }
2821 } 2814 }
2822 /* already taking inode->i_lock, so d_add() by hand */
2823 __d_instantiate(dentry, inode);
2824 spin_unlock(&inode->i_lock);
2825out: 2815out:
2826 security_d_instantiate(dentry, inode); 2816 __d_add(dentry, inode);
2827 d_rehash(dentry);
2828 return NULL; 2817 return NULL;
2829} 2818}
2830EXPORT_SYMBOL(d_splice_alias); 2819EXPORT_SYMBOL(d_splice_alias);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 1f107fd51328..655f21f99160 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -575,6 +575,26 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx)
575 mutex_unlock(&allocated_ptys_lock); 575 mutex_unlock(&allocated_ptys_lock);
576} 576}
577 577
578/*
579 * pty code needs to hold extra references in case of last /dev/tty close
580 */
581
582void devpts_add_ref(struct inode *ptmx_inode)
583{
584 struct super_block *sb = pts_sb_from_inode(ptmx_inode);
585
586 atomic_inc(&sb->s_active);
587 ihold(ptmx_inode);
588}
589
590void devpts_del_ref(struct inode *ptmx_inode)
591{
592 struct super_block *sb = pts_sb_from_inode(ptmx_inode);
593
594 iput(ptmx_inode);
595 deactivate_super(sb);
596}
597
578/** 598/**
579 * devpts_pty_new -- create a new inode in /dev/pts/ 599 * devpts_pty_new -- create a new inode in /dev/pts/
580 * @ptmx_inode: inode of the master 600 * @ptmx_inode: inode of the master
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1b2f7ffc8b84..85463171053b 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -445,7 +445,8 @@ static struct bio *dio_await_one(struct dio *dio)
445 __set_current_state(TASK_UNINTERRUPTIBLE); 445 __set_current_state(TASK_UNINTERRUPTIBLE);
446 dio->waiter = current; 446 dio->waiter = current;
447 spin_unlock_irqrestore(&dio->bio_lock, flags); 447 spin_unlock_irqrestore(&dio->bio_lock, flags);
448 if (!blk_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie)) 448 if (!(dio->iocb->ki_flags & IOCB_HIPRI) ||
449 !blk_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie))
449 io_schedule(); 450 io_schedule();
450 /* wake up sets us TASK_RUNNING */ 451 /* wake up sets us TASK_RUNNING */
451 spin_lock_irqsave(&dio->bio_lock, flags); 452 spin_lock_irqsave(&dio->bio_lock, flags);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 80d6901493cf..87dbdd4881ab 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1499,16 +1499,14 @@ out:
1499 */ 1499 */
1500static int 1500static int
1501ecryptfs_encrypt_filename(struct ecryptfs_filename *filename, 1501ecryptfs_encrypt_filename(struct ecryptfs_filename *filename,
1502 struct ecryptfs_crypt_stat *crypt_stat,
1503 struct ecryptfs_mount_crypt_stat *mount_crypt_stat) 1502 struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
1504{ 1503{
1505 int rc = 0; 1504 int rc = 0;
1506 1505
1507 filename->encrypted_filename = NULL; 1506 filename->encrypted_filename = NULL;
1508 filename->encrypted_filename_size = 0; 1507 filename->encrypted_filename_size = 0;
1509 if ((crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCFN_USE_MOUNT_FNEK)) 1508 if (mount_crypt_stat && (mount_crypt_stat->flags
1510 || (mount_crypt_stat && (mount_crypt_stat->flags 1509 & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK)) {
1511 & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK))) {
1512 size_t packet_size; 1510 size_t packet_size;
1513 size_t remaining_bytes; 1511 size_t remaining_bytes;
1514 1512
@@ -1944,7 +1942,6 @@ out:
1944int ecryptfs_encrypt_and_encode_filename( 1942int ecryptfs_encrypt_and_encode_filename(
1945 char **encoded_name, 1943 char **encoded_name,
1946 size_t *encoded_name_size, 1944 size_t *encoded_name_size,
1947 struct ecryptfs_crypt_stat *crypt_stat,
1948 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, 1945 struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
1949 const char *name, size_t name_size) 1946 const char *name, size_t name_size)
1950{ 1947{
@@ -1953,9 +1950,8 @@ int ecryptfs_encrypt_and_encode_filename(
1953 1950
1954 (*encoded_name) = NULL; 1951 (*encoded_name) = NULL;
1955 (*encoded_name_size) = 0; 1952 (*encoded_name_size) = 0;
1956 if ((crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCRYPT_FILENAMES)) 1953 if (mount_crypt_stat && (mount_crypt_stat->flags
1957 || (mount_crypt_stat && (mount_crypt_stat->flags 1954 & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)) {
1958 & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES))) {
1959 struct ecryptfs_filename *filename; 1955 struct ecryptfs_filename *filename;
1960 1956
1961 filename = kzalloc(sizeof(*filename), GFP_KERNEL); 1957 filename = kzalloc(sizeof(*filename), GFP_KERNEL);
@@ -1968,8 +1964,7 @@ int ecryptfs_encrypt_and_encode_filename(
1968 } 1964 }
1969 filename->filename = (char *)name; 1965 filename->filename = (char *)name;
1970 filename->filename_size = name_size; 1966 filename->filename_size = name_size;
1971 rc = ecryptfs_encrypt_filename(filename, crypt_stat, 1967 rc = ecryptfs_encrypt_filename(filename, mount_crypt_stat);
1972 mount_crypt_stat);
1973 if (rc) { 1968 if (rc) {
1974 printk(KERN_ERR "%s: Error attempting to encrypt " 1969 printk(KERN_ERR "%s: Error attempting to encrypt "
1975 "filename; rc = [%d]\n", __func__, rc); 1970 "filename; rc = [%d]\n", __func__, rc);
@@ -1980,11 +1975,9 @@ int ecryptfs_encrypt_and_encode_filename(
1980 NULL, &encoded_name_no_prefix_size, 1975 NULL, &encoded_name_no_prefix_size,
1981 filename->encrypted_filename, 1976 filename->encrypted_filename,
1982 filename->encrypted_filename_size); 1977 filename->encrypted_filename_size);
1983 if ((crypt_stat && (crypt_stat->flags 1978 if (mount_crypt_stat
1984 & ECRYPTFS_ENCFN_USE_MOUNT_FNEK))
1985 || (mount_crypt_stat
1986 && (mount_crypt_stat->flags 1979 && (mount_crypt_stat->flags
1987 & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK))) 1980 & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK))
1988 (*encoded_name_size) = 1981 (*encoded_name_size) =
1989 (ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE 1982 (ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE
1990 + encoded_name_no_prefix_size); 1983 + encoded_name_no_prefix_size);
@@ -2002,11 +1995,9 @@ int ecryptfs_encrypt_and_encode_filename(
2002 kfree(filename); 1995 kfree(filename);
2003 goto out; 1996 goto out;
2004 } 1997 }
2005 if ((crypt_stat && (crypt_stat->flags 1998 if (mount_crypt_stat
2006 & ECRYPTFS_ENCFN_USE_MOUNT_FNEK))
2007 || (mount_crypt_stat
2008 && (mount_crypt_stat->flags 1999 && (mount_crypt_stat->flags
2009 & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK))) { 2000 & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK)) {
2010 memcpy((*encoded_name), 2001 memcpy((*encoded_name),
2011 ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX, 2002 ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX,
2012 ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE); 2003 ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 7b39260c7bba..67e16128c572 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -569,7 +569,6 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length);
569int ecryptfs_encrypt_and_encode_filename( 569int ecryptfs_encrypt_and_encode_filename(
570 char **encoded_name, 570 char **encoded_name,
571 size_t *encoded_name_size, 571 size_t *encoded_name_size,
572 struct ecryptfs_crypt_stat *crypt_stat,
573 struct ecryptfs_mount_crypt_stat *mount_crypt_stat, 572 struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
574 const char *name, size_t name_size); 573 const char *name, size_t name_size);
575struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry); 574struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 4e685ac1024d..26651636cd1d 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -397,11 +397,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
397 int rc = 0; 397 int rc = 0;
398 398
399 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent); 399 lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
400 inode_lock(d_inode(lower_dir_dentry)); 400 lower_dentry = lookup_one_len_unlocked(ecryptfs_dentry->d_name.name,
401 lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
402 lower_dir_dentry, 401 lower_dir_dentry,
403 ecryptfs_dentry->d_name.len); 402 ecryptfs_dentry->d_name.len);
404 inode_unlock(d_inode(lower_dir_dentry));
405 if (IS_ERR(lower_dentry)) { 403 if (IS_ERR(lower_dentry)) {
406 rc = PTR_ERR(lower_dentry); 404 rc = PTR_ERR(lower_dentry);
407 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " 405 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
@@ -419,18 +417,16 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
419 dput(lower_dentry); 417 dput(lower_dentry);
420 rc = ecryptfs_encrypt_and_encode_filename( 418 rc = ecryptfs_encrypt_and_encode_filename(
421 &encrypted_and_encoded_name, &encrypted_and_encoded_name_size, 419 &encrypted_and_encoded_name, &encrypted_and_encoded_name_size,
422 NULL, mount_crypt_stat, ecryptfs_dentry->d_name.name, 420 mount_crypt_stat, ecryptfs_dentry->d_name.name,
423 ecryptfs_dentry->d_name.len); 421 ecryptfs_dentry->d_name.len);
424 if (rc) { 422 if (rc) {
425 printk(KERN_ERR "%s: Error attempting to encrypt and encode " 423 printk(KERN_ERR "%s: Error attempting to encrypt and encode "
426 "filename; rc = [%d]\n", __func__, rc); 424 "filename; rc = [%d]\n", __func__, rc);
427 goto out; 425 goto out;
428 } 426 }
429 inode_lock(d_inode(lower_dir_dentry)); 427 lower_dentry = lookup_one_len_unlocked(encrypted_and_encoded_name,
430 lower_dentry = lookup_one_len(encrypted_and_encoded_name,
431 lower_dir_dentry, 428 lower_dir_dentry,
432 encrypted_and_encoded_name_size); 429 encrypted_and_encoded_name_size);
433 inode_unlock(d_inode(lower_dir_dentry));
434 if (IS_ERR(lower_dentry)) { 430 if (IS_ERR(lower_dentry)) {
435 rc = PTR_ERR(lower_dentry); 431 rc = PTR_ERR(lower_dentry);
436 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned " 432 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
@@ -502,7 +498,6 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
502 dir->i_sb)->mount_crypt_stat; 498 dir->i_sb)->mount_crypt_stat;
503 rc = ecryptfs_encrypt_and_encode_filename(&encoded_symname, 499 rc = ecryptfs_encrypt_and_encode_filename(&encoded_symname,
504 &encoded_symlen, 500 &encoded_symlen,
505 NULL,
506 mount_crypt_stat, symname, 501 mount_crypt_stat, symname,
507 strlen(symname)); 502 strlen(symname));
508 if (rc) 503 if (rc)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index ae1dbcf47e97..cde60741cad2 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -94,6 +94,11 @@
94/* Epoll private bits inside the event mask */ 94/* Epoll private bits inside the event mask */
95#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE) 95#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE)
96 96
97#define EPOLLINOUT_BITS (POLLIN | POLLOUT)
98
99#define EPOLLEXCLUSIVE_OK_BITS (EPOLLINOUT_BITS | POLLERR | POLLHUP | \
100 EPOLLWAKEUP | EPOLLET | EPOLLEXCLUSIVE)
101
97/* Maximum number of nesting allowed inside epoll sets */ 102/* Maximum number of nesting allowed inside epoll sets */
98#define EP_MAX_NESTS 4 103#define EP_MAX_NESTS 4
99 104
@@ -1068,7 +1073,22 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
1068 * wait list. 1073 * wait list.
1069 */ 1074 */
1070 if (waitqueue_active(&ep->wq)) { 1075 if (waitqueue_active(&ep->wq)) {
1071 ewake = 1; 1076 if ((epi->event.events & EPOLLEXCLUSIVE) &&
1077 !((unsigned long)key & POLLFREE)) {
1078 switch ((unsigned long)key & EPOLLINOUT_BITS) {
1079 case POLLIN:
1080 if (epi->event.events & POLLIN)
1081 ewake = 1;
1082 break;
1083 case POLLOUT:
1084 if (epi->event.events & POLLOUT)
1085 ewake = 1;
1086 break;
1087 case 0:
1088 ewake = 1;
1089 break;
1090 }
1091 }
1072 wake_up_locked(&ep->wq); 1092 wake_up_locked(&ep->wq);
1073 } 1093 }
1074 if (waitqueue_active(&ep->poll_wait)) 1094 if (waitqueue_active(&ep->poll_wait))
@@ -1875,9 +1895,13 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1875 * so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation. 1895 * so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation.
1876 * Also, we do not currently supported nested exclusive wakeups. 1896 * Also, we do not currently supported nested exclusive wakeups.
1877 */ 1897 */
1878 if ((epds.events & EPOLLEXCLUSIVE) && (op == EPOLL_CTL_MOD || 1898 if (epds.events & EPOLLEXCLUSIVE) {
1879 (op == EPOLL_CTL_ADD && is_file_epoll(tf.file)))) 1899 if (op == EPOLL_CTL_MOD)
1880 goto error_tgt_fput; 1900 goto error_tgt_fput;
1901 if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) ||
1902 (epds.events & ~EPOLLEXCLUSIVE_OK_BITS)))
1903 goto error_tgt_fput;
1904 }
1881 1905
1882 /* 1906 /*
1883 * At this point it is safe to assume that the "private_data" contains 1907 * At this point it is safe to assume that the "private_data" contains
@@ -1950,8 +1974,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1950 break; 1974 break;
1951 case EPOLL_CTL_MOD: 1975 case EPOLL_CTL_MOD:
1952 if (epi) { 1976 if (epi) {
1953 epds.events |= POLLERR | POLLHUP; 1977 if (!(epi->event.events & EPOLLEXCLUSIVE)) {
1954 error = ep_modify(ep, epi, &epds); 1978 epds.events |= POLLERR | POLLHUP;
1979 error = ep_modify(ep, epi, &epds);
1980 }
1955 } else 1981 } else
1956 error = -ENOENT; 1982 error = -ENOENT;
1957 break; 1983 break;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 506765afa1a3..bb8d67e2740a 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -376,12 +376,11 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
376 struct inode *inode = d_inode(dentry); 376 struct inode *inode = d_inode(dentry);
377 dnode_secno dno; 377 dnode_secno dno;
378 int r; 378 int r;
379 int rep = 0;
380 int err; 379 int err;
381 380
382 hpfs_lock(dir->i_sb); 381 hpfs_lock(dir->i_sb);
383 hpfs_adjust_length(name, &len); 382 hpfs_adjust_length(name, &len);
384again: 383
385 err = -ENOENT; 384 err = -ENOENT;
386 de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh); 385 de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
387 if (!de) 386 if (!de)
@@ -401,33 +400,9 @@ again:
401 hpfs_error(dir->i_sb, "there was error when removing dirent"); 400 hpfs_error(dir->i_sb, "there was error when removing dirent");
402 err = -EFSERROR; 401 err = -EFSERROR;
403 break; 402 break;
404 case 2: /* no space for deleting, try to truncate file */ 403 case 2: /* no space for deleting */
405
406 err = -ENOSPC; 404 err = -ENOSPC;
407 if (rep++) 405 break;
408 break;
409
410 dentry_unhash(dentry);
411 if (!d_unhashed(dentry)) {
412 hpfs_unlock(dir->i_sb);
413 return -ENOSPC;
414 }
415 if (generic_permission(inode, MAY_WRITE) ||
416 !S_ISREG(inode->i_mode) ||
417 get_write_access(inode)) {
418 d_rehash(dentry);
419 } else {
420 struct iattr newattrs;
421 /*pr_info("truncating file before delete.\n");*/
422 newattrs.ia_size = 0;
423 newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
424 err = notify_change(dentry, &newattrs, NULL);
425 put_write_access(inode);
426 if (!err)
427 goto again;
428 }
429 hpfs_unlock(dir->i_sb);
430 return -ENOSPC;
431 default: 406 default:
432 drop_nlink(inode); 407 drop_nlink(inode);
433 err = 0; 408 err = 0;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index d211b8e18566..30c4c9ebb693 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -843,9 +843,14 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
843 843
844 pr_notice("%s(): Link succeeded, unlink failed (err %d). You now have a hard link\n", 844 pr_notice("%s(): Link succeeded, unlink failed (err %d). You now have a hard link\n",
845 __func__, ret); 845 __func__, ret);
846 /* Might as well let the VFS know */ 846 /*
847 d_instantiate(new_dentry, d_inode(old_dentry)); 847 * We can't keep the target in dcache after that.
848 ihold(d_inode(old_dentry)); 848 * For one thing, we can't afford dentry aliases for directories.
849 * For another, if there was a victim, we _can't_ set new inode
850 * for that sucker and we have to trigger mount eviction - the
851 * caller won't do it on its own since we are returning an error.
852 */
853 d_invalidate(new_dentry);
849 new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now); 854 new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
850 return ret; 855 return ret;
851 } 856 }
diff --git a/fs/namei.c b/fs/namei.c
index f624d132e01e..794f81dce766 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1220,8 +1220,8 @@ static int follow_managed(struct path *path, struct nameidata *nd)
1220 1220
1221 if (need_mntput && path->mnt == mnt) 1221 if (need_mntput && path->mnt == mnt)
1222 mntput(path->mnt); 1222 mntput(path->mnt);
1223 if (ret == -EISDIR) 1223 if (ret == -EISDIR || !ret)
1224 ret = 0; 1224 ret = 1;
1225 if (need_mntput) 1225 if (need_mntput)
1226 nd->flags |= LOOKUP_JUMPED; 1226 nd->flags |= LOOKUP_JUMPED;
1227 if (unlikely(ret < 0)) 1227 if (unlikely(ret < 0))
@@ -1444,40 +1444,26 @@ static int follow_dotdot(struct nameidata *nd)
1444 * This looks up the name in dcache, possibly revalidates the old dentry and 1444 * This looks up the name in dcache, possibly revalidates the old dentry and
1445 * allocates a new one if not found or not valid. In the need_lookup argument 1445 * allocates a new one if not found or not valid. In the need_lookup argument
1446 * returns whether i_op->lookup is necessary. 1446 * returns whether i_op->lookup is necessary.
1447 *
1448 * dir->d_inode->i_mutex must be held
1449 */ 1447 */
1450static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, 1448static struct dentry *lookup_dcache(const struct qstr *name,
1451 unsigned int flags, bool *need_lookup) 1449 struct dentry *dir,
1450 unsigned int flags)
1452{ 1451{
1453 struct dentry *dentry; 1452 struct dentry *dentry;
1454 int error; 1453 int error;
1455 1454
1456 *need_lookup = false;
1457 dentry = d_lookup(dir, name); 1455 dentry = d_lookup(dir, name);
1458 if (dentry) { 1456 if (dentry) {
1459 if (dentry->d_flags & DCACHE_OP_REVALIDATE) { 1457 if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
1460 error = d_revalidate(dentry, flags); 1458 error = d_revalidate(dentry, flags);
1461 if (unlikely(error <= 0)) { 1459 if (unlikely(error <= 0)) {
1462 if (error < 0) { 1460 if (!error)
1463 dput(dentry);
1464 return ERR_PTR(error);
1465 } else {
1466 d_invalidate(dentry); 1461 d_invalidate(dentry);
1467 dput(dentry); 1462 dput(dentry);
1468 dentry = NULL; 1463 return ERR_PTR(error);
1469 }
1470 } 1464 }
1471 } 1465 }
1472 } 1466 }
1473
1474 if (!dentry) {
1475 dentry = d_alloc(dir, name);
1476 if (unlikely(!dentry))
1477 return ERR_PTR(-ENOMEM);
1478
1479 *need_lookup = true;
1480 }
1481 return dentry; 1467 return dentry;
1482} 1468}
1483 1469
@@ -1506,45 +1492,44 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
1506 return dentry; 1492 return dentry;
1507} 1493}
1508 1494
1509static struct dentry *__lookup_hash(struct qstr *name, 1495static struct dentry *__lookup_hash(const struct qstr *name,
1510 struct dentry *base, unsigned int flags) 1496 struct dentry *base, unsigned int flags)
1511{ 1497{
1512 bool need_lookup; 1498 struct dentry *dentry = lookup_dcache(name, base, flags);
1513 struct dentry *dentry;
1514 1499
1515 dentry = lookup_dcache(name, base, flags, &need_lookup); 1500 if (dentry)
1516 if (!need_lookup)
1517 return dentry; 1501 return dentry;
1518 1502
1503 dentry = d_alloc(base, name);
1504 if (unlikely(!dentry))
1505 return ERR_PTR(-ENOMEM);
1506
1519 return lookup_real(base->d_inode, dentry, flags); 1507 return lookup_real(base->d_inode, dentry, flags);
1520} 1508}
1521 1509
1522/*
1523 * It's more convoluted than I'd like it to be, but... it's still fairly
1524 * small and for now I'd prefer to have fast path as straight as possible.
1525 * It _is_ time-critical.
1526 */
1527static int lookup_fast(struct nameidata *nd, 1510static int lookup_fast(struct nameidata *nd,
1528 struct path *path, struct inode **inode, 1511 struct path *path, struct inode **inode,
1529 unsigned *seqp) 1512 unsigned *seqp)
1530{ 1513{
1531 struct vfsmount *mnt = nd->path.mnt; 1514 struct vfsmount *mnt = nd->path.mnt;
1532 struct dentry *dentry, *parent = nd->path.dentry; 1515 struct dentry *dentry, *parent = nd->path.dentry;
1533 int need_reval = 1;
1534 int status = 1; 1516 int status = 1;
1535 int err; 1517 int err;
1536 1518
1537 /* 1519 /*
1538 * Rename seqlock is not required here because in the off chance 1520 * Rename seqlock is not required here because in the off chance
1539 * of a false negative due to a concurrent rename, we're going to 1521 * of a false negative due to a concurrent rename, the caller is
1540 * do the non-racy lookup, below. 1522 * going to fall back to non-racy lookup.
1541 */ 1523 */
1542 if (nd->flags & LOOKUP_RCU) { 1524 if (nd->flags & LOOKUP_RCU) {
1543 unsigned seq; 1525 unsigned seq;
1544 bool negative; 1526 bool negative;
1545 dentry = __d_lookup_rcu(parent, &nd->last, &seq); 1527 dentry = __d_lookup_rcu(parent, &nd->last, &seq);
1546 if (!dentry) 1528 if (unlikely(!dentry)) {
1547 goto unlazy; 1529 if (unlazy_walk(nd, NULL, 0))
1530 return -ECHILD;
1531 return 0;
1532 }
1548 1533
1549 /* 1534 /*
1550 * This sequence count validates that the inode matches 1535 * This sequence count validates that the inode matches
@@ -1552,7 +1537,7 @@ static int lookup_fast(struct nameidata *nd,
1552 */ 1537 */
1553 *inode = d_backing_inode(dentry); 1538 *inode = d_backing_inode(dentry);
1554 negative = d_is_negative(dentry); 1539 negative = d_is_negative(dentry);
1555 if (read_seqcount_retry(&dentry->d_seq, seq)) 1540 if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
1556 return -ECHILD; 1541 return -ECHILD;
1557 1542
1558 /* 1543 /*
@@ -1562,81 +1547,89 @@ static int lookup_fast(struct nameidata *nd,
1562 * The memory barrier in read_seqcount_begin of child is 1547 * The memory barrier in read_seqcount_begin of child is
1563 * enough, we can use __read_seqcount_retry here. 1548 * enough, we can use __read_seqcount_retry here.
1564 */ 1549 */
1565 if (__read_seqcount_retry(&parent->d_seq, nd->seq)) 1550 if (unlikely(__read_seqcount_retry(&parent->d_seq, nd->seq)))
1566 return -ECHILD; 1551 return -ECHILD;
1567 1552
1568 *seqp = seq; 1553 *seqp = seq;
1569 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1554 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
1570 status = d_revalidate(dentry, nd->flags); 1555 status = d_revalidate(dentry, nd->flags);
1571 if (unlikely(status <= 0)) { 1556 if (unlikely(status <= 0)) {
1572 if (status != -ECHILD) 1557 if (unlazy_walk(nd, dentry, seq))
1573 need_reval = 0; 1558 return -ECHILD;
1574 goto unlazy; 1559 if (status == -ECHILD)
1575 } 1560 status = d_revalidate(dentry, nd->flags);
1561 } else {
1562 /*
1563 * Note: do negative dentry check after revalidation in
1564 * case that drops it.
1565 */
1566 if (unlikely(negative))
1567 return -ENOENT;
1568 path->mnt = mnt;
1569 path->dentry = dentry;
1570 if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
1571 return 1;
1572 if (unlazy_walk(nd, dentry, seq))
1573 return -ECHILD;
1576 } 1574 }
1577 /*
1578 * Note: do negative dentry check after revalidation in
1579 * case that drops it.
1580 */
1581 if (negative)
1582 return -ENOENT;
1583 path->mnt = mnt;
1584 path->dentry = dentry;
1585 if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
1586 return 0;
1587unlazy:
1588 if (unlazy_walk(nd, dentry, seq))
1589 return -ECHILD;
1590 } else { 1575 } else {
1591 dentry = __d_lookup(parent, &nd->last); 1576 dentry = __d_lookup(parent, &nd->last);
1577 if (unlikely(!dentry))
1578 return 0;
1579 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
1580 status = d_revalidate(dentry, nd->flags);
1592 } 1581 }
1593
1594 if (unlikely(!dentry))
1595 goto need_lookup;
1596
1597 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
1598 status = d_revalidate(dentry, nd->flags);
1599 if (unlikely(status <= 0)) { 1582 if (unlikely(status <= 0)) {
1600 if (status < 0) { 1583 if (!status)
1601 dput(dentry); 1584 d_invalidate(dentry);
1602 return status;
1603 }
1604 d_invalidate(dentry);
1605 dput(dentry); 1585 dput(dentry);
1606 goto need_lookup; 1586 return status;
1607 } 1587 }
1608
1609 if (unlikely(d_is_negative(dentry))) { 1588 if (unlikely(d_is_negative(dentry))) {
1610 dput(dentry); 1589 dput(dentry);
1611 return -ENOENT; 1590 return -ENOENT;
1612 } 1591 }
1592
1613 path->mnt = mnt; 1593 path->mnt = mnt;
1614 path->dentry = dentry; 1594 path->dentry = dentry;
1615 err = follow_managed(path, nd); 1595 err = follow_managed(path, nd);
1616 if (likely(!err)) 1596 if (likely(err > 0))
1617 *inode = d_backing_inode(path->dentry); 1597 *inode = d_backing_inode(path->dentry);
1618 return err; 1598 return err;
1619
1620need_lookup:
1621 return 1;
1622} 1599}
1623 1600
1624/* Fast lookup failed, do it the slow way */ 1601/* Fast lookup failed, do it the slow way */
1625static int lookup_slow(struct nameidata *nd, struct path *path) 1602static struct dentry *lookup_slow(const struct qstr *name,
1603 struct dentry *dir,
1604 unsigned int flags)
1626{ 1605{
1627 struct dentry *dentry, *parent; 1606 struct dentry *dentry;
1628 1607 inode_lock(dir->d_inode);
1629 parent = nd->path.dentry; 1608 dentry = d_lookup(dir, name);
1630 BUG_ON(nd->inode != parent->d_inode); 1609 if (unlikely(dentry)) {
1631 1610 if ((dentry->d_flags & DCACHE_OP_REVALIDATE) &&
1632 inode_lock(parent->d_inode); 1611 !(flags & LOOKUP_NO_REVAL)) {
1633 dentry = __lookup_hash(&nd->last, parent, nd->flags); 1612 int error = d_revalidate(dentry, flags);
1634 inode_unlock(parent->d_inode); 1613 if (unlikely(error <= 0)) {
1635 if (IS_ERR(dentry)) 1614 if (!error)
1636 return PTR_ERR(dentry); 1615 d_invalidate(dentry);
1637 path->mnt = nd->path.mnt; 1616 dput(dentry);
1638 path->dentry = dentry; 1617 dentry = ERR_PTR(error);
1639 return follow_managed(path, nd); 1618 }
1619 }
1620 if (dentry) {
1621 inode_unlock(dir->d_inode);
1622 return dentry;
1623 }
1624 }
1625 dentry = d_alloc(dir, name);
1626 if (unlikely(!dentry)) {
1627 inode_unlock(dir->d_inode);
1628 return ERR_PTR(-ENOMEM);
1629 }
1630 dentry = lookup_real(dir->d_inode, dentry, flags);
1631 inode_unlock(dir->d_inode);
1632 return dentry;
1640} 1633}
1641 1634
1642static inline int may_lookup(struct nameidata *nd) 1635static inline int may_lookup(struct nameidata *nd)
@@ -1712,6 +1705,11 @@ static inline int should_follow_link(struct nameidata *nd, struct path *link,
1712 return 0; 1705 return 0;
1713 if (!follow) 1706 if (!follow)
1714 return 0; 1707 return 0;
1708 /* make sure that d_is_symlink above matches inode */
1709 if (nd->flags & LOOKUP_RCU) {
1710 if (read_seqcount_retry(&link->dentry->d_seq, seq))
1711 return -ECHILD;
1712 }
1715 return pick_link(nd, link, inode, seq); 1713 return pick_link(nd, link, inode, seq);
1716} 1714}
1717 1715
@@ -1735,19 +1733,24 @@ static int walk_component(struct nameidata *nd, int flags)
1735 return err; 1733 return err;
1736 } 1734 }
1737 err = lookup_fast(nd, &path, &inode, &seq); 1735 err = lookup_fast(nd, &path, &inode, &seq);
1738 if (unlikely(err)) { 1736 if (unlikely(err <= 0)) {
1739 if (err < 0) 1737 if (err < 0)
1740 return err; 1738 return err;
1741 1739 path.dentry = lookup_slow(&nd->last, nd->path.dentry,
1742 err = lookup_slow(nd, &path); 1740 nd->flags);
1743 if (err < 0) 1741 if (IS_ERR(path.dentry))
1742 return PTR_ERR(path.dentry);
1743 if (unlikely(d_is_negative(path.dentry))) {
1744 dput(path.dentry);
1745 return -ENOENT;
1746 }
1747 path.mnt = nd->path.mnt;
1748 err = follow_managed(&path, nd);
1749 if (unlikely(err < 0))
1744 return err; 1750 return err;
1745 1751
1746 inode = d_backing_inode(path.dentry);
1747 seq = 0; /* we are already out of RCU mode */ 1752 seq = 0; /* we are already out of RCU mode */
1748 err = -ENOENT; 1753 inode = d_backing_inode(path.dentry);
1749 if (d_is_negative(path.dentry))
1750 goto out_path_put;
1751 } 1754 }
1752 1755
1753 if (flags & WALK_PUT) 1756 if (flags & WALK_PUT)
@@ -1759,10 +1762,6 @@ static int walk_component(struct nameidata *nd, int flags)
1759 nd->inode = inode; 1762 nd->inode = inode;
1760 nd->seq = seq; 1763 nd->seq = seq;
1761 return 0; 1764 return 0;
1762
1763out_path_put:
1764 path_to_nameidata(&path, nd);
1765 return err;
1766} 1765}
1767 1766
1768/* 1767/*
@@ -2368,21 +2367,9 @@ struct dentry *lookup_one_len_unlocked(const char *name,
2368 if (err) 2367 if (err)
2369 return ERR_PTR(err); 2368 return ERR_PTR(err);
2370 2369
2371 /* 2370 ret = lookup_dcache(&this, base, 0);
2372 * __d_lookup() is used to try to get a quick answer and avoid the 2371 if (!ret)
2373 * mutex. A false-negative does no harm. 2372 ret = lookup_slow(&this, base, 0);
2374 */
2375 ret = __d_lookup(base, &this);
2376 if (ret && unlikely(ret->d_flags & DCACHE_OP_REVALIDATE)) {
2377 dput(ret);
2378 ret = NULL;
2379 }
2380 if (ret)
2381 return ret;
2382
2383 inode_lock(base->d_inode);
2384 ret = __lookup_hash(&this, base, 0);
2385 inode_unlock(base->d_inode);
2386 return ret; 2373 return ret;
2387} 2374}
2388EXPORT_SYMBOL(lookup_one_len_unlocked); 2375EXPORT_SYMBOL(lookup_one_len_unlocked);
@@ -2460,31 +2447,21 @@ mountpoint_last(struct nameidata *nd, struct path *path)
2460 if (error) 2447 if (error)
2461 return error; 2448 return error;
2462 dentry = dget(nd->path.dentry); 2449 dentry = dget(nd->path.dentry);
2463 goto done; 2450 } else {
2464 } 2451 dentry = d_lookup(dir, &nd->last);
2465
2466 inode_lock(dir->d_inode);
2467 dentry = d_lookup(dir, &nd->last);
2468 if (!dentry) {
2469 /*
2470 * No cached dentry. Mounted dentries are pinned in the cache,
2471 * so that means that this dentry is probably a symlink or the
2472 * path doesn't actually point to a mounted dentry.
2473 */
2474 dentry = d_alloc(dir, &nd->last);
2475 if (!dentry) { 2452 if (!dentry) {
2476 inode_unlock(dir->d_inode); 2453 /*
2477 return -ENOMEM; 2454 * No cached dentry. Mounted dentries are pinned in the
2478 } 2455 * cache, so that means that this dentry is probably
2479 dentry = lookup_real(dir->d_inode, dentry, nd->flags); 2456 * a symlink or the path doesn't actually point
2480 if (IS_ERR(dentry)) { 2457 * to a mounted dentry.
2481 inode_unlock(dir->d_inode); 2458 */
2482 return PTR_ERR(dentry); 2459 dentry = lookup_slow(&nd->last, dir,
2460 nd->flags | LOOKUP_NO_REVAL);
2461 if (IS_ERR(dentry))
2462 return PTR_ERR(dentry);
2483 } 2463 }
2484 } 2464 }
2485 inode_unlock(dir->d_inode);
2486
2487done:
2488 if (d_is_negative(dentry)) { 2465 if (d_is_negative(dentry)) {
2489 dput(dentry); 2466 dput(dentry);
2490 return -ENOENT; 2467 return -ENOENT;
@@ -3013,16 +2990,22 @@ static int lookup_open(struct nameidata *nd, struct path *path,
3013 struct inode *dir_inode = dir->d_inode; 2990 struct inode *dir_inode = dir->d_inode;
3014 struct dentry *dentry; 2991 struct dentry *dentry;
3015 int error; 2992 int error;
3016 bool need_lookup; 2993 bool need_lookup = false;
3017 2994
3018 *opened &= ~FILE_CREATED; 2995 *opened &= ~FILE_CREATED;
3019 dentry = lookup_dcache(&nd->last, dir, nd->flags, &need_lookup); 2996 dentry = lookup_dcache(&nd->last, dir, nd->flags);
3020 if (IS_ERR(dentry)) 2997 if (IS_ERR(dentry))
3021 return PTR_ERR(dentry); 2998 return PTR_ERR(dentry);
3022 2999
3023 /* Cached positive dentry: will open in f_op->open */ 3000 if (!dentry) {
3024 if (!need_lookup && dentry->d_inode) 3001 dentry = d_alloc(dir, &nd->last);
3002 if (unlikely(!dentry))
3003 return -ENOMEM;
3004 need_lookup = true;
3005 } else if (dentry->d_inode) {
3006 /* Cached positive dentry: will open in f_op->open */
3025 goto out_no_open; 3007 goto out_no_open;
3008 }
3026 3009
3027 if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { 3010 if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
3028 return atomic_open(nd, dentry, path, file, op, got_write, 3011 return atomic_open(nd, dentry, path, file, op, got_write,
@@ -3106,13 +3089,14 @@ static int do_last(struct nameidata *nd,
3106 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 3089 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
3107 /* we _can_ be in RCU mode here */ 3090 /* we _can_ be in RCU mode here */
3108 error = lookup_fast(nd, &path, &inode, &seq); 3091 error = lookup_fast(nd, &path, &inode, &seq);
3109 if (likely(!error)) 3092 if (likely(error > 0))
3110 goto finish_lookup; 3093 goto finish_lookup;
3111 3094
3112 if (error < 0) 3095 if (error < 0)
3113 return error; 3096 return error;
3114 3097
3115 BUG_ON(nd->inode != dir->d_inode); 3098 BUG_ON(nd->inode != dir->d_inode);
3099 BUG_ON(nd->flags & LOOKUP_RCU);
3116 } else { 3100 } else {
3117 /* create side of things */ 3101 /* create side of things */
3118 /* 3102 /*
@@ -3167,12 +3151,6 @@ retry_lookup:
3167 } 3151 }
3168 3152
3169 /* 3153 /*
3170 * create/update audit record if it already exists.
3171 */
3172 if (d_is_positive(path.dentry))
3173 audit_inode(nd->name, path.dentry, 0);
3174
3175 /*
3176 * If atomic_open() acquired write access it is dropped now due to 3154 * If atomic_open() acquired write access it is dropped now due to
3177 * possible mount and symlink following (this might be optimized away if 3155 * possible mount and symlink following (this might be optimized away if
3178 * necessary...) 3156 * necessary...)
@@ -3182,6 +3160,16 @@ retry_lookup:
3182 got_write = false; 3160 got_write = false;
3183 } 3161 }
3184 3162
3163 if (unlikely(d_is_negative(path.dentry))) {
3164 path_to_nameidata(&path, nd);
3165 return -ENOENT;
3166 }
3167
3168 /*
3169 * create/update audit record if it already exists.
3170 */
3171 audit_inode(nd->name, path.dentry, 0);
3172
3185 if (unlikely((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))) { 3173 if (unlikely((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))) {
3186 path_to_nameidata(&path, nd); 3174 path_to_nameidata(&path, nd);
3187 return -EEXIST; 3175 return -EEXIST;
@@ -3191,13 +3179,8 @@ retry_lookup:
3191 if (unlikely(error < 0)) 3179 if (unlikely(error < 0))
3192 return error; 3180 return error;
3193 3181
3194 BUG_ON(nd->flags & LOOKUP_RCU);
3195 inode = d_backing_inode(path.dentry);
3196 seq = 0; /* out of RCU mode, so the value doesn't matter */ 3182 seq = 0; /* out of RCU mode, so the value doesn't matter */
3197 if (unlikely(d_is_negative(path.dentry))) { 3183 inode = d_backing_inode(path.dentry);
3198 path_to_nameidata(&path, nd);
3199 return -ENOENT;
3200 }
3201finish_lookup: 3184finish_lookup:
3202 if (nd->depth) 3185 if (nd->depth)
3203 put_link(nd); 3186 put_link(nd);
@@ -3206,11 +3189,6 @@ finish_lookup:
3206 if (unlikely(error)) 3189 if (unlikely(error))
3207 return error; 3190 return error;
3208 3191
3209 if (unlikely(d_is_symlink(path.dentry)) && !(open_flag & O_PATH)) {
3210 path_to_nameidata(&path, nd);
3211 return -ELOOP;
3212 }
3213
3214 if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) { 3192 if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) {
3215 path_to_nameidata(&path, nd); 3193 path_to_nameidata(&path, nd);
3216 } else { 3194 } else {
@@ -3229,6 +3207,10 @@ finish_open:
3229 return error; 3207 return error;
3230 } 3208 }
3231 audit_inode(nd->name, nd->path.dentry, 0); 3209 audit_inode(nd->name, nd->path.dentry, 0);
3210 if (unlikely(d_is_symlink(nd->path.dentry)) && !(open_flag & O_PATH)) {
3211 error = -ELOOP;
3212 goto out;
3213 }
3232 error = -EISDIR; 3214 error = -EISDIR;
3233 if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) 3215 if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
3234 goto out; 3216 goto out;
@@ -3273,6 +3255,10 @@ opened:
3273 goto exit_fput; 3255 goto exit_fput;
3274 } 3256 }
3275out: 3257out:
3258 if (unlikely(error > 0)) {
3259 WARN_ON(1);
3260 error = -EINVAL;
3261 }
3276 if (got_write) 3262 if (got_write)
3277 mnt_drop_write(nd->path.mnt); 3263 mnt_drop_write(nd->path.mnt);
3278 path_put(&save_parent); 3264 path_put(&save_parent);
@@ -3699,31 +3685,6 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
3699 return sys_mkdirat(AT_FDCWD, pathname, mode); 3685 return sys_mkdirat(AT_FDCWD, pathname, mode);
3700} 3686}
3701 3687
3702/*
3703 * The dentry_unhash() helper will try to drop the dentry early: we
3704 * should have a usage count of 1 if we're the only user of this
3705 * dentry, and if that is true (possibly after pruning the dcache),
3706 * then we drop the dentry now.
3707 *
3708 * A low-level filesystem can, if it choses, legally
3709 * do a
3710 *
3711 * if (!d_unhashed(dentry))
3712 * return -EBUSY;
3713 *
3714 * if it cannot handle the case of removing a directory
3715 * that is still in use by something else..
3716 */
3717void dentry_unhash(struct dentry *dentry)
3718{
3719 shrink_dcache_parent(dentry);
3720 spin_lock(&dentry->d_lock);
3721 if (dentry->d_lockref.count == 1)
3722 __d_drop(dentry);
3723 spin_unlock(&dentry->d_lock);
3724}
3725EXPORT_SYMBOL(dentry_unhash);
3726
3727int vfs_rmdir(struct inode *dir, struct dentry *dentry) 3688int vfs_rmdir(struct inode *dir, struct dentry *dentry)
3728{ 3689{
3729 int error = may_delete(dir, dentry, 1); 3690 int error = may_delete(dir, dentry, 1);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 26c2de2de13f..b7f8eaeea5d8 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -633,7 +633,7 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx,
633 d_rehash(newdent); 633 d_rehash(newdent);
634 } else { 634 } else {
635 spin_lock(&dentry->d_lock); 635 spin_lock(&dentry->d_lock);
636 NCP_FINFO(inode)->flags &= ~NCPI_DIR_CACHE; 636 NCP_FINFO(dir)->flags &= ~NCPI_DIR_CACHE;
637 spin_unlock(&dentry->d_lock); 637 spin_unlock(&dentry->d_lock);
638 } 638 }
639 } else { 639 } else {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 9cce67043f92..4bfa7d8bcade 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1360,19 +1360,15 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
1360 dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry); 1360 dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry);
1361 nfs_inc_stats(dir, NFSIOS_VFSLOOKUP); 1361 nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
1362 1362
1363 res = ERR_PTR(-ENAMETOOLONG); 1363 if (unlikely(dentry->d_name.len > NFS_SERVER(dir)->namelen))
1364 if (dentry->d_name.len > NFS_SERVER(dir)->namelen) 1364 return ERR_PTR(-ENAMETOOLONG);
1365 goto out;
1366 1365
1367 /* 1366 /*
1368 * If we're doing an exclusive create, optimize away the lookup 1367 * If we're doing an exclusive create, optimize away the lookup
1369 * but don't hash the dentry. 1368 * but don't hash the dentry.
1370 */ 1369 */
1371 if (nfs_is_exclusive_create(dir, flags)) { 1370 if (nfs_is_exclusive_create(dir, flags))
1372 d_instantiate(dentry, NULL); 1371 return NULL;
1373 res = NULL;
1374 goto out;
1375 }
1376 1372
1377 res = ERR_PTR(-ENOMEM); 1373 res = ERR_PTR(-ENOMEM);
1378 fhandle = nfs_alloc_fhandle(); 1374 fhandle = nfs_alloc_fhandle();
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 5bcd92d50e82..0cb1abd535e3 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1215,7 +1215,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
1215 hdr->pgio_mirror_idx + 1, 1215 hdr->pgio_mirror_idx + 1,
1216 &hdr->pgio_mirror_idx)) 1216 &hdr->pgio_mirror_idx))
1217 goto out_eagain; 1217 goto out_eagain;
1218 set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 1218 set_bit(NFS_LAYOUT_RETURN_REQUESTED,
1219 &hdr->lseg->pls_layout->plh_flags); 1219 &hdr->lseg->pls_layout->plh_flags);
1220 pnfs_read_resend_pnfs(hdr); 1220 pnfs_read_resend_pnfs(hdr);
1221 return task->tk_status; 1221 return task->tk_status;
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 29898a9550fa..eb370460ce20 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -412,7 +412,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
412 OP_ILLEGAL, GFP_NOIO); 412 OP_ILLEGAL, GFP_NOIO);
413 if (!fail_return) { 413 if (!fail_return) {
414 if (ff_layout_has_available_ds(lseg)) 414 if (ff_layout_has_available_ds(lseg))
415 set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 415 set_bit(NFS_LAYOUT_RETURN_REQUESTED,
416 &lseg->pls_layout->plh_flags); 416 &lseg->pls_layout->plh_flags);
417 else 417 else
418 pnfs_error_mark_layout_for_return(ino, lseg); 418 pnfs_error_mark_layout_for_return(ino, lseg);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4bfc33ad0563..400a70b3be7b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2461,14 +2461,15 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
2461 2461
2462 dentry = opendata->dentry; 2462 dentry = opendata->dentry;
2463 if (d_really_is_negative(dentry)) { 2463 if (d_really_is_negative(dentry)) {
2464 /* FIXME: Is this d_drop() ever needed? */ 2464 struct dentry *alias;
2465 d_drop(dentry); 2465 d_drop(dentry);
2466 dentry = d_add_unique(dentry, igrab(state->inode)); 2466 alias = d_exact_alias(dentry, state->inode);
2467 if (dentry == NULL) { 2467 if (!alias)
2468 dentry = opendata->dentry; 2468 alias = d_splice_alias(igrab(state->inode), dentry);
2469 } else if (dentry != ctx->dentry) { 2469 /* d_splice_alias() can't fail here - it's a non-directory */
2470 if (alias) {
2470 dput(ctx->dentry); 2471 dput(ctx->dentry);
2471 ctx->dentry = dget(dentry); 2472 ctx->dentry = dentry = alias;
2472 } 2473 }
2473 nfs_set_verifier(dentry, 2474 nfs_set_verifier(dentry,
2474 nfs_save_change_attribute(d_inode(opendata->dir))); 2475 nfs_save_change_attribute(d_inode(opendata->dir)));
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a3592cc34a20..482b6e94bb37 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -52,9 +52,7 @@ static DEFINE_SPINLOCK(pnfs_spinlock);
52 */ 52 */
53static LIST_HEAD(pnfs_modules_tbl); 53static LIST_HEAD(pnfs_modules_tbl);
54 54
55static int 55static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo);
56pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
57 enum pnfs_iomode iomode, bool sync);
58 56
59/* Return the registered pnfs layout driver module matching given id */ 57/* Return the registered pnfs layout driver module matching given id */
60static struct pnfs_layoutdriver_type * 58static struct pnfs_layoutdriver_type *
@@ -243,6 +241,8 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
243{ 241{
244 struct inode *inode = lo->plh_inode; 242 struct inode *inode = lo->plh_inode;
245 243
244 pnfs_layoutreturn_before_put_layout_hdr(lo);
245
246 if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { 246 if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
247 if (!list_empty(&lo->plh_segs)) 247 if (!list_empty(&lo->plh_segs))
248 WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); 248 WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
@@ -345,58 +345,6 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
345 rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); 345 rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
346} 346}
347 347
348/* Return true if layoutreturn is needed */
349static bool
350pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
351 struct pnfs_layout_segment *lseg)
352{
353 struct pnfs_layout_segment *s;
354
355 if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
356 return false;
357
358 list_for_each_entry(s, &lo->plh_segs, pls_list)
359 if (s != lseg && test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags))
360 return false;
361
362 return true;
363}
364
365static bool
366pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
367{
368 if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
369 return false;
370 lo->plh_return_iomode = 0;
371 pnfs_get_layout_hdr(lo);
372 clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
373 return true;
374}
375
376static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
377 struct pnfs_layout_hdr *lo, struct inode *inode)
378{
379 lo = lseg->pls_layout;
380 inode = lo->plh_inode;
381
382 spin_lock(&inode->i_lock);
383 if (pnfs_layout_need_return(lo, lseg)) {
384 nfs4_stateid stateid;
385 enum pnfs_iomode iomode;
386 bool send;
387
388 nfs4_stateid_copy(&stateid, &lo->plh_stateid);
389 iomode = lo->plh_return_iomode;
390 send = pnfs_prepare_layoutreturn(lo);
391 spin_unlock(&inode->i_lock);
392 if (send) {
393 /* Send an async layoutreturn so we dont deadlock */
394 pnfs_send_layoutreturn(lo, &stateid, iomode, false);
395 }
396 } else
397 spin_unlock(&inode->i_lock);
398}
399
400void 348void
401pnfs_put_lseg(struct pnfs_layout_segment *lseg) 349pnfs_put_lseg(struct pnfs_layout_segment *lseg)
402{ 350{
@@ -410,15 +358,8 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
410 atomic_read(&lseg->pls_refcount), 358 atomic_read(&lseg->pls_refcount),
411 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 359 test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
412 360
413 /* Handle the case where refcount != 1 */
414 if (atomic_add_unless(&lseg->pls_refcount, -1, 1))
415 return;
416
417 lo = lseg->pls_layout; 361 lo = lseg->pls_layout;
418 inode = lo->plh_inode; 362 inode = lo->plh_inode;
419 /* Do we need a layoutreturn? */
420 if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
421 pnfs_layoutreturn_before_put_lseg(lseg, lo, inode);
422 363
423 if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { 364 if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
424 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { 365 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
@@ -937,6 +878,17 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
937 rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); 878 rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
938} 879}
939 880
881static bool
882pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
883{
884 if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
885 return false;
886 lo->plh_return_iomode = 0;
887 pnfs_get_layout_hdr(lo);
888 clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
889 return true;
890}
891
940static int 892static int
941pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, 893pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
942 enum pnfs_iomode iomode, bool sync) 894 enum pnfs_iomode iomode, bool sync)
@@ -971,6 +923,48 @@ out:
971 return status; 923 return status;
972} 924}
973 925
926/* Return true if layoutreturn is needed */
927static bool
928pnfs_layout_need_return(struct pnfs_layout_hdr *lo)
929{
930 struct pnfs_layout_segment *s;
931
932 if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
933 return false;
934
935 /* Defer layoutreturn until all lsegs are done */
936 list_for_each_entry(s, &lo->plh_segs, pls_list) {
937 if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags))
938 return false;
939 }
940
941 return true;
942}
943
944static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
945{
946 struct inode *inode= lo->plh_inode;
947
948 if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
949 return;
950 spin_lock(&inode->i_lock);
951 if (pnfs_layout_need_return(lo)) {
952 nfs4_stateid stateid;
953 enum pnfs_iomode iomode;
954 bool send;
955
956 nfs4_stateid_copy(&stateid, &lo->plh_stateid);
957 iomode = lo->plh_return_iomode;
958 send = pnfs_prepare_layoutreturn(lo);
959 spin_unlock(&inode->i_lock);
960 if (send) {
961 /* Send an async layoutreturn so we dont deadlock */
962 pnfs_send_layoutreturn(lo, &stateid, iomode, false);
963 }
964 } else
965 spin_unlock(&inode->i_lock);
966}
967
974/* 968/*
975 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr 969 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
976 * when the layout segment list is empty. 970 * when the layout segment list is empty.
@@ -1091,7 +1085,7 @@ bool pnfs_roc(struct inode *ino)
1091 1085
1092 nfs4_stateid_copy(&stateid, &lo->plh_stateid); 1086 nfs4_stateid_copy(&stateid, &lo->plh_stateid);
1093 /* always send layoutreturn if being marked so */ 1087 /* always send layoutreturn if being marked so */
1094 if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 1088 if (test_and_clear_bit(NFS_LAYOUT_RETURN_REQUESTED,
1095 &lo->plh_flags)) 1089 &lo->plh_flags))
1096 layoutreturn = pnfs_prepare_layoutreturn(lo); 1090 layoutreturn = pnfs_prepare_layoutreturn(lo);
1097 1091
@@ -1772,7 +1766,7 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
1772 pnfs_set_plh_return_iomode(lo, return_range->iomode); 1766 pnfs_set_plh_return_iomode(lo, return_range->iomode);
1773 if (!mark_lseg_invalid(lseg, tmp_list)) 1767 if (!mark_lseg_invalid(lseg, tmp_list))
1774 remaining++; 1768 remaining++;
1775 set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 1769 set_bit(NFS_LAYOUT_RETURN_REQUESTED,
1776 &lo->plh_flags); 1770 &lo->plh_flags);
1777 } 1771 }
1778 return remaining; 1772 return remaining;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 9f4e2a47f4aa..1ac1db5f6dad 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -94,8 +94,8 @@ enum {
94 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ 94 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
95 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ 95 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
96 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ 96 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
97 NFS_LAYOUT_RETURN, /* Return this layout ASAP */ 97 NFS_LAYOUT_RETURN, /* layoutreturn in progress */
98 NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */ 98 NFS_LAYOUT_RETURN_REQUESTED, /* Return this layout ASAP */
99 NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ 99 NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */
100 NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ 100 NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */
101}; 101};
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 5d2a57e4c03a..d40010e4f1a9 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -870,7 +870,7 @@ __be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
870 870
871 oldfs = get_fs(); 871 oldfs = get_fs();
872 set_fs(KERNEL_DS); 872 set_fs(KERNEL_DS);
873 host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); 873 host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset, 0);
874 set_fs(oldfs); 874 set_fs(oldfs);
875 return nfsd_finish_read(file, count, host_err); 875 return nfsd_finish_read(file, count, host_err);
876} 876}
@@ -957,7 +957,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
957 957
958 /* Write the data. */ 958 /* Write the data. */
959 oldfs = get_fs(); set_fs(KERNEL_DS); 959 oldfs = get_fs(); set_fs(KERNEL_DS);
960 host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos); 960 host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos, 0);
961 set_fs(oldfs); 961 set_fs(oldfs);
962 if (host_err < 0) 962 if (host_err < 0)
963 goto out_nfserr; 963 goto out_nfserr;
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index a3cc6d2fc896..a76b9ea7722e 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1254,15 +1254,15 @@ static const struct file_operations o2hb_debug_fops = {
1254 1254
1255void o2hb_exit(void) 1255void o2hb_exit(void)
1256{ 1256{
1257 kfree(o2hb_db_livenodes);
1258 kfree(o2hb_db_liveregions);
1259 kfree(o2hb_db_quorumregions);
1260 kfree(o2hb_db_failedregions);
1261 debugfs_remove(o2hb_debug_failedregions); 1257 debugfs_remove(o2hb_debug_failedregions);
1262 debugfs_remove(o2hb_debug_quorumregions); 1258 debugfs_remove(o2hb_debug_quorumregions);
1263 debugfs_remove(o2hb_debug_liveregions); 1259 debugfs_remove(o2hb_debug_liveregions);
1264 debugfs_remove(o2hb_debug_livenodes); 1260 debugfs_remove(o2hb_debug_livenodes);
1265 debugfs_remove(o2hb_debug_dir); 1261 debugfs_remove(o2hb_debug_dir);
1262 kfree(o2hb_db_livenodes);
1263 kfree(o2hb_db_liveregions);
1264 kfree(o2hb_db_quorumregions);
1265 kfree(o2hb_db_failedregions);
1266} 1266}
1267 1267
1268static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir, 1268static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir,
@@ -1438,13 +1438,15 @@ static void o2hb_region_release(struct config_item *item)
1438 1438
1439 kfree(reg->hr_slots); 1439 kfree(reg->hr_slots);
1440 1440
1441 kfree(reg->hr_db_regnum);
1442 kfree(reg->hr_db_livenodes);
1443 debugfs_remove(reg->hr_debug_livenodes); 1441 debugfs_remove(reg->hr_debug_livenodes);
1444 debugfs_remove(reg->hr_debug_regnum); 1442 debugfs_remove(reg->hr_debug_regnum);
1445 debugfs_remove(reg->hr_debug_elapsed_time); 1443 debugfs_remove(reg->hr_debug_elapsed_time);
1446 debugfs_remove(reg->hr_debug_pinned); 1444 debugfs_remove(reg->hr_debug_pinned);
1447 debugfs_remove(reg->hr_debug_dir); 1445 debugfs_remove(reg->hr_debug_dir);
1446 kfree(reg->hr_db_livenodes);
1447 kfree(reg->hr_db_regnum);
1448 kfree(reg->hr_debug_elapsed_time);
1449 kfree(reg->hr_debug_pinned);
1448 1450
1449 spin_lock(&o2hb_live_lock); 1451 spin_lock(&o2hb_live_lock);
1450 list_del(&reg->hr_all_item); 1452 list_del(&reg->hr_all_item);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index c5bdf02c213b..b94a425f0175 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2367,6 +2367,8 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
2367 break; 2367 break;
2368 } 2368 }
2369 } 2369 }
2370 dlm_lockres_clear_refmap_bit(dlm, res,
2371 dead_node);
2370 spin_unlock(&res->spinlock); 2372 spin_unlock(&res->spinlock);
2371 continue; 2373 continue;
2372 } 2374 }
diff --git a/fs/pnode.c b/fs/pnode.c
index 6367e1e435c6..c524fdddc7fb 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -202,6 +202,11 @@ static struct mount *last_dest, *last_source, *dest_master;
202static struct mountpoint *mp; 202static struct mountpoint *mp;
203static struct hlist_head *list; 203static struct hlist_head *list;
204 204
205static inline bool peers(struct mount *m1, struct mount *m2)
206{
207 return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
208}
209
205static int propagate_one(struct mount *m) 210static int propagate_one(struct mount *m)
206{ 211{
207 struct mount *child; 212 struct mount *child;
@@ -212,7 +217,7 @@ static int propagate_one(struct mount *m)
212 /* skip if mountpoint isn't covered by it */ 217 /* skip if mountpoint isn't covered by it */
213 if (!is_subdir(mp->m_dentry, m->mnt.mnt_root)) 218 if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
214 return 0; 219 return 0;
215 if (m->mnt_group_id == last_dest->mnt_group_id) { 220 if (peers(m, last_dest)) {
216 type = CL_MAKE_SHARED; 221 type = CL_MAKE_SHARED;
217 } else { 222 } else {
218 struct mount *n, *p; 223 struct mount *n, *p;
@@ -223,7 +228,7 @@ static int propagate_one(struct mount *m)
223 last_source = last_source->mnt_master; 228 last_source = last_source->mnt_master;
224 last_dest = last_source->mnt_parent; 229 last_dest = last_source->mnt_parent;
225 } 230 }
226 if (n->mnt_group_id != last_dest->mnt_group_id) { 231 if (!peers(n, last_dest)) {
227 last_source = last_source->mnt_master; 232 last_source = last_source->mnt_master;
228 last_dest = last_source->mnt_parent; 233 last_dest = last_source->mnt_parent;
229 } 234 }
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 85d16c67c33e..fa95ab2d3674 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -259,23 +259,29 @@ static int do_maps_open(struct inode *inode, struct file *file,
259 sizeof(struct proc_maps_private)); 259 sizeof(struct proc_maps_private));
260} 260}
261 261
262static pid_t pid_of_stack(struct proc_maps_private *priv, 262/*
263 struct vm_area_struct *vma, bool is_pid) 263 * Indicate if the VMA is a stack for the given task; for
264 * /proc/PID/maps that is the stack of the main task.
265 */
266static int is_stack(struct proc_maps_private *priv,
267 struct vm_area_struct *vma, int is_pid)
264{ 268{
265 struct inode *inode = priv->inode; 269 int stack = 0;
266 struct task_struct *task; 270
267 pid_t ret = 0; 271 if (is_pid) {
272 stack = vma->vm_start <= vma->vm_mm->start_stack &&
273 vma->vm_end >= vma->vm_mm->start_stack;
274 } else {
275 struct inode *inode = priv->inode;
276 struct task_struct *task;
268 277
269 rcu_read_lock(); 278 rcu_read_lock();
270 task = pid_task(proc_pid(inode), PIDTYPE_PID); 279 task = pid_task(proc_pid(inode), PIDTYPE_PID);
271 if (task) {
272 task = task_of_stack(task, vma, is_pid);
273 if (task) 280 if (task)
274 ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); 281 stack = vma_is_stack_for_task(vma, task);
282 rcu_read_unlock();
275 } 283 }
276 rcu_read_unlock(); 284 return stack;
277
278 return ret;
279} 285}
280 286
281static void 287static void
@@ -335,8 +341,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
335 341
336 name = arch_vma_name(vma); 342 name = arch_vma_name(vma);
337 if (!name) { 343 if (!name) {
338 pid_t tid;
339
340 if (!mm) { 344 if (!mm) {
341 name = "[vdso]"; 345 name = "[vdso]";
342 goto done; 346 goto done;
@@ -348,21 +352,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
348 goto done; 352 goto done;
349 } 353 }
350 354
351 tid = pid_of_stack(priv, vma, is_pid); 355 if (is_stack(priv, vma, is_pid))
352 if (tid != 0) { 356 name = "[stack]";
353 /*
354 * Thread stack in /proc/PID/task/TID/maps or
355 * the main process stack.
356 */
357 if (!is_pid || (vma->vm_start <= mm->start_stack &&
358 vma->vm_end >= mm->start_stack)) {
359 name = "[stack]";
360 } else {
361 /* Thread stack in /proc/PID/maps */
362 seq_pad(m, ' ');
363 seq_printf(m, "[stack:%d]", tid);
364 }
365 }
366 } 357 }
367 358
368done: 359done:
@@ -1552,18 +1543,19 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
1552static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, 1543static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
1553 unsigned long addr, unsigned long end, struct mm_walk *walk) 1544 unsigned long addr, unsigned long end, struct mm_walk *walk)
1554{ 1545{
1546 pte_t huge_pte = huge_ptep_get(pte);
1555 struct numa_maps *md; 1547 struct numa_maps *md;
1556 struct page *page; 1548 struct page *page;
1557 1549
1558 if (!pte_present(*pte)) 1550 if (!pte_present(huge_pte))
1559 return 0; 1551 return 0;
1560 1552
1561 page = pte_page(*pte); 1553 page = pte_page(huge_pte);
1562 if (!page) 1554 if (!page)
1563 return 0; 1555 return 0;
1564 1556
1565 md = walk->private; 1557 md = walk->private;
1566 gather_stats(page, md, pte_dirty(*pte), 1); 1558 gather_stats(page, md, pte_dirty(huge_pte), 1);
1567 return 0; 1559 return 0;
1568} 1560}
1569 1561
@@ -1617,19 +1609,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
1617 seq_file_path(m, file, "\n\t= "); 1609 seq_file_path(m, file, "\n\t= ");
1618 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { 1610 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
1619 seq_puts(m, " heap"); 1611 seq_puts(m, " heap");
1620 } else { 1612 } else if (is_stack(proc_priv, vma, is_pid)) {
1621 pid_t tid = pid_of_stack(proc_priv, vma, is_pid); 1613 seq_puts(m, " stack");
1622 if (tid != 0) {
1623 /*
1624 * Thread stack in /proc/PID/task/TID/maps or
1625 * the main process stack.
1626 */
1627 if (!is_pid || (vma->vm_start <= mm->start_stack &&
1628 vma->vm_end >= mm->start_stack))
1629 seq_puts(m, " stack");
1630 else
1631 seq_printf(m, " stack:%d", tid);
1632 }
1633 } 1614 }
1634 1615
1635 if (is_vm_hugetlb_page(vma)) 1616 if (is_vm_hugetlb_page(vma))
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index e0d64c92e4f6..faacb0c0d857 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -123,23 +123,26 @@ unsigned long task_statm(struct mm_struct *mm,
123 return size; 123 return size;
124} 124}
125 125
126static pid_t pid_of_stack(struct proc_maps_private *priv, 126static int is_stack(struct proc_maps_private *priv,
127 struct vm_area_struct *vma, bool is_pid) 127 struct vm_area_struct *vma, int is_pid)
128{ 128{
129 struct inode *inode = priv->inode; 129 struct mm_struct *mm = vma->vm_mm;
130 struct task_struct *task; 130 int stack = 0;
131 pid_t ret = 0; 131
132 132 if (is_pid) {
133 rcu_read_lock(); 133 stack = vma->vm_start <= mm->start_stack &&
134 task = pid_task(proc_pid(inode), PIDTYPE_PID); 134 vma->vm_end >= mm->start_stack;
135 if (task) { 135 } else {
136 task = task_of_stack(task, vma, is_pid); 136 struct inode *inode = priv->inode;
137 struct task_struct *task;
138
139 rcu_read_lock();
140 task = pid_task(proc_pid(inode), PIDTYPE_PID);
137 if (task) 141 if (task)
138 ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); 142 stack = vma_is_stack_for_task(vma, task);
143 rcu_read_unlock();
139 } 144 }
140 rcu_read_unlock(); 145 return stack;
141
142 return ret;
143} 146}
144 147
145/* 148/*
@@ -181,21 +184,9 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
181 if (file) { 184 if (file) {
182 seq_pad(m, ' '); 185 seq_pad(m, ' ');
183 seq_file_path(m, file, ""); 186 seq_file_path(m, file, "");
184 } else if (mm) { 187 } else if (mm && is_stack(priv, vma, is_pid)) {
185 pid_t tid = pid_of_stack(priv, vma, is_pid); 188 seq_pad(m, ' ');
186 189 seq_printf(m, "[stack]");
187 if (tid != 0) {
188 seq_pad(m, ' ');
189 /*
190 * Thread stack in /proc/PID/task/TID/maps or
191 * the main process stack.
192 */
193 if (!is_pid || (vma->vm_start <= mm->start_stack &&
194 vma->vm_end >= mm->start_stack))
195 seq_printf(m, "[stack]");
196 else
197 seq_printf(m, "[stack:%d]", tid);
198 }
199 } 190 }
200 191
201 seq_putc(m, '\n'); 192 seq_putc(m, '\n');
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 3c3b81bb6dfe..04ca0cc6d065 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2430,9 +2430,7 @@ int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
2430 struct dentry *dentry; 2430 struct dentry *dentry;
2431 int error; 2431 int error;
2432 2432
2433 inode_lock(d_inode(sb->s_root)); 2433 dentry = lookup_one_len_unlocked(qf_name, sb->s_root, strlen(qf_name));
2434 dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
2435 inode_unlock(d_inode(sb->s_root));
2436 if (IS_ERR(dentry)) 2434 if (IS_ERR(dentry))
2437 return PTR_ERR(dentry); 2435 return PTR_ERR(dentry);
2438 2436
diff --git a/fs/read_write.c b/fs/read_write.c
index 324ec271cc4e..cf377cf9dfe3 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -17,6 +17,7 @@
17#include <linux/splice.h> 17#include <linux/splice.h>
18#include <linux/compat.h> 18#include <linux/compat.h>
19#include <linux/mount.h> 19#include <linux/mount.h>
20#include <linux/fs.h>
20#include "internal.h" 21#include "internal.h"
21 22
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
@@ -183,7 +184,7 @@ loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
183 switch (whence) { 184 switch (whence) {
184 case SEEK_SET: case SEEK_CUR: 185 case SEEK_SET: case SEEK_CUR:
185 return generic_file_llseek_size(file, offset, whence, 186 return generic_file_llseek_size(file, offset, whence,
186 ~0ULL, 0); 187 OFFSET_MAX, 0);
187 default: 188 default:
188 return -EINVAL; 189 return -EINVAL;
189 } 190 }
@@ -692,12 +693,17 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
692EXPORT_SYMBOL(iov_shorten); 693EXPORT_SYMBOL(iov_shorten);
693 694
694static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, 695static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
695 loff_t *ppos, iter_fn_t fn) 696 loff_t *ppos, iter_fn_t fn, int flags)
696{ 697{
697 struct kiocb kiocb; 698 struct kiocb kiocb;
698 ssize_t ret; 699 ssize_t ret;
699 700
701 if (flags & ~RWF_HIPRI)
702 return -EOPNOTSUPP;
703
700 init_sync_kiocb(&kiocb, filp); 704 init_sync_kiocb(&kiocb, filp);
705 if (flags & RWF_HIPRI)
706 kiocb.ki_flags |= IOCB_HIPRI;
701 kiocb.ki_pos = *ppos; 707 kiocb.ki_pos = *ppos;
702 708
703 ret = fn(&kiocb, iter); 709 ret = fn(&kiocb, iter);
@@ -708,10 +714,13 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
708 714
709/* Do it by hand, with file-ops */ 715/* Do it by hand, with file-ops */
710static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, 716static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
711 loff_t *ppos, io_fn_t fn) 717 loff_t *ppos, io_fn_t fn, int flags)
712{ 718{
713 ssize_t ret = 0; 719 ssize_t ret = 0;
714 720
721 if (flags & ~RWF_HIPRI)
722 return -EOPNOTSUPP;
723
715 while (iov_iter_count(iter)) { 724 while (iov_iter_count(iter)) {
716 struct iovec iovec = iov_iter_iovec(iter); 725 struct iovec iovec = iov_iter_iovec(iter);
717 ssize_t nr; 726 ssize_t nr;
@@ -812,7 +821,8 @@ out:
812 821
813static ssize_t do_readv_writev(int type, struct file *file, 822static ssize_t do_readv_writev(int type, struct file *file,
814 const struct iovec __user * uvector, 823 const struct iovec __user * uvector,
815 unsigned long nr_segs, loff_t *pos) 824 unsigned long nr_segs, loff_t *pos,
825 int flags)
816{ 826{
817 size_t tot_len; 827 size_t tot_len;
818 struct iovec iovstack[UIO_FASTIOV]; 828 struct iovec iovstack[UIO_FASTIOV];
@@ -844,9 +854,9 @@ static ssize_t do_readv_writev(int type, struct file *file,
844 } 854 }
845 855
846 if (iter_fn) 856 if (iter_fn)
847 ret = do_iter_readv_writev(file, &iter, pos, iter_fn); 857 ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
848 else 858 else
849 ret = do_loop_readv_writev(file, &iter, pos, fn); 859 ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
850 860
851 if (type != READ) 861 if (type != READ)
852 file_end_write(file); 862 file_end_write(file);
@@ -863,40 +873,40 @@ out:
863} 873}
864 874
865ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 875ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
866 unsigned long vlen, loff_t *pos) 876 unsigned long vlen, loff_t *pos, int flags)
867{ 877{
868 if (!(file->f_mode & FMODE_READ)) 878 if (!(file->f_mode & FMODE_READ))
869 return -EBADF; 879 return -EBADF;
870 if (!(file->f_mode & FMODE_CAN_READ)) 880 if (!(file->f_mode & FMODE_CAN_READ))
871 return -EINVAL; 881 return -EINVAL;
872 882
873 return do_readv_writev(READ, file, vec, vlen, pos); 883 return do_readv_writev(READ, file, vec, vlen, pos, flags);
874} 884}
875 885
876EXPORT_SYMBOL(vfs_readv); 886EXPORT_SYMBOL(vfs_readv);
877 887
878ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 888ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
879 unsigned long vlen, loff_t *pos) 889 unsigned long vlen, loff_t *pos, int flags)
880{ 890{
881 if (!(file->f_mode & FMODE_WRITE)) 891 if (!(file->f_mode & FMODE_WRITE))
882 return -EBADF; 892 return -EBADF;
883 if (!(file->f_mode & FMODE_CAN_WRITE)) 893 if (!(file->f_mode & FMODE_CAN_WRITE))
884 return -EINVAL; 894 return -EINVAL;
885 895
886 return do_readv_writev(WRITE, file, vec, vlen, pos); 896 return do_readv_writev(WRITE, file, vec, vlen, pos, flags);
887} 897}
888 898
889EXPORT_SYMBOL(vfs_writev); 899EXPORT_SYMBOL(vfs_writev);
890 900
891SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 901static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
892 unsigned long, vlen) 902 unsigned long vlen, int flags)
893{ 903{
894 struct fd f = fdget_pos(fd); 904 struct fd f = fdget_pos(fd);
895 ssize_t ret = -EBADF; 905 ssize_t ret = -EBADF;
896 906
897 if (f.file) { 907 if (f.file) {
898 loff_t pos = file_pos_read(f.file); 908 loff_t pos = file_pos_read(f.file);
899 ret = vfs_readv(f.file, vec, vlen, &pos); 909 ret = vfs_readv(f.file, vec, vlen, &pos, flags);
900 if (ret >= 0) 910 if (ret >= 0)
901 file_pos_write(f.file, pos); 911 file_pos_write(f.file, pos);
902 fdput_pos(f); 912 fdput_pos(f);
@@ -908,15 +918,15 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
908 return ret; 918 return ret;
909} 919}
910 920
911SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 921static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
912 unsigned long, vlen) 922 unsigned long vlen, int flags)
913{ 923{
914 struct fd f = fdget_pos(fd); 924 struct fd f = fdget_pos(fd);
915 ssize_t ret = -EBADF; 925 ssize_t ret = -EBADF;
916 926
917 if (f.file) { 927 if (f.file) {
918 loff_t pos = file_pos_read(f.file); 928 loff_t pos = file_pos_read(f.file);
919 ret = vfs_writev(f.file, vec, vlen, &pos); 929 ret = vfs_writev(f.file, vec, vlen, &pos, flags);
920 if (ret >= 0) 930 if (ret >= 0)
921 file_pos_write(f.file, pos); 931 file_pos_write(f.file, pos);
922 fdput_pos(f); 932 fdput_pos(f);
@@ -934,10 +944,9 @@ static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
934 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 944 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
935} 945}
936 946
937SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 947static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
938 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 948 unsigned long vlen, loff_t pos, int flags)
939{ 949{
940 loff_t pos = pos_from_hilo(pos_h, pos_l);
941 struct fd f; 950 struct fd f;
942 ssize_t ret = -EBADF; 951 ssize_t ret = -EBADF;
943 952
@@ -948,7 +957,7 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
948 if (f.file) { 957 if (f.file) {
949 ret = -ESPIPE; 958 ret = -ESPIPE;
950 if (f.file->f_mode & FMODE_PREAD) 959 if (f.file->f_mode & FMODE_PREAD)
951 ret = vfs_readv(f.file, vec, vlen, &pos); 960 ret = vfs_readv(f.file, vec, vlen, &pos, flags);
952 fdput(f); 961 fdput(f);
953 } 962 }
954 963
@@ -958,10 +967,9 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
958 return ret; 967 return ret;
959} 968}
960 969
961SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 970static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
962 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 971 unsigned long vlen, loff_t pos, int flags)
963{ 972{
964 loff_t pos = pos_from_hilo(pos_h, pos_l);
965 struct fd f; 973 struct fd f;
966 ssize_t ret = -EBADF; 974 ssize_t ret = -EBADF;
967 975
@@ -972,7 +980,7 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
972 if (f.file) { 980 if (f.file) {
973 ret = -ESPIPE; 981 ret = -ESPIPE;
974 if (f.file->f_mode & FMODE_PWRITE) 982 if (f.file->f_mode & FMODE_PWRITE)
975 ret = vfs_writev(f.file, vec, vlen, &pos); 983 ret = vfs_writev(f.file, vec, vlen, &pos, flags);
976 fdput(f); 984 fdput(f);
977 } 985 }
978 986
@@ -982,11 +990,64 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
982 return ret; 990 return ret;
983} 991}
984 992
993SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
994 unsigned long, vlen)
995{
996 return do_readv(fd, vec, vlen, 0);
997}
998
999SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
1000 unsigned long, vlen)
1001{
1002 return do_writev(fd, vec, vlen, 0);
1003}
1004
1005SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
1006 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
1007{
1008 loff_t pos = pos_from_hilo(pos_h, pos_l);
1009
1010 return do_preadv(fd, vec, vlen, pos, 0);
1011}
1012
1013SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
1014 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
1015 int, flags)
1016{
1017 loff_t pos = pos_from_hilo(pos_h, pos_l);
1018
1019 if (pos == -1)
1020 return do_readv(fd, vec, vlen, flags);
1021
1022 return do_preadv(fd, vec, vlen, pos, flags);
1023}
1024
1025SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
1026 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
1027{
1028 loff_t pos = pos_from_hilo(pos_h, pos_l);
1029
1030 return do_pwritev(fd, vec, vlen, pos, 0);
1031}
1032
1033SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
1034 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
1035 int, flags)
1036{
1037 loff_t pos = pos_from_hilo(pos_h, pos_l);
1038
1039 if (pos == -1)
1040 return do_writev(fd, vec, vlen, flags);
1041
1042 return do_pwritev(fd, vec, vlen, pos, flags);
1043}
1044
985#ifdef CONFIG_COMPAT 1045#ifdef CONFIG_COMPAT
986 1046
987static ssize_t compat_do_readv_writev(int type, struct file *file, 1047static ssize_t compat_do_readv_writev(int type, struct file *file,
988 const struct compat_iovec __user *uvector, 1048 const struct compat_iovec __user *uvector,
989 unsigned long nr_segs, loff_t *pos) 1049 unsigned long nr_segs, loff_t *pos,
1050 int flags)
990{ 1051{
991 compat_ssize_t tot_len; 1052 compat_ssize_t tot_len;
992 struct iovec iovstack[UIO_FASTIOV]; 1053 struct iovec iovstack[UIO_FASTIOV];
@@ -1018,9 +1079,9 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
1018 } 1079 }
1019 1080
1020 if (iter_fn) 1081 if (iter_fn)
1021 ret = do_iter_readv_writev(file, &iter, pos, iter_fn); 1082 ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
1022 else 1083 else
1023 ret = do_loop_readv_writev(file, &iter, pos, fn); 1084 ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
1024 1085
1025 if (type != READ) 1086 if (type != READ)
1026 file_end_write(file); 1087 file_end_write(file);
@@ -1038,7 +1099,7 @@ out:
1038 1099
1039static size_t compat_readv(struct file *file, 1100static size_t compat_readv(struct file *file,
1040 const struct compat_iovec __user *vec, 1101 const struct compat_iovec __user *vec,
1041 unsigned long vlen, loff_t *pos) 1102 unsigned long vlen, loff_t *pos, int flags)
1042{ 1103{
1043 ssize_t ret = -EBADF; 1104 ssize_t ret = -EBADF;
1044 1105
@@ -1049,7 +1110,7 @@ static size_t compat_readv(struct file *file,
1049 if (!(file->f_mode & FMODE_CAN_READ)) 1110 if (!(file->f_mode & FMODE_CAN_READ))
1050 goto out; 1111 goto out;
1051 1112
1052 ret = compat_do_readv_writev(READ, file, vec, vlen, pos); 1113 ret = compat_do_readv_writev(READ, file, vec, vlen, pos, flags);
1053 1114
1054out: 1115out:
1055 if (ret > 0) 1116 if (ret > 0)
@@ -1058,9 +1119,9 @@ out:
1058 return ret; 1119 return ret;
1059} 1120}
1060 1121
1061COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, 1122static size_t do_compat_readv(compat_ulong_t fd,
1062 const struct compat_iovec __user *,vec, 1123 const struct compat_iovec __user *vec,
1063 compat_ulong_t, vlen) 1124 compat_ulong_t vlen, int flags)
1064{ 1125{
1065 struct fd f = fdget_pos(fd); 1126 struct fd f = fdget_pos(fd);
1066 ssize_t ret; 1127 ssize_t ret;
@@ -1069,16 +1130,24 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
1069 if (!f.file) 1130 if (!f.file)
1070 return -EBADF; 1131 return -EBADF;
1071 pos = f.file->f_pos; 1132 pos = f.file->f_pos;
1072 ret = compat_readv(f.file, vec, vlen, &pos); 1133 ret = compat_readv(f.file, vec, vlen, &pos, flags);
1073 if (ret >= 0) 1134 if (ret >= 0)
1074 f.file->f_pos = pos; 1135 f.file->f_pos = pos;
1075 fdput_pos(f); 1136 fdput_pos(f);
1076 return ret; 1137 return ret;
1138
1077} 1139}
1078 1140
1079static long __compat_sys_preadv64(unsigned long fd, 1141COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
1142 const struct compat_iovec __user *,vec,
1143 compat_ulong_t, vlen)
1144{
1145 return do_compat_readv(fd, vec, vlen, 0);
1146}
1147
1148static long do_compat_preadv64(unsigned long fd,
1080 const struct compat_iovec __user *vec, 1149 const struct compat_iovec __user *vec,
1081 unsigned long vlen, loff_t pos) 1150 unsigned long vlen, loff_t pos, int flags)
1082{ 1151{
1083 struct fd f; 1152 struct fd f;
1084 ssize_t ret; 1153 ssize_t ret;
@@ -1090,7 +1159,7 @@ static long __compat_sys_preadv64(unsigned long fd,
1090 return -EBADF; 1159 return -EBADF;
1091 ret = -ESPIPE; 1160 ret = -ESPIPE;
1092 if (f.file->f_mode & FMODE_PREAD) 1161 if (f.file->f_mode & FMODE_PREAD)
1093 ret = compat_readv(f.file, vec, vlen, &pos); 1162 ret = compat_readv(f.file, vec, vlen, &pos, flags);
1094 fdput(f); 1163 fdput(f);
1095 return ret; 1164 return ret;
1096} 1165}
@@ -1100,7 +1169,7 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
1100 const struct compat_iovec __user *,vec, 1169 const struct compat_iovec __user *,vec,
1101 unsigned long, vlen, loff_t, pos) 1170 unsigned long, vlen, loff_t, pos)
1102{ 1171{
1103 return __compat_sys_preadv64(fd, vec, vlen, pos); 1172 return do_compat_preadv64(fd, vec, vlen, pos, 0);
1104} 1173}
1105#endif 1174#endif
1106 1175
@@ -1110,12 +1179,25 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
1110{ 1179{
1111 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1180 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1112 1181
1113 return __compat_sys_preadv64(fd, vec, vlen, pos); 1182 return do_compat_preadv64(fd, vec, vlen, pos, 0);
1183}
1184
1185COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
1186 const struct compat_iovec __user *,vec,
1187 compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
1188 int, flags)
1189{
1190 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1191
1192 if (pos == -1)
1193 return do_compat_readv(fd, vec, vlen, flags);
1194
1195 return do_compat_preadv64(fd, vec, vlen, pos, flags);
1114} 1196}
1115 1197
1116static size_t compat_writev(struct file *file, 1198static size_t compat_writev(struct file *file,
1117 const struct compat_iovec __user *vec, 1199 const struct compat_iovec __user *vec,
1118 unsigned long vlen, loff_t *pos) 1200 unsigned long vlen, loff_t *pos, int flags)
1119{ 1201{
1120 ssize_t ret = -EBADF; 1202 ssize_t ret = -EBADF;
1121 1203
@@ -1126,7 +1208,7 @@ static size_t compat_writev(struct file *file,
1126 if (!(file->f_mode & FMODE_CAN_WRITE)) 1208 if (!(file->f_mode & FMODE_CAN_WRITE))
1127 goto out; 1209 goto out;
1128 1210
1129 ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); 1211 ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, 0);
1130 1212
1131out: 1213out:
1132 if (ret > 0) 1214 if (ret > 0)
@@ -1135,9 +1217,9 @@ out:
1135 return ret; 1217 return ret;
1136} 1218}
1137 1219
1138COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, 1220static size_t do_compat_writev(compat_ulong_t fd,
1139 const struct compat_iovec __user *, vec, 1221 const struct compat_iovec __user* vec,
1140 compat_ulong_t, vlen) 1222 compat_ulong_t vlen, int flags)
1141{ 1223{
1142 struct fd f = fdget_pos(fd); 1224 struct fd f = fdget_pos(fd);
1143 ssize_t ret; 1225 ssize_t ret;
@@ -1146,16 +1228,23 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
1146 if (!f.file) 1228 if (!f.file)
1147 return -EBADF; 1229 return -EBADF;
1148 pos = f.file->f_pos; 1230 pos = f.file->f_pos;
1149 ret = compat_writev(f.file, vec, vlen, &pos); 1231 ret = compat_writev(f.file, vec, vlen, &pos, flags);
1150 if (ret >= 0) 1232 if (ret >= 0)
1151 f.file->f_pos = pos; 1233 f.file->f_pos = pos;
1152 fdput_pos(f); 1234 fdput_pos(f);
1153 return ret; 1235 return ret;
1154} 1236}
1155 1237
1156static long __compat_sys_pwritev64(unsigned long fd, 1238COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
1239 const struct compat_iovec __user *, vec,
1240 compat_ulong_t, vlen)
1241{
1242 return do_compat_writev(fd, vec, vlen, 0);
1243}
1244
1245static long do_compat_pwritev64(unsigned long fd,
1157 const struct compat_iovec __user *vec, 1246 const struct compat_iovec __user *vec,
1158 unsigned long vlen, loff_t pos) 1247 unsigned long vlen, loff_t pos, int flags)
1159{ 1248{
1160 struct fd f; 1249 struct fd f;
1161 ssize_t ret; 1250 ssize_t ret;
@@ -1167,7 +1256,7 @@ static long __compat_sys_pwritev64(unsigned long fd,
1167 return -EBADF; 1256 return -EBADF;
1168 ret = -ESPIPE; 1257 ret = -ESPIPE;
1169 if (f.file->f_mode & FMODE_PWRITE) 1258 if (f.file->f_mode & FMODE_PWRITE)
1170 ret = compat_writev(f.file, vec, vlen, &pos); 1259 ret = compat_writev(f.file, vec, vlen, &pos, flags);
1171 fdput(f); 1260 fdput(f);
1172 return ret; 1261 return ret;
1173} 1262}
@@ -1177,7 +1266,7 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
1177 const struct compat_iovec __user *,vec, 1266 const struct compat_iovec __user *,vec,
1178 unsigned long, vlen, loff_t, pos) 1267 unsigned long, vlen, loff_t, pos)
1179{ 1268{
1180 return __compat_sys_pwritev64(fd, vec, vlen, pos); 1269 return do_compat_pwritev64(fd, vec, vlen, pos, 0);
1181} 1270}
1182#endif 1271#endif
1183 1272
@@ -1187,8 +1276,21 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
1187{ 1276{
1188 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1277 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1189 1278
1190 return __compat_sys_pwritev64(fd, vec, vlen, pos); 1279 return do_compat_pwritev64(fd, vec, vlen, pos, 0);
1191} 1280}
1281
1282COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
1283 const struct compat_iovec __user *,vec,
1284 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags)
1285{
1286 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1287
1288 if (pos == -1)
1289 return do_compat_writev(fd, vec, vlen, flags);
1290
1291 return do_compat_pwritev64(fd, vec, vlen, pos, flags);
1292}
1293
1192#endif 1294#endif
1193 1295
1194static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 1296static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
@@ -1532,10 +1634,12 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
1532 1634
1533 if (!(file_in->f_mode & FMODE_READ) || 1635 if (!(file_in->f_mode & FMODE_READ) ||
1534 !(file_out->f_mode & FMODE_WRITE) || 1636 !(file_out->f_mode & FMODE_WRITE) ||
1535 (file_out->f_flags & O_APPEND) || 1637 (file_out->f_flags & O_APPEND))
1536 !file_in->f_op->clone_file_range)
1537 return -EBADF; 1638 return -EBADF;
1538 1639
1640 if (!file_in->f_op->clone_file_range)
1641 return -EOPNOTSUPP;
1642
1539 ret = clone_verify_area(file_in, pos_in, len, false); 1643 ret = clone_verify_area(file_in, pos_in, len, false);
1540 if (ret) 1644 if (ret)
1541 return ret; 1645 return ret;
diff --git a/fs/splice.c b/fs/splice.c
index 19e0b103d253..9947b5c69664 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -580,7 +580,7 @@ static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
580 old_fs = get_fs(); 580 old_fs = get_fs();
581 set_fs(get_ds()); 581 set_fs(get_ds());
582 /* The cast to a user pointer is valid due to the set_fs() */ 582 /* The cast to a user pointer is valid due to the set_fs() */
583 res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos); 583 res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos, 0);
584 set_fs(old_fs); 584 set_fs(old_fs);
585 585
586 return res; 586 return res;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index b94fa6c3c6eb..053818dd6c18 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -153,7 +153,7 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
153 if (isalarm(ctx)) 153 if (isalarm(ctx))
154 remaining = alarm_expires_remaining(&ctx->t.alarm); 154 remaining = alarm_expires_remaining(&ctx->t.alarm);
155 else 155 else
156 remaining = hrtimer_expires_remaining(&ctx->t.tmr); 156 remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr);
157 157
158 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; 158 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
159} 159}
diff --git a/fs/xattr.c b/fs/xattr.c
index 07d0e47f6a7f..4861322e28e8 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -940,7 +940,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
940 bool trusted = capable(CAP_SYS_ADMIN); 940 bool trusted = capable(CAP_SYS_ADMIN);
941 struct simple_xattr *xattr; 941 struct simple_xattr *xattr;
942 ssize_t remaining_size = size; 942 ssize_t remaining_size = size;
943 int err; 943 int err = 0;
944 944
945#ifdef CONFIG_FS_POSIX_ACL 945#ifdef CONFIG_FS_POSIX_ACL
946 if (inode->i_acl) { 946 if (inode->i_acl) {
@@ -965,11 +965,11 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
965 965
966 err = xattr_list_one(&buffer, &remaining_size, xattr->name); 966 err = xattr_list_one(&buffer, &remaining_size, xattr->name);
967 if (err) 967 if (err)
968 return err; 968 break;
969 } 969 }
970 spin_unlock(&xattrs->lock); 970 spin_unlock(&xattrs->lock);
971 971
972 return size - remaining_size; 972 return err ? err : size - remaining_size;
973} 973}
974 974
975/* 975/*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index da37beb76f6e..594f7e63b432 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4491,7 +4491,7 @@ xlog_recover_process(
4491 * know precisely what failed. 4491 * know precisely what failed.
4492 */ 4492 */
4493 if (pass == XLOG_RECOVER_CRCPASS) { 4493 if (pass == XLOG_RECOVER_CRCPASS) {
4494 if (rhead->h_crc && crc != le32_to_cpu(rhead->h_crc)) 4494 if (rhead->h_crc && crc != rhead->h_crc)
4495 return -EFSBADCRC; 4495 return -EFSBADCRC;
4496 return 0; 4496 return 0;
4497 } 4497 }
@@ -4502,7 +4502,7 @@ xlog_recover_process(
4502 * zero CRC check prevents warnings from being emitted when upgrading 4502 * zero CRC check prevents warnings from being emitted when upgrading
4503 * the kernel from one that does not add CRCs by default. 4503 * the kernel from one that does not add CRCs by default.
4504 */ 4504 */
4505 if (crc != le32_to_cpu(rhead->h_crc)) { 4505 if (crc != rhead->h_crc) {
4506 if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) { 4506 if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
4507 xfs_alert(log->l_mp, 4507 xfs_alert(log->l_mp,
4508 "log record CRC mismatch: found 0x%x, expected 0x%x.", 4508 "log record CRC mismatch: found 0x%x, expected 0x%x.",