aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-02-18 03:28:03 -0500
committerIngo Molnar <mingo@kernel.org>2016-02-18 03:28:03 -0500
commit3a2f2ac9b96f9a9f5538396a212d3b9fb543bfc5 (patch)
tree294c2f340b11584e58cea90adfc4182ac8742348 /fs
parent4e79e182b419172e35936a47f098509092d69817 (diff)
parentf4eafd8bcd5229e998aa252627703b8462c3b90f (diff)
Merge branch 'x86/urgent' into x86/asm, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c36
-rw-r--r--fs/btrfs/async-thread.c2
-rw-r--r--fs/btrfs/backref.c10
-rw-r--r--fs/btrfs/compression.c6
-rw-r--r--fs/btrfs/delayed-inode.c3
-rw-r--r--fs/btrfs/delayed-inode.h2
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent_io.c45
-rw-r--r--fs/btrfs/extent_io.h3
-rw-r--r--fs/btrfs/free-space-tree.c18
-rw-r--r--fs/btrfs/inode.c50
-rw-r--r--fs/btrfs/ioctl.c119
-rw-r--r--fs/btrfs/relocation.c3
-rw-r--r--fs/btrfs/sysfs.c35
-rw-r--r--fs/btrfs/sysfs.h5
-rw-r--r--fs/btrfs/tests/btrfs-tests.c10
-rw-r--r--fs/btrfs/tests/extent-io-tests.c12
-rw-r--r--fs/btrfs/tests/inode-tests.c8
-rw-r--r--fs/btrfs/tree-log.c14
-rw-r--r--fs/ceph/file.c6
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/dax.c23
-rw-r--r--fs/devpts/inode.c20
-rw-r--r--fs/efivarfs/file.c70
-rw-r--r--fs/efivarfs/inode.c30
-rw-r--r--fs/efivarfs/internal.h3
-rw-r--r--fs/efivarfs/super.c16
-rw-r--r--fs/eventpoll.c38
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c2
-rw-r--r--fs/nfs/pnfs.c122
-rw-r--r--fs/nfs/pnfs.h4
-rw-r--r--fs/ocfs2/cluster/heartbeat.c14
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/proc/task_mmu.c73
-rw-r--r--fs/proc/task_nommu.c49
-rw-r--r--fs/timerfd.c2
-rw-r--r--fs/xfs/xfs_log_recover.c4
38 files changed, 554 insertions, 312 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 7b9cd49622b1..39b3a174a425 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1730,43 +1730,25 @@ static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1730 return __dax_fault(vma, vmf, blkdev_get_block, NULL); 1730 return __dax_fault(vma, vmf, blkdev_get_block, NULL);
1731} 1731}
1732 1732
1733static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, 1733static int blkdev_dax_pfn_mkwrite(struct vm_area_struct *vma,
1734 pmd_t *pmd, unsigned int flags) 1734 struct vm_fault *vmf)
1735{
1736 return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
1737}
1738
1739static void blkdev_vm_open(struct vm_area_struct *vma)
1740{ 1735{
1741 struct inode *bd_inode = bdev_file_inode(vma->vm_file); 1736 return dax_pfn_mkwrite(vma, vmf);
1742 struct block_device *bdev = I_BDEV(bd_inode);
1743
1744 inode_lock(bd_inode);
1745 bdev->bd_map_count++;
1746 inode_unlock(bd_inode);
1747} 1737}
1748 1738
1749static void blkdev_vm_close(struct vm_area_struct *vma) 1739static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
1740 pmd_t *pmd, unsigned int flags)
1750{ 1741{
1751 struct inode *bd_inode = bdev_file_inode(vma->vm_file); 1742 return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL);
1752 struct block_device *bdev = I_BDEV(bd_inode);
1753
1754 inode_lock(bd_inode);
1755 bdev->bd_map_count--;
1756 inode_unlock(bd_inode);
1757} 1743}
1758 1744
1759static const struct vm_operations_struct blkdev_dax_vm_ops = { 1745static const struct vm_operations_struct blkdev_dax_vm_ops = {
1760 .open = blkdev_vm_open,
1761 .close = blkdev_vm_close,
1762 .fault = blkdev_dax_fault, 1746 .fault = blkdev_dax_fault,
1763 .pmd_fault = blkdev_dax_pmd_fault, 1747 .pmd_fault = blkdev_dax_pmd_fault,
1764 .pfn_mkwrite = blkdev_dax_fault, 1748 .pfn_mkwrite = blkdev_dax_pfn_mkwrite,
1765}; 1749};
1766 1750
1767static const struct vm_operations_struct blkdev_default_vm_ops = { 1751static const struct vm_operations_struct blkdev_default_vm_ops = {
1768 .open = blkdev_vm_open,
1769 .close = blkdev_vm_close,
1770 .fault = filemap_fault, 1752 .fault = filemap_fault,
1771 .map_pages = filemap_map_pages, 1753 .map_pages = filemap_map_pages,
1772}; 1754};
@@ -1774,18 +1756,14 @@ static const struct vm_operations_struct blkdev_default_vm_ops = {
1774static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) 1756static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
1775{ 1757{
1776 struct inode *bd_inode = bdev_file_inode(file); 1758 struct inode *bd_inode = bdev_file_inode(file);
1777 struct block_device *bdev = I_BDEV(bd_inode);
1778 1759
1779 file_accessed(file); 1760 file_accessed(file);
1780 inode_lock(bd_inode);
1781 bdev->bd_map_count++;
1782 if (IS_DAX(bd_inode)) { 1761 if (IS_DAX(bd_inode)) {
1783 vma->vm_ops = &blkdev_dax_vm_ops; 1762 vma->vm_ops = &blkdev_dax_vm_ops;
1784 vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; 1763 vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
1785 } else { 1764 } else {
1786 vma->vm_ops = &blkdev_default_vm_ops; 1765 vma->vm_ops = &blkdev_default_vm_ops;
1787 } 1766 }
1788 inode_unlock(bd_inode);
1789 1767
1790 return 0; 1768 return 0;
1791} 1769}
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 88d9af3d4581..5fb60ea7eee2 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -328,8 +328,8 @@ static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq,
328 list_add_tail(&work->ordered_list, &wq->ordered_list); 328 list_add_tail(&work->ordered_list, &wq->ordered_list);
329 spin_unlock_irqrestore(&wq->list_lock, flags); 329 spin_unlock_irqrestore(&wq->list_lock, flags);
330 } 330 }
331 queue_work(wq->normal_wq, &work->normal_work);
332 trace_btrfs_work_queued(work); 331 trace_btrfs_work_queued(work);
332 queue_work(wq->normal_wq, &work->normal_work);
333} 333}
334 334
335void btrfs_queue_work(struct btrfs_workqueue *wq, 335void btrfs_queue_work(struct btrfs_workqueue *wq,
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index b90cd3776f8e..f6dac40f87ff 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1406,7 +1406,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
1406 read_extent_buffer(eb, dest + bytes_left, 1406 read_extent_buffer(eb, dest + bytes_left,
1407 name_off, name_len); 1407 name_off, name_len);
1408 if (eb != eb_in) { 1408 if (eb != eb_in) {
1409 btrfs_tree_read_unlock_blocking(eb); 1409 if (!path->skip_locking)
1410 btrfs_tree_read_unlock_blocking(eb);
1410 free_extent_buffer(eb); 1411 free_extent_buffer(eb);
1411 } 1412 }
1412 ret = btrfs_find_item(fs_root, path, parent, 0, 1413 ret = btrfs_find_item(fs_root, path, parent, 0,
@@ -1426,9 +1427,10 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
1426 eb = path->nodes[0]; 1427 eb = path->nodes[0];
1427 /* make sure we can use eb after releasing the path */ 1428 /* make sure we can use eb after releasing the path */
1428 if (eb != eb_in) { 1429 if (eb != eb_in) {
1429 atomic_inc(&eb->refs); 1430 if (!path->skip_locking)
1430 btrfs_tree_read_lock(eb); 1431 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1431 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); 1432 path->nodes[0] = NULL;
1433 path->locks[0] = 0;
1432 } 1434 }
1433 btrfs_release_path(path); 1435 btrfs_release_path(path);
1434 iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); 1436 iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index c473c42d7d6c..3346cd8f9910 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -637,11 +637,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
637 faili = nr_pages - 1; 637 faili = nr_pages - 1;
638 cb->nr_pages = nr_pages; 638 cb->nr_pages = nr_pages;
639 639
640 /* In the parent-locked case, we only locked the range we are 640 add_ra_bio_pages(inode, em_start + em_len, cb);
641 * interested in. In all other cases, we can opportunistically
642 * cache decompressed data that goes beyond the requested range. */
643 if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED))
644 add_ra_bio_pages(inode, em_start + em_len, cb);
645 641
646 /* include any pages we added in add_ra-bio_pages */ 642 /* include any pages we added in add_ra-bio_pages */
647 uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; 643 uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 0be47e4b8136..b57daa895cea 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1689,7 +1689,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
1689 * 1689 *
1690 */ 1690 */
1691int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, 1691int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1692 struct list_head *ins_list) 1692 struct list_head *ins_list, bool *emitted)
1693{ 1693{
1694 struct btrfs_dir_item *di; 1694 struct btrfs_dir_item *di;
1695 struct btrfs_delayed_item *curr, *next; 1695 struct btrfs_delayed_item *curr, *next;
@@ -1733,6 +1733,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1733 1733
1734 if (over) 1734 if (over)
1735 return 1; 1735 return 1;
1736 *emitted = true;
1736 } 1737 }
1737 return 0; 1738 return 0;
1738} 1739}
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index f70119f25421..0167853c84ae 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -144,7 +144,7 @@ void btrfs_put_delayed_items(struct list_head *ins_list,
144int btrfs_should_delete_dir_index(struct list_head *del_list, 144int btrfs_should_delete_dir_index(struct list_head *del_list,
145 u64 index); 145 u64 index);
146int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, 146int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
147 struct list_head *ins_list); 147 struct list_head *ins_list, bool *emitted);
148 148
149/* for init */ 149/* for init */
150int __init btrfs_delayed_inode_init(void); 150int __init btrfs_delayed_inode_init(void);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d9286497924f..5699bbc23feb 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -182,6 +182,7 @@ static struct btrfs_lockdep_keyset {
182 { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, 182 { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
183 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, 183 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
184 { .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" }, 184 { .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" },
185 { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, .name_stem = "free-space" },
185 { .id = 0, .name_stem = "tree" }, 186 { .id = 0, .name_stem = "tree" },
186}; 187};
187 188
@@ -1787,7 +1788,6 @@ static int cleaner_kthread(void *arg)
1787 int again; 1788 int again;
1788 struct btrfs_trans_handle *trans; 1789 struct btrfs_trans_handle *trans;
1789 1790
1790 set_freezable();
1791 do { 1791 do {
1792 again = 0; 1792 again = 0;
1793 1793
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2e7c97a3f344..392592dc7010 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2897,12 +2897,11 @@ static int __do_readpage(struct extent_io_tree *tree,
2897 struct block_device *bdev; 2897 struct block_device *bdev;
2898 int ret; 2898 int ret;
2899 int nr = 0; 2899 int nr = 0;
2900 int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
2901 size_t pg_offset = 0; 2900 size_t pg_offset = 0;
2902 size_t iosize; 2901 size_t iosize;
2903 size_t disk_io_size; 2902 size_t disk_io_size;
2904 size_t blocksize = inode->i_sb->s_blocksize; 2903 size_t blocksize = inode->i_sb->s_blocksize;
2905 unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED; 2904 unsigned long this_bio_flag = 0;
2906 2905
2907 set_page_extent_mapped(page); 2906 set_page_extent_mapped(page);
2908 2907
@@ -2942,18 +2941,16 @@ static int __do_readpage(struct extent_io_tree *tree,
2942 kunmap_atomic(userpage); 2941 kunmap_atomic(userpage);
2943 set_extent_uptodate(tree, cur, cur + iosize - 1, 2942 set_extent_uptodate(tree, cur, cur + iosize - 1,
2944 &cached, GFP_NOFS); 2943 &cached, GFP_NOFS);
2945 if (!parent_locked) 2944 unlock_extent_cached(tree, cur,
2946 unlock_extent_cached(tree, cur, 2945 cur + iosize - 1,
2947 cur + iosize - 1, 2946 &cached, GFP_NOFS);
2948 &cached, GFP_NOFS);
2949 break; 2947 break;
2950 } 2948 }
2951 em = __get_extent_map(inode, page, pg_offset, cur, 2949 em = __get_extent_map(inode, page, pg_offset, cur,
2952 end - cur + 1, get_extent, em_cached); 2950 end - cur + 1, get_extent, em_cached);
2953 if (IS_ERR_OR_NULL(em)) { 2951 if (IS_ERR_OR_NULL(em)) {
2954 SetPageError(page); 2952 SetPageError(page);
2955 if (!parent_locked) 2953 unlock_extent(tree, cur, end);
2956 unlock_extent(tree, cur, end);
2957 break; 2954 break;
2958 } 2955 }
2959 extent_offset = cur - em->start; 2956 extent_offset = cur - em->start;
@@ -3038,12 +3035,9 @@ static int __do_readpage(struct extent_io_tree *tree,
3038 3035
3039 set_extent_uptodate(tree, cur, cur + iosize - 1, 3036 set_extent_uptodate(tree, cur, cur + iosize - 1,
3040 &cached, GFP_NOFS); 3037 &cached, GFP_NOFS);
3041 if (parent_locked) 3038 unlock_extent_cached(tree, cur,
3042 free_extent_state(cached); 3039 cur + iosize - 1,
3043 else 3040 &cached, GFP_NOFS);
3044 unlock_extent_cached(tree, cur,
3045 cur + iosize - 1,
3046 &cached, GFP_NOFS);
3047 cur = cur + iosize; 3041 cur = cur + iosize;
3048 pg_offset += iosize; 3042 pg_offset += iosize;
3049 continue; 3043 continue;
@@ -3052,8 +3046,7 @@ static int __do_readpage(struct extent_io_tree *tree,
3052 if (test_range_bit(tree, cur, cur_end, 3046 if (test_range_bit(tree, cur, cur_end,
3053 EXTENT_UPTODATE, 1, NULL)) { 3047 EXTENT_UPTODATE, 1, NULL)) {
3054 check_page_uptodate(tree, page); 3048 check_page_uptodate(tree, page);
3055 if (!parent_locked) 3049 unlock_extent(tree, cur, cur + iosize - 1);
3056 unlock_extent(tree, cur, cur + iosize - 1);
3057 cur = cur + iosize; 3050 cur = cur + iosize;
3058 pg_offset += iosize; 3051 pg_offset += iosize;
3059 continue; 3052 continue;
@@ -3063,8 +3056,7 @@ static int __do_readpage(struct extent_io_tree *tree,
3063 */ 3056 */
3064 if (block_start == EXTENT_MAP_INLINE) { 3057 if (block_start == EXTENT_MAP_INLINE) {
3065 SetPageError(page); 3058 SetPageError(page);
3066 if (!parent_locked) 3059 unlock_extent(tree, cur, cur + iosize - 1);
3067 unlock_extent(tree, cur, cur + iosize - 1);
3068 cur = cur + iosize; 3060 cur = cur + iosize;
3069 pg_offset += iosize; 3061 pg_offset += iosize;
3070 continue; 3062 continue;
@@ -3083,8 +3075,7 @@ static int __do_readpage(struct extent_io_tree *tree,
3083 *bio_flags = this_bio_flag; 3075 *bio_flags = this_bio_flag;
3084 } else { 3076 } else {
3085 SetPageError(page); 3077 SetPageError(page);
3086 if (!parent_locked) 3078 unlock_extent(tree, cur, cur + iosize - 1);
3087 unlock_extent(tree, cur, cur + iosize - 1);
3088 } 3079 }
3089 cur = cur + iosize; 3080 cur = cur + iosize;
3090 pg_offset += iosize; 3081 pg_offset += iosize;
@@ -3213,20 +3204,6 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
3213 return ret; 3204 return ret;
3214} 3205}
3215 3206
3216int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
3217 get_extent_t *get_extent, int mirror_num)
3218{
3219 struct bio *bio = NULL;
3220 unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
3221 int ret;
3222
3223 ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
3224 &bio_flags, READ, NULL);
3225 if (bio)
3226 ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
3227 return ret;
3228}
3229
3230static noinline void update_nr_written(struct page *page, 3207static noinline void update_nr_written(struct page *page,
3231 struct writeback_control *wbc, 3208 struct writeback_control *wbc,
3232 unsigned long nr_written) 3209 unsigned long nr_written)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 0377413bd4b9..880d5292e972 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -29,7 +29,6 @@
29 */ 29 */
30#define EXTENT_BIO_COMPRESSED 1 30#define EXTENT_BIO_COMPRESSED 1
31#define EXTENT_BIO_TREE_LOG 2 31#define EXTENT_BIO_TREE_LOG 2
32#define EXTENT_BIO_PARENT_LOCKED 4
33#define EXTENT_BIO_FLAG_SHIFT 16 32#define EXTENT_BIO_FLAG_SHIFT 16
34 33
35/* these are bit numbers for test/set bit */ 34/* these are bit numbers for test/set bit */
@@ -210,8 +209,6 @@ static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
210int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end); 209int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
211int extent_read_full_page(struct extent_io_tree *tree, struct page *page, 210int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
212 get_extent_t *get_extent, int mirror_num); 211 get_extent_t *get_extent, int mirror_num);
213int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
214 get_extent_t *get_extent, int mirror_num);
215int __init extent_io_init(void); 212int __init extent_io_init(void);
216void extent_io_exit(void); 213void extent_io_exit(void);
217 214
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 393e36bd5845..53dbeaf6ce94 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -153,6 +153,20 @@ static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
153 153
154static unsigned long *alloc_bitmap(u32 bitmap_size) 154static unsigned long *alloc_bitmap(u32 bitmap_size)
155{ 155{
156 void *mem;
157
158 /*
159 * The allocation size varies, observed numbers were < 4K up to 16K.
160 * Using vmalloc unconditionally would be too heavy, we'll try
161 * contiguous allocations first.
162 */
163 if (bitmap_size <= PAGE_SIZE)
164 return kzalloc(bitmap_size, GFP_NOFS);
165
166 mem = kzalloc(bitmap_size, GFP_NOFS | __GFP_NOWARN);
167 if (mem)
168 return mem;
169
156 return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO, 170 return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
157 PAGE_KERNEL); 171 PAGE_KERNEL);
158} 172}
@@ -289,7 +303,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
289 303
290 ret = 0; 304 ret = 0;
291out: 305out:
292 vfree(bitmap); 306 kvfree(bitmap);
293 if (ret) 307 if (ret)
294 btrfs_abort_transaction(trans, root, ret); 308 btrfs_abort_transaction(trans, root, ret);
295 return ret; 309 return ret;
@@ -438,7 +452,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
438 452
439 ret = 0; 453 ret = 0;
440out: 454out:
441 vfree(bitmap); 455 kvfree(bitmap);
442 if (ret) 456 if (ret)
443 btrfs_abort_transaction(trans, root, ret); 457 btrfs_abort_transaction(trans, root, ret);
444 return ret; 458 return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e28f3d4691af..151b7c71b868 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5717,6 +5717,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5717 char *name_ptr; 5717 char *name_ptr;
5718 int name_len; 5718 int name_len;
5719 int is_curr = 0; /* ctx->pos points to the current index? */ 5719 int is_curr = 0; /* ctx->pos points to the current index? */
5720 bool emitted;
5720 5721
5721 /* FIXME, use a real flag for deciding about the key type */ 5722 /* FIXME, use a real flag for deciding about the key type */
5722 if (root->fs_info->tree_root == root) 5723 if (root->fs_info->tree_root == root)
@@ -5745,6 +5746,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
5745 if (ret < 0) 5746 if (ret < 0)
5746 goto err; 5747 goto err;
5747 5748
5749 emitted = false;
5748 while (1) { 5750 while (1) {
5749 leaf = path->nodes[0]; 5751 leaf = path->nodes[0];
5750 slot = path->slots[0]; 5752 slot = path->slots[0];
@@ -5824,6 +5826,7 @@ skip:
5824 5826
5825 if (over) 5827 if (over)
5826 goto nopos; 5828 goto nopos;
5829 emitted = true;
5827 di_len = btrfs_dir_name_len(leaf, di) + 5830 di_len = btrfs_dir_name_len(leaf, di) +
5828 btrfs_dir_data_len(leaf, di) + sizeof(*di); 5831 btrfs_dir_data_len(leaf, di) + sizeof(*di);
5829 di_cur += di_len; 5832 di_cur += di_len;
@@ -5836,11 +5839,20 @@ next:
5836 if (key_type == BTRFS_DIR_INDEX_KEY) { 5839 if (key_type == BTRFS_DIR_INDEX_KEY) {
5837 if (is_curr) 5840 if (is_curr)
5838 ctx->pos++; 5841 ctx->pos++;
5839 ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list); 5842 ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list, &emitted);
5840 if (ret) 5843 if (ret)
5841 goto nopos; 5844 goto nopos;
5842 } 5845 }
5843 5846
5847 /*
5848 * If we haven't emitted any dir entry, we must not touch ctx->pos as
5849 * it was was set to the termination value in previous call. We assume
5850 * that "." and ".." were emitted if we reach this point and set the
5851 * termination value as well for an empty directory.
5852 */
5853 if (ctx->pos > 2 && !emitted)
5854 goto nopos;
5855
5844 /* Reached end of directory/root. Bump pos past the last item. */ 5856 /* Reached end of directory/root. Bump pos past the last item. */
5845 ctx->pos++; 5857 ctx->pos++;
5846 5858
@@ -7116,21 +7128,41 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
7116 if (ret) 7128 if (ret)
7117 return ERR_PTR(ret); 7129 return ERR_PTR(ret);
7118 7130
7119 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, 7131 /*
7120 ins.offset, ins.offset, ins.offset, 0); 7132 * Create the ordered extent before the extent map. This is to avoid
7121 if (IS_ERR(em)) { 7133 * races with the fast fsync path that would lead to it logging file
7122 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); 7134 * extent items that point to disk extents that were not yet written to.
7123 return em; 7135 * The fast fsync path collects ordered extents into a local list and
7124 } 7136 * then collects all the new extent maps, so we must create the ordered
7125 7137 * extent first and make sure the fast fsync path collects any new
7138 * ordered extents after collecting new extent maps as well.
7139 * The fsync path simply can not rely on inode_dio_wait() because it
7140 * causes deadlock with AIO.
7141 */
7126 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, 7142 ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
7127 ins.offset, ins.offset, 0); 7143 ins.offset, ins.offset, 0);
7128 if (ret) { 7144 if (ret) {
7129 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); 7145 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
7130 free_extent_map(em);
7131 return ERR_PTR(ret); 7146 return ERR_PTR(ret);
7132 } 7147 }
7133 7148
7149 em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
7150 ins.offset, ins.offset, ins.offset, 0);
7151 if (IS_ERR(em)) {
7152 struct btrfs_ordered_extent *oe;
7153
7154 btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
7155 oe = btrfs_lookup_ordered_extent(inode, start);
7156 ASSERT(oe);
7157 if (WARN_ON(!oe))
7158 return em;
7159 set_bit(BTRFS_ORDERED_IOERR, &oe->flags);
7160 set_bit(BTRFS_ORDERED_IO_DONE, &oe->flags);
7161 btrfs_remove_ordered_extent(inode, oe);
7162 /* Once for our lookup and once for the ordered extents tree. */
7163 btrfs_put_ordered_extent(oe);
7164 btrfs_put_ordered_extent(oe);
7165 }
7134 return em; 7166 return em;
7135} 7167}
7136 7168
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 952172ca7e45..48aee9846329 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2794,24 +2794,29 @@ out:
2794static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) 2794static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
2795{ 2795{
2796 struct page *page; 2796 struct page *page;
2797 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
2798 2797
2799 page = grab_cache_page(inode->i_mapping, index); 2798 page = grab_cache_page(inode->i_mapping, index);
2800 if (!page) 2799 if (!page)
2801 return NULL; 2800 return ERR_PTR(-ENOMEM);
2802 2801
2803 if (!PageUptodate(page)) { 2802 if (!PageUptodate(page)) {
2804 if (extent_read_full_page_nolock(tree, page, btrfs_get_extent, 2803 int ret;
2805 0)) 2804
2806 return NULL; 2805 ret = btrfs_readpage(NULL, page);
2806 if (ret)
2807 return ERR_PTR(ret);
2807 lock_page(page); 2808 lock_page(page);
2808 if (!PageUptodate(page)) { 2809 if (!PageUptodate(page)) {
2809 unlock_page(page); 2810 unlock_page(page);
2810 page_cache_release(page); 2811 page_cache_release(page);
2811 return NULL; 2812 return ERR_PTR(-EIO);
2813 }
2814 if (page->mapping != inode->i_mapping) {
2815 unlock_page(page);
2816 page_cache_release(page);
2817 return ERR_PTR(-EAGAIN);
2812 } 2818 }
2813 } 2819 }
2814 unlock_page(page);
2815 2820
2816 return page; 2821 return page;
2817} 2822}
@@ -2823,17 +2828,31 @@ static int gather_extent_pages(struct inode *inode, struct page **pages,
2823 pgoff_t index = off >> PAGE_CACHE_SHIFT; 2828 pgoff_t index = off >> PAGE_CACHE_SHIFT;
2824 2829
2825 for (i = 0; i < num_pages; i++) { 2830 for (i = 0; i < num_pages; i++) {
2831again:
2826 pages[i] = extent_same_get_page(inode, index + i); 2832 pages[i] = extent_same_get_page(inode, index + i);
2827 if (!pages[i]) 2833 if (IS_ERR(pages[i])) {
2828 return -ENOMEM; 2834 int err = PTR_ERR(pages[i]);
2835
2836 if (err == -EAGAIN)
2837 goto again;
2838 pages[i] = NULL;
2839 return err;
2840 }
2829 } 2841 }
2830 return 0; 2842 return 0;
2831} 2843}
2832 2844
2833static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) 2845static int lock_extent_range(struct inode *inode, u64 off, u64 len,
2846 bool retry_range_locking)
2834{ 2847{
2835 /* do any pending delalloc/csum calc on src, one way or 2848 /*
2836 another, and lock file content */ 2849 * Do any pending delalloc/csum calculations on inode, one way or
2850 * another, and lock file content.
2851 * The locking order is:
2852 *
2853 * 1) pages
2854 * 2) range in the inode's io tree
2855 */
2837 while (1) { 2856 while (1) {
2838 struct btrfs_ordered_extent *ordered; 2857 struct btrfs_ordered_extent *ordered;
2839 lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); 2858 lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
@@ -2851,8 +2870,11 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
2851 unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); 2870 unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
2852 if (ordered) 2871 if (ordered)
2853 btrfs_put_ordered_extent(ordered); 2872 btrfs_put_ordered_extent(ordered);
2873 if (!retry_range_locking)
2874 return -EAGAIN;
2854 btrfs_wait_ordered_range(inode, off, len); 2875 btrfs_wait_ordered_range(inode, off, len);
2855 } 2876 }
2877 return 0;
2856} 2878}
2857 2879
2858static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) 2880static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
@@ -2877,15 +2899,24 @@ static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
2877 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); 2899 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
2878} 2900}
2879 2901
2880static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, 2902static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
2881 struct inode *inode2, u64 loff2, u64 len) 2903 struct inode *inode2, u64 loff2, u64 len,
2904 bool retry_range_locking)
2882{ 2905{
2906 int ret;
2907
2883 if (inode1 < inode2) { 2908 if (inode1 < inode2) {
2884 swap(inode1, inode2); 2909 swap(inode1, inode2);
2885 swap(loff1, loff2); 2910 swap(loff1, loff2);
2886 } 2911 }
2887 lock_extent_range(inode1, loff1, len); 2912 ret = lock_extent_range(inode1, loff1, len, retry_range_locking);
2888 lock_extent_range(inode2, loff2, len); 2913 if (ret)
2914 return ret;
2915 ret = lock_extent_range(inode2, loff2, len, retry_range_locking);
2916 if (ret)
2917 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1,
2918 loff1 + len - 1);
2919 return ret;
2889} 2920}
2890 2921
2891struct cmp_pages { 2922struct cmp_pages {
@@ -2901,11 +2932,15 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp)
2901 2932
2902 for (i = 0; i < cmp->num_pages; i++) { 2933 for (i = 0; i < cmp->num_pages; i++) {
2903 pg = cmp->src_pages[i]; 2934 pg = cmp->src_pages[i];
2904 if (pg) 2935 if (pg) {
2936 unlock_page(pg);
2905 page_cache_release(pg); 2937 page_cache_release(pg);
2938 }
2906 pg = cmp->dst_pages[i]; 2939 pg = cmp->dst_pages[i];
2907 if (pg) 2940 if (pg) {
2941 unlock_page(pg);
2908 page_cache_release(pg); 2942 page_cache_release(pg);
2943 }
2909 } 2944 }
2910 kfree(cmp->src_pages); 2945 kfree(cmp->src_pages);
2911 kfree(cmp->dst_pages); 2946 kfree(cmp->dst_pages);
@@ -2966,6 +3001,8 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
2966 3001
2967 src_page = cmp->src_pages[i]; 3002 src_page = cmp->src_pages[i];
2968 dst_page = cmp->dst_pages[i]; 3003 dst_page = cmp->dst_pages[i];
3004 ASSERT(PageLocked(src_page));
3005 ASSERT(PageLocked(dst_page));
2969 3006
2970 addr = kmap_atomic(src_page); 3007 addr = kmap_atomic(src_page);
2971 dst_addr = kmap_atomic(dst_page); 3008 dst_addr = kmap_atomic(dst_page);
@@ -3078,14 +3115,46 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
3078 goto out_unlock; 3115 goto out_unlock;
3079 } 3116 }
3080 3117
3118again:
3081 ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); 3119 ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
3082 if (ret) 3120 if (ret)
3083 goto out_unlock; 3121 goto out_unlock;
3084 3122
3085 if (same_inode) 3123 if (same_inode)
3086 lock_extent_range(src, same_lock_start, same_lock_len); 3124 ret = lock_extent_range(src, same_lock_start, same_lock_len,
3125 false);
3087 else 3126 else
3088 btrfs_double_extent_lock(src, loff, dst, dst_loff, len); 3127 ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len,
3128 false);
3129 /*
3130 * If one of the inodes has dirty pages in the respective range or
3131 * ordered extents, we need to flush dellaloc and wait for all ordered
3132 * extents in the range. We must unlock the pages and the ranges in the
3133 * io trees to avoid deadlocks when flushing delalloc (requires locking
3134 * pages) and when waiting for ordered extents to complete (they require
3135 * range locking).
3136 */
3137 if (ret == -EAGAIN) {
3138 /*
3139 * Ranges in the io trees already unlocked. Now unlock all
3140 * pages before waiting for all IO to complete.
3141 */
3142 btrfs_cmp_data_free(&cmp);
3143 if (same_inode) {
3144 btrfs_wait_ordered_range(src, same_lock_start,
3145 same_lock_len);
3146 } else {
3147 btrfs_wait_ordered_range(src, loff, len);
3148 btrfs_wait_ordered_range(dst, dst_loff, len);
3149 }
3150 goto again;
3151 }
3152 ASSERT(ret == 0);
3153 if (WARN_ON(ret)) {
3154 /* ranges in the io trees already unlocked */
3155 btrfs_cmp_data_free(&cmp);
3156 return ret;
3157 }
3089 3158
3090 /* pass original length for comparison so we stay within i_size */ 3159 /* pass original length for comparison so we stay within i_size */
3091 ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); 3160 ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
@@ -3795,9 +3864,15 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
3795 u64 lock_start = min_t(u64, off, destoff); 3864 u64 lock_start = min_t(u64, off, destoff);
3796 u64 lock_len = max_t(u64, off, destoff) + len - lock_start; 3865 u64 lock_len = max_t(u64, off, destoff) + len - lock_start;
3797 3866
3798 lock_extent_range(src, lock_start, lock_len); 3867 ret = lock_extent_range(src, lock_start, lock_len, true);
3799 } else { 3868 } else {
3800 btrfs_double_extent_lock(src, off, inode, destoff, len); 3869 ret = btrfs_double_extent_lock(src, off, inode, destoff, len,
3870 true);
3871 }
3872 ASSERT(ret == 0);
3873 if (WARN_ON(ret)) {
3874 /* ranges in the io trees already unlocked */
3875 goto out_unlock;
3801 } 3876 }
3802 3877
3803 ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); 3878 ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index fd1c4d982463..2bd0011450df 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -575,7 +575,8 @@ static int is_cowonly_root(u64 root_objectid)
575 root_objectid == BTRFS_TREE_LOG_OBJECTID || 575 root_objectid == BTRFS_TREE_LOG_OBJECTID ||
576 root_objectid == BTRFS_CSUM_TREE_OBJECTID || 576 root_objectid == BTRFS_CSUM_TREE_OBJECTID ||
577 root_objectid == BTRFS_UUID_TREE_OBJECTID || 577 root_objectid == BTRFS_UUID_TREE_OBJECTID ||
578 root_objectid == BTRFS_QUOTA_TREE_OBJECTID) 578 root_objectid == BTRFS_QUOTA_TREE_OBJECTID ||
579 root_objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
579 return 1; 580 return 1;
580 return 0; 581 return 0;
581} 582}
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index e0ac85949067..539e7b5e3f86 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -202,6 +202,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
202BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56); 202BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
203BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA); 203BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
204BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES); 204BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
205BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
205 206
206static struct attribute *btrfs_supported_feature_attrs[] = { 207static struct attribute *btrfs_supported_feature_attrs[] = {
207 BTRFS_FEAT_ATTR_PTR(mixed_backref), 208 BTRFS_FEAT_ATTR_PTR(mixed_backref),
@@ -213,6 +214,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
213 BTRFS_FEAT_ATTR_PTR(raid56), 214 BTRFS_FEAT_ATTR_PTR(raid56),
214 BTRFS_FEAT_ATTR_PTR(skinny_metadata), 215 BTRFS_FEAT_ATTR_PTR(skinny_metadata),
215 BTRFS_FEAT_ATTR_PTR(no_holes), 216 BTRFS_FEAT_ATTR_PTR(no_holes),
217 BTRFS_FEAT_ATTR_PTR(free_space_tree),
216 NULL 218 NULL
217}; 219};
218 220
@@ -780,6 +782,39 @@ failure:
780 return error; 782 return error;
781} 783}
782 784
785
786/*
787 * Change per-fs features in /sys/fs/btrfs/UUID/features to match current
788 * values in superblock. Call after any changes to incompat/compat_ro flags
789 */
790void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
791 u64 bit, enum btrfs_feature_set set)
792{
793 struct btrfs_fs_devices *fs_devs;
794 struct kobject *fsid_kobj;
795 u64 features;
796 int ret;
797
798 if (!fs_info)
799 return;
800
801 features = get_features(fs_info, set);
802 ASSERT(bit & supported_feature_masks[set]);
803
804 fs_devs = fs_info->fs_devices;
805 fsid_kobj = &fs_devs->fsid_kobj;
806
807 if (!fsid_kobj->state_initialized)
808 return;
809
810 /*
811 * FIXME: this is too heavy to update just one value, ideally we'd like
812 * to use sysfs_update_group but some refactoring is needed first.
813 */
814 sysfs_remove_group(fsid_kobj, &btrfs_feature_attr_group);
815 ret = sysfs_create_group(fsid_kobj, &btrfs_feature_attr_group);
816}
817
783static int btrfs_init_debugfs(void) 818static int btrfs_init_debugfs(void)
784{ 819{
785#ifdef CONFIG_DEBUG_FS 820#ifdef CONFIG_DEBUG_FS
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 9c09522125a6..d7da1a4c2f6c 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -56,7 +56,7 @@ static struct btrfs_feature_attr btrfs_attr_##_name = { \
56#define BTRFS_FEAT_ATTR_COMPAT(name, feature) \ 56#define BTRFS_FEAT_ATTR_COMPAT(name, feature) \
57 BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature) 57 BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature)
58#define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \ 58#define BTRFS_FEAT_ATTR_COMPAT_RO(name, feature) \
59 BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT, feature) 59 BTRFS_FEAT_ATTR(name, FEAT_COMPAT_RO, BTRFS_FEATURE_COMPAT_RO, feature)
60#define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \ 60#define BTRFS_FEAT_ATTR_INCOMPAT(name, feature) \
61 BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature) 61 BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
62 62
@@ -90,4 +90,7 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
90 struct kobject *parent); 90 struct kobject *parent);
91int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs); 91int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
92void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs); 92void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
93void btrfs_sysfs_feature_update(struct btrfs_fs_info *fs_info,
94 u64 bit, enum btrfs_feature_set set);
95
93#endif /* _BTRFS_SYSFS_H_ */ 96#endif /* _BTRFS_SYSFS_H_ */
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index b1d920b30070..0e1e61a7ec23 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -82,18 +82,18 @@ void btrfs_destroy_test_fs(void)
82struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void) 82struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
83{ 83{
84 struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info), 84 struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
85 GFP_NOFS); 85 GFP_KERNEL);
86 86
87 if (!fs_info) 87 if (!fs_info)
88 return fs_info; 88 return fs_info;
89 fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices), 89 fs_info->fs_devices = kzalloc(sizeof(struct btrfs_fs_devices),
90 GFP_NOFS); 90 GFP_KERNEL);
91 if (!fs_info->fs_devices) { 91 if (!fs_info->fs_devices) {
92 kfree(fs_info); 92 kfree(fs_info);
93 return NULL; 93 return NULL;
94 } 94 }
95 fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block), 95 fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block),
96 GFP_NOFS); 96 GFP_KERNEL);
97 if (!fs_info->super_copy) { 97 if (!fs_info->super_copy) {
98 kfree(fs_info->fs_devices); 98 kfree(fs_info->fs_devices);
99 kfree(fs_info); 99 kfree(fs_info);
@@ -180,11 +180,11 @@ btrfs_alloc_dummy_block_group(unsigned long length)
180{ 180{
181 struct btrfs_block_group_cache *cache; 181 struct btrfs_block_group_cache *cache;
182 182
183 cache = kzalloc(sizeof(*cache), GFP_NOFS); 183 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
184 if (!cache) 184 if (!cache)
185 return NULL; 185 return NULL;
186 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), 186 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
187 GFP_NOFS); 187 GFP_KERNEL);
188 if (!cache->free_space_ctl) { 188 if (!cache->free_space_ctl) {
189 kfree(cache); 189 kfree(cache);
190 return NULL; 190 return NULL;
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index e29fa297e053..669b58201e36 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -94,7 +94,7 @@ static int test_find_delalloc(void)
94 * test. 94 * test.
95 */ 95 */
96 for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) { 96 for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
97 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); 97 page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
98 if (!page) { 98 if (!page) {
99 test_msg("Failed to allocate test page\n"); 99 test_msg("Failed to allocate test page\n");
100 ret = -ENOMEM; 100 ret = -ENOMEM;
@@ -113,7 +113,7 @@ static int test_find_delalloc(void)
113 * |--- delalloc ---| 113 * |--- delalloc ---|
114 * |--- search ---| 114 * |--- search ---|
115 */ 115 */
116 set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_NOFS); 116 set_extent_delalloc(&tmp, 0, 4095, NULL, GFP_KERNEL);
117 start = 0; 117 start = 0;
118 end = 0; 118 end = 0;
119 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, 119 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -144,7 +144,7 @@ static int test_find_delalloc(void)
144 test_msg("Couldn't find the locked page\n"); 144 test_msg("Couldn't find the locked page\n");
145 goto out_bits; 145 goto out_bits;
146 } 146 }
147 set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_NOFS); 147 set_extent_delalloc(&tmp, 4096, max_bytes - 1, NULL, GFP_KERNEL);
148 start = test_start; 148 start = test_start;
149 end = 0; 149 end = 0;
150 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, 150 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -199,7 +199,7 @@ static int test_find_delalloc(void)
199 * 199 *
200 * We are re-using our test_start from above since it works out well. 200 * We are re-using our test_start from above since it works out well.
201 */ 201 */
202 set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_NOFS); 202 set_extent_delalloc(&tmp, max_bytes, total_dirty - 1, NULL, GFP_KERNEL);
203 start = test_start; 203 start = test_start;
204 end = 0; 204 end = 0;
205 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start, 205 found = find_lock_delalloc_range(inode, &tmp, locked_page, &start,
@@ -262,7 +262,7 @@ static int test_find_delalloc(void)
262 } 262 }
263 ret = 0; 263 ret = 0;
264out_bits: 264out_bits:
265 clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS); 265 clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL);
266out: 266out:
267 if (locked_page) 267 if (locked_page)
268 page_cache_release(locked_page); 268 page_cache_release(locked_page);
@@ -360,7 +360,7 @@ static int test_eb_bitmaps(void)
360 360
361 test_msg("Running extent buffer bitmap tests\n"); 361 test_msg("Running extent buffer bitmap tests\n");
362 362
363 bitmap = kmalloc(len, GFP_NOFS); 363 bitmap = kmalloc(len, GFP_KERNEL);
364 if (!bitmap) { 364 if (!bitmap) {
365 test_msg("Couldn't allocate test bitmap\n"); 365 test_msg("Couldn't allocate test bitmap\n");
366 return -ENOMEM; 366 return -ENOMEM;
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 5de55fdd28bc..e2d3da02deee 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -974,7 +974,7 @@ static int test_extent_accounting(void)
974 (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095, 974 (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
975 EXTENT_DELALLOC | EXTENT_DIRTY | 975 EXTENT_DELALLOC | EXTENT_DIRTY |
976 EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0, 976 EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
977 NULL, GFP_NOFS); 977 NULL, GFP_KERNEL);
978 if (ret) { 978 if (ret) {
979 test_msg("clear_extent_bit returned %d\n", ret); 979 test_msg("clear_extent_bit returned %d\n", ret);
980 goto out; 980 goto out;
@@ -1045,7 +1045,7 @@ static int test_extent_accounting(void)
1045 BTRFS_MAX_EXTENT_SIZE+8191, 1045 BTRFS_MAX_EXTENT_SIZE+8191,
1046 EXTENT_DIRTY | EXTENT_DELALLOC | 1046 EXTENT_DIRTY | EXTENT_DELALLOC |
1047 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, 1047 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1048 NULL, GFP_NOFS); 1048 NULL, GFP_KERNEL);
1049 if (ret) { 1049 if (ret) {
1050 test_msg("clear_extent_bit returned %d\n", ret); 1050 test_msg("clear_extent_bit returned %d\n", ret);
1051 goto out; 1051 goto out;
@@ -1079,7 +1079,7 @@ static int test_extent_accounting(void)
1079 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, 1079 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
1080 EXTENT_DIRTY | EXTENT_DELALLOC | 1080 EXTENT_DIRTY | EXTENT_DELALLOC |
1081 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, 1081 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1082 NULL, GFP_NOFS); 1082 NULL, GFP_KERNEL);
1083 if (ret) { 1083 if (ret) {
1084 test_msg("clear_extent_bit returned %d\n", ret); 1084 test_msg("clear_extent_bit returned %d\n", ret);
1085 goto out; 1085 goto out;
@@ -1096,7 +1096,7 @@ out:
1096 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, 1096 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
1097 EXTENT_DIRTY | EXTENT_DELALLOC | 1097 EXTENT_DIRTY | EXTENT_DELALLOC |
1098 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, 1098 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1099 NULL, GFP_NOFS); 1099 NULL, GFP_KERNEL);
1100 iput(inode); 1100 iput(inode);
1101 btrfs_free_dummy_root(root); 1101 btrfs_free_dummy_root(root);
1102 return ret; 1102 return ret;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 323e12cc9d2f..978c3a810893 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4127,7 +4127,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
4127 struct inode *inode, 4127 struct inode *inode,
4128 struct btrfs_path *path, 4128 struct btrfs_path *path,
4129 struct list_head *logged_list, 4129 struct list_head *logged_list,
4130 struct btrfs_log_ctx *ctx) 4130 struct btrfs_log_ctx *ctx,
4131 const u64 start,
4132 const u64 end)
4131{ 4133{
4132 struct extent_map *em, *n; 4134 struct extent_map *em, *n;
4133 struct list_head extents; 4135 struct list_head extents;
@@ -4166,7 +4168,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
4166 } 4168 }
4167 4169
4168 list_sort(NULL, &extents, extent_cmp); 4170 list_sort(NULL, &extents, extent_cmp);
4169 4171 /*
4172 * Collect any new ordered extents within the range. This is to
4173 * prevent logging file extent items without waiting for the disk
4174 * location they point to being written. We do this only to deal
4175 * with races against concurrent lockless direct IO writes.
4176 */
4177 btrfs_get_logged_extents(inode, logged_list, start, end);
4170process: 4178process:
4171 while (!list_empty(&extents)) { 4179 while (!list_empty(&extents)) {
4172 em = list_entry(extents.next, struct extent_map, list); 4180 em = list_entry(extents.next, struct extent_map, list);
@@ -4701,7 +4709,7 @@ log_extents:
4701 goto out_unlock; 4709 goto out_unlock;
4702 } 4710 }
4703 ret = btrfs_log_changed_extents(trans, root, inode, dst_path, 4711 ret = btrfs_log_changed_extents(trans, root, inode, dst_path,
4704 &logged_list, ctx); 4712 &logged_list, ctx, start, end);
4705 if (ret) { 4713 if (ret) {
4706 err = ret; 4714 err = ret;
4707 goto out_unlock; 4715 goto out_unlock;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 86a9c383955e..eb9028e8cfc5 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -698,8 +698,8 @@ static void ceph_aio_retry_work(struct work_struct *work)
698 698
699 req = ceph_osdc_alloc_request(orig_req->r_osdc, snapc, 2, 699 req = ceph_osdc_alloc_request(orig_req->r_osdc, snapc, 2,
700 false, GFP_NOFS); 700 false, GFP_NOFS);
701 if (IS_ERR(req)) { 701 if (!req) {
702 ret = PTR_ERR(req); 702 ret = -ENOMEM;
703 req = orig_req; 703 req = orig_req;
704 goto out; 704 goto out;
705 } 705 }
@@ -716,7 +716,6 @@ static void ceph_aio_retry_work(struct work_struct *work)
716 ceph_osdc_build_request(req, req->r_ops[0].extent.offset, 716 ceph_osdc_build_request(req, req->r_ops[0].extent.offset,
717 snapc, CEPH_NOSNAP, &aio_req->mtime); 717 snapc, CEPH_NOSNAP, &aio_req->mtime);
718 718
719 ceph_put_snap_context(snapc);
720 ceph_osdc_put_request(orig_req); 719 ceph_osdc_put_request(orig_req);
721 720
722 req->r_callback = ceph_aio_complete_req; 721 req->r_callback = ceph_aio_complete_req;
@@ -731,6 +730,7 @@ out:
731 ceph_aio_complete_req(req, NULL); 730 ceph_aio_complete_req(req, NULL);
732 } 731 }
733 732
733 ceph_put_snap_context(snapc);
734 kfree(aio_work); 734 kfree(aio_work);
735} 735}
736 736
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index a5b8eb69a8f4..6402eaf8ab95 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1261,6 +1261,9 @@ COMPATIBLE_IOCTL(HCIUNBLOCKADDR)
1261COMPATIBLE_IOCTL(HCIINQUIRY) 1261COMPATIBLE_IOCTL(HCIINQUIRY)
1262COMPATIBLE_IOCTL(HCIUARTSETPROTO) 1262COMPATIBLE_IOCTL(HCIUARTSETPROTO)
1263COMPATIBLE_IOCTL(HCIUARTGETPROTO) 1263COMPATIBLE_IOCTL(HCIUARTGETPROTO)
1264COMPATIBLE_IOCTL(HCIUARTGETDEVICE)
1265COMPATIBLE_IOCTL(HCIUARTSETFLAGS)
1266COMPATIBLE_IOCTL(HCIUARTGETFLAGS)
1264COMPATIBLE_IOCTL(RFCOMMCREATEDEV) 1267COMPATIBLE_IOCTL(RFCOMMCREATEDEV)
1265COMPATIBLE_IOCTL(RFCOMMRELEASEDEV) 1268COMPATIBLE_IOCTL(RFCOMMRELEASEDEV)
1266COMPATIBLE_IOCTL(RFCOMMGETDEVLIST) 1269COMPATIBLE_IOCTL(RFCOMMGETDEVLIST)
diff --git a/fs/dax.c b/fs/dax.c
index 4fd6b0c5c6b5..fc2e3141138b 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -58,6 +58,26 @@ static void dax_unmap_atomic(struct block_device *bdev,
58 blk_queue_exit(bdev->bd_queue); 58 blk_queue_exit(bdev->bd_queue);
59} 59}
60 60
61struct page *read_dax_sector(struct block_device *bdev, sector_t n)
62{
63 struct page *page = alloc_pages(GFP_KERNEL, 0);
64 struct blk_dax_ctl dax = {
65 .size = PAGE_SIZE,
66 .sector = n & ~((((int) PAGE_SIZE) / 512) - 1),
67 };
68 long rc;
69
70 if (!page)
71 return ERR_PTR(-ENOMEM);
72
73 rc = dax_map_atomic(bdev, &dax);
74 if (rc < 0)
75 return ERR_PTR(rc);
76 memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE);
77 dax_unmap_atomic(bdev, &dax);
78 return page;
79}
80
61/* 81/*
62 * dax_clear_blocks() is called from within transaction context from XFS, 82 * dax_clear_blocks() is called from within transaction context from XFS,
63 * and hence this means the stack from this point must follow GFP_NOFS 83 * and hence this means the stack from this point must follow GFP_NOFS
@@ -338,7 +358,8 @@ static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
338 void *entry; 358 void *entry;
339 359
340 WARN_ON_ONCE(pmd_entry && !dirty); 360 WARN_ON_ONCE(pmd_entry && !dirty);
341 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 361 if (dirty)
362 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
342 363
343 spin_lock_irq(&mapping->tree_lock); 364 spin_lock_irq(&mapping->tree_lock);
344 365
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 1f107fd51328..655f21f99160 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -575,6 +575,26 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx)
575 mutex_unlock(&allocated_ptys_lock); 575 mutex_unlock(&allocated_ptys_lock);
576} 576}
577 577
578/*
579 * pty code needs to hold extra references in case of last /dev/tty close
580 */
581
582void devpts_add_ref(struct inode *ptmx_inode)
583{
584 struct super_block *sb = pts_sb_from_inode(ptmx_inode);
585
586 atomic_inc(&sb->s_active);
587 ihold(ptmx_inode);
588}
589
590void devpts_del_ref(struct inode *ptmx_inode)
591{
592 struct super_block *sb = pts_sb_from_inode(ptmx_inode);
593
594 iput(ptmx_inode);
595 deactivate_super(sb);
596}
597
578/** 598/**
579 * devpts_pty_new -- create a new inode in /dev/pts/ 599 * devpts_pty_new -- create a new inode in /dev/pts/
580 * @ptmx_inode: inode of the master 600 * @ptmx_inode: inode of the master
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index c424e4813ec8..d48e0d261d78 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -10,6 +10,7 @@
10#include <linux/efi.h> 10#include <linux/efi.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/mount.h>
13 14
14#include "internal.h" 15#include "internal.h"
15 16
@@ -103,9 +104,78 @@ out_free:
103 return size; 104 return size;
104} 105}
105 106
107static int
108efivarfs_ioc_getxflags(struct file *file, void __user *arg)
109{
110 struct inode *inode = file->f_mapping->host;
111 unsigned int i_flags;
112 unsigned int flags = 0;
113
114 i_flags = inode->i_flags;
115 if (i_flags & S_IMMUTABLE)
116 flags |= FS_IMMUTABLE_FL;
117
118 if (copy_to_user(arg, &flags, sizeof(flags)))
119 return -EFAULT;
120 return 0;
121}
122
123static int
124efivarfs_ioc_setxflags(struct file *file, void __user *arg)
125{
126 struct inode *inode = file->f_mapping->host;
127 unsigned int flags;
128 unsigned int i_flags = 0;
129 int error;
130
131 if (!inode_owner_or_capable(inode))
132 return -EACCES;
133
134 if (copy_from_user(&flags, arg, sizeof(flags)))
135 return -EFAULT;
136
137 if (flags & ~FS_IMMUTABLE_FL)
138 return -EOPNOTSUPP;
139
140 if (!capable(CAP_LINUX_IMMUTABLE))
141 return -EPERM;
142
143 if (flags & FS_IMMUTABLE_FL)
144 i_flags |= S_IMMUTABLE;
145
146
147 error = mnt_want_write_file(file);
148 if (error)
149 return error;
150
151 inode_lock(inode);
152 inode_set_flags(inode, i_flags, S_IMMUTABLE);
153 inode_unlock(inode);
154
155 mnt_drop_write_file(file);
156
157 return 0;
158}
159
160long
161efivarfs_file_ioctl(struct file *file, unsigned int cmd, unsigned long p)
162{
163 void __user *arg = (void __user *)p;
164
165 switch (cmd) {
166 case FS_IOC_GETFLAGS:
167 return efivarfs_ioc_getxflags(file, arg);
168 case FS_IOC_SETFLAGS:
169 return efivarfs_ioc_setxflags(file, arg);
170 }
171
172 return -ENOTTY;
173}
174
106const struct file_operations efivarfs_file_operations = { 175const struct file_operations efivarfs_file_operations = {
107 .open = simple_open, 176 .open = simple_open,
108 .read = efivarfs_file_read, 177 .read = efivarfs_file_read,
109 .write = efivarfs_file_write, 178 .write = efivarfs_file_write,
110 .llseek = no_llseek, 179 .llseek = no_llseek,
180 .unlocked_ioctl = efivarfs_file_ioctl,
111}; 181};
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 3381b9da9ee6..e2ab6d0497f2 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -15,7 +15,8 @@
15#include "internal.h" 15#include "internal.h"
16 16
17struct inode *efivarfs_get_inode(struct super_block *sb, 17struct inode *efivarfs_get_inode(struct super_block *sb,
18 const struct inode *dir, int mode, dev_t dev) 18 const struct inode *dir, int mode,
19 dev_t dev, bool is_removable)
19{ 20{
20 struct inode *inode = new_inode(sb); 21 struct inode *inode = new_inode(sb);
21 22
@@ -23,6 +24,7 @@ struct inode *efivarfs_get_inode(struct super_block *sb,
23 inode->i_ino = get_next_ino(); 24 inode->i_ino = get_next_ino();
24 inode->i_mode = mode; 25 inode->i_mode = mode;
25 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 26 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
27 inode->i_flags = is_removable ? 0 : S_IMMUTABLE;
26 switch (mode & S_IFMT) { 28 switch (mode & S_IFMT) {
27 case S_IFREG: 29 case S_IFREG:
28 inode->i_fop = &efivarfs_file_operations; 30 inode->i_fop = &efivarfs_file_operations;
@@ -102,22 +104,17 @@ static void efivarfs_hex_to_guid(const char *str, efi_guid_t *guid)
102static int efivarfs_create(struct inode *dir, struct dentry *dentry, 104static int efivarfs_create(struct inode *dir, struct dentry *dentry,
103 umode_t mode, bool excl) 105 umode_t mode, bool excl)
104{ 106{
105 struct inode *inode; 107 struct inode *inode = NULL;
106 struct efivar_entry *var; 108 struct efivar_entry *var;
107 int namelen, i = 0, err = 0; 109 int namelen, i = 0, err = 0;
110 bool is_removable = false;
108 111
109 if (!efivarfs_valid_name(dentry->d_name.name, dentry->d_name.len)) 112 if (!efivarfs_valid_name(dentry->d_name.name, dentry->d_name.len))
110 return -EINVAL; 113 return -EINVAL;
111 114
112 inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0);
113 if (!inode)
114 return -ENOMEM;
115
116 var = kzalloc(sizeof(struct efivar_entry), GFP_KERNEL); 115 var = kzalloc(sizeof(struct efivar_entry), GFP_KERNEL);
117 if (!var) { 116 if (!var)
118 err = -ENOMEM; 117 return -ENOMEM;
119 goto out;
120 }
121 118
122 /* length of the variable name itself: remove GUID and separator */ 119 /* length of the variable name itself: remove GUID and separator */
123 namelen = dentry->d_name.len - EFI_VARIABLE_GUID_LEN - 1; 120 namelen = dentry->d_name.len - EFI_VARIABLE_GUID_LEN - 1;
@@ -125,6 +122,16 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry,
125 efivarfs_hex_to_guid(dentry->d_name.name + namelen + 1, 122 efivarfs_hex_to_guid(dentry->d_name.name + namelen + 1,
126 &var->var.VendorGuid); 123 &var->var.VendorGuid);
127 124
125 if (efivar_variable_is_removable(var->var.VendorGuid,
126 dentry->d_name.name, namelen))
127 is_removable = true;
128
129 inode = efivarfs_get_inode(dir->i_sb, dir, mode, 0, is_removable);
130 if (!inode) {
131 err = -ENOMEM;
132 goto out;
133 }
134
128 for (i = 0; i < namelen; i++) 135 for (i = 0; i < namelen; i++)
129 var->var.VariableName[i] = dentry->d_name.name[i]; 136 var->var.VariableName[i] = dentry->d_name.name[i];
130 137
@@ -138,7 +145,8 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry,
138out: 145out:
139 if (err) { 146 if (err) {
140 kfree(var); 147 kfree(var);
141 iput(inode); 148 if (inode)
149 iput(inode);
142 } 150 }
143 return err; 151 return err;
144} 152}
diff --git a/fs/efivarfs/internal.h b/fs/efivarfs/internal.h
index b5ff16addb7c..b4505188e799 100644
--- a/fs/efivarfs/internal.h
+++ b/fs/efivarfs/internal.h
@@ -15,7 +15,8 @@ extern const struct file_operations efivarfs_file_operations;
15extern const struct inode_operations efivarfs_dir_inode_operations; 15extern const struct inode_operations efivarfs_dir_inode_operations;
16extern bool efivarfs_valid_name(const char *str, int len); 16extern bool efivarfs_valid_name(const char *str, int len);
17extern struct inode *efivarfs_get_inode(struct super_block *sb, 17extern struct inode *efivarfs_get_inode(struct super_block *sb,
18 const struct inode *dir, int mode, dev_t dev); 18 const struct inode *dir, int mode, dev_t dev,
19 bool is_removable);
19 20
20extern struct list_head efivarfs_list; 21extern struct list_head efivarfs_list;
21 22
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index b8a564f29107..dd029d13ea61 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -118,8 +118,9 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
118 struct dentry *dentry, *root = sb->s_root; 118 struct dentry *dentry, *root = sb->s_root;
119 unsigned long size = 0; 119 unsigned long size = 0;
120 char *name; 120 char *name;
121 int len, i; 121 int len;
122 int err = -ENOMEM; 122 int err = -ENOMEM;
123 bool is_removable = false;
123 124
124 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 125 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
125 if (!entry) 126 if (!entry)
@@ -128,15 +129,17 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
128 memcpy(entry->var.VariableName, name16, name_size); 129 memcpy(entry->var.VariableName, name16, name_size);
129 memcpy(&(entry->var.VendorGuid), &vendor, sizeof(efi_guid_t)); 130 memcpy(&(entry->var.VendorGuid), &vendor, sizeof(efi_guid_t));
130 131
131 len = ucs2_strlen(entry->var.VariableName); 132 len = ucs2_utf8size(entry->var.VariableName);
132 133
133 /* name, plus '-', plus GUID, plus NUL*/ 134 /* name, plus '-', plus GUID, plus NUL*/
134 name = kmalloc(len + 1 + EFI_VARIABLE_GUID_LEN + 1, GFP_KERNEL); 135 name = kmalloc(len + 1 + EFI_VARIABLE_GUID_LEN + 1, GFP_KERNEL);
135 if (!name) 136 if (!name)
136 goto fail; 137 goto fail;
137 138
138 for (i = 0; i < len; i++) 139 ucs2_as_utf8(name, entry->var.VariableName, len);
139 name[i] = entry->var.VariableName[i] & 0xFF; 140
141 if (efivar_variable_is_removable(entry->var.VendorGuid, name, len))
142 is_removable = true;
140 143
141 name[len] = '-'; 144 name[len] = '-';
142 145
@@ -144,7 +147,8 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
144 147
145 name[len + EFI_VARIABLE_GUID_LEN+1] = '\0'; 148 name[len + EFI_VARIABLE_GUID_LEN+1] = '\0';
146 149
147 inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0); 150 inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0,
151 is_removable);
148 if (!inode) 152 if (!inode)
149 goto fail_name; 153 goto fail_name;
150 154
@@ -200,7 +204,7 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent)
200 sb->s_d_op = &efivarfs_d_ops; 204 sb->s_d_op = &efivarfs_d_ops;
201 sb->s_time_gran = 1; 205 sb->s_time_gran = 1;
202 206
203 inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0); 207 inode = efivarfs_get_inode(sb, NULL, S_IFDIR | 0755, 0, true);
204 if (!inode) 208 if (!inode)
205 return -ENOMEM; 209 return -ENOMEM;
206 inode->i_op = &efivarfs_dir_inode_operations; 210 inode->i_op = &efivarfs_dir_inode_operations;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index ae1dbcf47e97..cde60741cad2 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -94,6 +94,11 @@
94/* Epoll private bits inside the event mask */ 94/* Epoll private bits inside the event mask */
95#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE) 95#define EP_PRIVATE_BITS (EPOLLWAKEUP | EPOLLONESHOT | EPOLLET | EPOLLEXCLUSIVE)
96 96
97#define EPOLLINOUT_BITS (POLLIN | POLLOUT)
98
99#define EPOLLEXCLUSIVE_OK_BITS (EPOLLINOUT_BITS | POLLERR | POLLHUP | \
100 EPOLLWAKEUP | EPOLLET | EPOLLEXCLUSIVE)
101
97/* Maximum number of nesting allowed inside epoll sets */ 102/* Maximum number of nesting allowed inside epoll sets */
98#define EP_MAX_NESTS 4 103#define EP_MAX_NESTS 4
99 104
@@ -1068,7 +1073,22 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
1068 * wait list. 1073 * wait list.
1069 */ 1074 */
1070 if (waitqueue_active(&ep->wq)) { 1075 if (waitqueue_active(&ep->wq)) {
1071 ewake = 1; 1076 if ((epi->event.events & EPOLLEXCLUSIVE) &&
1077 !((unsigned long)key & POLLFREE)) {
1078 switch ((unsigned long)key & EPOLLINOUT_BITS) {
1079 case POLLIN:
1080 if (epi->event.events & POLLIN)
1081 ewake = 1;
1082 break;
1083 case POLLOUT:
1084 if (epi->event.events & POLLOUT)
1085 ewake = 1;
1086 break;
1087 case 0:
1088 ewake = 1;
1089 break;
1090 }
1091 }
1072 wake_up_locked(&ep->wq); 1092 wake_up_locked(&ep->wq);
1073 } 1093 }
1074 if (waitqueue_active(&ep->poll_wait)) 1094 if (waitqueue_active(&ep->poll_wait))
@@ -1875,9 +1895,13 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1875 * so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation. 1895 * so EPOLLEXCLUSIVE is not allowed for a EPOLL_CTL_MOD operation.
1876 * Also, we do not currently supported nested exclusive wakeups. 1896 * Also, we do not currently supported nested exclusive wakeups.
1877 */ 1897 */
1878 if ((epds.events & EPOLLEXCLUSIVE) && (op == EPOLL_CTL_MOD || 1898 if (epds.events & EPOLLEXCLUSIVE) {
1879 (op == EPOLL_CTL_ADD && is_file_epoll(tf.file)))) 1899 if (op == EPOLL_CTL_MOD)
1880 goto error_tgt_fput; 1900 goto error_tgt_fput;
1901 if (op == EPOLL_CTL_ADD && (is_file_epoll(tf.file) ||
1902 (epds.events & ~EPOLLEXCLUSIVE_OK_BITS)))
1903 goto error_tgt_fput;
1904 }
1881 1905
1882 /* 1906 /*
1883 * At this point it is safe to assume that the "private_data" contains 1907 * At this point it is safe to assume that the "private_data" contains
@@ -1950,8 +1974,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1950 break; 1974 break;
1951 case EPOLL_CTL_MOD: 1975 case EPOLL_CTL_MOD:
1952 if (epi) { 1976 if (epi) {
1953 epds.events |= POLLERR | POLLHUP; 1977 if (!(epi->event.events & EPOLLEXCLUSIVE)) {
1954 error = ep_modify(ep, epi, &epds); 1978 epds.events |= POLLERR | POLLHUP;
1979 error = ep_modify(ep, epi, &epds);
1980 }
1955 } else 1981 } else
1956 error = -ENOENT; 1982 error = -ENOENT;
1957 break; 1983 break;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 5bcd92d50e82..0cb1abd535e3 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1215,7 +1215,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
1215 hdr->pgio_mirror_idx + 1, 1215 hdr->pgio_mirror_idx + 1,
1216 &hdr->pgio_mirror_idx)) 1216 &hdr->pgio_mirror_idx))
1217 goto out_eagain; 1217 goto out_eagain;
1218 set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 1218 set_bit(NFS_LAYOUT_RETURN_REQUESTED,
1219 &hdr->lseg->pls_layout->plh_flags); 1219 &hdr->lseg->pls_layout->plh_flags);
1220 pnfs_read_resend_pnfs(hdr); 1220 pnfs_read_resend_pnfs(hdr);
1221 return task->tk_status; 1221 return task->tk_status;
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 29898a9550fa..eb370460ce20 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -412,7 +412,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
412 OP_ILLEGAL, GFP_NOIO); 412 OP_ILLEGAL, GFP_NOIO);
413 if (!fail_return) { 413 if (!fail_return) {
414 if (ff_layout_has_available_ds(lseg)) 414 if (ff_layout_has_available_ds(lseg))
415 set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 415 set_bit(NFS_LAYOUT_RETURN_REQUESTED,
416 &lseg->pls_layout->plh_flags); 416 &lseg->pls_layout->plh_flags);
417 else 417 else
418 pnfs_error_mark_layout_for_return(ino, lseg); 418 pnfs_error_mark_layout_for_return(ino, lseg);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a3592cc34a20..482b6e94bb37 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -52,9 +52,7 @@ static DEFINE_SPINLOCK(pnfs_spinlock);
52 */ 52 */
53static LIST_HEAD(pnfs_modules_tbl); 53static LIST_HEAD(pnfs_modules_tbl);
54 54
55static int 55static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo);
56pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
57 enum pnfs_iomode iomode, bool sync);
58 56
59/* Return the registered pnfs layout driver module matching given id */ 57/* Return the registered pnfs layout driver module matching given id */
60static struct pnfs_layoutdriver_type * 58static struct pnfs_layoutdriver_type *
@@ -243,6 +241,8 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
243{ 241{
244 struct inode *inode = lo->plh_inode; 242 struct inode *inode = lo->plh_inode;
245 243
244 pnfs_layoutreturn_before_put_layout_hdr(lo);
245
246 if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { 246 if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
247 if (!list_empty(&lo->plh_segs)) 247 if (!list_empty(&lo->plh_segs))
248 WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); 248 WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
@@ -345,58 +345,6 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
345 rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); 345 rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
346} 346}
347 347
348/* Return true if layoutreturn is needed */
349static bool
350pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
351 struct pnfs_layout_segment *lseg)
352{
353 struct pnfs_layout_segment *s;
354
355 if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
356 return false;
357
358 list_for_each_entry(s, &lo->plh_segs, pls_list)
359 if (s != lseg && test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags))
360 return false;
361
362 return true;
363}
364
365static bool
366pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
367{
368 if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
369 return false;
370 lo->plh_return_iomode = 0;
371 pnfs_get_layout_hdr(lo);
372 clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
373 return true;
374}
375
376static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
377 struct pnfs_layout_hdr *lo, struct inode *inode)
378{
379 lo = lseg->pls_layout;
380 inode = lo->plh_inode;
381
382 spin_lock(&inode->i_lock);
383 if (pnfs_layout_need_return(lo, lseg)) {
384 nfs4_stateid stateid;
385 enum pnfs_iomode iomode;
386 bool send;
387
388 nfs4_stateid_copy(&stateid, &lo->plh_stateid);
389 iomode = lo->plh_return_iomode;
390 send = pnfs_prepare_layoutreturn(lo);
391 spin_unlock(&inode->i_lock);
392 if (send) {
393 /* Send an async layoutreturn so we dont deadlock */
394 pnfs_send_layoutreturn(lo, &stateid, iomode, false);
395 }
396 } else
397 spin_unlock(&inode->i_lock);
398}
399
400void 348void
401pnfs_put_lseg(struct pnfs_layout_segment *lseg) 349pnfs_put_lseg(struct pnfs_layout_segment *lseg)
402{ 350{
@@ -410,15 +358,8 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
410 atomic_read(&lseg->pls_refcount), 358 atomic_read(&lseg->pls_refcount),
411 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 359 test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
412 360
413 /* Handle the case where refcount != 1 */
414 if (atomic_add_unless(&lseg->pls_refcount, -1, 1))
415 return;
416
417 lo = lseg->pls_layout; 361 lo = lseg->pls_layout;
418 inode = lo->plh_inode; 362 inode = lo->plh_inode;
419 /* Do we need a layoutreturn? */
420 if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
421 pnfs_layoutreturn_before_put_lseg(lseg, lo, inode);
422 363
423 if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { 364 if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
424 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { 365 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
@@ -937,6 +878,17 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
937 rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); 878 rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
938} 879}
939 880
881static bool
882pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
883{
884 if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
885 return false;
886 lo->plh_return_iomode = 0;
887 pnfs_get_layout_hdr(lo);
888 clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
889 return true;
890}
891
940static int 892static int
941pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, 893pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
942 enum pnfs_iomode iomode, bool sync) 894 enum pnfs_iomode iomode, bool sync)
@@ -971,6 +923,48 @@ out:
971 return status; 923 return status;
972} 924}
973 925
926/* Return true if layoutreturn is needed */
927static bool
928pnfs_layout_need_return(struct pnfs_layout_hdr *lo)
929{
930 struct pnfs_layout_segment *s;
931
932 if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
933 return false;
934
935 /* Defer layoutreturn until all lsegs are done */
936 list_for_each_entry(s, &lo->plh_segs, pls_list) {
937 if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags))
938 return false;
939 }
940
941 return true;
942}
943
944static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
945{
946 struct inode *inode= lo->plh_inode;
947
948 if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
949 return;
950 spin_lock(&inode->i_lock);
951 if (pnfs_layout_need_return(lo)) {
952 nfs4_stateid stateid;
953 enum pnfs_iomode iomode;
954 bool send;
955
956 nfs4_stateid_copy(&stateid, &lo->plh_stateid);
957 iomode = lo->plh_return_iomode;
958 send = pnfs_prepare_layoutreturn(lo);
959 spin_unlock(&inode->i_lock);
960 if (send) {
961 /* Send an async layoutreturn so we dont deadlock */
962 pnfs_send_layoutreturn(lo, &stateid, iomode, false);
963 }
964 } else
965 spin_unlock(&inode->i_lock);
966}
967
974/* 968/*
975 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr 969 * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
976 * when the layout segment list is empty. 970 * when the layout segment list is empty.
@@ -1091,7 +1085,7 @@ bool pnfs_roc(struct inode *ino)
1091 1085
1092 nfs4_stateid_copy(&stateid, &lo->plh_stateid); 1086 nfs4_stateid_copy(&stateid, &lo->plh_stateid);
1093 /* always send layoutreturn if being marked so */ 1087 /* always send layoutreturn if being marked so */
1094 if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 1088 if (test_and_clear_bit(NFS_LAYOUT_RETURN_REQUESTED,
1095 &lo->plh_flags)) 1089 &lo->plh_flags))
1096 layoutreturn = pnfs_prepare_layoutreturn(lo); 1090 layoutreturn = pnfs_prepare_layoutreturn(lo);
1097 1091
@@ -1772,7 +1766,7 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
1772 pnfs_set_plh_return_iomode(lo, return_range->iomode); 1766 pnfs_set_plh_return_iomode(lo, return_range->iomode);
1773 if (!mark_lseg_invalid(lseg, tmp_list)) 1767 if (!mark_lseg_invalid(lseg, tmp_list))
1774 remaining++; 1768 remaining++;
1775 set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, 1769 set_bit(NFS_LAYOUT_RETURN_REQUESTED,
1776 &lo->plh_flags); 1770 &lo->plh_flags);
1777 } 1771 }
1778 return remaining; 1772 return remaining;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 9f4e2a47f4aa..1ac1db5f6dad 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -94,8 +94,8 @@ enum {
94 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ 94 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
95 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ 95 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
96 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ 96 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
97 NFS_LAYOUT_RETURN, /* Return this layout ASAP */ 97 NFS_LAYOUT_RETURN, /* layoutreturn in progress */
98 NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */ 98 NFS_LAYOUT_RETURN_REQUESTED, /* Return this layout ASAP */
99 NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ 99 NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */
100 NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ 100 NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */
101}; 101};
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index a3cc6d2fc896..a76b9ea7722e 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1254,15 +1254,15 @@ static const struct file_operations o2hb_debug_fops = {
1254 1254
1255void o2hb_exit(void) 1255void o2hb_exit(void)
1256{ 1256{
1257 kfree(o2hb_db_livenodes);
1258 kfree(o2hb_db_liveregions);
1259 kfree(o2hb_db_quorumregions);
1260 kfree(o2hb_db_failedregions);
1261 debugfs_remove(o2hb_debug_failedregions); 1257 debugfs_remove(o2hb_debug_failedregions);
1262 debugfs_remove(o2hb_debug_quorumregions); 1258 debugfs_remove(o2hb_debug_quorumregions);
1263 debugfs_remove(o2hb_debug_liveregions); 1259 debugfs_remove(o2hb_debug_liveregions);
1264 debugfs_remove(o2hb_debug_livenodes); 1260 debugfs_remove(o2hb_debug_livenodes);
1265 debugfs_remove(o2hb_debug_dir); 1261 debugfs_remove(o2hb_debug_dir);
1262 kfree(o2hb_db_livenodes);
1263 kfree(o2hb_db_liveregions);
1264 kfree(o2hb_db_quorumregions);
1265 kfree(o2hb_db_failedregions);
1266} 1266}
1267 1267
1268static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir, 1268static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir,
@@ -1438,13 +1438,15 @@ static void o2hb_region_release(struct config_item *item)
1438 1438
1439 kfree(reg->hr_slots); 1439 kfree(reg->hr_slots);
1440 1440
1441 kfree(reg->hr_db_regnum);
1442 kfree(reg->hr_db_livenodes);
1443 debugfs_remove(reg->hr_debug_livenodes); 1441 debugfs_remove(reg->hr_debug_livenodes);
1444 debugfs_remove(reg->hr_debug_regnum); 1442 debugfs_remove(reg->hr_debug_regnum);
1445 debugfs_remove(reg->hr_debug_elapsed_time); 1443 debugfs_remove(reg->hr_debug_elapsed_time);
1446 debugfs_remove(reg->hr_debug_pinned); 1444 debugfs_remove(reg->hr_debug_pinned);
1447 debugfs_remove(reg->hr_debug_dir); 1445 debugfs_remove(reg->hr_debug_dir);
1446 kfree(reg->hr_db_livenodes);
1447 kfree(reg->hr_db_regnum);
1448 kfree(reg->hr_debug_elapsed_time);
1449 kfree(reg->hr_debug_pinned);
1448 1450
1449 spin_lock(&o2hb_live_lock); 1451 spin_lock(&o2hb_live_lock);
1450 list_del(&reg->hr_all_item); 1452 list_del(&reg->hr_all_item);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index c5bdf02c213b..b94a425f0175 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2367,6 +2367,8 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
2367 break; 2367 break;
2368 } 2368 }
2369 } 2369 }
2370 dlm_lockres_clear_refmap_bit(dlm, res,
2371 dead_node);
2370 spin_unlock(&res->spinlock); 2372 spin_unlock(&res->spinlock);
2371 continue; 2373 continue;
2372 } 2374 }
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 85d16c67c33e..fa95ab2d3674 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -259,23 +259,29 @@ static int do_maps_open(struct inode *inode, struct file *file,
259 sizeof(struct proc_maps_private)); 259 sizeof(struct proc_maps_private));
260} 260}
261 261
262static pid_t pid_of_stack(struct proc_maps_private *priv, 262/*
263 struct vm_area_struct *vma, bool is_pid) 263 * Indicate if the VMA is a stack for the given task; for
264 * /proc/PID/maps that is the stack of the main task.
265 */
266static int is_stack(struct proc_maps_private *priv,
267 struct vm_area_struct *vma, int is_pid)
264{ 268{
265 struct inode *inode = priv->inode; 269 int stack = 0;
266 struct task_struct *task; 270
267 pid_t ret = 0; 271 if (is_pid) {
272 stack = vma->vm_start <= vma->vm_mm->start_stack &&
273 vma->vm_end >= vma->vm_mm->start_stack;
274 } else {
275 struct inode *inode = priv->inode;
276 struct task_struct *task;
268 277
269 rcu_read_lock(); 278 rcu_read_lock();
270 task = pid_task(proc_pid(inode), PIDTYPE_PID); 279 task = pid_task(proc_pid(inode), PIDTYPE_PID);
271 if (task) {
272 task = task_of_stack(task, vma, is_pid);
273 if (task) 280 if (task)
274 ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); 281 stack = vma_is_stack_for_task(vma, task);
282 rcu_read_unlock();
275 } 283 }
276 rcu_read_unlock(); 284 return stack;
277
278 return ret;
279} 285}
280 286
281static void 287static void
@@ -335,8 +341,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
335 341
336 name = arch_vma_name(vma); 342 name = arch_vma_name(vma);
337 if (!name) { 343 if (!name) {
338 pid_t tid;
339
340 if (!mm) { 344 if (!mm) {
341 name = "[vdso]"; 345 name = "[vdso]";
342 goto done; 346 goto done;
@@ -348,21 +352,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
348 goto done; 352 goto done;
349 } 353 }
350 354
351 tid = pid_of_stack(priv, vma, is_pid); 355 if (is_stack(priv, vma, is_pid))
352 if (tid != 0) { 356 name = "[stack]";
353 /*
354 * Thread stack in /proc/PID/task/TID/maps or
355 * the main process stack.
356 */
357 if (!is_pid || (vma->vm_start <= mm->start_stack &&
358 vma->vm_end >= mm->start_stack)) {
359 name = "[stack]";
360 } else {
361 /* Thread stack in /proc/PID/maps */
362 seq_pad(m, ' ');
363 seq_printf(m, "[stack:%d]", tid);
364 }
365 }
366 } 357 }
367 358
368done: 359done:
@@ -1552,18 +1543,19 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
1552static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, 1543static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
1553 unsigned long addr, unsigned long end, struct mm_walk *walk) 1544 unsigned long addr, unsigned long end, struct mm_walk *walk)
1554{ 1545{
1546 pte_t huge_pte = huge_ptep_get(pte);
1555 struct numa_maps *md; 1547 struct numa_maps *md;
1556 struct page *page; 1548 struct page *page;
1557 1549
1558 if (!pte_present(*pte)) 1550 if (!pte_present(huge_pte))
1559 return 0; 1551 return 0;
1560 1552
1561 page = pte_page(*pte); 1553 page = pte_page(huge_pte);
1562 if (!page) 1554 if (!page)
1563 return 0; 1555 return 0;
1564 1556
1565 md = walk->private; 1557 md = walk->private;
1566 gather_stats(page, md, pte_dirty(*pte), 1); 1558 gather_stats(page, md, pte_dirty(huge_pte), 1);
1567 return 0; 1559 return 0;
1568} 1560}
1569 1561
@@ -1617,19 +1609,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
1617 seq_file_path(m, file, "\n\t= "); 1609 seq_file_path(m, file, "\n\t= ");
1618 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { 1610 } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
1619 seq_puts(m, " heap"); 1611 seq_puts(m, " heap");
1620 } else { 1612 } else if (is_stack(proc_priv, vma, is_pid)) {
1621 pid_t tid = pid_of_stack(proc_priv, vma, is_pid); 1613 seq_puts(m, " stack");
1622 if (tid != 0) {
1623 /*
1624 * Thread stack in /proc/PID/task/TID/maps or
1625 * the main process stack.
1626 */
1627 if (!is_pid || (vma->vm_start <= mm->start_stack &&
1628 vma->vm_end >= mm->start_stack))
1629 seq_puts(m, " stack");
1630 else
1631 seq_printf(m, " stack:%d", tid);
1632 }
1633 } 1614 }
1634 1615
1635 if (is_vm_hugetlb_page(vma)) 1616 if (is_vm_hugetlb_page(vma))
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index e0d64c92e4f6..faacb0c0d857 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -123,23 +123,26 @@ unsigned long task_statm(struct mm_struct *mm,
123 return size; 123 return size;
124} 124}
125 125
126static pid_t pid_of_stack(struct proc_maps_private *priv, 126static int is_stack(struct proc_maps_private *priv,
127 struct vm_area_struct *vma, bool is_pid) 127 struct vm_area_struct *vma, int is_pid)
128{ 128{
129 struct inode *inode = priv->inode; 129 struct mm_struct *mm = vma->vm_mm;
130 struct task_struct *task; 130 int stack = 0;
131 pid_t ret = 0; 131
132 132 if (is_pid) {
133 rcu_read_lock(); 133 stack = vma->vm_start <= mm->start_stack &&
134 task = pid_task(proc_pid(inode), PIDTYPE_PID); 134 vma->vm_end >= mm->start_stack;
135 if (task) { 135 } else {
136 task = task_of_stack(task, vma, is_pid); 136 struct inode *inode = priv->inode;
137 struct task_struct *task;
138
139 rcu_read_lock();
140 task = pid_task(proc_pid(inode), PIDTYPE_PID);
137 if (task) 141 if (task)
138 ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info); 142 stack = vma_is_stack_for_task(vma, task);
143 rcu_read_unlock();
139 } 144 }
140 rcu_read_unlock(); 145 return stack;
141
142 return ret;
143} 146}
144 147
145/* 148/*
@@ -181,21 +184,9 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
181 if (file) { 184 if (file) {
182 seq_pad(m, ' '); 185 seq_pad(m, ' ');
183 seq_file_path(m, file, ""); 186 seq_file_path(m, file, "");
184 } else if (mm) { 187 } else if (mm && is_stack(priv, vma, is_pid)) {
185 pid_t tid = pid_of_stack(priv, vma, is_pid); 188 seq_pad(m, ' ');
186 189 seq_printf(m, "[stack]");
187 if (tid != 0) {
188 seq_pad(m, ' ');
189 /*
190 * Thread stack in /proc/PID/task/TID/maps or
191 * the main process stack.
192 */
193 if (!is_pid || (vma->vm_start <= mm->start_stack &&
194 vma->vm_end >= mm->start_stack))
195 seq_printf(m, "[stack]");
196 else
197 seq_printf(m, "[stack:%d]", tid);
198 }
199 } 190 }
200 191
201 seq_putc(m, '\n'); 192 seq_putc(m, '\n');
diff --git a/fs/timerfd.c b/fs/timerfd.c
index b94fa6c3c6eb..053818dd6c18 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -153,7 +153,7 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
153 if (isalarm(ctx)) 153 if (isalarm(ctx))
154 remaining = alarm_expires_remaining(&ctx->t.alarm); 154 remaining = alarm_expires_remaining(&ctx->t.alarm);
155 else 155 else
156 remaining = hrtimer_expires_remaining(&ctx->t.tmr); 156 remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr);
157 157
158 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; 158 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
159} 159}
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index da37beb76f6e..594f7e63b432 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4491,7 +4491,7 @@ xlog_recover_process(
4491 * know precisely what failed. 4491 * know precisely what failed.
4492 */ 4492 */
4493 if (pass == XLOG_RECOVER_CRCPASS) { 4493 if (pass == XLOG_RECOVER_CRCPASS) {
4494 if (rhead->h_crc && crc != le32_to_cpu(rhead->h_crc)) 4494 if (rhead->h_crc && crc != rhead->h_crc)
4495 return -EFSBADCRC; 4495 return -EFSBADCRC;
4496 return 0; 4496 return 0;
4497 } 4497 }
@@ -4502,7 +4502,7 @@ xlog_recover_process(
4502 * zero CRC check prevents warnings from being emitted when upgrading 4502 * zero CRC check prevents warnings from being emitted when upgrading
4503 * the kernel from one that does not add CRCs by default. 4503 * the kernel from one that does not add CRCs by default.
4504 */ 4504 */
4505 if (crc != le32_to_cpu(rhead->h_crc)) { 4505 if (crc != rhead->h_crc) {
4506 if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) { 4506 if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
4507 xfs_alert(log->l_mp, 4507 xfs_alert(log->l_mp,
4508 "log record CRC mismatch: found 0x%x, expected 0x%x.", 4508 "log record CRC mismatch: found 0x%x, expected 0x%x.",