aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS4
-rw-r--r--arch/arm/mm/dma-mapping.c11
-rw-r--r--arch/x86/include/asm/pmem.h11
-rw-r--r--drivers/acpi/apei/erst.c6
-rw-r--r--drivers/block/drbd/drbd_bitmap.c26
-rw-r--r--drivers/block/drbd/drbd_int.h3
-rw-r--r--drivers/char/mspec.c15
-rw-r--r--drivers/gpu/drm/drm_hashtab.c5
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_private.h8
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/coda/coda_linux.h3
-rw-r--r--fs/dax.c274
-rw-r--r--fs/ext2/file.c4
-rw-r--r--fs/ext4/file.c4
-rw-r--r--fs/inode.c2
-rw-r--r--fs/jffs2/build.c8
-rw-r--r--fs/jffs2/fs.c5
-rw-r--r--fs/jffs2/super.c5
-rw-r--r--fs/udf/super.c7
-rw-r--r--fs/xfs/xfs_file.c7
-rw-r--r--include/linux/dax.h7
-rw-r--r--include/linux/fs.h3
-rw-r--r--include/linux/pagemap.h3
-rw-r--r--include/linux/pmem.h22
-rw-r--r--include/linux/radix-tree.h9
-rw-r--r--ipc/sem.c2
-rw-r--r--ipc/util.c11
-rw-r--r--ipc/util.h2
-rw-r--r--mm/filemap.c91
-rw-r--r--mm/percpu.c18
-rw-r--r--mm/truncate.c69
-rw-r--r--mm/vmscan.c9
-rw-r--r--mm/workingset.c4
-rw-r--r--net/ipv4/fib_trie.c4
34 files changed, 493 insertions, 171 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index b8a717c4f863..fc1718500181 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10453,9 +10453,11 @@ S: Maintained
10453F: drivers/net/ethernet/dlink/sundance.c 10453F: drivers/net/ethernet/dlink/sundance.c
10454 10454
10455SUPERH 10455SUPERH
10456M: Yoshinori Sato <ysato@users.sourceforge.jp>
10457M: Rich Felker <dalias@libc.org>
10456L: linux-sh@vger.kernel.org 10458L: linux-sh@vger.kernel.org
10457Q: http://patchwork.kernel.org/project/linux-sh/list/ 10459Q: http://patchwork.kernel.org/project/linux-sh/list/
10458S: Orphan 10460S: Maintained
10459F: Documentation/sh/ 10461F: Documentation/sh/
10460F: arch/sh/ 10462F: arch/sh/
10461F: drivers/sh/ 10463F: drivers/sh/
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 534a60ae282e..0eca3812527e 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1200,10 +1200,7 @@ error:
1200 while (i--) 1200 while (i--)
1201 if (pages[i]) 1201 if (pages[i])
1202 __free_pages(pages[i], 0); 1202 __free_pages(pages[i], 0);
1203 if (array_size <= PAGE_SIZE) 1203 kvfree(pages);
1204 kfree(pages);
1205 else
1206 vfree(pages);
1207 return NULL; 1204 return NULL;
1208} 1205}
1209 1206
@@ -1211,7 +1208,6 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages,
1211 size_t size, struct dma_attrs *attrs) 1208 size_t size, struct dma_attrs *attrs)
1212{ 1209{
1213 int count = size >> PAGE_SHIFT; 1210 int count = size >> PAGE_SHIFT;
1214 int array_size = count * sizeof(struct page *);
1215 int i; 1211 int i;
1216 1212
1217 if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) { 1213 if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) {
@@ -1222,10 +1218,7 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages,
1222 __free_pages(pages[i], 0); 1218 __free_pages(pages[i], 0);
1223 } 1219 }
1224 1220
1225 if (array_size <= PAGE_SIZE) 1221 kvfree(pages);
1226 kfree(pages);
1227 else
1228 vfree(pages);
1229 return 0; 1222 return 0;
1230} 1223}
1231 1224
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 1544fabcd7f9..c57fd1ea9689 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -67,18 +67,19 @@ static inline void arch_wmb_pmem(void)
67} 67}
68 68
69/** 69/**
70 * __arch_wb_cache_pmem - write back a cache range with CLWB 70 * arch_wb_cache_pmem - write back a cache range with CLWB
71 * @vaddr: virtual start address 71 * @vaddr: virtual start address
72 * @size: number of bytes to write back 72 * @size: number of bytes to write back
73 * 73 *
74 * Write back a cache range using the CLWB (cache line write back) 74 * Write back a cache range using the CLWB (cache line write back)
75 * instruction. This function requires explicit ordering with an 75 * instruction. This function requires explicit ordering with an
76 * arch_wmb_pmem() call. This API is internal to the x86 PMEM implementation. 76 * arch_wmb_pmem() call.
77 */ 77 */
78static inline void __arch_wb_cache_pmem(void *vaddr, size_t size) 78static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
79{ 79{
80 u16 x86_clflush_size = boot_cpu_data.x86_clflush_size; 80 u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
81 unsigned long clflush_mask = x86_clflush_size - 1; 81 unsigned long clflush_mask = x86_clflush_size - 1;
82 void *vaddr = (void __force *)addr;
82 void *vend = vaddr + size; 83 void *vend = vaddr + size;
83 void *p; 84 void *p;
84 85
@@ -115,7 +116,7 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
115 len = copy_from_iter_nocache(vaddr, bytes, i); 116 len = copy_from_iter_nocache(vaddr, bytes, i);
116 117
117 if (__iter_needs_pmem_wb(i)) 118 if (__iter_needs_pmem_wb(i))
118 __arch_wb_cache_pmem(vaddr, bytes); 119 arch_wb_cache_pmem(addr, bytes);
119 120
120 return len; 121 return len;
121} 122}
@@ -133,7 +134,7 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size)
133 void *vaddr = (void __force *)addr; 134 void *vaddr = (void __force *)addr;
134 135
135 memset(vaddr, 0, size); 136 memset(vaddr, 0, size);
136 __arch_wb_cache_pmem(vaddr, size); 137 arch_wb_cache_pmem(addr, size);
137} 138}
138 139
139static inline bool __arch_has_wmb_pmem(void) 140static inline bool __arch_has_wmb_pmem(void)
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 6682c5daf742..6e6bc1059301 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -32,6 +32,7 @@
32#include <linux/hardirq.h> 32#include <linux/hardirq.h>
33#include <linux/pstore.h> 33#include <linux/pstore.h>
34#include <linux/vmalloc.h> 34#include <linux/vmalloc.h>
35#include <linux/mm.h> /* kvfree() */
35#include <acpi/apei.h> 36#include <acpi/apei.h>
36 37
37#include "apei-internal.h" 38#include "apei-internal.h"
@@ -532,10 +533,7 @@ retry:
532 return -ENOMEM; 533 return -ENOMEM;
533 memcpy(new_entries, entries, 534 memcpy(new_entries, entries,
534 erst_record_id_cache.len * sizeof(entries[0])); 535 erst_record_id_cache.len * sizeof(entries[0]));
535 if (erst_record_id_cache.size < PAGE_SIZE) 536 kvfree(entries);
536 kfree(entries);
537 else
538 vfree(entries);
539 erst_record_id_cache.entries = entries = new_entries; 537 erst_record_id_cache.entries = entries = new_entries;
540 erst_record_id_cache.size = new_size; 538 erst_record_id_cache.size = new_size;
541 } 539 }
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 0dabc9b93725..92d6fc020a65 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -364,12 +364,9 @@ static void bm_free_pages(struct page **pages, unsigned long number)
364 } 364 }
365} 365}
366 366
367static void bm_vk_free(void *ptr, int v) 367static inline void bm_vk_free(void *ptr)
368{ 368{
369 if (v) 369 kvfree(ptr);
370 vfree(ptr);
371 else
372 kfree(ptr);
373} 370}
374 371
375/* 372/*
@@ -379,7 +376,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
379{ 376{
380 struct page **old_pages = b->bm_pages; 377 struct page **old_pages = b->bm_pages;
381 struct page **new_pages, *page; 378 struct page **new_pages, *page;
382 unsigned int i, bytes, vmalloced = 0; 379 unsigned int i, bytes;
383 unsigned long have = b->bm_number_of_pages; 380 unsigned long have = b->bm_number_of_pages;
384 381
385 BUG_ON(have == 0 && old_pages != NULL); 382 BUG_ON(have == 0 && old_pages != NULL);
@@ -401,7 +398,6 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
401 PAGE_KERNEL); 398 PAGE_KERNEL);
402 if (!new_pages) 399 if (!new_pages)
403 return NULL; 400 return NULL;
404 vmalloced = 1;
405 } 401 }
406 402
407 if (want >= have) { 403 if (want >= have) {
@@ -411,7 +407,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
411 page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); 407 page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
412 if (!page) { 408 if (!page) {
413 bm_free_pages(new_pages + have, i - have); 409 bm_free_pages(new_pages + have, i - have);
414 bm_vk_free(new_pages, vmalloced); 410 bm_vk_free(new_pages);
415 return NULL; 411 return NULL;
416 } 412 }
417 /* we want to know which page it is 413 /* we want to know which page it is
@@ -427,11 +423,6 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
427 */ 423 */
428 } 424 }
429 425
430 if (vmalloced)
431 b->bm_flags |= BM_P_VMALLOCED;
432 else
433 b->bm_flags &= ~BM_P_VMALLOCED;
434
435 return new_pages; 426 return new_pages;
436} 427}
437 428
@@ -469,7 +460,7 @@ void drbd_bm_cleanup(struct drbd_device *device)
469 if (!expect(device->bitmap)) 460 if (!expect(device->bitmap))
470 return; 461 return;
471 bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages); 462 bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages);
472 bm_vk_free(device->bitmap->bm_pages, (BM_P_VMALLOCED & device->bitmap->bm_flags)); 463 bm_vk_free(device->bitmap->bm_pages);
473 kfree(device->bitmap); 464 kfree(device->bitmap);
474 device->bitmap = NULL; 465 device->bitmap = NULL;
475} 466}
@@ -643,7 +634,6 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
643 unsigned long want, have, onpages; /* number of pages */ 634 unsigned long want, have, onpages; /* number of pages */
644 struct page **npages, **opages = NULL; 635 struct page **npages, **opages = NULL;
645 int err = 0, growing; 636 int err = 0, growing;
646 int opages_vmalloced;
647 637
648 if (!expect(b)) 638 if (!expect(b))
649 return -ENOMEM; 639 return -ENOMEM;
@@ -656,8 +646,6 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
656 if (capacity == b->bm_dev_capacity) 646 if (capacity == b->bm_dev_capacity)
657 goto out; 647 goto out;
658 648
659 opages_vmalloced = (BM_P_VMALLOCED & b->bm_flags);
660
661 if (capacity == 0) { 649 if (capacity == 0) {
662 spin_lock_irq(&b->bm_lock); 650 spin_lock_irq(&b->bm_lock);
663 opages = b->bm_pages; 651 opages = b->bm_pages;
@@ -671,7 +659,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
671 b->bm_dev_capacity = 0; 659 b->bm_dev_capacity = 0;
672 spin_unlock_irq(&b->bm_lock); 660 spin_unlock_irq(&b->bm_lock);
673 bm_free_pages(opages, onpages); 661 bm_free_pages(opages, onpages);
674 bm_vk_free(opages, opages_vmalloced); 662 bm_vk_free(opages);
675 goto out; 663 goto out;
676 } 664 }
677 bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT)); 665 bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT));
@@ -744,7 +732,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
744 732
745 spin_unlock_irq(&b->bm_lock); 733 spin_unlock_irq(&b->bm_lock);
746 if (opages != npages) 734 if (opages != npages)
747 bm_vk_free(opages, opages_vmalloced); 735 bm_vk_free(opages);
748 if (!growing) 736 if (!growing)
749 b->bm_set = bm_count_bits(b); 737 b->bm_set = bm_count_bits(b);
750 drbd_info(device, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want); 738 drbd_info(device, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index b6844feb9f9b..34bc84efc29e 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -536,9 +536,6 @@ struct drbd_bitmap; /* opaque for drbd_device */
536/* definition of bits in bm_flags to be used in drbd_bm_lock 536/* definition of bits in bm_flags to be used in drbd_bm_lock
537 * and drbd_bitmap_io and friends. */ 537 * and drbd_bitmap_io and friends. */
538enum bm_flag { 538enum bm_flag {
539 /* do we need to kfree, or vfree bm_pages? */
540 BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */
541
542 /* currently locked for bulk operation */ 539 /* currently locked for bulk operation */
543 BM_LOCKED_MASK = 0xf, 540 BM_LOCKED_MASK = 0xf,
544 541
diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c
index f1d7fa45c275..f3f92d5fcda0 100644
--- a/drivers/char/mspec.c
+++ b/drivers/char/mspec.c
@@ -93,14 +93,11 @@ struct vma_data {
93 spinlock_t lock; /* Serialize access to this structure. */ 93 spinlock_t lock; /* Serialize access to this structure. */
94 int count; /* Number of pages allocated. */ 94 int count; /* Number of pages allocated. */
95 enum mspec_page_type type; /* Type of pages allocated. */ 95 enum mspec_page_type type; /* Type of pages allocated. */
96 int flags; /* See VMD_xxx below. */
97 unsigned long vm_start; /* Original (unsplit) base. */ 96 unsigned long vm_start; /* Original (unsplit) base. */
98 unsigned long vm_end; /* Original (unsplit) end. */ 97 unsigned long vm_end; /* Original (unsplit) end. */
99 unsigned long maddr[0]; /* Array of MSPEC addresses. */ 98 unsigned long maddr[0]; /* Array of MSPEC addresses. */
100}; 99};
101 100
102#define VMD_VMALLOCED 0x1 /* vmalloc'd rather than kmalloc'd */
103
104/* used on shub2 to clear FOP cache in the HUB */ 101/* used on shub2 to clear FOP cache in the HUB */
105static unsigned long scratch_page[MAX_NUMNODES]; 102static unsigned long scratch_page[MAX_NUMNODES];
106#define SH2_AMO_CACHE_ENTRIES 4 103#define SH2_AMO_CACHE_ENTRIES 4
@@ -185,10 +182,7 @@ mspec_close(struct vm_area_struct *vma)
185 "failed to zero page %ld\n", my_page); 182 "failed to zero page %ld\n", my_page);
186 } 183 }
187 184
188 if (vdata->flags & VMD_VMALLOCED) 185 kvfree(vdata);
189 vfree(vdata);
190 else
191 kfree(vdata);
192} 186}
193 187
194/* 188/*
@@ -256,7 +250,7 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma,
256 enum mspec_page_type type) 250 enum mspec_page_type type)
257{ 251{
258 struct vma_data *vdata; 252 struct vma_data *vdata;
259 int pages, vdata_size, flags = 0; 253 int pages, vdata_size;
260 254
261 if (vma->vm_pgoff != 0) 255 if (vma->vm_pgoff != 0)
262 return -EINVAL; 256 return -EINVAL;
@@ -271,16 +265,13 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma,
271 vdata_size = sizeof(struct vma_data) + pages * sizeof(long); 265 vdata_size = sizeof(struct vma_data) + pages * sizeof(long);
272 if (vdata_size <= PAGE_SIZE) 266 if (vdata_size <= PAGE_SIZE)
273 vdata = kzalloc(vdata_size, GFP_KERNEL); 267 vdata = kzalloc(vdata_size, GFP_KERNEL);
274 else { 268 else
275 vdata = vzalloc(vdata_size); 269 vdata = vzalloc(vdata_size);
276 flags = VMD_VMALLOCED;
277 }
278 if (!vdata) 270 if (!vdata)
279 return -ENOMEM; 271 return -ENOMEM;
280 272
281 vdata->vm_start = vma->vm_start; 273 vdata->vm_start = vma->vm_start;
282 vdata->vm_end = vma->vm_end; 274 vdata->vm_end = vma->vm_end;
283 vdata->flags = flags;
284 vdata->type = type; 275 vdata->type = type;
285 spin_lock_init(&vdata->lock); 276 spin_lock_init(&vdata->lock);
286 atomic_set(&vdata->refcnt, 1); 277 atomic_set(&vdata->refcnt, 1);
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index c3b80fd65d62..7b30b307674b 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -198,10 +198,7 @@ EXPORT_SYMBOL(drm_ht_remove_item);
198void drm_ht_remove(struct drm_open_hash *ht) 198void drm_ht_remove(struct drm_open_hash *ht)
199{ 199{
200 if (ht->table) { 200 if (ht->table) {
201 if ((PAGE_SIZE / sizeof(*ht->table)) >> ht->order) 201 kvfree(ht->table);
202 kfree(ht->table);
203 else
204 vfree(ht->table);
205 ht->table = NULL; 202 ht->table = NULL;
206 } 203 }
207} 204}
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
index d6273e143324..a80d993b882e 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
@@ -151,16 +151,12 @@ do { \
151 151
152#define LIBCFS_FREE(ptr, size) \ 152#define LIBCFS_FREE(ptr, size) \
153do { \ 153do { \
154 int s = (size); \
155 if (unlikely((ptr) == NULL)) { \ 154 if (unlikely((ptr) == NULL)) { \
156 CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at " \ 155 CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at " \
157 "%s:%d\n", s, __FILE__, __LINE__); \ 156 "%s:%d\n", (int)(size), __FILE__, __LINE__); \
158 break; \ 157 break; \
159 } \ 158 } \
160 if (unlikely(s > LIBCFS_VMALLOC_SIZE)) \ 159 kvfree(ptr); \
161 vfree(ptr); \
162 else \
163 kfree(ptr); \
164} while (0) 160} while (0)
165 161
166/******************************************************************************/ 162/******************************************************************************/
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ba762ea07f67..60895e500e15 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -75,7 +75,7 @@ void kill_bdev(struct block_device *bdev)
75{ 75{
76 struct address_space *mapping = bdev->bd_inode->i_mapping; 76 struct address_space *mapping = bdev->bd_inode->i_mapping;
77 77
78 if (mapping->nrpages == 0 && mapping->nrshadows == 0) 78 if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
79 return; 79 return;
80 80
81 invalidate_bh_lrus(); 81 invalidate_bh_lrus();
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index f829fe963f5b..5104d84c4f64 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -72,8 +72,7 @@ void coda_sysctl_clean(void);
72} while (0) 72} while (0)
73 73
74 74
75#define CODA_FREE(ptr,size) \ 75#define CODA_FREE(ptr, size) kvfree((ptr))
76 do { if (size < PAGE_SIZE) kfree((ptr)); else vfree((ptr)); } while (0)
77 76
78/* inode to cnode access functions */ 77/* inode to cnode access functions */
79 78
diff --git a/fs/dax.c b/fs/dax.c
index 7af879759064..206650f82762 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -24,6 +24,7 @@
24#include <linux/memcontrol.h> 24#include <linux/memcontrol.h>
25#include <linux/mm.h> 25#include <linux/mm.h>
26#include <linux/mutex.h> 26#include <linux/mutex.h>
27#include <linux/pagevec.h>
27#include <linux/pmem.h> 28#include <linux/pmem.h>
28#include <linux/sched.h> 29#include <linux/sched.h>
29#include <linux/uio.h> 30#include <linux/uio.h>
@@ -245,6 +246,7 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
245 loff_t end = pos + iov_iter_count(iter); 246 loff_t end = pos + iov_iter_count(iter);
246 247
247 memset(&bh, 0, sizeof(bh)); 248 memset(&bh, 0, sizeof(bh));
249 bh.b_bdev = inode->i_sb->s_bdev;
248 250
249 if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) { 251 if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) {
250 struct address_space *mapping = inode->i_mapping; 252 struct address_space *mapping = inode->i_mapping;
@@ -324,6 +326,199 @@ static int copy_user_bh(struct page *to, struct inode *inode,
324 return 0; 326 return 0;
325} 327}
326 328
329#define NO_SECTOR -1
330#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_CACHE_SHIFT))
331
332static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
333 sector_t sector, bool pmd_entry, bool dirty)
334{
335 struct radix_tree_root *page_tree = &mapping->page_tree;
336 pgoff_t pmd_index = DAX_PMD_INDEX(index);
337 int type, error = 0;
338 void *entry;
339
340 WARN_ON_ONCE(pmd_entry && !dirty);
341 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
342
343 spin_lock_irq(&mapping->tree_lock);
344
345 entry = radix_tree_lookup(page_tree, pmd_index);
346 if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) {
347 index = pmd_index;
348 goto dirty;
349 }
350
351 entry = radix_tree_lookup(page_tree, index);
352 if (entry) {
353 type = RADIX_DAX_TYPE(entry);
354 if (WARN_ON_ONCE(type != RADIX_DAX_PTE &&
355 type != RADIX_DAX_PMD)) {
356 error = -EIO;
357 goto unlock;
358 }
359
360 if (!pmd_entry || type == RADIX_DAX_PMD)
361 goto dirty;
362
363 /*
364 * We only insert dirty PMD entries into the radix tree. This
365 * means we don't need to worry about removing a dirty PTE
366 * entry and inserting a clean PMD entry, thus reducing the
367 * range we would flush with a follow-up fsync/msync call.
368 */
369 radix_tree_delete(&mapping->page_tree, index);
370 mapping->nrexceptional--;
371 }
372
373 if (sector == NO_SECTOR) {
374 /*
375 * This can happen during correct operation if our pfn_mkwrite
376 * fault raced against a hole punch operation. If this
377 * happens the pte that was hole punched will have been
378 * unmapped and the radix tree entry will have been removed by
379 * the time we are called, but the call will still happen. We
380 * will return all the way up to wp_pfn_shared(), where the
381 * pte_same() check will fail, eventually causing page fault
382 * to be retried by the CPU.
383 */
384 goto unlock;
385 }
386
387 error = radix_tree_insert(page_tree, index,
388 RADIX_DAX_ENTRY(sector, pmd_entry));
389 if (error)
390 goto unlock;
391
392 mapping->nrexceptional++;
393 dirty:
394 if (dirty)
395 radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
396 unlock:
397 spin_unlock_irq(&mapping->tree_lock);
398 return error;
399}
400
401static int dax_writeback_one(struct block_device *bdev,
402 struct address_space *mapping, pgoff_t index, void *entry)
403{
404 struct radix_tree_root *page_tree = &mapping->page_tree;
405 int type = RADIX_DAX_TYPE(entry);
406 struct radix_tree_node *node;
407 struct blk_dax_ctl dax;
408 void **slot;
409 int ret = 0;
410
411 spin_lock_irq(&mapping->tree_lock);
412 /*
413 * Regular page slots are stabilized by the page lock even
414 * without the tree itself locked. These unlocked entries
415 * need verification under the tree lock.
416 */
417 if (!__radix_tree_lookup(page_tree, index, &node, &slot))
418 goto unlock;
419 if (*slot != entry)
420 goto unlock;
421
422 /* another fsync thread may have already written back this entry */
423 if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
424 goto unlock;
425
426 if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) {
427 ret = -EIO;
428 goto unlock;
429 }
430
431 dax.sector = RADIX_DAX_SECTOR(entry);
432 dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE);
433 spin_unlock_irq(&mapping->tree_lock);
434
435 /*
436 * We cannot hold tree_lock while calling dax_map_atomic() because it
437 * eventually calls cond_resched().
438 */
439 ret = dax_map_atomic(bdev, &dax);
440 if (ret < 0)
441 return ret;
442
443 if (WARN_ON_ONCE(ret < dax.size)) {
444 ret = -EIO;
445 goto unmap;
446 }
447
448 wb_cache_pmem(dax.addr, dax.size);
449
450 spin_lock_irq(&mapping->tree_lock);
451 radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
452 spin_unlock_irq(&mapping->tree_lock);
453 unmap:
454 dax_unmap_atomic(bdev, &dax);
455 return ret;
456
457 unlock:
458 spin_unlock_irq(&mapping->tree_lock);
459 return ret;
460}
461
462/*
463 * Flush the mapping to the persistent domain within the byte range of [start,
464 * end]. This is required by data integrity operations to ensure file data is
465 * on persistent storage prior to completion of the operation.
466 */
467int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
468 loff_t end)
469{
470 struct inode *inode = mapping->host;
471 struct block_device *bdev = inode->i_sb->s_bdev;
472 pgoff_t start_index, end_index, pmd_index;
473 pgoff_t indices[PAGEVEC_SIZE];
474 struct pagevec pvec;
475 bool done = false;
476 int i, ret = 0;
477 void *entry;
478
479 if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
480 return -EIO;
481
482 start_index = start >> PAGE_CACHE_SHIFT;
483 end_index = end >> PAGE_CACHE_SHIFT;
484 pmd_index = DAX_PMD_INDEX(start_index);
485
486 rcu_read_lock();
487 entry = radix_tree_lookup(&mapping->page_tree, pmd_index);
488 rcu_read_unlock();
489
490 /* see if the start of our range is covered by a PMD entry */
491 if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
492 start_index = pmd_index;
493
494 tag_pages_for_writeback(mapping, start_index, end_index);
495
496 pagevec_init(&pvec, 0);
497 while (!done) {
498 pvec.nr = find_get_entries_tag(mapping, start_index,
499 PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE,
500 pvec.pages, indices);
501
502 if (pvec.nr == 0)
503 break;
504
505 for (i = 0; i < pvec.nr; i++) {
506 if (indices[i] > end_index) {
507 done = true;
508 break;
509 }
510
511 ret = dax_writeback_one(bdev, mapping, indices[i],
512 pvec.pages[i]);
513 if (ret < 0)
514 return ret;
515 }
516 }
517 wmb_pmem();
518 return 0;
519}
520EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
521
327static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, 522static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
328 struct vm_area_struct *vma, struct vm_fault *vmf) 523 struct vm_area_struct *vma, struct vm_fault *vmf)
329{ 524{
@@ -363,6 +558,11 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
363 } 558 }
364 dax_unmap_atomic(bdev, &dax); 559 dax_unmap_atomic(bdev, &dax);
365 560
561 error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false,
562 vmf->flags & FAULT_FLAG_WRITE);
563 if (error)
564 goto out;
565
366 error = vm_insert_mixed(vma, vaddr, dax.pfn); 566 error = vm_insert_mixed(vma, vaddr, dax.pfn);
367 567
368 out: 568 out:
@@ -408,6 +608,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
408 608
409 memset(&bh, 0, sizeof(bh)); 609 memset(&bh, 0, sizeof(bh));
410 block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits); 610 block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
611 bh.b_bdev = inode->i_sb->s_bdev;
411 bh.b_size = PAGE_SIZE; 612 bh.b_size = PAGE_SIZE;
412 613
413 repeat: 614 repeat:
@@ -487,6 +688,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
487 delete_from_page_cache(page); 688 delete_from_page_cache(page);
488 unlock_page(page); 689 unlock_page(page);
489 page_cache_release(page); 690 page_cache_release(page);
691 page = NULL;
490 } 692 }
491 693
492 /* 694 /*
@@ -590,7 +792,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
590 struct block_device *bdev; 792 struct block_device *bdev;
591 pgoff_t size, pgoff; 793 pgoff_t size, pgoff;
592 sector_t block; 794 sector_t block;
593 int result = 0; 795 int error, result = 0;
796 bool alloc = false;
594 797
595 /* dax pmd mappings require pfn_t_devmap() */ 798 /* dax pmd mappings require pfn_t_devmap() */
596 if (!IS_ENABLED(CONFIG_FS_DAX_PMD)) 799 if (!IS_ENABLED(CONFIG_FS_DAX_PMD))
@@ -624,13 +827,21 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
624 } 827 }
625 828
626 memset(&bh, 0, sizeof(bh)); 829 memset(&bh, 0, sizeof(bh));
830 bh.b_bdev = inode->i_sb->s_bdev;
627 block = (sector_t)pgoff << (PAGE_SHIFT - blkbits); 831 block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
628 832
629 bh.b_size = PMD_SIZE; 833 bh.b_size = PMD_SIZE;
630 if (get_block(inode, block, &bh, write) != 0) 834
835 if (get_block(inode, block, &bh, 0) != 0)
631 return VM_FAULT_SIGBUS; 836 return VM_FAULT_SIGBUS;
837
838 if (!buffer_mapped(&bh) && write) {
839 if (get_block(inode, block, &bh, 1) != 0)
840 return VM_FAULT_SIGBUS;
841 alloc = true;
842 }
843
632 bdev = bh.b_bdev; 844 bdev = bh.b_bdev;
633 i_mmap_lock_read(mapping);
634 845
635 /* 846 /*
636 * If the filesystem isn't willing to tell us the length of a hole, 847 * If the filesystem isn't willing to tell us the length of a hole,
@@ -639,19 +850,22 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
639 */ 850 */
640 if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) { 851 if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) {
641 dax_pmd_dbg(&bh, address, "allocated block too small"); 852 dax_pmd_dbg(&bh, address, "allocated block too small");
642 goto fallback; 853 return VM_FAULT_FALLBACK;
643 } 854 }
644 855
645 /* 856 /*
646 * If we allocated new storage, make sure no process has any 857 * If we allocated new storage, make sure no process has any
647 * zero pages covering this hole 858 * zero pages covering this hole
648 */ 859 */
649 if (buffer_new(&bh)) { 860 if (alloc) {
650 i_mmap_unlock_read(mapping); 861 loff_t lstart = pgoff << PAGE_SHIFT;
651 unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0); 862 loff_t lend = lstart + PMD_SIZE - 1; /* inclusive */
652 i_mmap_lock_read(mapping); 863
864 truncate_pagecache_range(inode, lstart, lend);
653 } 865 }
654 866
867 i_mmap_lock_read(mapping);
868
655 /* 869 /*
656 * If a truncate happened while we were allocating blocks, we may 870 * If a truncate happened while we were allocating blocks, we may
657 * leave blocks allocated to the file that are beyond EOF. We can't 871 * leave blocks allocated to the file that are beyond EOF. We can't
@@ -664,7 +878,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
664 goto out; 878 goto out;
665 } 879 }
666 if ((pgoff | PG_PMD_COLOUR) >= size) { 880 if ((pgoff | PG_PMD_COLOUR) >= size) {
667 dax_pmd_dbg(&bh, address, "pgoff unaligned"); 881 dax_pmd_dbg(&bh, address,
882 "offset + huge page size > file size");
668 goto fallback; 883 goto fallback;
669 } 884 }
670 885
@@ -732,6 +947,31 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
732 } 947 }
733 dax_unmap_atomic(bdev, &dax); 948 dax_unmap_atomic(bdev, &dax);
734 949
950 /*
951 * For PTE faults we insert a radix tree entry for reads, and
952 * leave it clean. Then on the first write we dirty the radix
953 * tree entry via the dax_pfn_mkwrite() path. This sequence
954 * allows the dax_pfn_mkwrite() call to be simpler and avoid a
955 * call into get_block() to translate the pgoff to a sector in
956 * order to be able to create a new radix tree entry.
957 *
958 * The PMD path doesn't have an equivalent to
959 * dax_pfn_mkwrite(), though, so for a read followed by a
960 * write we traverse all the way through __dax_pmd_fault()
961 * twice. This means we can just skip inserting a radix tree
962 * entry completely on the initial read and just wait until
963 * the write to insert a dirty entry.
964 */
965 if (write) {
966 error = dax_radix_entry(mapping, pgoff, dax.sector,
967 true, true);
968 if (error) {
969 dax_pmd_dbg(&bh, address,
970 "PMD radix insertion failed");
971 goto fallback;
972 }
973 }
974
735 dev_dbg(part_to_dev(bdev->bd_part), 975 dev_dbg(part_to_dev(bdev->bd_part),
736 "%s: %s addr: %lx pfn: %lx sect: %llx\n", 976 "%s: %s addr: %lx pfn: %lx sect: %llx\n",
737 __func__, current->comm, address, 977 __func__, current->comm, address,
@@ -790,15 +1030,20 @@ EXPORT_SYMBOL_GPL(dax_pmd_fault);
790 * dax_pfn_mkwrite - handle first write to DAX page 1030 * dax_pfn_mkwrite - handle first write to DAX page
791 * @vma: The virtual memory area where the fault occurred 1031 * @vma: The virtual memory area where the fault occurred
792 * @vmf: The description of the fault 1032 * @vmf: The description of the fault
793 *
794 */ 1033 */
795int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 1034int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
796{ 1035{
797 struct super_block *sb = file_inode(vma->vm_file)->i_sb; 1036 struct file *file = vma->vm_file;
798 1037
799 sb_start_pagefault(sb); 1038 /*
800 file_update_time(vma->vm_file); 1039 * We pass NO_SECTOR to dax_radix_entry() because we expect that a
801 sb_end_pagefault(sb); 1040 * RADIX_DAX_PTE entry already exists in the radix tree from a
1041 * previous call to __dax_fault(). We just want to look up that PTE
1042 * entry using vmf->pgoff and make sure the dirty tag is set. This
1043 * saves us from having to make a call to get_block() here to look
1044 * up the sector.
1045 */
1046 dax_radix_entry(file->f_mapping, vmf->pgoff, NO_SECTOR, false, true);
802 return VM_FAULT_NOPAGE; 1047 return VM_FAULT_NOPAGE;
803} 1048}
804EXPORT_SYMBOL_GPL(dax_pfn_mkwrite); 1049EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
@@ -835,6 +1080,7 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
835 BUG_ON((offset + length) > PAGE_CACHE_SIZE); 1080 BUG_ON((offset + length) > PAGE_CACHE_SIZE);
836 1081
837 memset(&bh, 0, sizeof(bh)); 1082 memset(&bh, 0, sizeof(bh));
1083 bh.b_bdev = inode->i_sb->s_bdev;
838 bh.b_size = PAGE_CACHE_SIZE; 1084 bh.b_size = PAGE_CACHE_SIZE;
839 err = get_block(inode, index, &bh, 0); 1085 err = get_block(inode, index, &bh, 0);
840 if (err < 0) 1086 if (err < 0)
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 11a42c5a09ae..2c88d683cd91 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -102,8 +102,8 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
102{ 102{
103 struct inode *inode = file_inode(vma->vm_file); 103 struct inode *inode = file_inode(vma->vm_file);
104 struct ext2_inode_info *ei = EXT2_I(inode); 104 struct ext2_inode_info *ei = EXT2_I(inode);
105 int ret = VM_FAULT_NOPAGE;
106 loff_t size; 105 loff_t size;
106 int ret;
107 107
108 sb_start_pagefault(inode->i_sb); 108 sb_start_pagefault(inode->i_sb);
109 file_update_time(vma->vm_file); 109 file_update_time(vma->vm_file);
@@ -113,6 +113,8 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
113 size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; 113 size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
114 if (vmf->pgoff >= size) 114 if (vmf->pgoff >= size)
115 ret = VM_FAULT_SIGBUS; 115 ret = VM_FAULT_SIGBUS;
116 else
117 ret = dax_pfn_mkwrite(vma, vmf);
116 118
117 up_read(&ei->dax_sem); 119 up_read(&ei->dax_sem);
118 sb_end_pagefault(inode->i_sb); 120 sb_end_pagefault(inode->i_sb);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 749b222e6498..8c8965cc4aab 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -291,8 +291,8 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
291{ 291{
292 struct inode *inode = file_inode(vma->vm_file); 292 struct inode *inode = file_inode(vma->vm_file);
293 struct super_block *sb = inode->i_sb; 293 struct super_block *sb = inode->i_sb;
294 int ret = VM_FAULT_NOPAGE;
295 loff_t size; 294 loff_t size;
295 int ret;
296 296
297 sb_start_pagefault(sb); 297 sb_start_pagefault(sb);
298 file_update_time(vma->vm_file); 298 file_update_time(vma->vm_file);
@@ -300,6 +300,8 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
300 size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; 300 size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
301 if (vmf->pgoff >= size) 301 if (vmf->pgoff >= size)
302 ret = VM_FAULT_SIGBUS; 302 ret = VM_FAULT_SIGBUS;
303 else
304 ret = dax_pfn_mkwrite(vma, vmf);
303 up_read(&EXT4_I(inode)->i_mmap_sem); 305 up_read(&EXT4_I(inode)->i_mmap_sem);
304 sb_end_pagefault(sb); 306 sb_end_pagefault(sb);
305 307
diff --git a/fs/inode.c b/fs/inode.c
index e491e54d2430..1e6dd388ba7f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -495,7 +495,7 @@ void clear_inode(struct inode *inode)
495 */ 495 */
496 spin_lock_irq(&inode->i_data.tree_lock); 496 spin_lock_irq(&inode->i_data.tree_lock);
497 BUG_ON(inode->i_data.nrpages); 497 BUG_ON(inode->i_data.nrpages);
498 BUG_ON(inode->i_data.nrshadows); 498 BUG_ON(inode->i_data.nrexceptional);
499 spin_unlock_irq(&inode->i_data.tree_lock); 499 spin_unlock_irq(&inode->i_data.tree_lock);
500 BUG_ON(!list_empty(&inode->i_data.private_list)); 500 BUG_ON(!list_empty(&inode->i_data.private_list));
501 BUG_ON(!(inode->i_state & I_FREEING)); 501 BUG_ON(!(inode->i_state & I_FREEING));
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index a3750f902adc..0ae91ad6df2d 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -17,6 +17,7 @@
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
19#include <linux/mtd/mtd.h> 19#include <linux/mtd/mtd.h>
20#include <linux/mm.h> /* kvfree() */
20#include "nodelist.h" 21#include "nodelist.h"
21 22
22static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *, 23static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *,
@@ -383,12 +384,7 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c)
383 return 0; 384 return 0;
384 385
385 out_free: 386 out_free:
386#ifndef __ECOS 387 kvfree(c->blocks);
387 if (jffs2_blocks_use_vmalloc(c))
388 vfree(c->blocks);
389 else
390#endif
391 kfree(c->blocks);
392 388
393 return ret; 389 return ret;
394} 390}
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 2caf1682036d..bead25ae8fe4 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -596,10 +596,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
596out_root: 596out_root:
597 jffs2_free_ino_caches(c); 597 jffs2_free_ino_caches(c);
598 jffs2_free_raw_node_refs(c); 598 jffs2_free_raw_node_refs(c);
599 if (jffs2_blocks_use_vmalloc(c)) 599 kvfree(c->blocks);
600 vfree(c->blocks);
601 else
602 kfree(c->blocks);
603 out_inohash: 600 out_inohash:
604 jffs2_clear_xattr_subsystem(c); 601 jffs2_clear_xattr_subsystem(c);
605 kfree(c->inocache_list); 602 kfree(c->inocache_list);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index bb080c272149..0a9a114bb9d1 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -331,10 +331,7 @@ static void jffs2_put_super (struct super_block *sb)
331 331
332 jffs2_free_ino_caches(c); 332 jffs2_free_ino_caches(c);
333 jffs2_free_raw_node_refs(c); 333 jffs2_free_raw_node_refs(c);
334 if (jffs2_blocks_use_vmalloc(c)) 334 kvfree(c->blocks);
335 vfree(c->blocks);
336 else
337 kfree(c->blocks);
338 jffs2_flash_cleanup(c); 335 jffs2_flash_cleanup(c);
339 kfree(c->inocache_list); 336 kfree(c->inocache_list);
340 jffs2_clear_xattr_subsystem(c); 337 jffs2_clear_xattr_subsystem(c);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 0fbb4c7c72e8..a522c15a0bfd 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -279,17 +279,12 @@ static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
279{ 279{
280 int i; 280 int i;
281 int nr_groups = bitmap->s_nr_groups; 281 int nr_groups = bitmap->s_nr_groups;
282 int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) *
283 nr_groups);
284 282
285 for (i = 0; i < nr_groups; i++) 283 for (i = 0; i < nr_groups; i++)
286 if (bitmap->s_block_bitmap[i]) 284 if (bitmap->s_block_bitmap[i])
287 brelse(bitmap->s_block_bitmap[i]); 285 brelse(bitmap->s_block_bitmap[i]);
288 286
289 if (size <= PAGE_SIZE) 287 kvfree(bitmap);
290 kfree(bitmap);
291 else
292 vfree(bitmap);
293} 288}
294 289
295static void udf_free_partition(struct udf_part_map *map) 290static void udf_free_partition(struct udf_part_map *map)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ebe9b8290a70..55e16e2402a7 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1610,9 +1610,8 @@ xfs_filemap_pmd_fault(
1610/* 1610/*
1611 * pfn_mkwrite was originally inteneded to ensure we capture time stamp 1611 * pfn_mkwrite was originally inteneded to ensure we capture time stamp
1612 * updates on write faults. In reality, it's need to serialise against 1612 * updates on write faults. In reality, it's need to serialise against
1613 * truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite() 1613 * truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED
1614 * here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault 1614 * to ensure we serialise the fault barrier in place.
1615 * barrier in place.
1616 */ 1615 */
1617static int 1616static int
1618xfs_filemap_pfn_mkwrite( 1617xfs_filemap_pfn_mkwrite(
@@ -1635,6 +1634,8 @@ xfs_filemap_pfn_mkwrite(
1635 size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; 1634 size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
1636 if (vmf->pgoff >= size) 1635 if (vmf->pgoff >= size)
1637 ret = VM_FAULT_SIGBUS; 1636 ret = VM_FAULT_SIGBUS;
1637 else if (IS_DAX(inode))
1638 ret = dax_pfn_mkwrite(vma, vmf);
1638 xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); 1639 xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
1639 sb_end_pagefault(inode->i_sb); 1640 sb_end_pagefault(inode->i_sb);
1640 return ret; 1641 return ret;
diff --git a/include/linux/dax.h b/include/linux/dax.h
index b415e521528d..8204c3dc3800 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -36,4 +36,11 @@ static inline bool vma_is_dax(struct vm_area_struct *vma)
36{ 36{
37 return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); 37 return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
38} 38}
39
40static inline bool dax_mapping(struct address_space *mapping)
41{
42 return mapping->host && IS_DAX(mapping->host);
43}
44int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
45 loff_t end);
39#endif 46#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index eb73d74ed992..0d7570320d63 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -433,7 +433,8 @@ struct address_space {
433 struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */ 433 struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */
434 /* Protected by tree_lock together with the radix tree */ 434 /* Protected by tree_lock together with the radix tree */
435 unsigned long nrpages; /* number of total pages */ 435 unsigned long nrpages; /* number of total pages */
436 unsigned long nrshadows; /* number of shadow entries */ 436 /* number of shadow or DAX exceptional entries */
437 unsigned long nrexceptional;
437 pgoff_t writeback_index;/* writeback starts here */ 438 pgoff_t writeback_index;/* writeback starts here */
438 const struct address_space_operations *a_ops; /* methods */ 439 const struct address_space_operations *a_ops; /* methods */
439 unsigned long flags; /* error bits/gfp mask */ 440 unsigned long flags; /* error bits/gfp mask */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 4d08b6c33557..92395a0a7dc5 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -361,6 +361,9 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
361 unsigned int nr_pages, struct page **pages); 361 unsigned int nr_pages, struct page **pages);
362unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, 362unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
363 int tag, unsigned int nr_pages, struct page **pages); 363 int tag, unsigned int nr_pages, struct page **pages);
364unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
365 int tag, unsigned int nr_entries,
366 struct page **entries, pgoff_t *indices);
364 367
365struct page *grab_cache_page_write_begin(struct address_space *mapping, 368struct page *grab_cache_page_write_begin(struct address_space *mapping,
366 pgoff_t index, unsigned flags); 369 pgoff_t index, unsigned flags);
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index acfea8ce4a07..7c3d11a6b4ad 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -53,12 +53,18 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size)
53{ 53{
54 BUG(); 54 BUG();
55} 55}
56
57static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
58{
59 BUG();
60}
56#endif 61#endif
57 62
58/* 63/*
59 * Architectures that define ARCH_HAS_PMEM_API must provide 64 * Architectures that define ARCH_HAS_PMEM_API must provide
60 * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(), 65 * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(),
61 * arch_copy_from_iter_pmem(), arch_clear_pmem() and arch_has_wmb_pmem(). 66 * arch_copy_from_iter_pmem(), arch_clear_pmem(), arch_wb_cache_pmem()
67 * and arch_has_wmb_pmem().
62 */ 68 */
63static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size) 69static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
64{ 70{
@@ -178,4 +184,18 @@ static inline void clear_pmem(void __pmem *addr, size_t size)
178 else 184 else
179 default_clear_pmem(addr, size); 185 default_clear_pmem(addr, size);
180} 186}
187
188/**
189 * wb_cache_pmem - write back processor cache for PMEM memory range
190 * @addr: virtual start address
191 * @size: number of bytes to write back
192 *
193 * Write back the processor cache range starting at 'addr' for 'size' bytes.
194 * This function requires explicit ordering with a wmb_pmem() call.
195 */
196static inline void wb_cache_pmem(void __pmem *addr, size_t size)
197{
198 if (arch_has_pmem_api())
199 arch_wb_cache_pmem(addr, size);
200}
181#endif /* __PMEM_H__ */ 201#endif /* __PMEM_H__ */
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 57e7d87d2d4c..7c88ad156a29 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -51,6 +51,15 @@
51#define RADIX_TREE_EXCEPTIONAL_ENTRY 2 51#define RADIX_TREE_EXCEPTIONAL_ENTRY 2
52#define RADIX_TREE_EXCEPTIONAL_SHIFT 2 52#define RADIX_TREE_EXCEPTIONAL_SHIFT 2
53 53
54#define RADIX_DAX_MASK 0xf
55#define RADIX_DAX_SHIFT 4
56#define RADIX_DAX_PTE (0x4 | RADIX_TREE_EXCEPTIONAL_ENTRY)
57#define RADIX_DAX_PMD (0x8 | RADIX_TREE_EXCEPTIONAL_ENTRY)
58#define RADIX_DAX_TYPE(entry) ((unsigned long)entry & RADIX_DAX_MASK)
59#define RADIX_DAX_SECTOR(entry) (((unsigned long)entry >> RADIX_DAX_SHIFT))
60#define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \
61 RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE)))
62
54static inline int radix_tree_is_indirect_ptr(void *ptr) 63static inline int radix_tree_is_indirect_ptr(void *ptr)
55{ 64{
56 return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR); 65 return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR);
diff --git a/ipc/sem.c b/ipc/sem.c
index b471e5a3863d..cddd5b5fde51 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1493,7 +1493,7 @@ out_rcu_wakeup:
1493 wake_up_sem_queue_do(&tasks); 1493 wake_up_sem_queue_do(&tasks);
1494out_free: 1494out_free:
1495 if (sem_io != fast_sem_io) 1495 if (sem_io != fast_sem_io)
1496 ipc_free(sem_io, sizeof(ushort)*nsems); 1496 ipc_free(sem_io);
1497 return err; 1497 return err;
1498} 1498}
1499 1499
diff --git a/ipc/util.c b/ipc/util.c
index 0f401d94b7c6..798cad18dd87 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -414,17 +414,12 @@ void *ipc_alloc(int size)
414/** 414/**
415 * ipc_free - free ipc space 415 * ipc_free - free ipc space
416 * @ptr: pointer returned by ipc_alloc 416 * @ptr: pointer returned by ipc_alloc
417 * @size: size of block
418 * 417 *
419 * Free a block created with ipc_alloc(). The caller must know the size 418 * Free a block created with ipc_alloc().
420 * used in the allocation call.
421 */ 419 */
422void ipc_free(void *ptr, int size) 420void ipc_free(void *ptr)
423{ 421{
424 if (size > PAGE_SIZE) 422 kvfree(ptr);
425 vfree(ptr);
426 else
427 kfree(ptr);
428} 423}
429 424
430/** 425/**
diff --git a/ipc/util.h b/ipc/util.h
index 3a8a5a0eca62..51f7ca58ac67 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -118,7 +118,7 @@ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
118 * both function can sleep 118 * both function can sleep
119 */ 119 */
120void *ipc_alloc(int size); 120void *ipc_alloc(int size);
121void ipc_free(void *ptr, int size); 121void ipc_free(void *ptr);
122 122
123/* 123/*
124 * For allocation that need to be freed by RCU. 124 * For allocation that need to be freed by RCU.
diff --git a/mm/filemap.c b/mm/filemap.c
index 847ee43c2806..2e7c8d980d5e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -11,6 +11,7 @@
11 */ 11 */
12#include <linux/export.h> 12#include <linux/export.h>
13#include <linux/compiler.h> 13#include <linux/compiler.h>
14#include <linux/dax.h>
14#include <linux/fs.h> 15#include <linux/fs.h>
15#include <linux/uaccess.h> 16#include <linux/uaccess.h>
16#include <linux/capability.h> 17#include <linux/capability.h>
@@ -123,9 +124,9 @@ static void page_cache_tree_delete(struct address_space *mapping,
123 __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot); 124 __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot);
124 125
125 if (shadow) { 126 if (shadow) {
126 mapping->nrshadows++; 127 mapping->nrexceptional++;
127 /* 128 /*
128 * Make sure the nrshadows update is committed before 129 * Make sure the nrexceptional update is committed before
129 * the nrpages update so that final truncate racing 130 * the nrpages update so that final truncate racing
130 * with reclaim does not see both counters 0 at the 131 * with reclaim does not see both counters 0 at the
131 * same time and miss a shadow entry. 132 * same time and miss a shadow entry.
@@ -481,6 +482,12 @@ int filemap_write_and_wait_range(struct address_space *mapping,
481{ 482{
482 int err = 0; 483 int err = 0;
483 484
485 if (dax_mapping(mapping) && mapping->nrexceptional) {
486 err = dax_writeback_mapping_range(mapping, lstart, lend);
487 if (err)
488 return err;
489 }
490
484 if (mapping->nrpages) { 491 if (mapping->nrpages) {
485 err = __filemap_fdatawrite_range(mapping, lstart, lend, 492 err = __filemap_fdatawrite_range(mapping, lstart, lend,
486 WB_SYNC_ALL); 493 WB_SYNC_ALL);
@@ -579,9 +586,13 @@ static int page_cache_tree_insert(struct address_space *mapping,
579 p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); 586 p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
580 if (!radix_tree_exceptional_entry(p)) 587 if (!radix_tree_exceptional_entry(p))
581 return -EEXIST; 588 return -EEXIST;
589
590 if (WARN_ON(dax_mapping(mapping)))
591 return -EINVAL;
592
582 if (shadowp) 593 if (shadowp)
583 *shadowp = p; 594 *shadowp = p;
584 mapping->nrshadows--; 595 mapping->nrexceptional--;
585 if (node) 596 if (node)
586 workingset_node_shadows_dec(node); 597 workingset_node_shadows_dec(node);
587 } 598 }
@@ -1245,9 +1256,9 @@ repeat:
1245 if (radix_tree_deref_retry(page)) 1256 if (radix_tree_deref_retry(page))
1246 goto restart; 1257 goto restart;
1247 /* 1258 /*
1248 * A shadow entry of a recently evicted page, 1259 * A shadow entry of a recently evicted page, a swap
1249 * or a swap entry from shmem/tmpfs. Return 1260 * entry from shmem/tmpfs or a DAX entry. Return it
1250 * it without attempting to raise page count. 1261 * without attempting to raise page count.
1251 */ 1262 */
1252 goto export; 1263 goto export;
1253 } 1264 }
@@ -1494,6 +1505,74 @@ repeat:
1494} 1505}
1495EXPORT_SYMBOL(find_get_pages_tag); 1506EXPORT_SYMBOL(find_get_pages_tag);
1496 1507
1508/**
1509 * find_get_entries_tag - find and return entries that match @tag
1510 * @mapping: the address_space to search
1511 * @start: the starting page cache index
1512 * @tag: the tag index
1513 * @nr_entries: the maximum number of entries
1514 * @entries: where the resulting entries are placed
1515 * @indices: the cache indices corresponding to the entries in @entries
1516 *
1517 * Like find_get_entries, except we only return entries which are tagged with
1518 * @tag.
1519 */
1520unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
1521 int tag, unsigned int nr_entries,
1522 struct page **entries, pgoff_t *indices)
1523{
1524 void **slot;
1525 unsigned int ret = 0;
1526 struct radix_tree_iter iter;
1527
1528 if (!nr_entries)
1529 return 0;
1530
1531 rcu_read_lock();
1532restart:
1533 radix_tree_for_each_tagged(slot, &mapping->page_tree,
1534 &iter, start, tag) {
1535 struct page *page;
1536repeat:
1537 page = radix_tree_deref_slot(slot);
1538 if (unlikely(!page))
1539 continue;
1540 if (radix_tree_exception(page)) {
1541 if (radix_tree_deref_retry(page)) {
1542 /*
1543 * Transient condition which can only trigger
1544 * when entry at index 0 moves out of or back
1545 * to root: none yet gotten, safe to restart.
1546 */
1547 goto restart;
1548 }
1549
1550 /*
1551 * A shadow entry of a recently evicted page, a swap
1552 * entry from shmem/tmpfs or a DAX entry. Return it
1553 * without attempting to raise page count.
1554 */
1555 goto export;
1556 }
1557 if (!page_cache_get_speculative(page))
1558 goto repeat;
1559
1560 /* Has the page moved? */
1561 if (unlikely(page != *slot)) {
1562 page_cache_release(page);
1563 goto repeat;
1564 }
1565export:
1566 indices[ret] = iter.index;
1567 entries[ret] = page;
1568 if (++ret == nr_entries)
1569 break;
1570 }
1571 rcu_read_unlock();
1572 return ret;
1573}
1574EXPORT_SYMBOL(find_get_entries_tag);
1575
1497/* 1576/*
1498 * CD/DVDs are error prone. When a medium error occurs, the driver may fail 1577 * CD/DVDs are error prone. When a medium error occurs, the driver may fail
1499 * a _large_ part of the i/o request. Imagine the worst scenario: 1578 * a _large_ part of the i/o request. Imagine the worst scenario:
diff --git a/mm/percpu.c b/mm/percpu.c
index 8a943b97a053..998607adf6eb 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -305,16 +305,12 @@ static void *pcpu_mem_zalloc(size_t size)
305/** 305/**
306 * pcpu_mem_free - free memory 306 * pcpu_mem_free - free memory
307 * @ptr: memory to free 307 * @ptr: memory to free
308 * @size: size of the area
309 * 308 *
310 * Free @ptr. @ptr should have been allocated using pcpu_mem_zalloc(). 309 * Free @ptr. @ptr should have been allocated using pcpu_mem_zalloc().
311 */ 310 */
312static void pcpu_mem_free(void *ptr, size_t size) 311static void pcpu_mem_free(void *ptr)
313{ 312{
314 if (size <= PAGE_SIZE) 313 kvfree(ptr);
315 kfree(ptr);
316 else
317 vfree(ptr);
318} 314}
319 315
320/** 316/**
@@ -463,8 +459,8 @@ out_unlock:
463 * pcpu_mem_free() might end up calling vfree() which uses 459 * pcpu_mem_free() might end up calling vfree() which uses
464 * IRQ-unsafe lock and thus can't be called under pcpu_lock. 460 * IRQ-unsafe lock and thus can't be called under pcpu_lock.
465 */ 461 */
466 pcpu_mem_free(old, old_size); 462 pcpu_mem_free(old);
467 pcpu_mem_free(new, new_size); 463 pcpu_mem_free(new);
468 464
469 return 0; 465 return 0;
470} 466}
@@ -732,7 +728,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
732 chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC * 728 chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *
733 sizeof(chunk->map[0])); 729 sizeof(chunk->map[0]));
734 if (!chunk->map) { 730 if (!chunk->map) {
735 pcpu_mem_free(chunk, pcpu_chunk_struct_size); 731 pcpu_mem_free(chunk);
736 return NULL; 732 return NULL;
737 } 733 }
738 734
@@ -753,8 +749,8 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk)
753{ 749{
754 if (!chunk) 750 if (!chunk)
755 return; 751 return;
756 pcpu_mem_free(chunk->map, chunk->map_alloc * sizeof(chunk->map[0])); 752 pcpu_mem_free(chunk->map);
757 pcpu_mem_free(chunk, pcpu_chunk_struct_size); 753 pcpu_mem_free(chunk);
758} 754}
759 755
760/** 756/**
diff --git a/mm/truncate.c b/mm/truncate.c
index 76e35ad97102..e3ee0e27cd17 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -9,6 +9,7 @@
9 9
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/backing-dev.h> 11#include <linux/backing-dev.h>
12#include <linux/dax.h>
12#include <linux/gfp.h> 13#include <linux/gfp.h>
13#include <linux/mm.h> 14#include <linux/mm.h>
14#include <linux/swap.h> 15#include <linux/swap.h>
@@ -34,31 +35,39 @@ static void clear_exceptional_entry(struct address_space *mapping,
34 return; 35 return;
35 36
36 spin_lock_irq(&mapping->tree_lock); 37 spin_lock_irq(&mapping->tree_lock);
37 /* 38
38 * Regular page slots are stabilized by the page lock even 39 if (dax_mapping(mapping)) {
39 * without the tree itself locked. These unlocked entries 40 if (radix_tree_delete_item(&mapping->page_tree, index, entry))
40 * need verification under the tree lock. 41 mapping->nrexceptional--;
41 */ 42 } else {
42 if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot)) 43 /*
43 goto unlock; 44 * Regular page slots are stabilized by the page lock even
44 if (*slot != entry) 45 * without the tree itself locked. These unlocked entries
45 goto unlock; 46 * need verification under the tree lock.
46 radix_tree_replace_slot(slot, NULL); 47 */
47 mapping->nrshadows--; 48 if (!__radix_tree_lookup(&mapping->page_tree, index, &node,
48 if (!node) 49 &slot))
49 goto unlock; 50 goto unlock;
50 workingset_node_shadows_dec(node); 51 if (*slot != entry)
51 /* 52 goto unlock;
52 * Don't track node without shadow entries. 53 radix_tree_replace_slot(slot, NULL);
53 * 54 mapping->nrexceptional--;
54 * Avoid acquiring the list_lru lock if already untracked. 55 if (!node)
55 * The list_empty() test is safe as node->private_list is 56 goto unlock;
56 * protected by mapping->tree_lock. 57 workingset_node_shadows_dec(node);
57 */ 58 /*
58 if (!workingset_node_shadows(node) && 59 * Don't track node without shadow entries.
59 !list_empty(&node->private_list)) 60 *
60 list_lru_del(&workingset_shadow_nodes, &node->private_list); 61 * Avoid acquiring the list_lru lock if already untracked.
61 __radix_tree_delete_node(&mapping->page_tree, node); 62 * The list_empty() test is safe as node->private_list is
63 * protected by mapping->tree_lock.
64 */
65 if (!workingset_node_shadows(node) &&
66 !list_empty(&node->private_list))
67 list_lru_del(&workingset_shadow_nodes,
68 &node->private_list);
69 __radix_tree_delete_node(&mapping->page_tree, node);
70 }
62unlock: 71unlock:
63 spin_unlock_irq(&mapping->tree_lock); 72 spin_unlock_irq(&mapping->tree_lock);
64} 73}
@@ -228,7 +237,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
228 int i; 237 int i;
229 238
230 cleancache_invalidate_inode(mapping); 239 cleancache_invalidate_inode(mapping);
231 if (mapping->nrpages == 0 && mapping->nrshadows == 0) 240 if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
232 return; 241 return;
233 242
234 /* Offsets within partial pages */ 243 /* Offsets within partial pages */
@@ -402,7 +411,7 @@ EXPORT_SYMBOL(truncate_inode_pages);
402 */ 411 */
403void truncate_inode_pages_final(struct address_space *mapping) 412void truncate_inode_pages_final(struct address_space *mapping)
404{ 413{
405 unsigned long nrshadows; 414 unsigned long nrexceptional;
406 unsigned long nrpages; 415 unsigned long nrpages;
407 416
408 /* 417 /*
@@ -416,14 +425,14 @@ void truncate_inode_pages_final(struct address_space *mapping)
416 425
417 /* 426 /*
418 * When reclaim installs eviction entries, it increases 427 * When reclaim installs eviction entries, it increases
419 * nrshadows first, then decreases nrpages. Make sure we see 428 * nrexceptional first, then decreases nrpages. Make sure we see
420 * this in the right order or we might miss an entry. 429 * this in the right order or we might miss an entry.
421 */ 430 */
422 nrpages = mapping->nrpages; 431 nrpages = mapping->nrpages;
423 smp_rmb(); 432 smp_rmb();
424 nrshadows = mapping->nrshadows; 433 nrexceptional = mapping->nrexceptional;
425 434
426 if (nrpages || nrshadows) { 435 if (nrpages || nrexceptional) {
427 /* 436 /*
428 * As truncation uses a lockless tree lookup, cycle 437 * As truncation uses a lockless tree lookup, cycle
429 * the tree lock to make sure any ongoing tree 438 * the tree lock to make sure any ongoing tree
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bd620b65db52..eb3dd37ccd7c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -46,6 +46,7 @@
46#include <linux/oom.h> 46#include <linux/oom.h>
47#include <linux/prefetch.h> 47#include <linux/prefetch.h>
48#include <linux/printk.h> 48#include <linux/printk.h>
49#include <linux/dax.h>
49 50
50#include <asm/tlbflush.h> 51#include <asm/tlbflush.h>
51#include <asm/div64.h> 52#include <asm/div64.h>
@@ -671,9 +672,15 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
671 * inode reclaim needs to empty out the radix tree or 672 * inode reclaim needs to empty out the radix tree or
672 * the nodes are lost. Don't plant shadows behind its 673 * the nodes are lost. Don't plant shadows behind its
673 * back. 674 * back.
675 *
676 * We also don't store shadows for DAX mappings because the
677 * only page cache pages found in these are zero pages
678 * covering holes, and because we don't want to mix DAX
679 * exceptional entries and shadow exceptional entries in the
680 * same page_tree.
674 */ 681 */
675 if (reclaimed && page_is_file_cache(page) && 682 if (reclaimed && page_is_file_cache(page) &&
676 !mapping_exiting(mapping)) 683 !mapping_exiting(mapping) && !dax_mapping(mapping))
677 shadow = workingset_eviction(mapping, page); 684 shadow = workingset_eviction(mapping, page);
678 __delete_from_page_cache(page, shadow, memcg); 685 __delete_from_page_cache(page, shadow, memcg);
679 spin_unlock_irqrestore(&mapping->tree_lock, flags); 686 spin_unlock_irqrestore(&mapping->tree_lock, flags);
diff --git a/mm/workingset.c b/mm/workingset.c
index aa017133744b..61ead9e5549d 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -351,8 +351,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
351 node->slots[i] = NULL; 351 node->slots[i] = NULL;
352 BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT)); 352 BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT));
353 node->count -= 1U << RADIX_TREE_COUNT_SHIFT; 353 node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
354 BUG_ON(!mapping->nrshadows); 354 BUG_ON(!mapping->nrexceptional);
355 mapping->nrshadows--; 355 mapping->nrexceptional--;
356 } 356 }
357 } 357 }
358 BUG_ON(node->count); 358 BUG_ON(node->count);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 744e5936c10d..7aea0ccb6be6 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -289,10 +289,8 @@ static void __node_free_rcu(struct rcu_head *head)
289 289
290 if (!n->tn_bits) 290 if (!n->tn_bits)
291 kmem_cache_free(trie_leaf_kmem, n); 291 kmem_cache_free(trie_leaf_kmem, n);
292 else if (n->tn_bits <= TNODE_KMALLOC_MAX)
293 kfree(n);
294 else 292 else
295 vfree(n); 293 kvfree(n);
296} 294}
297 295
298#define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu) 296#define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)