summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2007-07-19 04:46:59 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-19 13:04:41 -0400
commit54cb8821de07f2ffcd28c380ce9b93d5784b40d7 (patch)
tree1de676534963d96af42863b20191bc9f80060dea
parentd00806b183152af6d24f46f0c33f14162ca1262a (diff)
mm: merge populate and nopage into fault (fixes nonlinear)
Nonlinear mappings are (AFAIKS) simply a virtual memory concept that encodes the virtual address -> file offset differently from linear mappings. ->populate is a layering violation because the filesystem/pagecache code should need to know anything about the virtual memory mapping. The hitch here is that the ->nopage handler didn't pass down enough information (ie. pgoff). But it is more logical to pass pgoff rather than have the ->nopage function calculate it itself anyway (because that's a similar layering violation). Having the populate handler install the pte itself is likewise a nasty thing to be doing. This patch introduces a new fault handler that replaces ->nopage and ->populate and (later) ->nopfn. Most of the old mechanism is still in place so there is a lot of duplication and nice cleanups that can be removed if everyone switches over. The rationale for doing this in the first place is that nonlinear mappings are subject to the pagefault vs invalidate/truncate race too, and it seemed stupid to duplicate the synchronisation logic rather than just consolidate the two. After this patch, MAP_NONBLOCK no longer sets up ptes for pages present in pagecache. Seems like a fringe functionality anyway. NOPAGE_REFAULT is removed. This should be implemented with ->fault, and no users have hit mainline yet. [akpm@linux-foundation.org: cleanup] [randy.dunlap@oracle.com: doc. fixes for readahead] [akpm@linux-foundation.org: build fix] Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com> Cc: Mark Fasheh <mark.fasheh@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/feature-removal-schedule.txt27
-rw-r--r--Documentation/filesystems/Locking2
-rw-r--r--fs/gfs2/ops_address.c2
-rw-r--r--fs/gfs2/ops_file.c2
-rw-r--r--fs/gfs2/ops_vm.c36
-rw-r--r--fs/ncpfs/mmap.c23
-rw-r--r--fs/ocfs2/aops.c2
-rw-r--r--fs/ocfs2/mmap.c17
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c23
-rw-r--r--include/linux/mm.h41
-rw-r--r--ipc/shm.c9
-rw-r--r--mm/filemap.c94
-rw-r--r--mm/filemap_xip.c54
-rw-r--r--mm/fremap.c103
-rw-r--r--mm/memory.c132
-rw-r--r--mm/mmap.c8
-rw-r--r--mm/nommu.c3
-rw-r--r--mm/rmap.c4
-rw-r--r--mm/shmem.c82
-rw-r--r--mm/truncate.c2
20 files changed, 394 insertions, 272 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 66c8b4b165c1..716568afdff8 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -135,6 +135,33 @@ Who: Greg Kroah-Hartman <gregkh@suse.de>
135 135
136--------------------------- 136---------------------------
137 137
138What: filemap_nopage, filemap_populate
139When: April 2007
140Why: These legacy interfaces no longer have any callers in the kernel and
141 any functionality provided can be provided with filemap_fault. The
142 removal schedule is short because they are a big maintainence burden
143 and have some bugs.
144Who: Nick Piggin <npiggin@suse.de>
145
146---------------------------
147
148What: vm_ops.populate, install_page
149When: April 2007
150Why: These legacy interfaces no longer have any callers in the kernel and
151 any functionality provided can be provided with vm_ops.fault.
152Who: Nick Piggin <npiggin@suse.de>
153
154---------------------------
155
156What: vm_ops.nopage
157When: February 2008, provided in-kernel callers have been converted
158Why: This interface is replaced by vm_ops.fault, but it has been around
159 forever, is used by a lot of drivers, and doesn't cost much to
160 maintain.
161Who: Nick Piggin <npiggin@suse.de>
162
163---------------------------
164
138What: Interrupt only SA_* flags 165What: Interrupt only SA_* flags
139When: September 2007 166When: September 2007
140Why: The interrupt related SA_* flags are replaced by IRQF_* to move them 167Why: The interrupt related SA_* flags are replaced by IRQF_* to move them
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index d866551be037..970c8ec1a05b 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -510,12 +510,14 @@ More details about quota locking can be found in fs/dquot.c.
510prototypes: 510prototypes:
511 void (*open)(struct vm_area_struct*); 511 void (*open)(struct vm_area_struct*);
512 void (*close)(struct vm_area_struct*); 512 void (*close)(struct vm_area_struct*);
513 struct page *(*fault)(struct vm_area_struct*, struct fault_data *);
513 struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *); 514 struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *);
514 515
515locking rules: 516locking rules:
516 BKL mmap_sem 517 BKL mmap_sem
517open: no yes 518open: no yes
518close: no yes 519close: no yes
520fault: no yes
519nopage: no yes 521nopage: no yes
520 522
521================================================================================ 523================================================================================
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 26c888890c24..ce90032c010e 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -251,7 +251,7 @@ static int gfs2_readpage(struct file *file, struct page *page)
251 if (file) { 251 if (file) {
252 gf = file->private_data; 252 gf = file->private_data;
253 if (test_bit(GFF_EXLOCK, &gf->f_flags)) 253 if (test_bit(GFF_EXLOCK, &gf->f_flags))
254 /* gfs2_sharewrite_nopage has grabbed the ip->i_gl already */ 254 /* gfs2_sharewrite_fault has grabbed the ip->i_gl already */
255 goto skip_lock; 255 goto skip_lock;
256 } 256 }
257 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); 257 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index bad0b24cb773..581ac11b2656 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -364,7 +364,7 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
364 else 364 else
365 vma->vm_ops = &gfs2_vm_ops_private; 365 vma->vm_ops = &gfs2_vm_ops_private;
366 366
367 vma->vm_flags |= VM_CAN_INVALIDATE; 367 vma->vm_flags |= VM_CAN_INVALIDATE|VM_CAN_NONLINEAR;
368 368
369 gfs2_glock_dq_uninit(&i_gh); 369 gfs2_glock_dq_uninit(&i_gh);
370 370
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
index d5a98cbfebdc..e9fe6eb74e75 100644
--- a/fs/gfs2/ops_vm.c
+++ b/fs/gfs2/ops_vm.c
@@ -27,13 +27,13 @@
27#include "trans.h" 27#include "trans.h"
28#include "util.h" 28#include "util.h"
29 29
30static struct page *gfs2_private_nopage(struct vm_area_struct *area, 30static struct page *gfs2_private_fault(struct vm_area_struct *vma,
31 unsigned long address, int *type) 31 struct fault_data *fdata)
32{ 32{
33 struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host); 33 struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host);
34 34
35 set_bit(GIF_PAGED, &ip->i_flags); 35 set_bit(GIF_PAGED, &ip->i_flags);
36 return filemap_nopage(area, address, type); 36 return filemap_fault(vma, fdata);
37} 37}
38 38
39static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) 39static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
@@ -104,16 +104,14 @@ out:
104 return error; 104 return error;
105} 105}
106 106
107static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area, 107static struct page *gfs2_sharewrite_fault(struct vm_area_struct *vma,
108 unsigned long address, int *type) 108 struct fault_data *fdata)
109{ 109{
110 struct file *file = area->vm_file; 110 struct file *file = vma->vm_file;
111 struct gfs2_file *gf = file->private_data; 111 struct gfs2_file *gf = file->private_data;
112 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); 112 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
113 struct gfs2_holder i_gh; 113 struct gfs2_holder i_gh;
114 struct page *result = NULL; 114 struct page *result = NULL;
115 unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) +
116 area->vm_pgoff;
117 int alloc_required; 115 int alloc_required;
118 int error; 116 int error;
119 117
@@ -124,23 +122,27 @@ static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area,
124 set_bit(GIF_PAGED, &ip->i_flags); 122 set_bit(GIF_PAGED, &ip->i_flags);
125 set_bit(GIF_SW_PAGED, &ip->i_flags); 123 set_bit(GIF_SW_PAGED, &ip->i_flags);
126 124
127 error = gfs2_write_alloc_required(ip, (u64)index << PAGE_CACHE_SHIFT, 125 error = gfs2_write_alloc_required(ip,
128 PAGE_CACHE_SIZE, &alloc_required); 126 (u64)fdata->pgoff << PAGE_CACHE_SHIFT,
129 if (error) 127 PAGE_CACHE_SIZE, &alloc_required);
128 if (error) {
129 fdata->type = VM_FAULT_OOM; /* XXX: are these right? */
130 goto out; 130 goto out;
131 }
131 132
132 set_bit(GFF_EXLOCK, &gf->f_flags); 133 set_bit(GFF_EXLOCK, &gf->f_flags);
133 result = filemap_nopage(area, address, type); 134 result = filemap_fault(vma, fdata);
134 clear_bit(GFF_EXLOCK, &gf->f_flags); 135 clear_bit(GFF_EXLOCK, &gf->f_flags);
135 if (!result || result == NOPAGE_OOM) 136 if (!result)
136 goto out; 137 goto out;
137 138
138 if (alloc_required) { 139 if (alloc_required) {
139 error = alloc_page_backing(ip, result); 140 error = alloc_page_backing(ip, result);
140 if (error) { 141 if (error) {
141 if (area->vm_flags & VM_CAN_INVALIDATE) 142 if (vma->vm_flags & VM_CAN_INVALIDATE)
142 unlock_page(result); 143 unlock_page(result);
143 page_cache_release(result); 144 page_cache_release(result);
145 fdata->type = VM_FAULT_OOM;
144 result = NULL; 146 result = NULL;
145 goto out; 147 goto out;
146 } 148 }
@@ -154,10 +156,10 @@ out:
154} 156}
155 157
156struct vm_operations_struct gfs2_vm_ops_private = { 158struct vm_operations_struct gfs2_vm_ops_private = {
157 .nopage = gfs2_private_nopage, 159 .fault = gfs2_private_fault,
158}; 160};
159 161
160struct vm_operations_struct gfs2_vm_ops_sharewrite = { 162struct vm_operations_struct gfs2_vm_ops_sharewrite = {
161 .nopage = gfs2_sharewrite_nopage, 163 .fault = gfs2_sharewrite_fault,
162}; 164};
163 165
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 5416673418b8..af48b792ca04 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -25,8 +25,8 @@
25/* 25/*
26 * Fill in the supplied page for mmap 26 * Fill in the supplied page for mmap
27 */ 27 */
28static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area, 28static struct page* ncp_file_mmap_fault(struct vm_area_struct *area,
29 unsigned long address, int *type) 29 struct fault_data *fdata)
30{ 30{
31 struct file *file = area->vm_file; 31 struct file *file = area->vm_file;
32 struct dentry *dentry = file->f_path.dentry; 32 struct dentry *dentry = file->f_path.dentry;
@@ -40,15 +40,17 @@ static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area,
40 40
41 page = alloc_page(GFP_HIGHUSER); /* ncpfs has nothing against high pages 41 page = alloc_page(GFP_HIGHUSER); /* ncpfs has nothing against high pages
42 as long as recvmsg and memset works on it */ 42 as long as recvmsg and memset works on it */
43 if (!page) 43 if (!page) {
44 return page; 44 fdata->type = VM_FAULT_OOM;
45 return NULL;
46 }
45 pg_addr = kmap(page); 47 pg_addr = kmap(page);
46 address &= PAGE_MASK; 48 pos = fdata->pgoff << PAGE_SHIFT;
47 pos = address - area->vm_start + (area->vm_pgoff << PAGE_SHIFT);
48 49
49 count = PAGE_SIZE; 50 count = PAGE_SIZE;
50 if (address + PAGE_SIZE > area->vm_end) { 51 if (fdata->address + PAGE_SIZE > area->vm_end) {
51 count = area->vm_end - address; 52 WARN_ON(1); /* shouldn't happen? */
53 count = area->vm_end - fdata->address;
52 } 54 }
53 /* what we can read in one go */ 55 /* what we can read in one go */
54 bufsize = NCP_SERVER(inode)->buffer_size; 56 bufsize = NCP_SERVER(inode)->buffer_size;
@@ -91,15 +93,14 @@ static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area,
91 * fetches from the network, here the analogue of disk. 93 * fetches from the network, here the analogue of disk.
92 * -- wli 94 * -- wli
93 */ 95 */
94 if (type) 96 fdata->type = VM_FAULT_MAJOR;
95 *type = VM_FAULT_MAJOR;
96 count_vm_event(PGMAJFAULT); 97 count_vm_event(PGMAJFAULT);
97 return page; 98 return page;
98} 99}
99 100
100static struct vm_operations_struct ncp_file_mmap = 101static struct vm_operations_struct ncp_file_mmap =
101{ 102{
102 .nopage = ncp_file_mmap_nopage, 103 .fault = ncp_file_mmap_fault,
103}; 104};
104 105
105 106
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 84bf6e79de23..460d440310f2 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -232,7 +232,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
232 * might now be discovering a truncate that hit on another node. 232 * might now be discovering a truncate that hit on another node.
233 * block_read_full_page->get_block freaks out if it is asked to read 233 * block_read_full_page->get_block freaks out if it is asked to read
234 * beyond the end of a file, so we check here. Callers 234 * beyond the end of a file, so we check here. Callers
235 * (generic_file_read, fault->nopage) are clever enough to check i_size 235 * (generic_file_read, vm_ops->fault) are clever enough to check i_size
236 * and notice that the page they just read isn't needed. 236 * and notice that the page they just read isn't needed.
237 * 237 *
238 * XXX sys_readahead() seems to get that wrong? 238 * XXX sys_readahead() seems to get that wrong?
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 904f39ff5340..cd75508b1c8a 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -60,24 +60,23 @@ static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset)
60 return sigprocmask(SIG_SETMASK, oldset, NULL); 60 return sigprocmask(SIG_SETMASK, oldset, NULL);
61} 61}
62 62
63static struct page *ocfs2_nopage(struct vm_area_struct * area, 63static struct page *ocfs2_fault(struct vm_area_struct *area,
64 unsigned long address, 64 struct fault_data *fdata)
65 int *type)
66{ 65{
67 struct page *page = NOPAGE_SIGBUS; 66 struct page *page = NULL;
68 sigset_t blocked, oldset; 67 sigset_t blocked, oldset;
69 int ret; 68 int ret;
70 69
71 mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address, 70 mlog_entry("(area=%p, page offset=%lu)\n", area, fdata->pgoff);
72 type);
73 71
74 ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); 72 ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
75 if (ret < 0) { 73 if (ret < 0) {
74 fdata->type = VM_FAULT_SIGBUS;
76 mlog_errno(ret); 75 mlog_errno(ret);
77 goto out; 76 goto out;
78 } 77 }
79 78
80 page = filemap_nopage(area, address, type); 79 page = filemap_fault(area, fdata);
81 80
82 ret = ocfs2_vm_op_unblock_sigs(&oldset); 81 ret = ocfs2_vm_op_unblock_sigs(&oldset);
83 if (ret < 0) 82 if (ret < 0)
@@ -209,7 +208,7 @@ out:
209} 208}
210 209
211static struct vm_operations_struct ocfs2_file_vm_ops = { 210static struct vm_operations_struct ocfs2_file_vm_ops = {
212 .nopage = ocfs2_nopage, 211 .fault = ocfs2_fault,
213 .page_mkwrite = ocfs2_page_mkwrite, 212 .page_mkwrite = ocfs2_page_mkwrite,
214}; 213};
215 214
@@ -226,7 +225,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
226 ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level); 225 ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level);
227out: 226out:
228 vma->vm_ops = &ocfs2_file_vm_ops; 227 vma->vm_ops = &ocfs2_file_vm_ops;
229 vma->vm_flags |= VM_CAN_INVALIDATE; 228 vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR;
230 return 0; 229 return 0;
231} 230}
232 231
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 92b2f225712f..f12e80a69c68 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -213,18 +213,19 @@ xfs_file_fsync(
213 213
214#ifdef CONFIG_XFS_DMAPI 214#ifdef CONFIG_XFS_DMAPI
215STATIC struct page * 215STATIC struct page *
216xfs_vm_nopage( 216xfs_vm_fault(
217 struct vm_area_struct *area, 217 struct vm_area_struct *vma,
218 unsigned long address, 218 struct fault_data *fdata)
219 int *type)
220{ 219{
221 struct inode *inode = area->vm_file->f_path.dentry->d_inode; 220 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
222 bhv_vnode_t *vp = vn_from_inode(inode); 221 bhv_vnode_t *vp = vn_from_inode(inode);
223 222
224 ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI); 223 ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
225 if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), area, 0)) 224 if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0)) {
225 fdata->type = VM_FAULT_SIGBUS;
226 return NULL; 226 return NULL;
227 return filemap_nopage(area, address, type); 227 }
228 return filemap_fault(vma, fdata);
228} 229}
229#endif /* CONFIG_XFS_DMAPI */ 230#endif /* CONFIG_XFS_DMAPI */
230 231
@@ -310,7 +311,7 @@ xfs_file_mmap(
310 struct vm_area_struct *vma) 311 struct vm_area_struct *vma)
311{ 312{
312 vma->vm_ops = &xfs_file_vm_ops; 313 vma->vm_ops = &xfs_file_vm_ops;
313 vma->vm_flags |= VM_CAN_INVALIDATE; 314 vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR;
314 315
315#ifdef CONFIG_XFS_DMAPI 316#ifdef CONFIG_XFS_DMAPI
316 if (vn_from_inode(filp->f_path.dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI) 317 if (vn_from_inode(filp->f_path.dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI)
@@ -465,14 +466,12 @@ const struct file_operations xfs_dir_file_operations = {
465}; 466};
466 467
467static struct vm_operations_struct xfs_file_vm_ops = { 468static struct vm_operations_struct xfs_file_vm_ops = {
468 .nopage = filemap_nopage, 469 .fault = filemap_fault,
469 .populate = filemap_populate,
470}; 470};
471 471
472#ifdef CONFIG_XFS_DMAPI 472#ifdef CONFIG_XFS_DMAPI
473static struct vm_operations_struct xfs_dmapi_file_vm_ops = { 473static struct vm_operations_struct xfs_dmapi_file_vm_ops = {
474 .nopage = xfs_vm_nopage, 474 .fault = xfs_vm_fault,
475 .populate = filemap_populate,
476#ifdef HAVE_VMOP_MPROTECT 475#ifdef HAVE_VMOP_MPROTECT
477 .mprotect = xfs_vm_mprotect, 476 .mprotect = xfs_vm_mprotect,
478#endif 477#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ca9536a348c8..f28a1b3e63a9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -173,6 +173,7 @@ extern unsigned int kobjsize(const void *objp);
173 * In this case, do_no_page must 173 * In this case, do_no_page must
174 * return with the page locked. 174 * return with the page locked.
175 */ 175 */
176#define VM_CAN_NONLINEAR 0x10000000 /* Has ->fault & does nonlinear pages */
176 177
177#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ 178#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
178#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS 179#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
@@ -196,6 +197,25 @@ extern unsigned int kobjsize(const void *objp);
196 */ 197 */
197extern pgprot_t protection_map[16]; 198extern pgprot_t protection_map[16];
198 199
200#define FAULT_FLAG_WRITE 0x01
201#define FAULT_FLAG_NONLINEAR 0x02
202
203/*
204 * fault_data is filled in the the pagefault handler and passed to the
205 * vma's ->fault function. That function is responsible for filling in
206 * 'type', which is the type of fault if a page is returned, or the type
207 * of error if NULL is returned.
208 *
209 * pgoff should be used in favour of address, if possible. If pgoff is
210 * used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get
211 * nonlinear mapping support.
212 */
213struct fault_data {
214 unsigned long address;
215 pgoff_t pgoff;
216 unsigned int flags;
217 int type;
218};
199 219
200/* 220/*
201 * These are the virtual MM functions - opening of an area, closing and 221 * These are the virtual MM functions - opening of an area, closing and
@@ -205,9 +225,15 @@ extern pgprot_t protection_map[16];
205struct vm_operations_struct { 225struct vm_operations_struct {
206 void (*open)(struct vm_area_struct * area); 226 void (*open)(struct vm_area_struct * area);
207 void (*close)(struct vm_area_struct * area); 227 void (*close)(struct vm_area_struct * area);
208 struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type); 228 struct page *(*fault)(struct vm_area_struct *vma,
209 unsigned long (*nopfn)(struct vm_area_struct * area, unsigned long address); 229 struct fault_data *fdata);
210 int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock); 230 struct page *(*nopage)(struct vm_area_struct *area,
231 unsigned long address, int *type);
232 unsigned long (*nopfn)(struct vm_area_struct *area,
233 unsigned long address);
234 int (*populate)(struct vm_area_struct *area, unsigned long address,
235 unsigned long len, pgprot_t prot, unsigned long pgoff,
236 int nonblock);
211 237
212 /* notification that a previously read-only page is about to become 238 /* notification that a previously read-only page is about to become
213 * writable, if an error is returned it will cause a SIGBUS */ 239 * writable, if an error is returned it will cause a SIGBUS */
@@ -661,7 +687,6 @@ static inline int page_mapped(struct page *page)
661 */ 687 */
662#define NOPAGE_SIGBUS (NULL) 688#define NOPAGE_SIGBUS (NULL)
663#define NOPAGE_OOM ((struct page *) (-1)) 689#define NOPAGE_OOM ((struct page *) (-1))
664#define NOPAGE_REFAULT ((struct page *) (-2)) /* Return to userspace, rerun */
665 690
666/* 691/*
667 * Error return values for the *_nopfn functions 692 * Error return values for the *_nopfn functions
@@ -1110,9 +1135,11 @@ extern void truncate_inode_pages_range(struct address_space *,
1110 loff_t lstart, loff_t lend); 1135 loff_t lstart, loff_t lend);
1111 1136
1112/* generic vm_area_ops exported for stackable file systems */ 1137/* generic vm_area_ops exported for stackable file systems */
1113extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int *); 1138extern struct page *filemap_fault(struct vm_area_struct *, struct fault_data *);
1114extern int filemap_populate(struct vm_area_struct *, unsigned long, 1139extern struct page * __deprecated_for_modules
1115 unsigned long, pgprot_t, unsigned long, int); 1140filemap_nopage(struct vm_area_struct *, unsigned long, int *);
1141extern int __deprecated_for_modules filemap_populate(struct vm_area_struct *,
1142 unsigned long, unsigned long, pgprot_t, unsigned long, int);
1116 1143
1117/* mm/page-writeback.c */ 1144/* mm/page-writeback.c */
1118int write_one_page(struct page *page, int wait); 1145int write_one_page(struct page *page, int wait);
diff --git a/ipc/shm.c b/ipc/shm.c
index 242c3f66493a..e2d090348b1e 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -224,13 +224,13 @@ static void shm_close(struct vm_area_struct *vma)
224 mutex_unlock(&shm_ids(ns).mutex); 224 mutex_unlock(&shm_ids(ns).mutex);
225} 225}
226 226
227static struct page *shm_nopage(struct vm_area_struct *vma, 227static struct page *shm_fault(struct vm_area_struct *vma,
228 unsigned long address, int *type) 228 struct fault_data *fdata)
229{ 229{
230 struct file *file = vma->vm_file; 230 struct file *file = vma->vm_file;
231 struct shm_file_data *sfd = shm_file_data(file); 231 struct shm_file_data *sfd = shm_file_data(file);
232 232
233 return sfd->vm_ops->nopage(vma, address, type); 233 return sfd->vm_ops->fault(vma, fdata);
234} 234}
235 235
236#ifdef CONFIG_NUMA 236#ifdef CONFIG_NUMA
@@ -269,6 +269,7 @@ static int shm_mmap(struct file * file, struct vm_area_struct * vma)
269 if (ret != 0) 269 if (ret != 0)
270 return ret; 270 return ret;
271 sfd->vm_ops = vma->vm_ops; 271 sfd->vm_ops = vma->vm_ops;
272 BUG_ON(!sfd->vm_ops->fault);
272 vma->vm_ops = &shm_vm_ops; 273 vma->vm_ops = &shm_vm_ops;
273 shm_open(vma); 274 shm_open(vma);
274 275
@@ -327,7 +328,7 @@ static const struct file_operations shm_file_operations = {
327static struct vm_operations_struct shm_vm_ops = { 328static struct vm_operations_struct shm_vm_ops = {
328 .open = shm_open, /* callback for a new vm-area open */ 329 .open = shm_open, /* callback for a new vm-area open */
329 .close = shm_close, /* callback for when the vm-area is released */ 330 .close = shm_close, /* callback for when the vm-area is released */
330 .nopage = shm_nopage, 331 .fault = shm_fault,
331#if defined(CONFIG_NUMA) 332#if defined(CONFIG_NUMA)
332 .set_policy = shm_set_policy, 333 .set_policy = shm_set_policy,
333 .get_policy = shm_get_policy, 334 .get_policy = shm_get_policy,
diff --git a/mm/filemap.c b/mm/filemap.c
index 462cda58a18e..26b992d169e5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1301,40 +1301,38 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
1301#define MMAP_LOTSAMISS (100) 1301#define MMAP_LOTSAMISS (100)
1302 1302
1303/** 1303/**
1304 * filemap_nopage - read in file data for page fault handling 1304 * filemap_fault - read in file data for page fault handling
1305 * @area: the applicable vm_area 1305 * @vma: user vma (not used)
1306 * @address: target address to read in 1306 * @fdata: the applicable fault_data
1307 * @type: returned with VM_FAULT_{MINOR,MAJOR} if not %NULL
1308 * 1307 *
1309 * filemap_nopage() is invoked via the vma operations vector for a 1308 * filemap_fault() is invoked via the vma operations vector for a
1310 * mapped memory region to read in file data during a page fault. 1309 * mapped memory region to read in file data during a page fault.
1311 * 1310 *
1312 * The goto's are kind of ugly, but this streamlines the normal case of having 1311 * The goto's are kind of ugly, but this streamlines the normal case of having
1313 * it in the page cache, and handles the special cases reasonably without 1312 * it in the page cache, and handles the special cases reasonably without
1314 * having a lot of duplicated code. 1313 * having a lot of duplicated code.
1315 */ 1314 */
1316struct page *filemap_nopage(struct vm_area_struct *area, 1315struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata)
1317 unsigned long address, int *type)
1318{ 1316{
1319 int error; 1317 int error;
1320 struct file *file = area->vm_file; 1318 struct file *file = vma->vm_file;
1321 struct address_space *mapping = file->f_mapping; 1319 struct address_space *mapping = file->f_mapping;
1322 struct file_ra_state *ra = &file->f_ra; 1320 struct file_ra_state *ra = &file->f_ra;
1323 struct inode *inode = mapping->host; 1321 struct inode *inode = mapping->host;
1324 struct page *page; 1322 struct page *page;
1325 unsigned long size, pgoff; 1323 unsigned long size;
1326 int did_readaround = 0, majmin = VM_FAULT_MINOR; 1324 int did_readaround = 0;
1327 1325
1328 BUG_ON(!(area->vm_flags & VM_CAN_INVALIDATE)); 1326 fdata->type = VM_FAULT_MINOR;
1329 1327
1330 pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; 1328 BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
1331 1329
1332 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1330 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1333 if (pgoff >= size) 1331 if (fdata->pgoff >= size)
1334 goto outside_data_content; 1332 goto outside_data_content;
1335 1333
1336 /* If we don't want any read-ahead, don't bother */ 1334 /* If we don't want any read-ahead, don't bother */
1337 if (VM_RandomReadHint(area)) 1335 if (VM_RandomReadHint(vma))
1338 goto no_cached_page; 1336 goto no_cached_page;
1339 1337
1340 /* 1338 /*
@@ -1343,19 +1341,19 @@ struct page *filemap_nopage(struct vm_area_struct *area,
1343 * 1341 *
1344 * For sequential accesses, we use the generic readahead logic. 1342 * For sequential accesses, we use the generic readahead logic.
1345 */ 1343 */
1346 if (VM_SequentialReadHint(area)) 1344 if (VM_SequentialReadHint(vma))
1347 page_cache_readahead(mapping, ra, file, pgoff, 1); 1345 page_cache_readahead(mapping, ra, file, fdata->pgoff, 1);
1348 1346
1349 /* 1347 /*
1350 * Do we have something in the page cache already? 1348 * Do we have something in the page cache already?
1351 */ 1349 */
1352retry_find: 1350retry_find:
1353 page = find_lock_page(mapping, pgoff); 1351 page = find_lock_page(mapping, fdata->pgoff);
1354 if (!page) { 1352 if (!page) {
1355 unsigned long ra_pages; 1353 unsigned long ra_pages;
1356 1354
1357 if (VM_SequentialReadHint(area)) { 1355 if (VM_SequentialReadHint(vma)) {
1358 handle_ra_miss(mapping, ra, pgoff); 1356 handle_ra_miss(mapping, ra, fdata->pgoff);
1359 goto no_cached_page; 1357 goto no_cached_page;
1360 } 1358 }
1361 ra->mmap_miss++; 1359 ra->mmap_miss++;
@@ -1372,7 +1370,7 @@ retry_find:
1372 * check did_readaround, as this is an inner loop. 1370 * check did_readaround, as this is an inner loop.
1373 */ 1371 */
1374 if (!did_readaround) { 1372 if (!did_readaround) {
1375 majmin = VM_FAULT_MAJOR; 1373 fdata->type = VM_FAULT_MAJOR;
1376 count_vm_event(PGMAJFAULT); 1374 count_vm_event(PGMAJFAULT);
1377 } 1375 }
1378 did_readaround = 1; 1376 did_readaround = 1;
@@ -1380,11 +1378,11 @@ retry_find:
1380 if (ra_pages) { 1378 if (ra_pages) {
1381 pgoff_t start = 0; 1379 pgoff_t start = 0;
1382 1380
1383 if (pgoff > ra_pages / 2) 1381 if (fdata->pgoff > ra_pages / 2)
1384 start = pgoff - ra_pages / 2; 1382 start = fdata->pgoff - ra_pages / 2;
1385 do_page_cache_readahead(mapping, file, start, ra_pages); 1383 do_page_cache_readahead(mapping, file, start, ra_pages);
1386 } 1384 }
1387 page = find_lock_page(mapping, pgoff); 1385 page = find_lock_page(mapping, fdata->pgoff);
1388 if (!page) 1386 if (!page)
1389 goto no_cached_page; 1387 goto no_cached_page;
1390 } 1388 }
@@ -1401,7 +1399,7 @@ retry_find:
1401 1399
1402 /* Must recheck i_size under page lock */ 1400 /* Must recheck i_size under page lock */
1403 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1401 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1404 if (unlikely(pgoff >= size)) { 1402 if (unlikely(fdata->pgoff >= size)) {
1405 unlock_page(page); 1403 unlock_page(page);
1406 goto outside_data_content; 1404 goto outside_data_content;
1407 } 1405 }
@@ -1410,8 +1408,6 @@ retry_find:
1410 * Found the page and have a reference on it. 1408 * Found the page and have a reference on it.
1411 */ 1409 */
1412 mark_page_accessed(page); 1410 mark_page_accessed(page);
1413 if (type)
1414 *type = majmin;
1415 return page; 1411 return page;
1416 1412
1417outside_data_content: 1413outside_data_content:
@@ -1419,15 +1415,17 @@ outside_data_content:
1419 * An external ptracer can access pages that normally aren't 1415 * An external ptracer can access pages that normally aren't
1420 * accessible.. 1416 * accessible..
1421 */ 1417 */
1422 if (area->vm_mm == current->mm) 1418 if (vma->vm_mm == current->mm) {
1423 return NOPAGE_SIGBUS; 1419 fdata->type = VM_FAULT_SIGBUS;
1420 return NULL;
1421 }
1424 /* Fall through to the non-read-ahead case */ 1422 /* Fall through to the non-read-ahead case */
1425no_cached_page: 1423no_cached_page:
1426 /* 1424 /*
1427 * We're only likely to ever get here if MADV_RANDOM is in 1425 * We're only likely to ever get here if MADV_RANDOM is in
1428 * effect. 1426 * effect.
1429 */ 1427 */
1430 error = page_cache_read(file, pgoff); 1428 error = page_cache_read(file, fdata->pgoff);
1431 1429
1432 /* 1430 /*
1433 * The page we want has now been added to the page cache. 1431 * The page we want has now been added to the page cache.
@@ -1443,13 +1441,15 @@ no_cached_page:
1443 * to schedule I/O. 1441 * to schedule I/O.
1444 */ 1442 */
1445 if (error == -ENOMEM) 1443 if (error == -ENOMEM)
1446 return NOPAGE_OOM; 1444 fdata->type = VM_FAULT_OOM;
1447 return NOPAGE_SIGBUS; 1445 else
1446 fdata->type = VM_FAULT_SIGBUS;
1447 return NULL;
1448 1448
1449page_not_uptodate: 1449page_not_uptodate:
1450 /* IO error path */ 1450 /* IO error path */
1451 if (!did_readaround) { 1451 if (!did_readaround) {
1452 majmin = VM_FAULT_MAJOR; 1452 fdata->type = VM_FAULT_MAJOR;
1453 count_vm_event(PGMAJFAULT); 1453 count_vm_event(PGMAJFAULT);
1454 } 1454 }
1455 1455
@@ -1468,7 +1468,30 @@ page_not_uptodate:
1468 1468
1469 /* Things didn't work out. Return zero to tell the mm layer so. */ 1469 /* Things didn't work out. Return zero to tell the mm layer so. */
1470 shrink_readahead_size_eio(file, ra); 1470 shrink_readahead_size_eio(file, ra);
1471 return NOPAGE_SIGBUS; 1471 fdata->type = VM_FAULT_SIGBUS;
1472 return NULL;
1473}
1474EXPORT_SYMBOL(filemap_fault);
1475
1476/*
1477 * filemap_nopage and filemap_populate are legacy exports that are not used
1478 * in tree. Scheduled for removal.
1479 */
1480struct page *filemap_nopage(struct vm_area_struct *area,
1481 unsigned long address, int *type)
1482{
1483 struct page *page;
1484 struct fault_data fdata;
1485 fdata.address = address;
1486 fdata.pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT)
1487 + area->vm_pgoff;
1488 fdata.flags = 0;
1489
1490 page = filemap_fault(area, &fdata);
1491 if (type)
1492 *type = fdata.type;
1493
1494 return page;
1472} 1495}
1473EXPORT_SYMBOL(filemap_nopage); 1496EXPORT_SYMBOL(filemap_nopage);
1474 1497
@@ -1646,8 +1669,7 @@ repeat:
1646EXPORT_SYMBOL(filemap_populate); 1669EXPORT_SYMBOL(filemap_populate);
1647 1670
1648struct vm_operations_struct generic_file_vm_ops = { 1671struct vm_operations_struct generic_file_vm_ops = {
1649 .nopage = filemap_nopage, 1672 .fault = filemap_fault,
1650 .populate = filemap_populate,
1651}; 1673};
1652 1674
1653/* This is used for a general mmap of a disk file */ 1675/* This is used for a general mmap of a disk file */
@@ -1660,7 +1682,7 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
1660 return -ENOEXEC; 1682 return -ENOEXEC;
1661 file_accessed(file); 1683 file_accessed(file);
1662 vma->vm_ops = &generic_file_vm_ops; 1684 vma->vm_ops = &generic_file_vm_ops;
1663 vma->vm_flags |= VM_CAN_INVALIDATE; 1685 vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR;
1664 return 0; 1686 return 0;
1665} 1687}
1666 1688
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 65ffc321f0c0..82f4b8e9834e 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -205,62 +205,67 @@ __xip_unmap (struct address_space * mapping,
205} 205}
206 206
207/* 207/*
208 * xip_nopage() is invoked via the vma operations vector for a 208 * xip_fault() is invoked via the vma operations vector for a
209 * mapped memory region to read in file data during a page fault. 209 * mapped memory region to read in file data during a page fault.
210 * 210 *
211 * This function is derived from filemap_nopage, but used for execute in place 211 * This function is derived from filemap_fault, but used for execute in place
212 */ 212 */
213static struct page * 213static struct page *xip_file_fault(struct vm_area_struct *area,
214xip_file_nopage(struct vm_area_struct * area, 214 struct fault_data *fdata)
215 unsigned long address,
216 int *type)
217{ 215{
218 struct file *file = area->vm_file; 216 struct file *file = area->vm_file;
219 struct address_space *mapping = file->f_mapping; 217 struct address_space *mapping = file->f_mapping;
220 struct inode *inode = mapping->host; 218 struct inode *inode = mapping->host;
221 struct page *page; 219 struct page *page;
222 unsigned long size, pgoff, endoff; 220 pgoff_t size;
223 221
224 pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) 222 /* XXX: are VM_FAULT_ codes OK? */
225 + area->vm_pgoff;
226 endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT)
227 + area->vm_pgoff;
228 223
229 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 224 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
230 if (pgoff >= size) 225 if (fdata->pgoff >= size) {
231 return NOPAGE_SIGBUS; 226 fdata->type = VM_FAULT_SIGBUS;
227 return NULL;
228 }
232 229
233 page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0); 230 page = mapping->a_ops->get_xip_page(mapping,
231 fdata->pgoff*(PAGE_SIZE/512), 0);
234 if (!IS_ERR(page)) 232 if (!IS_ERR(page))
235 goto out; 233 goto out;
236 if (PTR_ERR(page) != -ENODATA) 234 if (PTR_ERR(page) != -ENODATA) {
237 return NOPAGE_SIGBUS; 235 fdata->type = VM_FAULT_OOM;
236 return NULL;
237 }
238 238
239 /* sparse block */ 239 /* sparse block */
240 if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && 240 if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) &&
241 (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) && 241 (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) &&
242 (!(mapping->host->i_sb->s_flags & MS_RDONLY))) { 242 (!(mapping->host->i_sb->s_flags & MS_RDONLY))) {
243 /* maybe shared writable, allocate new block */ 243 /* maybe shared writable, allocate new block */
244 page = mapping->a_ops->get_xip_page (mapping, 244 page = mapping->a_ops->get_xip_page(mapping,
245 pgoff*(PAGE_SIZE/512), 1); 245 fdata->pgoff*(PAGE_SIZE/512), 1);
246 if (IS_ERR(page)) 246 if (IS_ERR(page)) {
247 return NOPAGE_SIGBUS; 247 fdata->type = VM_FAULT_SIGBUS;
248 return NULL;
249 }
248 /* unmap page at pgoff from all other vmas */ 250 /* unmap page at pgoff from all other vmas */
249 __xip_unmap(mapping, pgoff); 251 __xip_unmap(mapping, fdata->pgoff);
250 } else { 252 } else {
251 /* not shared and writable, use xip_sparse_page() */ 253 /* not shared and writable, use xip_sparse_page() */
252 page = xip_sparse_page(); 254 page = xip_sparse_page();
253 if (!page) 255 if (!page) {
254 return NOPAGE_OOM; 256 fdata->type = VM_FAULT_OOM;
257 return NULL;
258 }
255 } 259 }
256 260
257out: 261out:
262 fdata->type = VM_FAULT_MINOR;
258 page_cache_get(page); 263 page_cache_get(page);
259 return page; 264 return page;
260} 265}
261 266
262static struct vm_operations_struct xip_file_vm_ops = { 267static struct vm_operations_struct xip_file_vm_ops = {
263 .nopage = xip_file_nopage, 268 .fault = xip_file_fault,
264}; 269};
265 270
266int xip_file_mmap(struct file * file, struct vm_area_struct * vma) 271int xip_file_mmap(struct file * file, struct vm_area_struct * vma)
@@ -269,6 +274,7 @@ int xip_file_mmap(struct file * file, struct vm_area_struct * vma)
269 274
270 file_accessed(file); 275 file_accessed(file);
271 vma->vm_ops = &xip_file_vm_ops; 276 vma->vm_ops = &xip_file_vm_ops;
277 vma->vm_flags |= VM_CAN_NONLINEAR;
272 return 0; 278 return 0;
273} 279}
274EXPORT_SYMBOL_GPL(xip_file_mmap); 280EXPORT_SYMBOL_GPL(xip_file_mmap);
diff --git a/mm/fremap.c b/mm/fremap.c
index 4e3f53dd5fd4..01e51f01b84e 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -126,6 +126,25 @@ out:
126 return err; 126 return err;
127} 127}
128 128
129static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma,
130 unsigned long addr, unsigned long size, pgoff_t pgoff)
131{
132 int err;
133
134 do {
135 err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot);
136 if (err)
137 return err;
138
139 size -= PAGE_SIZE;
140 addr += PAGE_SIZE;
141 pgoff++;
142 } while (size);
143
144 return 0;
145
146}
147
129/*** 148/***
130 * sys_remap_file_pages - remap arbitrary pages of a shared backing store 149 * sys_remap_file_pages - remap arbitrary pages of a shared backing store
131 * file within an existing vma. 150 * file within an existing vma.
@@ -183,41 +202,63 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
183 * the single existing vma. vm_private_data is used as a 202 * the single existing vma. vm_private_data is used as a
184 * swapout cursor in a VM_NONLINEAR vma. 203 * swapout cursor in a VM_NONLINEAR vma.
185 */ 204 */
186 if (vma && (vma->vm_flags & VM_SHARED) && 205 if (!vma || !(vma->vm_flags & VM_SHARED))
187 (!vma->vm_private_data || (vma->vm_flags & VM_NONLINEAR)) && 206 goto out;
188 vma->vm_ops && vma->vm_ops->populate && 207
189 end > start && start >= vma->vm_start && 208 if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR))
190 end <= vma->vm_end) { 209 goto out;
191 210
192 /* Must set VM_NONLINEAR before any pages are populated. */ 211 if ((!vma->vm_ops || !vma->vm_ops->populate) &&
193 if (pgoff != linear_page_index(vma, start) && 212 !(vma->vm_flags & VM_CAN_NONLINEAR))
194 !(vma->vm_flags & VM_NONLINEAR)) { 213 goto out;
195 if (!has_write_lock) { 214
196 up_read(&mm->mmap_sem); 215 if (end <= start || start < vma->vm_start || end > vma->vm_end)
197 down_write(&mm->mmap_sem); 216 goto out;
198 has_write_lock = 1; 217
199 goto retry; 218 /* Must set VM_NONLINEAR before any pages are populated. */
219 if (!(vma->vm_flags & VM_NONLINEAR)) {
220 /* Don't need a nonlinear mapping, exit success */
221 if (pgoff == linear_page_index(vma, start)) {
222 err = 0;
223 goto out;
224 }
225
226 if (!has_write_lock) {
227 up_read(&mm->mmap_sem);
228 down_write(&mm->mmap_sem);
229 has_write_lock = 1;
230 goto retry;
231 }
232 mapping = vma->vm_file->f_mapping;
233 spin_lock(&mapping->i_mmap_lock);
234 flush_dcache_mmap_lock(mapping);
235 vma->vm_flags |= VM_NONLINEAR;
236 vma_prio_tree_remove(vma, &mapping->i_mmap);
237 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
238 flush_dcache_mmap_unlock(mapping);
239 spin_unlock(&mapping->i_mmap_lock);
240 }
241
242 if (vma->vm_flags & VM_CAN_NONLINEAR) {
243 err = populate_range(mm, vma, start, size, pgoff);
244 if (!err && !(flags & MAP_NONBLOCK)) {
245 if (unlikely(has_write_lock)) {
246 downgrade_write(&mm->mmap_sem);
247 has_write_lock = 0;
200 } 248 }
201 mapping = vma->vm_file->f_mapping; 249 make_pages_present(start, start+size);
202 spin_lock(&mapping->i_mmap_lock);
203 flush_dcache_mmap_lock(mapping);
204 vma->vm_flags |= VM_NONLINEAR;
205 vma_prio_tree_remove(vma, &mapping->i_mmap);
206 vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
207 flush_dcache_mmap_unlock(mapping);
208 spin_unlock(&mapping->i_mmap_lock);
209 } 250 }
251 } else
252 err = vma->vm_ops->populate(vma, start, size, vma->vm_page_prot,
253 pgoff, flags & MAP_NONBLOCK);
210 254
211 err = vma->vm_ops->populate(vma, start, size, 255 /*
212 vma->vm_page_prot, 256 * We can't clear VM_NONLINEAR because we'd have to do
213 pgoff, flags & MAP_NONBLOCK); 257 * it after ->populate completes, and that would prevent
258 * downgrading the lock. (Locks can't be upgraded).
259 */
214 260
215 /* 261out:
216 * We can't clear VM_NONLINEAR because we'd have to do
217 * it after ->populate completes, and that would prevent
218 * downgrading the lock. (Locks can't be upgraded).
219 */
220 }
221 if (likely(!has_write_lock)) 262 if (likely(!has_write_lock))
222 up_read(&mm->mmap_sem); 263 up_read(&mm->mmap_sem);
223 else 264 else
diff --git a/mm/memory.c b/mm/memory.c
index e6c99f6b5649..eee7fec3ab54 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1047,7 +1047,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1047 if (pages) 1047 if (pages)
1048 foll_flags |= FOLL_GET; 1048 foll_flags |= FOLL_GET;
1049 if (!write && !(vma->vm_flags & VM_LOCKED) && 1049 if (!write && !(vma->vm_flags & VM_LOCKED) &&
1050 (!vma->vm_ops || !vma->vm_ops->nopage)) 1050 (!vma->vm_ops || (!vma->vm_ops->nopage &&
1051 !vma->vm_ops->fault)))
1051 foll_flags |= FOLL_ANON; 1052 foll_flags |= FOLL_ANON;
1052 1053
1053 do { 1054 do {
@@ -2288,10 +2289,10 @@ oom:
2288} 2289}
2289 2290
2290/* 2291/*
2291 * do_no_page() tries to create a new page mapping. It aggressively 2292 * __do_fault() tries to create a new page mapping. It aggressively
2292 * tries to share with existing pages, but makes a separate copy if 2293 * tries to share with existing pages, but makes a separate copy if
2293 * the "write_access" parameter is true in order to avoid the next 2294 * the FAULT_FLAG_WRITE is set in the flags parameter in order to avoid
2294 * page fault. 2295 * the next page fault.
2295 * 2296 *
2296 * As this is called only for pages that do not currently exist, we 2297 * As this is called only for pages that do not currently exist, we
2297 * do not need to flush old virtual caches or the TLB. 2298 * do not need to flush old virtual caches or the TLB.
@@ -2300,64 +2301,82 @@ oom:
2300 * but allow concurrent faults), and pte mapped but not yet locked. 2301 * but allow concurrent faults), and pte mapped but not yet locked.
2301 * We return with mmap_sem still held, but pte unmapped and unlocked. 2302 * We return with mmap_sem still held, but pte unmapped and unlocked.
2302 */ 2303 */
2303static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, 2304static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2304 unsigned long address, pte_t *page_table, pmd_t *pmd, 2305 unsigned long address, pte_t *page_table, pmd_t *pmd,
2305 int write_access) 2306 pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
2306{ 2307{
2307 spinlock_t *ptl; 2308 spinlock_t *ptl;
2308 struct page *page, *nopage_page; 2309 struct page *page, *faulted_page;
2309 pte_t entry; 2310 pte_t entry;
2310 int ret = VM_FAULT_MINOR;
2311 int anon = 0; 2311 int anon = 0;
2312 struct page *dirty_page = NULL; 2312 struct page *dirty_page = NULL;
2313 struct fault_data fdata;
2314
2315 fdata.address = address & PAGE_MASK;
2316 fdata.pgoff = pgoff;
2317 fdata.flags = flags;
2313 2318
2314 pte_unmap(page_table); 2319 pte_unmap(page_table);
2315 BUG_ON(vma->vm_flags & VM_PFNMAP); 2320 BUG_ON(vma->vm_flags & VM_PFNMAP);
2316 2321
2317 nopage_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); 2322 if (likely(vma->vm_ops->fault)) {
2318 /* no page was available -- either SIGBUS, OOM or REFAULT */ 2323 fdata.type = -1;
2319 if (unlikely(nopage_page == NOPAGE_SIGBUS)) 2324 faulted_page = vma->vm_ops->fault(vma, &fdata);
2320 return VM_FAULT_SIGBUS; 2325 WARN_ON(fdata.type == -1);
2321 else if (unlikely(nopage_page == NOPAGE_OOM)) 2326 if (unlikely(!faulted_page))
2322 return VM_FAULT_OOM; 2327 return fdata.type;
2323 else if (unlikely(nopage_page == NOPAGE_REFAULT)) 2328 } else {
2324 return VM_FAULT_MINOR; 2329 /* Legacy ->nopage path */
2330 fdata.type = VM_FAULT_MINOR;
2331 faulted_page = vma->vm_ops->nopage(vma, address & PAGE_MASK,
2332 &fdata.type);
2333 /* no page was available -- either SIGBUS or OOM */
2334 if (unlikely(faulted_page == NOPAGE_SIGBUS))
2335 return VM_FAULT_SIGBUS;
2336 else if (unlikely(faulted_page == NOPAGE_OOM))
2337 return VM_FAULT_OOM;
2338 }
2325 2339
2326 BUG_ON(vma->vm_flags & VM_CAN_INVALIDATE && !PageLocked(nopage_page));
2327 /* 2340 /*
2328 * For consistency in subsequent calls, make the nopage_page always 2341 * For consistency in subsequent calls, make the faulted_page always
2329 * locked. 2342 * locked.
2330 */ 2343 */
2331 if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE))) 2344 if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE)))
2332 lock_page(nopage_page); 2345 lock_page(faulted_page);
2346 else
2347 BUG_ON(!PageLocked(faulted_page));
2333 2348
2334 /* 2349 /*
2335 * Should we do an early C-O-W break? 2350 * Should we do an early C-O-W break?
2336 */ 2351 */
2337 page = nopage_page; 2352 page = faulted_page;
2338 if (write_access) { 2353 if (flags & FAULT_FLAG_WRITE) {
2339 if (!(vma->vm_flags & VM_SHARED)) { 2354 if (!(vma->vm_flags & VM_SHARED)) {
2355 anon = 1;
2340 if (unlikely(anon_vma_prepare(vma))) { 2356 if (unlikely(anon_vma_prepare(vma))) {
2341 ret = VM_FAULT_OOM; 2357 fdata.type = VM_FAULT_OOM;
2342 goto out_error; 2358 goto out;
2343 } 2359 }
2344 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); 2360 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
2345 if (!page) { 2361 if (!page) {
2346 ret = VM_FAULT_OOM; 2362 fdata.type = VM_FAULT_OOM;
2347 goto out_error; 2363 goto out;
2348 } 2364 }
2349 copy_user_highpage(page, nopage_page, address, vma); 2365 copy_user_highpage(page, faulted_page, address, vma);
2350 anon = 1;
2351 } else { 2366 } else {
2352 /* if the page will be shareable, see if the backing 2367 /*
2368 * If the page will be shareable, see if the backing
2353 * address space wants to know that the page is about 2369 * address space wants to know that the page is about
2354 * to become writable */ 2370 * to become writable
2371 */
2355 if (vma->vm_ops->page_mkwrite && 2372 if (vma->vm_ops->page_mkwrite &&
2356 vma->vm_ops->page_mkwrite(vma, page) < 0) { 2373 vma->vm_ops->page_mkwrite(vma, page) < 0) {
2357 ret = VM_FAULT_SIGBUS; 2374 fdata.type = VM_FAULT_SIGBUS;
2358 goto out_error; 2375 anon = 1; /* no anon but release faulted_page */
2376 goto out;
2359 } 2377 }
2360 } 2378 }
2379
2361 } 2380 }
2362 2381
2363 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 2382 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
@@ -2373,10 +2392,10 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
2373 * handle that later. 2392 * handle that later.
2374 */ 2393 */
2375 /* Only go through if we didn't race with anybody else... */ 2394 /* Only go through if we didn't race with anybody else... */
2376 if (likely(pte_none(*page_table))) { 2395 if (likely(pte_same(*page_table, orig_pte))) {
2377 flush_icache_page(vma, page); 2396 flush_icache_page(vma, page);
2378 entry = mk_pte(page, vma->vm_page_prot); 2397 entry = mk_pte(page, vma->vm_page_prot);
2379 if (write_access) 2398 if (flags & FAULT_FLAG_WRITE)
2380 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 2399 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2381 set_pte_at(mm, address, page_table, entry); 2400 set_pte_at(mm, address, page_table, entry);
2382 if (anon) { 2401 if (anon) {
@@ -2386,7 +2405,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
2386 } else { 2405 } else {
2387 inc_mm_counter(mm, file_rss); 2406 inc_mm_counter(mm, file_rss);
2388 page_add_file_rmap(page); 2407 page_add_file_rmap(page);
2389 if (write_access) { 2408 if (flags & FAULT_FLAG_WRITE) {
2390 dirty_page = page; 2409 dirty_page = page;
2391 get_page(dirty_page); 2410 get_page(dirty_page);
2392 } 2411 }
@@ -2399,25 +2418,42 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
2399 if (anon) 2418 if (anon)
2400 page_cache_release(page); 2419 page_cache_release(page);
2401 else 2420 else
2402 anon = 1; /* not anon, but release nopage_page */ 2421 anon = 1; /* no anon but release faulted_page */
2403 } 2422 }
2404 2423
2405 pte_unmap_unlock(page_table, ptl); 2424 pte_unmap_unlock(page_table, ptl);
2406 2425
2407out: 2426out:
2408 unlock_page(nopage_page); 2427 unlock_page(faulted_page);
2409 if (anon) 2428 if (anon)
2410 page_cache_release(nopage_page); 2429 page_cache_release(faulted_page);
2411 else if (dirty_page) { 2430 else if (dirty_page) {
2412 set_page_dirty_balance(dirty_page); 2431 set_page_dirty_balance(dirty_page);
2413 put_page(dirty_page); 2432 put_page(dirty_page);
2414 } 2433 }
2415 2434
2416 return ret; 2435 return fdata.type;
2436}
2417 2437
2418out_error: 2438static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2419 anon = 1; /* relase nopage_page */ 2439 unsigned long address, pte_t *page_table, pmd_t *pmd,
2420 goto out; 2440 int write_access, pte_t orig_pte)
2441{
2442 pgoff_t pgoff = (((address & PAGE_MASK)
2443 - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
2444 unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0);
2445
2446 return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte);
2447}
2448
2449static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2450 unsigned long address, pte_t *page_table, pmd_t *pmd,
2451 int write_access, pgoff_t pgoff, pte_t orig_pte)
2452{
2453 unsigned int flags = FAULT_FLAG_NONLINEAR |
2454 (write_access ? FAULT_FLAG_WRITE : 0);
2455
2456 return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte);
2421} 2457}
2422 2458
2423/* 2459/*
@@ -2496,9 +2532,14 @@ static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
2496 print_bad_pte(vma, orig_pte, address); 2532 print_bad_pte(vma, orig_pte, address);
2497 return VM_FAULT_OOM; 2533 return VM_FAULT_OOM;
2498 } 2534 }
2499 /* We can then assume vm->vm_ops && vma->vm_ops->populate */
2500 2535
2501 pgoff = pte_to_pgoff(orig_pte); 2536 pgoff = pte_to_pgoff(orig_pte);
2537
2538 if (vma->vm_ops && vma->vm_ops->fault)
2539 return do_nonlinear_fault(mm, vma, address, page_table, pmd,
2540 write_access, pgoff, orig_pte);
2541
2542 /* We can then assume vm->vm_ops && vma->vm_ops->populate */
2502 err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, 2543 err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE,
2503 vma->vm_page_prot, pgoff, 0); 2544 vma->vm_page_prot, pgoff, 0);
2504 if (err == -ENOMEM) 2545 if (err == -ENOMEM)
@@ -2532,10 +2573,9 @@ static inline int handle_pte_fault(struct mm_struct *mm,
2532 if (!pte_present(entry)) { 2573 if (!pte_present(entry)) {
2533 if (pte_none(entry)) { 2574 if (pte_none(entry)) {
2534 if (vma->vm_ops) { 2575 if (vma->vm_ops) {
2535 if (vma->vm_ops->nopage) 2576 if (vma->vm_ops->fault || vma->vm_ops->nopage)
2536 return do_no_page(mm, vma, address, 2577 return do_linear_fault(mm, vma, address,
2537 pte, pmd, 2578 pte, pmd, write_access, entry);
2538 write_access);
2539 if (unlikely(vma->vm_ops->nopfn)) 2579 if (unlikely(vma->vm_ops->nopfn))
2540 return do_no_pfn(mm, vma, address, pte, 2580 return do_no_pfn(mm, vma, address, pte,
2541 pmd, write_access); 2581 pmd, write_access);
diff --git a/mm/mmap.c b/mm/mmap.c
index 144b4a290f2c..724f342bcf89 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1165,12 +1165,8 @@ out:
1165 mm->locked_vm += len >> PAGE_SHIFT; 1165 mm->locked_vm += len >> PAGE_SHIFT;
1166 make_pages_present(addr, addr + len); 1166 make_pages_present(addr, addr + len);
1167 } 1167 }
1168 if (flags & MAP_POPULATE) { 1168 if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
1169 up_write(&mm->mmap_sem); 1169 make_pages_present(addr, addr + len);
1170 sys_remap_file_pages(addr, len, 0,
1171 pgoff, flags & MAP_NONBLOCK);
1172 down_write(&mm->mmap_sem);
1173 }
1174 return addr; 1170 return addr;
1175 1171
1176unmap_and_free_vma: 1172unmap_and_free_vma:
diff --git a/mm/nommu.c b/mm/nommu.c
index 8bbbf147a794..aee0e1b0ebe7 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1341,8 +1341,7 @@ int in_gate_area_no_task(unsigned long addr)
1341 return 0; 1341 return 0;
1342} 1342}
1343 1343
1344struct page *filemap_nopage(struct vm_area_struct *area, 1344struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata)
1345 unsigned long address, int *type)
1346{ 1345{
1347 BUG(); 1346 BUG();
1348 return NULL; 1347 return NULL;
diff --git a/mm/rmap.c b/mm/rmap.c
index 61e492597a0b..fede5c7910be 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -621,8 +621,10 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
621 printk (KERN_EMERG " page->count = %x\n", page_count(page)); 621 printk (KERN_EMERG " page->count = %x\n", page_count(page));
622 printk (KERN_EMERG " page->mapping = %p\n", page->mapping); 622 printk (KERN_EMERG " page->mapping = %p\n", page->mapping);
623 print_symbol (KERN_EMERG " vma->vm_ops = %s\n", (unsigned long)vma->vm_ops); 623 print_symbol (KERN_EMERG " vma->vm_ops = %s\n", (unsigned long)vma->vm_ops);
624 if (vma->vm_ops) 624 if (vma->vm_ops) {
625 print_symbol (KERN_EMERG " vma->vm_ops->nopage = %s\n", (unsigned long)vma->vm_ops->nopage); 625 print_symbol (KERN_EMERG " vma->vm_ops->nopage = %s\n", (unsigned long)vma->vm_ops->nopage);
626 print_symbol (KERN_EMERG " vma->vm_ops->fault = %s\n", (unsigned long)vma->vm_ops->fault);
627 }
626 if (vma->vm_file && vma->vm_file->f_op) 628 if (vma->vm_file && vma->vm_file->f_op)
627 print_symbol (KERN_EMERG " vma->vm_file->f_op->mmap = %s\n", (unsigned long)vma->vm_file->f_op->mmap); 629 print_symbol (KERN_EMERG " vma->vm_file->f_op->mmap = %s\n", (unsigned long)vma->vm_file->f_op->mmap);
628 BUG(); 630 BUG();
diff --git a/mm/shmem.c b/mm/shmem.c
index 5808fadd3944..6b44440f1b24 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -83,7 +83,7 @@ enum sgp_type {
83 SGP_READ, /* don't exceed i_size, don't allocate page */ 83 SGP_READ, /* don't exceed i_size, don't allocate page */
84 SGP_CACHE, /* don't exceed i_size, may allocate page */ 84 SGP_CACHE, /* don't exceed i_size, may allocate page */
85 SGP_WRITE, /* may exceed i_size, may allocate page */ 85 SGP_WRITE, /* may exceed i_size, may allocate page */
86 SGP_NOPAGE, /* same as SGP_CACHE, return with page locked */ 86 SGP_FAULT, /* same as SGP_CACHE, return with page locked */
87}; 87};
88 88
89static int shmem_getpage(struct inode *inode, unsigned long idx, 89static int shmem_getpage(struct inode *inode, unsigned long idx,
@@ -1101,6 +1101,10 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
1101 1101
1102 if (idx >= SHMEM_MAX_INDEX) 1102 if (idx >= SHMEM_MAX_INDEX)
1103 return -EFBIG; 1103 return -EFBIG;
1104
1105 if (type)
1106 *type = VM_FAULT_MINOR;
1107
1104 /* 1108 /*
1105 * Normally, filepage is NULL on entry, and either found 1109 * Normally, filepage is NULL on entry, and either found
1106 * uptodate immediately, or allocated and zeroed, or read 1110 * uptodate immediately, or allocated and zeroed, or read
@@ -1291,7 +1295,7 @@ repeat:
1291done: 1295done:
1292 if (*pagep != filepage) { 1296 if (*pagep != filepage) {
1293 *pagep = filepage; 1297 *pagep = filepage;
1294 if (sgp != SGP_NOPAGE) 1298 if (sgp != SGP_FAULT)
1295 unlock_page(filepage); 1299 unlock_page(filepage);
1296 1300
1297 } 1301 }
@@ -1305,76 +1309,31 @@ failed:
1305 return error; 1309 return error;
1306} 1310}
1307 1311
1308static struct page *shmem_nopage(struct vm_area_struct *vma, 1312static struct page *shmem_fault(struct vm_area_struct *vma,
1309 unsigned long address, int *type) 1313 struct fault_data *fdata)
1310{ 1314{
1311 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 1315 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1312 struct page *page = NULL; 1316 struct page *page = NULL;
1313 unsigned long idx;
1314 int error; 1317 int error;
1315 1318
1316 BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); 1319 BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
1317 1320
1318 idx = (address - vma->vm_start) >> PAGE_SHIFT; 1321 if (((loff_t)fdata->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
1319 idx += vma->vm_pgoff; 1322 fdata->type = VM_FAULT_SIGBUS;
1320 idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; 1323 return NULL;
1321 if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode)) 1324 }
1322 return NOPAGE_SIGBUS;
1323 1325
1324 error = shmem_getpage(inode, idx, &page, SGP_NOPAGE, type); 1326 error = shmem_getpage(inode, fdata->pgoff, &page,
1325 if (error) 1327 SGP_FAULT, &fdata->type);
1326 return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS; 1328 if (error) {
1329 fdata->type = ((error == -ENOMEM)?VM_FAULT_OOM:VM_FAULT_SIGBUS);
1330 return NULL;
1331 }
1327 1332
1328 mark_page_accessed(page); 1333 mark_page_accessed(page);
1329 return page; 1334 return page;
1330} 1335}
1331 1336
1332static int shmem_populate(struct vm_area_struct *vma,
1333 unsigned long addr, unsigned long len,
1334 pgprot_t prot, unsigned long pgoff, int nonblock)
1335{
1336 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1337 struct mm_struct *mm = vma->vm_mm;
1338 enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE;
1339 unsigned long size;
1340
1341 size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
1342 if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size)
1343 return -EINVAL;
1344
1345 while ((long) len > 0) {
1346 struct page *page = NULL;
1347 int err;
1348 /*
1349 * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE
1350 */
1351 err = shmem_getpage(inode, pgoff, &page, sgp, NULL);
1352 if (err)
1353 return err;
1354 /* Page may still be null, but only if nonblock was set. */
1355 if (page) {
1356 mark_page_accessed(page);
1357 err = install_page(mm, vma, addr, page, prot);
1358 if (err) {
1359 page_cache_release(page);
1360 return err;
1361 }
1362 } else if (vma->vm_flags & VM_NONLINEAR) {
1363 /* No page was found just because we can't read it in
1364 * now (being here implies nonblock != 0), but the page
1365 * may exist, so set the PTE to fault it in later. */
1366 err = install_file_pte(mm, vma, addr, pgoff, prot);
1367 if (err)
1368 return err;
1369 }
1370
1371 len -= PAGE_SIZE;
1372 addr += PAGE_SIZE;
1373 pgoff++;
1374 }
1375 return 0;
1376}
1377
1378#ifdef CONFIG_NUMA 1337#ifdef CONFIG_NUMA
1379int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new) 1338int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
1380{ 1339{
@@ -1419,7 +1378,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
1419{ 1378{
1420 file_accessed(file); 1379 file_accessed(file);
1421 vma->vm_ops = &shmem_vm_ops; 1380 vma->vm_ops = &shmem_vm_ops;
1422 vma->vm_flags |= VM_CAN_INVALIDATE; 1381 vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR;
1423 return 0; 1382 return 0;
1424} 1383}
1425 1384
@@ -2465,8 +2424,7 @@ static const struct super_operations shmem_ops = {
2465}; 2424};
2466 2425
2467static struct vm_operations_struct shmem_vm_ops = { 2426static struct vm_operations_struct shmem_vm_ops = {
2468 .nopage = shmem_nopage, 2427 .fault = shmem_fault,
2469 .populate = shmem_populate,
2470#ifdef CONFIG_NUMA 2428#ifdef CONFIG_NUMA
2471 .set_policy = shmem_set_policy, 2429 .set_policy = shmem_set_policy,
2472 .get_policy = shmem_get_policy, 2430 .get_policy = shmem_get_policy,
diff --git a/mm/truncate.c b/mm/truncate.c
index aed85f0b707f..5cdfbc1a59fd 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -82,7 +82,7 @@ EXPORT_SYMBOL(cancel_dirty_page);
82/* 82/*
83 * If truncate cannot remove the fs-private metadata from the page, the page 83 * If truncate cannot remove the fs-private metadata from the page, the page
84 * becomes anonymous. It will be left on the LRU and may even be mapped into 84 * becomes anonymous. It will be left on the LRU and may even be mapped into
85 * user pagetables if we're racing with filemap_nopage(). 85 * user pagetables if we're racing with filemap_fault().
86 * 86 *
87 * We need to bale out if page->mapping is no longer equal to the original 87 * We need to bale out if page->mapping is no longer equal to the original
88 * mapping. This happens a) when the VM reclaimed the page while we waited on 88 * mapping. This happens a) when the VM reclaimed the page while we waited on