diff options
author | Nick Piggin <npiggin@suse.de> | 2007-07-19 04:46:59 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-19 13:04:41 -0400 |
commit | 54cb8821de07f2ffcd28c380ce9b93d5784b40d7 (patch) | |
tree | 1de676534963d96af42863b20191bc9f80060dea | |
parent | d00806b183152af6d24f46f0c33f14162ca1262a (diff) |
mm: merge populate and nopage into fault (fixes nonlinear)
Nonlinear mappings are (AFAIKS) simply a virtual memory concept that encodes
the virtual address -> file offset differently from linear mappings.
->populate is a layering violation because the filesystem/pagecache code
should need to know anything about the virtual memory mapping. The hitch here
is that the ->nopage handler didn't pass down enough information (ie. pgoff).
But it is more logical to pass pgoff rather than have the ->nopage function
calculate it itself anyway (because that's a similar layering violation).
Having the populate handler install the pte itself is likewise a nasty thing
to be doing.
This patch introduces a new fault handler that replaces ->nopage and
->populate and (later) ->nopfn. Most of the old mechanism is still in place
so there is a lot of duplication and nice cleanups that can be removed if
everyone switches over.
The rationale for doing this in the first place is that nonlinear mappings are
subject to the pagefault vs invalidate/truncate race too, and it seemed stupid
to duplicate the synchronisation logic rather than just consolidate the two.
After this patch, MAP_NONBLOCK no longer sets up ptes for pages present in
pagecache. Seems like a fringe functionality anyway.
NOPAGE_REFAULT is removed. This should be implemented with ->fault, and no
users have hit mainline yet.
[akpm@linux-foundation.org: cleanup]
[randy.dunlap@oracle.com: doc. fixes for readahead]
[akpm@linux-foundation.org: build fix]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/feature-removal-schedule.txt | 27 | ||||
-rw-r--r-- | Documentation/filesystems/Locking | 2 | ||||
-rw-r--r-- | fs/gfs2/ops_address.c | 2 | ||||
-rw-r--r-- | fs/gfs2/ops_file.c | 2 | ||||
-rw-r--r-- | fs/gfs2/ops_vm.c | 36 | ||||
-rw-r--r-- | fs/ncpfs/mmap.c | 23 | ||||
-rw-r--r-- | fs/ocfs2/aops.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/mmap.c | 17 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_file.c | 23 | ||||
-rw-r--r-- | include/linux/mm.h | 41 | ||||
-rw-r--r-- | ipc/shm.c | 9 | ||||
-rw-r--r-- | mm/filemap.c | 94 | ||||
-rw-r--r-- | mm/filemap_xip.c | 54 | ||||
-rw-r--r-- | mm/fremap.c | 103 | ||||
-rw-r--r-- | mm/memory.c | 132 | ||||
-rw-r--r-- | mm/mmap.c | 8 | ||||
-rw-r--r-- | mm/nommu.c | 3 | ||||
-rw-r--r-- | mm/rmap.c | 4 | ||||
-rw-r--r-- | mm/shmem.c | 82 | ||||
-rw-r--r-- | mm/truncate.c | 2 |
20 files changed, 394 insertions, 272 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 66c8b4b165c1..716568afdff8 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -135,6 +135,33 @@ Who: Greg Kroah-Hartman <gregkh@suse.de> | |||
135 | 135 | ||
136 | --------------------------- | 136 | --------------------------- |
137 | 137 | ||
138 | What: filemap_nopage, filemap_populate | ||
139 | When: April 2007 | ||
140 | Why: These legacy interfaces no longer have any callers in the kernel and | ||
141 | any functionality provided can be provided with filemap_fault. The | ||
142 | removal schedule is short because they are a big maintainence burden | ||
143 | and have some bugs. | ||
144 | Who: Nick Piggin <npiggin@suse.de> | ||
145 | |||
146 | --------------------------- | ||
147 | |||
148 | What: vm_ops.populate, install_page | ||
149 | When: April 2007 | ||
150 | Why: These legacy interfaces no longer have any callers in the kernel and | ||
151 | any functionality provided can be provided with vm_ops.fault. | ||
152 | Who: Nick Piggin <npiggin@suse.de> | ||
153 | |||
154 | --------------------------- | ||
155 | |||
156 | What: vm_ops.nopage | ||
157 | When: February 2008, provided in-kernel callers have been converted | ||
158 | Why: This interface is replaced by vm_ops.fault, but it has been around | ||
159 | forever, is used by a lot of drivers, and doesn't cost much to | ||
160 | maintain. | ||
161 | Who: Nick Piggin <npiggin@suse.de> | ||
162 | |||
163 | --------------------------- | ||
164 | |||
138 | What: Interrupt only SA_* flags | 165 | What: Interrupt only SA_* flags |
139 | When: September 2007 | 166 | When: September 2007 |
140 | Why: The interrupt related SA_* flags are replaced by IRQF_* to move them | 167 | Why: The interrupt related SA_* flags are replaced by IRQF_* to move them |
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index d866551be037..970c8ec1a05b 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -510,12 +510,14 @@ More details about quota locking can be found in fs/dquot.c. | |||
510 | prototypes: | 510 | prototypes: |
511 | void (*open)(struct vm_area_struct*); | 511 | void (*open)(struct vm_area_struct*); |
512 | void (*close)(struct vm_area_struct*); | 512 | void (*close)(struct vm_area_struct*); |
513 | struct page *(*fault)(struct vm_area_struct*, struct fault_data *); | ||
513 | struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *); | 514 | struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *); |
514 | 515 | ||
515 | locking rules: | 516 | locking rules: |
516 | BKL mmap_sem | 517 | BKL mmap_sem |
517 | open: no yes | 518 | open: no yes |
518 | close: no yes | 519 | close: no yes |
520 | fault: no yes | ||
519 | nopage: no yes | 521 | nopage: no yes |
520 | 522 | ||
521 | ================================================================================ | 523 | ================================================================================ |
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 26c888890c24..ce90032c010e 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
@@ -251,7 +251,7 @@ static int gfs2_readpage(struct file *file, struct page *page) | |||
251 | if (file) { | 251 | if (file) { |
252 | gf = file->private_data; | 252 | gf = file->private_data; |
253 | if (test_bit(GFF_EXLOCK, &gf->f_flags)) | 253 | if (test_bit(GFF_EXLOCK, &gf->f_flags)) |
254 | /* gfs2_sharewrite_nopage has grabbed the ip->i_gl already */ | 254 | /* gfs2_sharewrite_fault has grabbed the ip->i_gl already */ |
255 | goto skip_lock; | 255 | goto skip_lock; |
256 | } | 256 | } |
257 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); | 257 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); |
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index bad0b24cb773..581ac11b2656 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c | |||
@@ -364,7 +364,7 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
364 | else | 364 | else |
365 | vma->vm_ops = &gfs2_vm_ops_private; | 365 | vma->vm_ops = &gfs2_vm_ops_private; |
366 | 366 | ||
367 | vma->vm_flags |= VM_CAN_INVALIDATE; | 367 | vma->vm_flags |= VM_CAN_INVALIDATE|VM_CAN_NONLINEAR; |
368 | 368 | ||
369 | gfs2_glock_dq_uninit(&i_gh); | 369 | gfs2_glock_dq_uninit(&i_gh); |
370 | 370 | ||
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c index d5a98cbfebdc..e9fe6eb74e75 100644 --- a/fs/gfs2/ops_vm.c +++ b/fs/gfs2/ops_vm.c | |||
@@ -27,13 +27,13 @@ | |||
27 | #include "trans.h" | 27 | #include "trans.h" |
28 | #include "util.h" | 28 | #include "util.h" |
29 | 29 | ||
30 | static struct page *gfs2_private_nopage(struct vm_area_struct *area, | 30 | static struct page *gfs2_private_fault(struct vm_area_struct *vma, |
31 | unsigned long address, int *type) | 31 | struct fault_data *fdata) |
32 | { | 32 | { |
33 | struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host); | 33 | struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host); |
34 | 34 | ||
35 | set_bit(GIF_PAGED, &ip->i_flags); | 35 | set_bit(GIF_PAGED, &ip->i_flags); |
36 | return filemap_nopage(area, address, type); | 36 | return filemap_fault(vma, fdata); |
37 | } | 37 | } |
38 | 38 | ||
39 | static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) | 39 | static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) |
@@ -104,16 +104,14 @@ out: | |||
104 | return error; | 104 | return error; |
105 | } | 105 | } |
106 | 106 | ||
107 | static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area, | 107 | static struct page *gfs2_sharewrite_fault(struct vm_area_struct *vma, |
108 | unsigned long address, int *type) | 108 | struct fault_data *fdata) |
109 | { | 109 | { |
110 | struct file *file = area->vm_file; | 110 | struct file *file = vma->vm_file; |
111 | struct gfs2_file *gf = file->private_data; | 111 | struct gfs2_file *gf = file->private_data; |
112 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); | 112 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); |
113 | struct gfs2_holder i_gh; | 113 | struct gfs2_holder i_gh; |
114 | struct page *result = NULL; | 114 | struct page *result = NULL; |
115 | unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + | ||
116 | area->vm_pgoff; | ||
117 | int alloc_required; | 115 | int alloc_required; |
118 | int error; | 116 | int error; |
119 | 117 | ||
@@ -124,23 +122,27 @@ static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area, | |||
124 | set_bit(GIF_PAGED, &ip->i_flags); | 122 | set_bit(GIF_PAGED, &ip->i_flags); |
125 | set_bit(GIF_SW_PAGED, &ip->i_flags); | 123 | set_bit(GIF_SW_PAGED, &ip->i_flags); |
126 | 124 | ||
127 | error = gfs2_write_alloc_required(ip, (u64)index << PAGE_CACHE_SHIFT, | 125 | error = gfs2_write_alloc_required(ip, |
128 | PAGE_CACHE_SIZE, &alloc_required); | 126 | (u64)fdata->pgoff << PAGE_CACHE_SHIFT, |
129 | if (error) | 127 | PAGE_CACHE_SIZE, &alloc_required); |
128 | if (error) { | ||
129 | fdata->type = VM_FAULT_OOM; /* XXX: are these right? */ | ||
130 | goto out; | 130 | goto out; |
131 | } | ||
131 | 132 | ||
132 | set_bit(GFF_EXLOCK, &gf->f_flags); | 133 | set_bit(GFF_EXLOCK, &gf->f_flags); |
133 | result = filemap_nopage(area, address, type); | 134 | result = filemap_fault(vma, fdata); |
134 | clear_bit(GFF_EXLOCK, &gf->f_flags); | 135 | clear_bit(GFF_EXLOCK, &gf->f_flags); |
135 | if (!result || result == NOPAGE_OOM) | 136 | if (!result) |
136 | goto out; | 137 | goto out; |
137 | 138 | ||
138 | if (alloc_required) { | 139 | if (alloc_required) { |
139 | error = alloc_page_backing(ip, result); | 140 | error = alloc_page_backing(ip, result); |
140 | if (error) { | 141 | if (error) { |
141 | if (area->vm_flags & VM_CAN_INVALIDATE) | 142 | if (vma->vm_flags & VM_CAN_INVALIDATE) |
142 | unlock_page(result); | 143 | unlock_page(result); |
143 | page_cache_release(result); | 144 | page_cache_release(result); |
145 | fdata->type = VM_FAULT_OOM; | ||
144 | result = NULL; | 146 | result = NULL; |
145 | goto out; | 147 | goto out; |
146 | } | 148 | } |
@@ -154,10 +156,10 @@ out: | |||
154 | } | 156 | } |
155 | 157 | ||
156 | struct vm_operations_struct gfs2_vm_ops_private = { | 158 | struct vm_operations_struct gfs2_vm_ops_private = { |
157 | .nopage = gfs2_private_nopage, | 159 | .fault = gfs2_private_fault, |
158 | }; | 160 | }; |
159 | 161 | ||
160 | struct vm_operations_struct gfs2_vm_ops_sharewrite = { | 162 | struct vm_operations_struct gfs2_vm_ops_sharewrite = { |
161 | .nopage = gfs2_sharewrite_nopage, | 163 | .fault = gfs2_sharewrite_fault, |
162 | }; | 164 | }; |
163 | 165 | ||
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index 5416673418b8..af48b792ca04 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c | |||
@@ -25,8 +25,8 @@ | |||
25 | /* | 25 | /* |
26 | * Fill in the supplied page for mmap | 26 | * Fill in the supplied page for mmap |
27 | */ | 27 | */ |
28 | static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area, | 28 | static struct page* ncp_file_mmap_fault(struct vm_area_struct *area, |
29 | unsigned long address, int *type) | 29 | struct fault_data *fdata) |
30 | { | 30 | { |
31 | struct file *file = area->vm_file; | 31 | struct file *file = area->vm_file; |
32 | struct dentry *dentry = file->f_path.dentry; | 32 | struct dentry *dentry = file->f_path.dentry; |
@@ -40,15 +40,17 @@ static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area, | |||
40 | 40 | ||
41 | page = alloc_page(GFP_HIGHUSER); /* ncpfs has nothing against high pages | 41 | page = alloc_page(GFP_HIGHUSER); /* ncpfs has nothing against high pages |
42 | as long as recvmsg and memset works on it */ | 42 | as long as recvmsg and memset works on it */ |
43 | if (!page) | 43 | if (!page) { |
44 | return page; | 44 | fdata->type = VM_FAULT_OOM; |
45 | return NULL; | ||
46 | } | ||
45 | pg_addr = kmap(page); | 47 | pg_addr = kmap(page); |
46 | address &= PAGE_MASK; | 48 | pos = fdata->pgoff << PAGE_SHIFT; |
47 | pos = address - area->vm_start + (area->vm_pgoff << PAGE_SHIFT); | ||
48 | 49 | ||
49 | count = PAGE_SIZE; | 50 | count = PAGE_SIZE; |
50 | if (address + PAGE_SIZE > area->vm_end) { | 51 | if (fdata->address + PAGE_SIZE > area->vm_end) { |
51 | count = area->vm_end - address; | 52 | WARN_ON(1); /* shouldn't happen? */ |
53 | count = area->vm_end - fdata->address; | ||
52 | } | 54 | } |
53 | /* what we can read in one go */ | 55 | /* what we can read in one go */ |
54 | bufsize = NCP_SERVER(inode)->buffer_size; | 56 | bufsize = NCP_SERVER(inode)->buffer_size; |
@@ -91,15 +93,14 @@ static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area, | |||
91 | * fetches from the network, here the analogue of disk. | 93 | * fetches from the network, here the analogue of disk. |
92 | * -- wli | 94 | * -- wli |
93 | */ | 95 | */ |
94 | if (type) | 96 | fdata->type = VM_FAULT_MAJOR; |
95 | *type = VM_FAULT_MAJOR; | ||
96 | count_vm_event(PGMAJFAULT); | 97 | count_vm_event(PGMAJFAULT); |
97 | return page; | 98 | return page; |
98 | } | 99 | } |
99 | 100 | ||
100 | static struct vm_operations_struct ncp_file_mmap = | 101 | static struct vm_operations_struct ncp_file_mmap = |
101 | { | 102 | { |
102 | .nopage = ncp_file_mmap_nopage, | 103 | .fault = ncp_file_mmap_fault, |
103 | }; | 104 | }; |
104 | 105 | ||
105 | 106 | ||
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 84bf6e79de23..460d440310f2 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -232,7 +232,7 @@ static int ocfs2_readpage(struct file *file, struct page *page) | |||
232 | * might now be discovering a truncate that hit on another node. | 232 | * might now be discovering a truncate that hit on another node. |
233 | * block_read_full_page->get_block freaks out if it is asked to read | 233 | * block_read_full_page->get_block freaks out if it is asked to read |
234 | * beyond the end of a file, so we check here. Callers | 234 | * beyond the end of a file, so we check here. Callers |
235 | * (generic_file_read, fault->nopage) are clever enough to check i_size | 235 | * (generic_file_read, vm_ops->fault) are clever enough to check i_size |
236 | * and notice that the page they just read isn't needed. | 236 | * and notice that the page they just read isn't needed. |
237 | * | 237 | * |
238 | * XXX sys_readahead() seems to get that wrong? | 238 | * XXX sys_readahead() seems to get that wrong? |
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 904f39ff5340..cd75508b1c8a 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
@@ -60,24 +60,23 @@ static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset) | |||
60 | return sigprocmask(SIG_SETMASK, oldset, NULL); | 60 | return sigprocmask(SIG_SETMASK, oldset, NULL); |
61 | } | 61 | } |
62 | 62 | ||
63 | static struct page *ocfs2_nopage(struct vm_area_struct * area, | 63 | static struct page *ocfs2_fault(struct vm_area_struct *area, |
64 | unsigned long address, | 64 | struct fault_data *fdata) |
65 | int *type) | ||
66 | { | 65 | { |
67 | struct page *page = NOPAGE_SIGBUS; | 66 | struct page *page = NULL; |
68 | sigset_t blocked, oldset; | 67 | sigset_t blocked, oldset; |
69 | int ret; | 68 | int ret; |
70 | 69 | ||
71 | mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address, | 70 | mlog_entry("(area=%p, page offset=%lu)\n", area, fdata->pgoff); |
72 | type); | ||
73 | 71 | ||
74 | ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); | 72 | ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); |
75 | if (ret < 0) { | 73 | if (ret < 0) { |
74 | fdata->type = VM_FAULT_SIGBUS; | ||
76 | mlog_errno(ret); | 75 | mlog_errno(ret); |
77 | goto out; | 76 | goto out; |
78 | } | 77 | } |
79 | 78 | ||
80 | page = filemap_nopage(area, address, type); | 79 | page = filemap_fault(area, fdata); |
81 | 80 | ||
82 | ret = ocfs2_vm_op_unblock_sigs(&oldset); | 81 | ret = ocfs2_vm_op_unblock_sigs(&oldset); |
83 | if (ret < 0) | 82 | if (ret < 0) |
@@ -209,7 +208,7 @@ out: | |||
209 | } | 208 | } |
210 | 209 | ||
211 | static struct vm_operations_struct ocfs2_file_vm_ops = { | 210 | static struct vm_operations_struct ocfs2_file_vm_ops = { |
212 | .nopage = ocfs2_nopage, | 211 | .fault = ocfs2_fault, |
213 | .page_mkwrite = ocfs2_page_mkwrite, | 212 | .page_mkwrite = ocfs2_page_mkwrite, |
214 | }; | 213 | }; |
215 | 214 | ||
@@ -226,7 +225,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
226 | ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level); | 225 | ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level); |
227 | out: | 226 | out: |
228 | vma->vm_ops = &ocfs2_file_vm_ops; | 227 | vma->vm_ops = &ocfs2_file_vm_ops; |
229 | vma->vm_flags |= VM_CAN_INVALIDATE; | 228 | vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; |
230 | return 0; | 229 | return 0; |
231 | } | 230 | } |
232 | 231 | ||
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 92b2f225712f..f12e80a69c68 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -213,18 +213,19 @@ xfs_file_fsync( | |||
213 | 213 | ||
214 | #ifdef CONFIG_XFS_DMAPI | 214 | #ifdef CONFIG_XFS_DMAPI |
215 | STATIC struct page * | 215 | STATIC struct page * |
216 | xfs_vm_nopage( | 216 | xfs_vm_fault( |
217 | struct vm_area_struct *area, | 217 | struct vm_area_struct *vma, |
218 | unsigned long address, | 218 | struct fault_data *fdata) |
219 | int *type) | ||
220 | { | 219 | { |
221 | struct inode *inode = area->vm_file->f_path.dentry->d_inode; | 220 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
222 | bhv_vnode_t *vp = vn_from_inode(inode); | 221 | bhv_vnode_t *vp = vn_from_inode(inode); |
223 | 222 | ||
224 | ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI); | 223 | ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI); |
225 | if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), area, 0)) | 224 | if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0)) { |
225 | fdata->type = VM_FAULT_SIGBUS; | ||
226 | return NULL; | 226 | return NULL; |
227 | return filemap_nopage(area, address, type); | 227 | } |
228 | return filemap_fault(vma, fdata); | ||
228 | } | 229 | } |
229 | #endif /* CONFIG_XFS_DMAPI */ | 230 | #endif /* CONFIG_XFS_DMAPI */ |
230 | 231 | ||
@@ -310,7 +311,7 @@ xfs_file_mmap( | |||
310 | struct vm_area_struct *vma) | 311 | struct vm_area_struct *vma) |
311 | { | 312 | { |
312 | vma->vm_ops = &xfs_file_vm_ops; | 313 | vma->vm_ops = &xfs_file_vm_ops; |
313 | vma->vm_flags |= VM_CAN_INVALIDATE; | 314 | vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; |
314 | 315 | ||
315 | #ifdef CONFIG_XFS_DMAPI | 316 | #ifdef CONFIG_XFS_DMAPI |
316 | if (vn_from_inode(filp->f_path.dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI) | 317 | if (vn_from_inode(filp->f_path.dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI) |
@@ -465,14 +466,12 @@ const struct file_operations xfs_dir_file_operations = { | |||
465 | }; | 466 | }; |
466 | 467 | ||
467 | static struct vm_operations_struct xfs_file_vm_ops = { | 468 | static struct vm_operations_struct xfs_file_vm_ops = { |
468 | .nopage = filemap_nopage, | 469 | .fault = filemap_fault, |
469 | .populate = filemap_populate, | ||
470 | }; | 470 | }; |
471 | 471 | ||
472 | #ifdef CONFIG_XFS_DMAPI | 472 | #ifdef CONFIG_XFS_DMAPI |
473 | static struct vm_operations_struct xfs_dmapi_file_vm_ops = { | 473 | static struct vm_operations_struct xfs_dmapi_file_vm_ops = { |
474 | .nopage = xfs_vm_nopage, | 474 | .fault = xfs_vm_fault, |
475 | .populate = filemap_populate, | ||
476 | #ifdef HAVE_VMOP_MPROTECT | 475 | #ifdef HAVE_VMOP_MPROTECT |
477 | .mprotect = xfs_vm_mprotect, | 476 | .mprotect = xfs_vm_mprotect, |
478 | #endif | 477 | #endif |
diff --git a/include/linux/mm.h b/include/linux/mm.h index ca9536a348c8..f28a1b3e63a9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -173,6 +173,7 @@ extern unsigned int kobjsize(const void *objp); | |||
173 | * In this case, do_no_page must | 173 | * In this case, do_no_page must |
174 | * return with the page locked. | 174 | * return with the page locked. |
175 | */ | 175 | */ |
176 | #define VM_CAN_NONLINEAR 0x10000000 /* Has ->fault & does nonlinear pages */ | ||
176 | 177 | ||
177 | #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ | 178 | #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ |
178 | #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS | 179 | #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS |
@@ -196,6 +197,25 @@ extern unsigned int kobjsize(const void *objp); | |||
196 | */ | 197 | */ |
197 | extern pgprot_t protection_map[16]; | 198 | extern pgprot_t protection_map[16]; |
198 | 199 | ||
200 | #define FAULT_FLAG_WRITE 0x01 | ||
201 | #define FAULT_FLAG_NONLINEAR 0x02 | ||
202 | |||
203 | /* | ||
204 | * fault_data is filled in the the pagefault handler and passed to the | ||
205 | * vma's ->fault function. That function is responsible for filling in | ||
206 | * 'type', which is the type of fault if a page is returned, or the type | ||
207 | * of error if NULL is returned. | ||
208 | * | ||
209 | * pgoff should be used in favour of address, if possible. If pgoff is | ||
210 | * used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get | ||
211 | * nonlinear mapping support. | ||
212 | */ | ||
213 | struct fault_data { | ||
214 | unsigned long address; | ||
215 | pgoff_t pgoff; | ||
216 | unsigned int flags; | ||
217 | int type; | ||
218 | }; | ||
199 | 219 | ||
200 | /* | 220 | /* |
201 | * These are the virtual MM functions - opening of an area, closing and | 221 | * These are the virtual MM functions - opening of an area, closing and |
@@ -205,9 +225,15 @@ extern pgprot_t protection_map[16]; | |||
205 | struct vm_operations_struct { | 225 | struct vm_operations_struct { |
206 | void (*open)(struct vm_area_struct * area); | 226 | void (*open)(struct vm_area_struct * area); |
207 | void (*close)(struct vm_area_struct * area); | 227 | void (*close)(struct vm_area_struct * area); |
208 | struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type); | 228 | struct page *(*fault)(struct vm_area_struct *vma, |
209 | unsigned long (*nopfn)(struct vm_area_struct * area, unsigned long address); | 229 | struct fault_data *fdata); |
210 | int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock); | 230 | struct page *(*nopage)(struct vm_area_struct *area, |
231 | unsigned long address, int *type); | ||
232 | unsigned long (*nopfn)(struct vm_area_struct *area, | ||
233 | unsigned long address); | ||
234 | int (*populate)(struct vm_area_struct *area, unsigned long address, | ||
235 | unsigned long len, pgprot_t prot, unsigned long pgoff, | ||
236 | int nonblock); | ||
211 | 237 | ||
212 | /* notification that a previously read-only page is about to become | 238 | /* notification that a previously read-only page is about to become |
213 | * writable, if an error is returned it will cause a SIGBUS */ | 239 | * writable, if an error is returned it will cause a SIGBUS */ |
@@ -661,7 +687,6 @@ static inline int page_mapped(struct page *page) | |||
661 | */ | 687 | */ |
662 | #define NOPAGE_SIGBUS (NULL) | 688 | #define NOPAGE_SIGBUS (NULL) |
663 | #define NOPAGE_OOM ((struct page *) (-1)) | 689 | #define NOPAGE_OOM ((struct page *) (-1)) |
664 | #define NOPAGE_REFAULT ((struct page *) (-2)) /* Return to userspace, rerun */ | ||
665 | 690 | ||
666 | /* | 691 | /* |
667 | * Error return values for the *_nopfn functions | 692 | * Error return values for the *_nopfn functions |
@@ -1110,9 +1135,11 @@ extern void truncate_inode_pages_range(struct address_space *, | |||
1110 | loff_t lstart, loff_t lend); | 1135 | loff_t lstart, loff_t lend); |
1111 | 1136 | ||
1112 | /* generic vm_area_ops exported for stackable file systems */ | 1137 | /* generic vm_area_ops exported for stackable file systems */ |
1113 | extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int *); | 1138 | extern struct page *filemap_fault(struct vm_area_struct *, struct fault_data *); |
1114 | extern int filemap_populate(struct vm_area_struct *, unsigned long, | 1139 | extern struct page * __deprecated_for_modules |
1115 | unsigned long, pgprot_t, unsigned long, int); | 1140 | filemap_nopage(struct vm_area_struct *, unsigned long, int *); |
1141 | extern int __deprecated_for_modules filemap_populate(struct vm_area_struct *, | ||
1142 | unsigned long, unsigned long, pgprot_t, unsigned long, int); | ||
1116 | 1143 | ||
1117 | /* mm/page-writeback.c */ | 1144 | /* mm/page-writeback.c */ |
1118 | int write_one_page(struct page *page, int wait); | 1145 | int write_one_page(struct page *page, int wait); |
@@ -224,13 +224,13 @@ static void shm_close(struct vm_area_struct *vma) | |||
224 | mutex_unlock(&shm_ids(ns).mutex); | 224 | mutex_unlock(&shm_ids(ns).mutex); |
225 | } | 225 | } |
226 | 226 | ||
227 | static struct page *shm_nopage(struct vm_area_struct *vma, | 227 | static struct page *shm_fault(struct vm_area_struct *vma, |
228 | unsigned long address, int *type) | 228 | struct fault_data *fdata) |
229 | { | 229 | { |
230 | struct file *file = vma->vm_file; | 230 | struct file *file = vma->vm_file; |
231 | struct shm_file_data *sfd = shm_file_data(file); | 231 | struct shm_file_data *sfd = shm_file_data(file); |
232 | 232 | ||
233 | return sfd->vm_ops->nopage(vma, address, type); | 233 | return sfd->vm_ops->fault(vma, fdata); |
234 | } | 234 | } |
235 | 235 | ||
236 | #ifdef CONFIG_NUMA | 236 | #ifdef CONFIG_NUMA |
@@ -269,6 +269,7 @@ static int shm_mmap(struct file * file, struct vm_area_struct * vma) | |||
269 | if (ret != 0) | 269 | if (ret != 0) |
270 | return ret; | 270 | return ret; |
271 | sfd->vm_ops = vma->vm_ops; | 271 | sfd->vm_ops = vma->vm_ops; |
272 | BUG_ON(!sfd->vm_ops->fault); | ||
272 | vma->vm_ops = &shm_vm_ops; | 273 | vma->vm_ops = &shm_vm_ops; |
273 | shm_open(vma); | 274 | shm_open(vma); |
274 | 275 | ||
@@ -327,7 +328,7 @@ static const struct file_operations shm_file_operations = { | |||
327 | static struct vm_operations_struct shm_vm_ops = { | 328 | static struct vm_operations_struct shm_vm_ops = { |
328 | .open = shm_open, /* callback for a new vm-area open */ | 329 | .open = shm_open, /* callback for a new vm-area open */ |
329 | .close = shm_close, /* callback for when the vm-area is released */ | 330 | .close = shm_close, /* callback for when the vm-area is released */ |
330 | .nopage = shm_nopage, | 331 | .fault = shm_fault, |
331 | #if defined(CONFIG_NUMA) | 332 | #if defined(CONFIG_NUMA) |
332 | .set_policy = shm_set_policy, | 333 | .set_policy = shm_set_policy, |
333 | .get_policy = shm_get_policy, | 334 | .get_policy = shm_get_policy, |
diff --git a/mm/filemap.c b/mm/filemap.c index 462cda58a18e..26b992d169e5 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1301,40 +1301,38 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset) | |||
1301 | #define MMAP_LOTSAMISS (100) | 1301 | #define MMAP_LOTSAMISS (100) |
1302 | 1302 | ||
1303 | /** | 1303 | /** |
1304 | * filemap_nopage - read in file data for page fault handling | 1304 | * filemap_fault - read in file data for page fault handling |
1305 | * @area: the applicable vm_area | 1305 | * @vma: user vma (not used) |
1306 | * @address: target address to read in | 1306 | * @fdata: the applicable fault_data |
1307 | * @type: returned with VM_FAULT_{MINOR,MAJOR} if not %NULL | ||
1308 | * | 1307 | * |
1309 | * filemap_nopage() is invoked via the vma operations vector for a | 1308 | * filemap_fault() is invoked via the vma operations vector for a |
1310 | * mapped memory region to read in file data during a page fault. | 1309 | * mapped memory region to read in file data during a page fault. |
1311 | * | 1310 | * |
1312 | * The goto's are kind of ugly, but this streamlines the normal case of having | 1311 | * The goto's are kind of ugly, but this streamlines the normal case of having |
1313 | * it in the page cache, and handles the special cases reasonably without | 1312 | * it in the page cache, and handles the special cases reasonably without |
1314 | * having a lot of duplicated code. | 1313 | * having a lot of duplicated code. |
1315 | */ | 1314 | */ |
1316 | struct page *filemap_nopage(struct vm_area_struct *area, | 1315 | struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) |
1317 | unsigned long address, int *type) | ||
1318 | { | 1316 | { |
1319 | int error; | 1317 | int error; |
1320 | struct file *file = area->vm_file; | 1318 | struct file *file = vma->vm_file; |
1321 | struct address_space *mapping = file->f_mapping; | 1319 | struct address_space *mapping = file->f_mapping; |
1322 | struct file_ra_state *ra = &file->f_ra; | 1320 | struct file_ra_state *ra = &file->f_ra; |
1323 | struct inode *inode = mapping->host; | 1321 | struct inode *inode = mapping->host; |
1324 | struct page *page; | 1322 | struct page *page; |
1325 | unsigned long size, pgoff; | 1323 | unsigned long size; |
1326 | int did_readaround = 0, majmin = VM_FAULT_MINOR; | 1324 | int did_readaround = 0; |
1327 | 1325 | ||
1328 | BUG_ON(!(area->vm_flags & VM_CAN_INVALIDATE)); | 1326 | fdata->type = VM_FAULT_MINOR; |
1329 | 1327 | ||
1330 | pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; | 1328 | BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); |
1331 | 1329 | ||
1332 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1330 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
1333 | if (pgoff >= size) | 1331 | if (fdata->pgoff >= size) |
1334 | goto outside_data_content; | 1332 | goto outside_data_content; |
1335 | 1333 | ||
1336 | /* If we don't want any read-ahead, don't bother */ | 1334 | /* If we don't want any read-ahead, don't bother */ |
1337 | if (VM_RandomReadHint(area)) | 1335 | if (VM_RandomReadHint(vma)) |
1338 | goto no_cached_page; | 1336 | goto no_cached_page; |
1339 | 1337 | ||
1340 | /* | 1338 | /* |
@@ -1343,19 +1341,19 @@ struct page *filemap_nopage(struct vm_area_struct *area, | |||
1343 | * | 1341 | * |
1344 | * For sequential accesses, we use the generic readahead logic. | 1342 | * For sequential accesses, we use the generic readahead logic. |
1345 | */ | 1343 | */ |
1346 | if (VM_SequentialReadHint(area)) | 1344 | if (VM_SequentialReadHint(vma)) |
1347 | page_cache_readahead(mapping, ra, file, pgoff, 1); | 1345 | page_cache_readahead(mapping, ra, file, fdata->pgoff, 1); |
1348 | 1346 | ||
1349 | /* | 1347 | /* |
1350 | * Do we have something in the page cache already? | 1348 | * Do we have something in the page cache already? |
1351 | */ | 1349 | */ |
1352 | retry_find: | 1350 | retry_find: |
1353 | page = find_lock_page(mapping, pgoff); | 1351 | page = find_lock_page(mapping, fdata->pgoff); |
1354 | if (!page) { | 1352 | if (!page) { |
1355 | unsigned long ra_pages; | 1353 | unsigned long ra_pages; |
1356 | 1354 | ||
1357 | if (VM_SequentialReadHint(area)) { | 1355 | if (VM_SequentialReadHint(vma)) { |
1358 | handle_ra_miss(mapping, ra, pgoff); | 1356 | handle_ra_miss(mapping, ra, fdata->pgoff); |
1359 | goto no_cached_page; | 1357 | goto no_cached_page; |
1360 | } | 1358 | } |
1361 | ra->mmap_miss++; | 1359 | ra->mmap_miss++; |
@@ -1372,7 +1370,7 @@ retry_find: | |||
1372 | * check did_readaround, as this is an inner loop. | 1370 | * check did_readaround, as this is an inner loop. |
1373 | */ | 1371 | */ |
1374 | if (!did_readaround) { | 1372 | if (!did_readaround) { |
1375 | majmin = VM_FAULT_MAJOR; | 1373 | fdata->type = VM_FAULT_MAJOR; |
1376 | count_vm_event(PGMAJFAULT); | 1374 | count_vm_event(PGMAJFAULT); |
1377 | } | 1375 | } |
1378 | did_readaround = 1; | 1376 | did_readaround = 1; |
@@ -1380,11 +1378,11 @@ retry_find: | |||
1380 | if (ra_pages) { | 1378 | if (ra_pages) { |
1381 | pgoff_t start = 0; | 1379 | pgoff_t start = 0; |
1382 | 1380 | ||
1383 | if (pgoff > ra_pages / 2) | 1381 | if (fdata->pgoff > ra_pages / 2) |
1384 | start = pgoff - ra_pages / 2; | 1382 | start = fdata->pgoff - ra_pages / 2; |
1385 | do_page_cache_readahead(mapping, file, start, ra_pages); | 1383 | do_page_cache_readahead(mapping, file, start, ra_pages); |
1386 | } | 1384 | } |
1387 | page = find_lock_page(mapping, pgoff); | 1385 | page = find_lock_page(mapping, fdata->pgoff); |
1388 | if (!page) | 1386 | if (!page) |
1389 | goto no_cached_page; | 1387 | goto no_cached_page; |
1390 | } | 1388 | } |
@@ -1401,7 +1399,7 @@ retry_find: | |||
1401 | 1399 | ||
1402 | /* Must recheck i_size under page lock */ | 1400 | /* Must recheck i_size under page lock */ |
1403 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1401 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
1404 | if (unlikely(pgoff >= size)) { | 1402 | if (unlikely(fdata->pgoff >= size)) { |
1405 | unlock_page(page); | 1403 | unlock_page(page); |
1406 | goto outside_data_content; | 1404 | goto outside_data_content; |
1407 | } | 1405 | } |
@@ -1410,8 +1408,6 @@ retry_find: | |||
1410 | * Found the page and have a reference on it. | 1408 | * Found the page and have a reference on it. |
1411 | */ | 1409 | */ |
1412 | mark_page_accessed(page); | 1410 | mark_page_accessed(page); |
1413 | if (type) | ||
1414 | *type = majmin; | ||
1415 | return page; | 1411 | return page; |
1416 | 1412 | ||
1417 | outside_data_content: | 1413 | outside_data_content: |
@@ -1419,15 +1415,17 @@ outside_data_content: | |||
1419 | * An external ptracer can access pages that normally aren't | 1415 | * An external ptracer can access pages that normally aren't |
1420 | * accessible.. | 1416 | * accessible.. |
1421 | */ | 1417 | */ |
1422 | if (area->vm_mm == current->mm) | 1418 | if (vma->vm_mm == current->mm) { |
1423 | return NOPAGE_SIGBUS; | 1419 | fdata->type = VM_FAULT_SIGBUS; |
1420 | return NULL; | ||
1421 | } | ||
1424 | /* Fall through to the non-read-ahead case */ | 1422 | /* Fall through to the non-read-ahead case */ |
1425 | no_cached_page: | 1423 | no_cached_page: |
1426 | /* | 1424 | /* |
1427 | * We're only likely to ever get here if MADV_RANDOM is in | 1425 | * We're only likely to ever get here if MADV_RANDOM is in |
1428 | * effect. | 1426 | * effect. |
1429 | */ | 1427 | */ |
1430 | error = page_cache_read(file, pgoff); | 1428 | error = page_cache_read(file, fdata->pgoff); |
1431 | 1429 | ||
1432 | /* | 1430 | /* |
1433 | * The page we want has now been added to the page cache. | 1431 | * The page we want has now been added to the page cache. |
@@ -1443,13 +1441,15 @@ no_cached_page: | |||
1443 | * to schedule I/O. | 1441 | * to schedule I/O. |
1444 | */ | 1442 | */ |
1445 | if (error == -ENOMEM) | 1443 | if (error == -ENOMEM) |
1446 | return NOPAGE_OOM; | 1444 | fdata->type = VM_FAULT_OOM; |
1447 | return NOPAGE_SIGBUS; | 1445 | else |
1446 | fdata->type = VM_FAULT_SIGBUS; | ||
1447 | return NULL; | ||
1448 | 1448 | ||
1449 | page_not_uptodate: | 1449 | page_not_uptodate: |
1450 | /* IO error path */ | 1450 | /* IO error path */ |
1451 | if (!did_readaround) { | 1451 | if (!did_readaround) { |
1452 | majmin = VM_FAULT_MAJOR; | 1452 | fdata->type = VM_FAULT_MAJOR; |
1453 | count_vm_event(PGMAJFAULT); | 1453 | count_vm_event(PGMAJFAULT); |
1454 | } | 1454 | } |
1455 | 1455 | ||
@@ -1468,7 +1468,30 @@ page_not_uptodate: | |||
1468 | 1468 | ||
1469 | /* Things didn't work out. Return zero to tell the mm layer so. */ | 1469 | /* Things didn't work out. Return zero to tell the mm layer so. */ |
1470 | shrink_readahead_size_eio(file, ra); | 1470 | shrink_readahead_size_eio(file, ra); |
1471 | return NOPAGE_SIGBUS; | 1471 | fdata->type = VM_FAULT_SIGBUS; |
1472 | return NULL; | ||
1473 | } | ||
1474 | EXPORT_SYMBOL(filemap_fault); | ||
1475 | |||
1476 | /* | ||
1477 | * filemap_nopage and filemap_populate are legacy exports that are not used | ||
1478 | * in tree. Scheduled for removal. | ||
1479 | */ | ||
1480 | struct page *filemap_nopage(struct vm_area_struct *area, | ||
1481 | unsigned long address, int *type) | ||
1482 | { | ||
1483 | struct page *page; | ||
1484 | struct fault_data fdata; | ||
1485 | fdata.address = address; | ||
1486 | fdata.pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) | ||
1487 | + area->vm_pgoff; | ||
1488 | fdata.flags = 0; | ||
1489 | |||
1490 | page = filemap_fault(area, &fdata); | ||
1491 | if (type) | ||
1492 | *type = fdata.type; | ||
1493 | |||
1494 | return page; | ||
1472 | } | 1495 | } |
1473 | EXPORT_SYMBOL(filemap_nopage); | 1496 | EXPORT_SYMBOL(filemap_nopage); |
1474 | 1497 | ||
@@ -1646,8 +1669,7 @@ repeat: | |||
1646 | EXPORT_SYMBOL(filemap_populate); | 1669 | EXPORT_SYMBOL(filemap_populate); |
1647 | 1670 | ||
1648 | struct vm_operations_struct generic_file_vm_ops = { | 1671 | struct vm_operations_struct generic_file_vm_ops = { |
1649 | .nopage = filemap_nopage, | 1672 | .fault = filemap_fault, |
1650 | .populate = filemap_populate, | ||
1651 | }; | 1673 | }; |
1652 | 1674 | ||
1653 | /* This is used for a general mmap of a disk file */ | 1675 | /* This is used for a general mmap of a disk file */ |
@@ -1660,7 +1682,7 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma) | |||
1660 | return -ENOEXEC; | 1682 | return -ENOEXEC; |
1661 | file_accessed(file); | 1683 | file_accessed(file); |
1662 | vma->vm_ops = &generic_file_vm_ops; | 1684 | vma->vm_ops = &generic_file_vm_ops; |
1663 | vma->vm_flags |= VM_CAN_INVALIDATE; | 1685 | vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; |
1664 | return 0; | 1686 | return 0; |
1665 | } | 1687 | } |
1666 | 1688 | ||
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 65ffc321f0c0..82f4b8e9834e 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c | |||
@@ -205,62 +205,67 @@ __xip_unmap (struct address_space * mapping, | |||
205 | } | 205 | } |
206 | 206 | ||
207 | /* | 207 | /* |
208 | * xip_nopage() is invoked via the vma operations vector for a | 208 | * xip_fault() is invoked via the vma operations vector for a |
209 | * mapped memory region to read in file data during a page fault. | 209 | * mapped memory region to read in file data during a page fault. |
210 | * | 210 | * |
211 | * This function is derived from filemap_nopage, but used for execute in place | 211 | * This function is derived from filemap_fault, but used for execute in place |
212 | */ | 212 | */ |
213 | static struct page * | 213 | static struct page *xip_file_fault(struct vm_area_struct *area, |
214 | xip_file_nopage(struct vm_area_struct * area, | 214 | struct fault_data *fdata) |
215 | unsigned long address, | ||
216 | int *type) | ||
217 | { | 215 | { |
218 | struct file *file = area->vm_file; | 216 | struct file *file = area->vm_file; |
219 | struct address_space *mapping = file->f_mapping; | 217 | struct address_space *mapping = file->f_mapping; |
220 | struct inode *inode = mapping->host; | 218 | struct inode *inode = mapping->host; |
221 | struct page *page; | 219 | struct page *page; |
222 | unsigned long size, pgoff, endoff; | 220 | pgoff_t size; |
223 | 221 | ||
224 | pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) | 222 | /* XXX: are VM_FAULT_ codes OK? */ |
225 | + area->vm_pgoff; | ||
226 | endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT) | ||
227 | + area->vm_pgoff; | ||
228 | 223 | ||
229 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 224 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
230 | if (pgoff >= size) | 225 | if (fdata->pgoff >= size) { |
231 | return NOPAGE_SIGBUS; | 226 | fdata->type = VM_FAULT_SIGBUS; |
227 | return NULL; | ||
228 | } | ||
232 | 229 | ||
233 | page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0); | 230 | page = mapping->a_ops->get_xip_page(mapping, |
231 | fdata->pgoff*(PAGE_SIZE/512), 0); | ||
234 | if (!IS_ERR(page)) | 232 | if (!IS_ERR(page)) |
235 | goto out; | 233 | goto out; |
236 | if (PTR_ERR(page) != -ENODATA) | 234 | if (PTR_ERR(page) != -ENODATA) { |
237 | return NOPAGE_SIGBUS; | 235 | fdata->type = VM_FAULT_OOM; |
236 | return NULL; | ||
237 | } | ||
238 | 238 | ||
239 | /* sparse block */ | 239 | /* sparse block */ |
240 | if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && | 240 | if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && |
241 | (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) && | 241 | (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) && |
242 | (!(mapping->host->i_sb->s_flags & MS_RDONLY))) { | 242 | (!(mapping->host->i_sb->s_flags & MS_RDONLY))) { |
243 | /* maybe shared writable, allocate new block */ | 243 | /* maybe shared writable, allocate new block */ |
244 | page = mapping->a_ops->get_xip_page (mapping, | 244 | page = mapping->a_ops->get_xip_page(mapping, |
245 | pgoff*(PAGE_SIZE/512), 1); | 245 | fdata->pgoff*(PAGE_SIZE/512), 1); |
246 | if (IS_ERR(page)) | 246 | if (IS_ERR(page)) { |
247 | return NOPAGE_SIGBUS; | 247 | fdata->type = VM_FAULT_SIGBUS; |
248 | return NULL; | ||
249 | } | ||
248 | /* unmap page at pgoff from all other vmas */ | 250 | /* unmap page at pgoff from all other vmas */ |
249 | __xip_unmap(mapping, pgoff); | 251 | __xip_unmap(mapping, fdata->pgoff); |
250 | } else { | 252 | } else { |
251 | /* not shared and writable, use xip_sparse_page() */ | 253 | /* not shared and writable, use xip_sparse_page() */ |
252 | page = xip_sparse_page(); | 254 | page = xip_sparse_page(); |
253 | if (!page) | 255 | if (!page) { |
254 | return NOPAGE_OOM; | 256 | fdata->type = VM_FAULT_OOM; |
257 | return NULL; | ||
258 | } | ||
255 | } | 259 | } |
256 | 260 | ||
257 | out: | 261 | out: |
262 | fdata->type = VM_FAULT_MINOR; | ||
258 | page_cache_get(page); | 263 | page_cache_get(page); |
259 | return page; | 264 | return page; |
260 | } | 265 | } |
261 | 266 | ||
262 | static struct vm_operations_struct xip_file_vm_ops = { | 267 | static struct vm_operations_struct xip_file_vm_ops = { |
263 | .nopage = xip_file_nopage, | 268 | .fault = xip_file_fault, |
264 | }; | 269 | }; |
265 | 270 | ||
266 | int xip_file_mmap(struct file * file, struct vm_area_struct * vma) | 271 | int xip_file_mmap(struct file * file, struct vm_area_struct * vma) |
@@ -269,6 +274,7 @@ int xip_file_mmap(struct file * file, struct vm_area_struct * vma) | |||
269 | 274 | ||
270 | file_accessed(file); | 275 | file_accessed(file); |
271 | vma->vm_ops = &xip_file_vm_ops; | 276 | vma->vm_ops = &xip_file_vm_ops; |
277 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
272 | return 0; | 278 | return 0; |
273 | } | 279 | } |
274 | EXPORT_SYMBOL_GPL(xip_file_mmap); | 280 | EXPORT_SYMBOL_GPL(xip_file_mmap); |
diff --git a/mm/fremap.c b/mm/fremap.c index 4e3f53dd5fd4..01e51f01b84e 100644 --- a/mm/fremap.c +++ b/mm/fremap.c | |||
@@ -126,6 +126,25 @@ out: | |||
126 | return err; | 126 | return err; |
127 | } | 127 | } |
128 | 128 | ||
129 | static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma, | ||
130 | unsigned long addr, unsigned long size, pgoff_t pgoff) | ||
131 | { | ||
132 | int err; | ||
133 | |||
134 | do { | ||
135 | err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot); | ||
136 | if (err) | ||
137 | return err; | ||
138 | |||
139 | size -= PAGE_SIZE; | ||
140 | addr += PAGE_SIZE; | ||
141 | pgoff++; | ||
142 | } while (size); | ||
143 | |||
144 | return 0; | ||
145 | |||
146 | } | ||
147 | |||
129 | /*** | 148 | /*** |
130 | * sys_remap_file_pages - remap arbitrary pages of a shared backing store | 149 | * sys_remap_file_pages - remap arbitrary pages of a shared backing store |
131 | * file within an existing vma. | 150 | * file within an existing vma. |
@@ -183,41 +202,63 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, | |||
183 | * the single existing vma. vm_private_data is used as a | 202 | * the single existing vma. vm_private_data is used as a |
184 | * swapout cursor in a VM_NONLINEAR vma. | 203 | * swapout cursor in a VM_NONLINEAR vma. |
185 | */ | 204 | */ |
186 | if (vma && (vma->vm_flags & VM_SHARED) && | 205 | if (!vma || !(vma->vm_flags & VM_SHARED)) |
187 | (!vma->vm_private_data || (vma->vm_flags & VM_NONLINEAR)) && | 206 | goto out; |
188 | vma->vm_ops && vma->vm_ops->populate && | 207 | |
189 | end > start && start >= vma->vm_start && | 208 | if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR)) |
190 | end <= vma->vm_end) { | 209 | goto out; |
191 | 210 | ||
192 | /* Must set VM_NONLINEAR before any pages are populated. */ | 211 | if ((!vma->vm_ops || !vma->vm_ops->populate) && |
193 | if (pgoff != linear_page_index(vma, start) && | 212 | !(vma->vm_flags & VM_CAN_NONLINEAR)) |
194 | !(vma->vm_flags & VM_NONLINEAR)) { | 213 | goto out; |
195 | if (!has_write_lock) { | 214 | |
196 | up_read(&mm->mmap_sem); | 215 | if (end <= start || start < vma->vm_start || end > vma->vm_end) |
197 | down_write(&mm->mmap_sem); | 216 | goto out; |
198 | has_write_lock = 1; | 217 | |
199 | goto retry; | 218 | /* Must set VM_NONLINEAR before any pages are populated. */ |
219 | if (!(vma->vm_flags & VM_NONLINEAR)) { | ||
220 | /* Don't need a nonlinear mapping, exit success */ | ||
221 | if (pgoff == linear_page_index(vma, start)) { | ||
222 | err = 0; | ||
223 | goto out; | ||
224 | } | ||
225 | |||
226 | if (!has_write_lock) { | ||
227 | up_read(&mm->mmap_sem); | ||
228 | down_write(&mm->mmap_sem); | ||
229 | has_write_lock = 1; | ||
230 | goto retry; | ||
231 | } | ||
232 | mapping = vma->vm_file->f_mapping; | ||
233 | spin_lock(&mapping->i_mmap_lock); | ||
234 | flush_dcache_mmap_lock(mapping); | ||
235 | vma->vm_flags |= VM_NONLINEAR; | ||
236 | vma_prio_tree_remove(vma, &mapping->i_mmap); | ||
237 | vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); | ||
238 | flush_dcache_mmap_unlock(mapping); | ||
239 | spin_unlock(&mapping->i_mmap_lock); | ||
240 | } | ||
241 | |||
242 | if (vma->vm_flags & VM_CAN_NONLINEAR) { | ||
243 | err = populate_range(mm, vma, start, size, pgoff); | ||
244 | if (!err && !(flags & MAP_NONBLOCK)) { | ||
245 | if (unlikely(has_write_lock)) { | ||
246 | downgrade_write(&mm->mmap_sem); | ||
247 | has_write_lock = 0; | ||
200 | } | 248 | } |
201 | mapping = vma->vm_file->f_mapping; | 249 | make_pages_present(start, start+size); |
202 | spin_lock(&mapping->i_mmap_lock); | ||
203 | flush_dcache_mmap_lock(mapping); | ||
204 | vma->vm_flags |= VM_NONLINEAR; | ||
205 | vma_prio_tree_remove(vma, &mapping->i_mmap); | ||
206 | vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear); | ||
207 | flush_dcache_mmap_unlock(mapping); | ||
208 | spin_unlock(&mapping->i_mmap_lock); | ||
209 | } | 250 | } |
251 | } else | ||
252 | err = vma->vm_ops->populate(vma, start, size, vma->vm_page_prot, | ||
253 | pgoff, flags & MAP_NONBLOCK); | ||
210 | 254 | ||
211 | err = vma->vm_ops->populate(vma, start, size, | 255 | /* |
212 | vma->vm_page_prot, | 256 | * We can't clear VM_NONLINEAR because we'd have to do |
213 | pgoff, flags & MAP_NONBLOCK); | 257 | * it after ->populate completes, and that would prevent |
258 | * downgrading the lock. (Locks can't be upgraded). | ||
259 | */ | ||
214 | 260 | ||
215 | /* | 261 | out: |
216 | * We can't clear VM_NONLINEAR because we'd have to do | ||
217 | * it after ->populate completes, and that would prevent | ||
218 | * downgrading the lock. (Locks can't be upgraded). | ||
219 | */ | ||
220 | } | ||
221 | if (likely(!has_write_lock)) | 262 | if (likely(!has_write_lock)) |
222 | up_read(&mm->mmap_sem); | 263 | up_read(&mm->mmap_sem); |
223 | else | 264 | else |
diff --git a/mm/memory.c b/mm/memory.c index e6c99f6b5649..eee7fec3ab54 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1047,7 +1047,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1047 | if (pages) | 1047 | if (pages) |
1048 | foll_flags |= FOLL_GET; | 1048 | foll_flags |= FOLL_GET; |
1049 | if (!write && !(vma->vm_flags & VM_LOCKED) && | 1049 | if (!write && !(vma->vm_flags & VM_LOCKED) && |
1050 | (!vma->vm_ops || !vma->vm_ops->nopage)) | 1050 | (!vma->vm_ops || (!vma->vm_ops->nopage && |
1051 | !vma->vm_ops->fault))) | ||
1051 | foll_flags |= FOLL_ANON; | 1052 | foll_flags |= FOLL_ANON; |
1052 | 1053 | ||
1053 | do { | 1054 | do { |
@@ -2288,10 +2289,10 @@ oom: | |||
2288 | } | 2289 | } |
2289 | 2290 | ||
2290 | /* | 2291 | /* |
2291 | * do_no_page() tries to create a new page mapping. It aggressively | 2292 | * __do_fault() tries to create a new page mapping. It aggressively |
2292 | * tries to share with existing pages, but makes a separate copy if | 2293 | * tries to share with existing pages, but makes a separate copy if |
2293 | * the "write_access" parameter is true in order to avoid the next | 2294 | * the FAULT_FLAG_WRITE is set in the flags parameter in order to avoid |
2294 | * page fault. | 2295 | * the next page fault. |
2295 | * | 2296 | * |
2296 | * As this is called only for pages that do not currently exist, we | 2297 | * As this is called only for pages that do not currently exist, we |
2297 | * do not need to flush old virtual caches or the TLB. | 2298 | * do not need to flush old virtual caches or the TLB. |
@@ -2300,64 +2301,82 @@ oom: | |||
2300 | * but allow concurrent faults), and pte mapped but not yet locked. | 2301 | * but allow concurrent faults), and pte mapped but not yet locked. |
2301 | * We return with mmap_sem still held, but pte unmapped and unlocked. | 2302 | * We return with mmap_sem still held, but pte unmapped and unlocked. |
2302 | */ | 2303 | */ |
2303 | static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | 2304 | static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
2304 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 2305 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
2305 | int write_access) | 2306 | pgoff_t pgoff, unsigned int flags, pte_t orig_pte) |
2306 | { | 2307 | { |
2307 | spinlock_t *ptl; | 2308 | spinlock_t *ptl; |
2308 | struct page *page, *nopage_page; | 2309 | struct page *page, *faulted_page; |
2309 | pte_t entry; | 2310 | pte_t entry; |
2310 | int ret = VM_FAULT_MINOR; | ||
2311 | int anon = 0; | 2311 | int anon = 0; |
2312 | struct page *dirty_page = NULL; | 2312 | struct page *dirty_page = NULL; |
2313 | struct fault_data fdata; | ||
2314 | |||
2315 | fdata.address = address & PAGE_MASK; | ||
2316 | fdata.pgoff = pgoff; | ||
2317 | fdata.flags = flags; | ||
2313 | 2318 | ||
2314 | pte_unmap(page_table); | 2319 | pte_unmap(page_table); |
2315 | BUG_ON(vma->vm_flags & VM_PFNMAP); | 2320 | BUG_ON(vma->vm_flags & VM_PFNMAP); |
2316 | 2321 | ||
2317 | nopage_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); | 2322 | if (likely(vma->vm_ops->fault)) { |
2318 | /* no page was available -- either SIGBUS, OOM or REFAULT */ | 2323 | fdata.type = -1; |
2319 | if (unlikely(nopage_page == NOPAGE_SIGBUS)) | 2324 | faulted_page = vma->vm_ops->fault(vma, &fdata); |
2320 | return VM_FAULT_SIGBUS; | 2325 | WARN_ON(fdata.type == -1); |
2321 | else if (unlikely(nopage_page == NOPAGE_OOM)) | 2326 | if (unlikely(!faulted_page)) |
2322 | return VM_FAULT_OOM; | 2327 | return fdata.type; |
2323 | else if (unlikely(nopage_page == NOPAGE_REFAULT)) | 2328 | } else { |
2324 | return VM_FAULT_MINOR; | 2329 | /* Legacy ->nopage path */ |
2330 | fdata.type = VM_FAULT_MINOR; | ||
2331 | faulted_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, | ||
2332 | &fdata.type); | ||
2333 | /* no page was available -- either SIGBUS or OOM */ | ||
2334 | if (unlikely(faulted_page == NOPAGE_SIGBUS)) | ||
2335 | return VM_FAULT_SIGBUS; | ||
2336 | else if (unlikely(faulted_page == NOPAGE_OOM)) | ||
2337 | return VM_FAULT_OOM; | ||
2338 | } | ||
2325 | 2339 | ||
2326 | BUG_ON(vma->vm_flags & VM_CAN_INVALIDATE && !PageLocked(nopage_page)); | ||
2327 | /* | 2340 | /* |
2328 | * For consistency in subsequent calls, make the nopage_page always | 2341 | * For consistency in subsequent calls, make the faulted_page always |
2329 | * locked. | 2342 | * locked. |
2330 | */ | 2343 | */ |
2331 | if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE))) | 2344 | if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE))) |
2332 | lock_page(nopage_page); | 2345 | lock_page(faulted_page); |
2346 | else | ||
2347 | BUG_ON(!PageLocked(faulted_page)); | ||
2333 | 2348 | ||
2334 | /* | 2349 | /* |
2335 | * Should we do an early C-O-W break? | 2350 | * Should we do an early C-O-W break? |
2336 | */ | 2351 | */ |
2337 | page = nopage_page; | 2352 | page = faulted_page; |
2338 | if (write_access) { | 2353 | if (flags & FAULT_FLAG_WRITE) { |
2339 | if (!(vma->vm_flags & VM_SHARED)) { | 2354 | if (!(vma->vm_flags & VM_SHARED)) { |
2355 | anon = 1; | ||
2340 | if (unlikely(anon_vma_prepare(vma))) { | 2356 | if (unlikely(anon_vma_prepare(vma))) { |
2341 | ret = VM_FAULT_OOM; | 2357 | fdata.type = VM_FAULT_OOM; |
2342 | goto out_error; | 2358 | goto out; |
2343 | } | 2359 | } |
2344 | page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); | 2360 | page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); |
2345 | if (!page) { | 2361 | if (!page) { |
2346 | ret = VM_FAULT_OOM; | 2362 | fdata.type = VM_FAULT_OOM; |
2347 | goto out_error; | 2363 | goto out; |
2348 | } | 2364 | } |
2349 | copy_user_highpage(page, nopage_page, address, vma); | 2365 | copy_user_highpage(page, faulted_page, address, vma); |
2350 | anon = 1; | ||
2351 | } else { | 2366 | } else { |
2352 | /* if the page will be shareable, see if the backing | 2367 | /* |
2368 | * If the page will be shareable, see if the backing | ||
2353 | * address space wants to know that the page is about | 2369 | * address space wants to know that the page is about |
2354 | * to become writable */ | 2370 | * to become writable |
2371 | */ | ||
2355 | if (vma->vm_ops->page_mkwrite && | 2372 | if (vma->vm_ops->page_mkwrite && |
2356 | vma->vm_ops->page_mkwrite(vma, page) < 0) { | 2373 | vma->vm_ops->page_mkwrite(vma, page) < 0) { |
2357 | ret = VM_FAULT_SIGBUS; | 2374 | fdata.type = VM_FAULT_SIGBUS; |
2358 | goto out_error; | 2375 | anon = 1; /* no anon but release faulted_page */ |
2376 | goto out; | ||
2359 | } | 2377 | } |
2360 | } | 2378 | } |
2379 | |||
2361 | } | 2380 | } |
2362 | 2381 | ||
2363 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); | 2382 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); |
@@ -2373,10 +2392,10 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2373 | * handle that later. | 2392 | * handle that later. |
2374 | */ | 2393 | */ |
2375 | /* Only go through if we didn't race with anybody else... */ | 2394 | /* Only go through if we didn't race with anybody else... */ |
2376 | if (likely(pte_none(*page_table))) { | 2395 | if (likely(pte_same(*page_table, orig_pte))) { |
2377 | flush_icache_page(vma, page); | 2396 | flush_icache_page(vma, page); |
2378 | entry = mk_pte(page, vma->vm_page_prot); | 2397 | entry = mk_pte(page, vma->vm_page_prot); |
2379 | if (write_access) | 2398 | if (flags & FAULT_FLAG_WRITE) |
2380 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 2399 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
2381 | set_pte_at(mm, address, page_table, entry); | 2400 | set_pte_at(mm, address, page_table, entry); |
2382 | if (anon) { | 2401 | if (anon) { |
@@ -2386,7 +2405,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2386 | } else { | 2405 | } else { |
2387 | inc_mm_counter(mm, file_rss); | 2406 | inc_mm_counter(mm, file_rss); |
2388 | page_add_file_rmap(page); | 2407 | page_add_file_rmap(page); |
2389 | if (write_access) { | 2408 | if (flags & FAULT_FLAG_WRITE) { |
2390 | dirty_page = page; | 2409 | dirty_page = page; |
2391 | get_page(dirty_page); | 2410 | get_page(dirty_page); |
2392 | } | 2411 | } |
@@ -2399,25 +2418,42 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2399 | if (anon) | 2418 | if (anon) |
2400 | page_cache_release(page); | 2419 | page_cache_release(page); |
2401 | else | 2420 | else |
2402 | anon = 1; /* not anon, but release nopage_page */ | 2421 | anon = 1; /* no anon but release faulted_page */ |
2403 | } | 2422 | } |
2404 | 2423 | ||
2405 | pte_unmap_unlock(page_table, ptl); | 2424 | pte_unmap_unlock(page_table, ptl); |
2406 | 2425 | ||
2407 | out: | 2426 | out: |
2408 | unlock_page(nopage_page); | 2427 | unlock_page(faulted_page); |
2409 | if (anon) | 2428 | if (anon) |
2410 | page_cache_release(nopage_page); | 2429 | page_cache_release(faulted_page); |
2411 | else if (dirty_page) { | 2430 | else if (dirty_page) { |
2412 | set_page_dirty_balance(dirty_page); | 2431 | set_page_dirty_balance(dirty_page); |
2413 | put_page(dirty_page); | 2432 | put_page(dirty_page); |
2414 | } | 2433 | } |
2415 | 2434 | ||
2416 | return ret; | 2435 | return fdata.type; |
2436 | } | ||
2417 | 2437 | ||
2418 | out_error: | 2438 | static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
2419 | anon = 1; /* relase nopage_page */ | 2439 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
2420 | goto out; | 2440 | int write_access, pte_t orig_pte) |
2441 | { | ||
2442 | pgoff_t pgoff = (((address & PAGE_MASK) | ||
2443 | - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff; | ||
2444 | unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0); | ||
2445 | |||
2446 | return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte); | ||
2447 | } | ||
2448 | |||
2449 | static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | ||
2450 | unsigned long address, pte_t *page_table, pmd_t *pmd, | ||
2451 | int write_access, pgoff_t pgoff, pte_t orig_pte) | ||
2452 | { | ||
2453 | unsigned int flags = FAULT_FLAG_NONLINEAR | | ||
2454 | (write_access ? FAULT_FLAG_WRITE : 0); | ||
2455 | |||
2456 | return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte); | ||
2421 | } | 2457 | } |
2422 | 2458 | ||
2423 | /* | 2459 | /* |
@@ -2496,9 +2532,14 @@ static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2496 | print_bad_pte(vma, orig_pte, address); | 2532 | print_bad_pte(vma, orig_pte, address); |
2497 | return VM_FAULT_OOM; | 2533 | return VM_FAULT_OOM; |
2498 | } | 2534 | } |
2499 | /* We can then assume vm->vm_ops && vma->vm_ops->populate */ | ||
2500 | 2535 | ||
2501 | pgoff = pte_to_pgoff(orig_pte); | 2536 | pgoff = pte_to_pgoff(orig_pte); |
2537 | |||
2538 | if (vma->vm_ops && vma->vm_ops->fault) | ||
2539 | return do_nonlinear_fault(mm, vma, address, page_table, pmd, | ||
2540 | write_access, pgoff, orig_pte); | ||
2541 | |||
2542 | /* We can then assume vm->vm_ops && vma->vm_ops->populate */ | ||
2502 | err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, | 2543 | err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, |
2503 | vma->vm_page_prot, pgoff, 0); | 2544 | vma->vm_page_prot, pgoff, 0); |
2504 | if (err == -ENOMEM) | 2545 | if (err == -ENOMEM) |
@@ -2532,10 +2573,9 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
2532 | if (!pte_present(entry)) { | 2573 | if (!pte_present(entry)) { |
2533 | if (pte_none(entry)) { | 2574 | if (pte_none(entry)) { |
2534 | if (vma->vm_ops) { | 2575 | if (vma->vm_ops) { |
2535 | if (vma->vm_ops->nopage) | 2576 | if (vma->vm_ops->fault || vma->vm_ops->nopage) |
2536 | return do_no_page(mm, vma, address, | 2577 | return do_linear_fault(mm, vma, address, |
2537 | pte, pmd, | 2578 | pte, pmd, write_access, entry); |
2538 | write_access); | ||
2539 | if (unlikely(vma->vm_ops->nopfn)) | 2579 | if (unlikely(vma->vm_ops->nopfn)) |
2540 | return do_no_pfn(mm, vma, address, pte, | 2580 | return do_no_pfn(mm, vma, address, pte, |
2541 | pmd, write_access); | 2581 | pmd, write_access); |
@@ -1165,12 +1165,8 @@ out: | |||
1165 | mm->locked_vm += len >> PAGE_SHIFT; | 1165 | mm->locked_vm += len >> PAGE_SHIFT; |
1166 | make_pages_present(addr, addr + len); | 1166 | make_pages_present(addr, addr + len); |
1167 | } | 1167 | } |
1168 | if (flags & MAP_POPULATE) { | 1168 | if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) |
1169 | up_write(&mm->mmap_sem); | 1169 | make_pages_present(addr, addr + len); |
1170 | sys_remap_file_pages(addr, len, 0, | ||
1171 | pgoff, flags & MAP_NONBLOCK); | ||
1172 | down_write(&mm->mmap_sem); | ||
1173 | } | ||
1174 | return addr; | 1170 | return addr; |
1175 | 1171 | ||
1176 | unmap_and_free_vma: | 1172 | unmap_and_free_vma: |
diff --git a/mm/nommu.c b/mm/nommu.c index 8bbbf147a794..aee0e1b0ebe7 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -1341,8 +1341,7 @@ int in_gate_area_no_task(unsigned long addr) | |||
1341 | return 0; | 1341 | return 0; |
1342 | } | 1342 | } |
1343 | 1343 | ||
1344 | struct page *filemap_nopage(struct vm_area_struct *area, | 1344 | struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) |
1345 | unsigned long address, int *type) | ||
1346 | { | 1345 | { |
1347 | BUG(); | 1346 | BUG(); |
1348 | return NULL; | 1347 | return NULL; |
@@ -621,8 +621,10 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma) | |||
621 | printk (KERN_EMERG " page->count = %x\n", page_count(page)); | 621 | printk (KERN_EMERG " page->count = %x\n", page_count(page)); |
622 | printk (KERN_EMERG " page->mapping = %p\n", page->mapping); | 622 | printk (KERN_EMERG " page->mapping = %p\n", page->mapping); |
623 | print_symbol (KERN_EMERG " vma->vm_ops = %s\n", (unsigned long)vma->vm_ops); | 623 | print_symbol (KERN_EMERG " vma->vm_ops = %s\n", (unsigned long)vma->vm_ops); |
624 | if (vma->vm_ops) | 624 | if (vma->vm_ops) { |
625 | print_symbol (KERN_EMERG " vma->vm_ops->nopage = %s\n", (unsigned long)vma->vm_ops->nopage); | 625 | print_symbol (KERN_EMERG " vma->vm_ops->nopage = %s\n", (unsigned long)vma->vm_ops->nopage); |
626 | print_symbol (KERN_EMERG " vma->vm_ops->fault = %s\n", (unsigned long)vma->vm_ops->fault); | ||
627 | } | ||
626 | if (vma->vm_file && vma->vm_file->f_op) | 628 | if (vma->vm_file && vma->vm_file->f_op) |
627 | print_symbol (KERN_EMERG " vma->vm_file->f_op->mmap = %s\n", (unsigned long)vma->vm_file->f_op->mmap); | 629 | print_symbol (KERN_EMERG " vma->vm_file->f_op->mmap = %s\n", (unsigned long)vma->vm_file->f_op->mmap); |
628 | BUG(); | 630 | BUG(); |
diff --git a/mm/shmem.c b/mm/shmem.c index 5808fadd3944..6b44440f1b24 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -83,7 +83,7 @@ enum sgp_type { | |||
83 | SGP_READ, /* don't exceed i_size, don't allocate page */ | 83 | SGP_READ, /* don't exceed i_size, don't allocate page */ |
84 | SGP_CACHE, /* don't exceed i_size, may allocate page */ | 84 | SGP_CACHE, /* don't exceed i_size, may allocate page */ |
85 | SGP_WRITE, /* may exceed i_size, may allocate page */ | 85 | SGP_WRITE, /* may exceed i_size, may allocate page */ |
86 | SGP_NOPAGE, /* same as SGP_CACHE, return with page locked */ | 86 | SGP_FAULT, /* same as SGP_CACHE, return with page locked */ |
87 | }; | 87 | }; |
88 | 88 | ||
89 | static int shmem_getpage(struct inode *inode, unsigned long idx, | 89 | static int shmem_getpage(struct inode *inode, unsigned long idx, |
@@ -1101,6 +1101,10 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, | |||
1101 | 1101 | ||
1102 | if (idx >= SHMEM_MAX_INDEX) | 1102 | if (idx >= SHMEM_MAX_INDEX) |
1103 | return -EFBIG; | 1103 | return -EFBIG; |
1104 | |||
1105 | if (type) | ||
1106 | *type = VM_FAULT_MINOR; | ||
1107 | |||
1104 | /* | 1108 | /* |
1105 | * Normally, filepage is NULL on entry, and either found | 1109 | * Normally, filepage is NULL on entry, and either found |
1106 | * uptodate immediately, or allocated and zeroed, or read | 1110 | * uptodate immediately, or allocated and zeroed, or read |
@@ -1291,7 +1295,7 @@ repeat: | |||
1291 | done: | 1295 | done: |
1292 | if (*pagep != filepage) { | 1296 | if (*pagep != filepage) { |
1293 | *pagep = filepage; | 1297 | *pagep = filepage; |
1294 | if (sgp != SGP_NOPAGE) | 1298 | if (sgp != SGP_FAULT) |
1295 | unlock_page(filepage); | 1299 | unlock_page(filepage); |
1296 | 1300 | ||
1297 | } | 1301 | } |
@@ -1305,76 +1309,31 @@ failed: | |||
1305 | return error; | 1309 | return error; |
1306 | } | 1310 | } |
1307 | 1311 | ||
1308 | static struct page *shmem_nopage(struct vm_area_struct *vma, | 1312 | static struct page *shmem_fault(struct vm_area_struct *vma, |
1309 | unsigned long address, int *type) | 1313 | struct fault_data *fdata) |
1310 | { | 1314 | { |
1311 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 1315 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
1312 | struct page *page = NULL; | 1316 | struct page *page = NULL; |
1313 | unsigned long idx; | ||
1314 | int error; | 1317 | int error; |
1315 | 1318 | ||
1316 | BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); | 1319 | BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); |
1317 | 1320 | ||
1318 | idx = (address - vma->vm_start) >> PAGE_SHIFT; | 1321 | if (((loff_t)fdata->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { |
1319 | idx += vma->vm_pgoff; | 1322 | fdata->type = VM_FAULT_SIGBUS; |
1320 | idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; | 1323 | return NULL; |
1321 | if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode)) | 1324 | } |
1322 | return NOPAGE_SIGBUS; | ||
1323 | 1325 | ||
1324 | error = shmem_getpage(inode, idx, &page, SGP_NOPAGE, type); | 1326 | error = shmem_getpage(inode, fdata->pgoff, &page, |
1325 | if (error) | 1327 | SGP_FAULT, &fdata->type); |
1326 | return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS; | 1328 | if (error) { |
1329 | fdata->type = ((error == -ENOMEM)?VM_FAULT_OOM:VM_FAULT_SIGBUS); | ||
1330 | return NULL; | ||
1331 | } | ||
1327 | 1332 | ||
1328 | mark_page_accessed(page); | 1333 | mark_page_accessed(page); |
1329 | return page; | 1334 | return page; |
1330 | } | 1335 | } |
1331 | 1336 | ||
1332 | static int shmem_populate(struct vm_area_struct *vma, | ||
1333 | unsigned long addr, unsigned long len, | ||
1334 | pgprot_t prot, unsigned long pgoff, int nonblock) | ||
1335 | { | ||
1336 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | ||
1337 | struct mm_struct *mm = vma->vm_mm; | ||
1338 | enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE; | ||
1339 | unsigned long size; | ||
1340 | |||
1341 | size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
1342 | if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size) | ||
1343 | return -EINVAL; | ||
1344 | |||
1345 | while ((long) len > 0) { | ||
1346 | struct page *page = NULL; | ||
1347 | int err; | ||
1348 | /* | ||
1349 | * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE | ||
1350 | */ | ||
1351 | err = shmem_getpage(inode, pgoff, &page, sgp, NULL); | ||
1352 | if (err) | ||
1353 | return err; | ||
1354 | /* Page may still be null, but only if nonblock was set. */ | ||
1355 | if (page) { | ||
1356 | mark_page_accessed(page); | ||
1357 | err = install_page(mm, vma, addr, page, prot); | ||
1358 | if (err) { | ||
1359 | page_cache_release(page); | ||
1360 | return err; | ||
1361 | } | ||
1362 | } else if (vma->vm_flags & VM_NONLINEAR) { | ||
1363 | /* No page was found just because we can't read it in | ||
1364 | * now (being here implies nonblock != 0), but the page | ||
1365 | * may exist, so set the PTE to fault it in later. */ | ||
1366 | err = install_file_pte(mm, vma, addr, pgoff, prot); | ||
1367 | if (err) | ||
1368 | return err; | ||
1369 | } | ||
1370 | |||
1371 | len -= PAGE_SIZE; | ||
1372 | addr += PAGE_SIZE; | ||
1373 | pgoff++; | ||
1374 | } | ||
1375 | return 0; | ||
1376 | } | ||
1377 | |||
1378 | #ifdef CONFIG_NUMA | 1337 | #ifdef CONFIG_NUMA |
1379 | int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new) | 1338 | int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new) |
1380 | { | 1339 | { |
@@ -1419,7 +1378,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) | |||
1419 | { | 1378 | { |
1420 | file_accessed(file); | 1379 | file_accessed(file); |
1421 | vma->vm_ops = &shmem_vm_ops; | 1380 | vma->vm_ops = &shmem_vm_ops; |
1422 | vma->vm_flags |= VM_CAN_INVALIDATE; | 1381 | vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; |
1423 | return 0; | 1382 | return 0; |
1424 | } | 1383 | } |
1425 | 1384 | ||
@@ -2465,8 +2424,7 @@ static const struct super_operations shmem_ops = { | |||
2465 | }; | 2424 | }; |
2466 | 2425 | ||
2467 | static struct vm_operations_struct shmem_vm_ops = { | 2426 | static struct vm_operations_struct shmem_vm_ops = { |
2468 | .nopage = shmem_nopage, | 2427 | .fault = shmem_fault, |
2469 | .populate = shmem_populate, | ||
2470 | #ifdef CONFIG_NUMA | 2428 | #ifdef CONFIG_NUMA |
2471 | .set_policy = shmem_set_policy, | 2429 | .set_policy = shmem_set_policy, |
2472 | .get_policy = shmem_get_policy, | 2430 | .get_policy = shmem_get_policy, |
diff --git a/mm/truncate.c b/mm/truncate.c index aed85f0b707f..5cdfbc1a59fd 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -82,7 +82,7 @@ EXPORT_SYMBOL(cancel_dirty_page); | |||
82 | /* | 82 | /* |
83 | * If truncate cannot remove the fs-private metadata from the page, the page | 83 | * If truncate cannot remove the fs-private metadata from the page, the page |
84 | * becomes anonymous. It will be left on the LRU and may even be mapped into | 84 | * becomes anonymous. It will be left on the LRU and may even be mapped into |
85 | * user pagetables if we're racing with filemap_nopage(). | 85 | * user pagetables if we're racing with filemap_fault(). |
86 | * | 86 | * |
87 | * We need to bale out if page->mapping is no longer equal to the original | 87 | * We need to bale out if page->mapping is no longer equal to the original |
88 | * mapping. This happens a) when the VM reclaimed the page while we waited on | 88 | * mapping. This happens a) when the VM reclaimed the page while we waited on |