diff options
-rw-r--r-- | Documentation/feature-removal-schedule.txt | 20 | ||||
-rw-r--r-- | Documentation/filesystems/Locking | 2 | ||||
-rw-r--r-- | fs/gfs2/ops_file.c | 2 | ||||
-rw-r--r-- | fs/gfs2/ops_vm.c | 47 | ||||
-rw-r--r-- | fs/ncpfs/mmap.c | 38 | ||||
-rw-r--r-- | fs/ocfs2/mmap.c | 30 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_file.c | 14 | ||||
-rw-r--r-- | include/linux/mm.h | 84 | ||||
-rw-r--r-- | ipc/shm.c | 5 | ||||
-rw-r--r-- | mm/filemap.c | 249 | ||||
-rw-r--r-- | mm/filemap_xip.c | 37 | ||||
-rw-r--r-- | mm/fremap.c | 85 | ||||
-rw-r--r-- | mm/hugetlb.c | 7 | ||||
-rw-r--r-- | mm/memory.c | 109 | ||||
-rw-r--r-- | mm/nommu.c | 4 | ||||
-rw-r--r-- | mm/shmem.c | 29 |
16 files changed, 238 insertions, 524 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 716568afdff8..cff63befeb9a 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -135,26 +135,8 @@ Who: Greg Kroah-Hartman <gregkh@suse.de> | |||
135 | 135 | ||
136 | --------------------------- | 136 | --------------------------- |
137 | 137 | ||
138 | What: filemap_nopage, filemap_populate | ||
139 | When: April 2007 | ||
140 | Why: These legacy interfaces no longer have any callers in the kernel and | ||
141 | any functionality provided can be provided with filemap_fault. The | ||
142 | removal schedule is short because they are a big maintainence burden | ||
143 | and have some bugs. | ||
144 | Who: Nick Piggin <npiggin@suse.de> | ||
145 | |||
146 | --------------------------- | ||
147 | |||
148 | What: vm_ops.populate, install_page | ||
149 | When: April 2007 | ||
150 | Why: These legacy interfaces no longer have any callers in the kernel and | ||
151 | any functionality provided can be provided with vm_ops.fault. | ||
152 | Who: Nick Piggin <npiggin@suse.de> | ||
153 | |||
154 | --------------------------- | ||
155 | |||
156 | What: vm_ops.nopage | 138 | What: vm_ops.nopage |
157 | When: February 2008, provided in-kernel callers have been converted | 139 | When: Soon, provided in-kernel callers have been converted |
158 | Why: This interface is replaced by vm_ops.fault, but it has been around | 140 | Why: This interface is replaced by vm_ops.fault, but it has been around |
159 | forever, is used by a lot of drivers, and doesn't cost much to | 141 | forever, is used by a lot of drivers, and doesn't cost much to |
160 | maintain. | 142 | maintain. |
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 91ec4b40ebfe..f0f825808ca4 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
@@ -510,7 +510,7 @@ More details about quota locking can be found in fs/dquot.c. | |||
510 | prototypes: | 510 | prototypes: |
511 | void (*open)(struct vm_area_struct*); | 511 | void (*open)(struct vm_area_struct*); |
512 | void (*close)(struct vm_area_struct*); | 512 | void (*close)(struct vm_area_struct*); |
513 | struct page *(*fault)(struct vm_area_struct*, struct fault_data *); | 513 | int (*fault)(struct vm_area_struct*, struct vm_fault *); |
514 | struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *); | 514 | struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *); |
515 | int (*page_mkwrite)(struct vm_area_struct *, struct page *); | 515 | int (*page_mkwrite)(struct vm_area_struct *, struct page *); |
516 | 516 | ||
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 581ac11b2656..1a5e8e893d75 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c | |||
@@ -364,8 +364,6 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
364 | else | 364 | else |
365 | vma->vm_ops = &gfs2_vm_ops_private; | 365 | vma->vm_ops = &gfs2_vm_ops_private; |
366 | 366 | ||
367 | vma->vm_flags |= VM_CAN_INVALIDATE|VM_CAN_NONLINEAR; | ||
368 | |||
369 | gfs2_glock_dq_uninit(&i_gh); | 367 | gfs2_glock_dq_uninit(&i_gh); |
370 | 368 | ||
371 | return error; | 369 | return error; |
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c index e9fe6eb74e75..dc287d2e3a66 100644 --- a/fs/gfs2/ops_vm.c +++ b/fs/gfs2/ops_vm.c | |||
@@ -27,13 +27,12 @@ | |||
27 | #include "trans.h" | 27 | #include "trans.h" |
28 | #include "util.h" | 28 | #include "util.h" |
29 | 29 | ||
30 | static struct page *gfs2_private_fault(struct vm_area_struct *vma, | 30 | static int gfs2_private_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
31 | struct fault_data *fdata) | ||
32 | { | 31 | { |
33 | struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host); | 32 | struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host); |
34 | 33 | ||
35 | set_bit(GIF_PAGED, &ip->i_flags); | 34 | set_bit(GIF_PAGED, &ip->i_flags); |
36 | return filemap_fault(vma, fdata); | 35 | return filemap_fault(vma, vmf); |
37 | } | 36 | } |
38 | 37 | ||
39 | static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) | 38 | static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) |
@@ -104,55 +103,55 @@ out: | |||
104 | return error; | 103 | return error; |
105 | } | 104 | } |
106 | 105 | ||
107 | static struct page *gfs2_sharewrite_fault(struct vm_area_struct *vma, | 106 | static int gfs2_sharewrite_fault(struct vm_area_struct *vma, |
108 | struct fault_data *fdata) | 107 | struct vm_fault *vmf) |
109 | { | 108 | { |
110 | struct file *file = vma->vm_file; | 109 | struct file *file = vma->vm_file; |
111 | struct gfs2_file *gf = file->private_data; | 110 | struct gfs2_file *gf = file->private_data; |
112 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); | 111 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); |
113 | struct gfs2_holder i_gh; | 112 | struct gfs2_holder i_gh; |
114 | struct page *result = NULL; | ||
115 | int alloc_required; | 113 | int alloc_required; |
116 | int error; | 114 | int error; |
115 | int ret = VM_FAULT_MINOR; | ||
117 | 116 | ||
118 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); | 117 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); |
119 | if (error) | 118 | if (error) |
120 | return NULL; | 119 | goto out; |
121 | 120 | ||
122 | set_bit(GIF_PAGED, &ip->i_flags); | 121 | set_bit(GIF_PAGED, &ip->i_flags); |
123 | set_bit(GIF_SW_PAGED, &ip->i_flags); | 122 | set_bit(GIF_SW_PAGED, &ip->i_flags); |
124 | 123 | ||
125 | error = gfs2_write_alloc_required(ip, | 124 | error = gfs2_write_alloc_required(ip, |
126 | (u64)fdata->pgoff << PAGE_CACHE_SHIFT, | 125 | (u64)vmf->pgoff << PAGE_CACHE_SHIFT, |
127 | PAGE_CACHE_SIZE, &alloc_required); | 126 | PAGE_CACHE_SIZE, &alloc_required); |
128 | if (error) { | 127 | if (error) { |
129 | fdata->type = VM_FAULT_OOM; /* XXX: are these right? */ | 128 | ret = VM_FAULT_OOM; /* XXX: are these right? */ |
130 | goto out; | 129 | goto out_unlock; |
131 | } | 130 | } |
132 | 131 | ||
133 | set_bit(GFF_EXLOCK, &gf->f_flags); | 132 | set_bit(GFF_EXLOCK, &gf->f_flags); |
134 | result = filemap_fault(vma, fdata); | 133 | ret = filemap_fault(vma, vmf); |
135 | clear_bit(GFF_EXLOCK, &gf->f_flags); | 134 | clear_bit(GFF_EXLOCK, &gf->f_flags); |
136 | if (!result) | 135 | if (ret & (VM_FAULT_ERROR | FAULT_RET_NOPAGE)) |
137 | goto out; | 136 | goto out_unlock; |
138 | 137 | ||
139 | if (alloc_required) { | 138 | if (alloc_required) { |
140 | error = alloc_page_backing(ip, result); | 139 | /* XXX: do we need to drop page lock around alloc_page_backing?*/ |
140 | error = alloc_page_backing(ip, vmf->page); | ||
141 | if (error) { | 141 | if (error) { |
142 | if (vma->vm_flags & VM_CAN_INVALIDATE) | 142 | if (ret & FAULT_RET_LOCKED) |
143 | unlock_page(result); | 143 | unlock_page(vmf->page); |
144 | page_cache_release(result); | 144 | page_cache_release(vmf->page); |
145 | fdata->type = VM_FAULT_OOM; | 145 | ret = VM_FAULT_OOM; |
146 | result = NULL; | 146 | goto out_unlock; |
147 | goto out; | ||
148 | } | 147 | } |
149 | set_page_dirty(result); | 148 | set_page_dirty(vmf->page); |
150 | } | 149 | } |
151 | 150 | ||
152 | out: | 151 | out_unlock: |
153 | gfs2_glock_dq_uninit(&i_gh); | 152 | gfs2_glock_dq_uninit(&i_gh); |
154 | 153 | out: | |
155 | return result; | 154 | return ret; |
156 | } | 155 | } |
157 | 156 | ||
158 | struct vm_operations_struct gfs2_vm_ops_private = { | 157 | struct vm_operations_struct gfs2_vm_ops_private = { |
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c index af48b792ca04..a94473d3072c 100644 --- a/fs/ncpfs/mmap.c +++ b/fs/ncpfs/mmap.c | |||
@@ -24,33 +24,35 @@ | |||
24 | 24 | ||
25 | /* | 25 | /* |
26 | * Fill in the supplied page for mmap | 26 | * Fill in the supplied page for mmap |
27 | * XXX: how are we excluding truncate/invalidate here? Maybe need to lock | ||
28 | * page? | ||
27 | */ | 29 | */ |
28 | static struct page* ncp_file_mmap_fault(struct vm_area_struct *area, | 30 | static int ncp_file_mmap_fault(struct vm_area_struct *area, |
29 | struct fault_data *fdata) | 31 | struct vm_fault *vmf) |
30 | { | 32 | { |
31 | struct file *file = area->vm_file; | 33 | struct file *file = area->vm_file; |
32 | struct dentry *dentry = file->f_path.dentry; | 34 | struct dentry *dentry = file->f_path.dentry; |
33 | struct inode *inode = dentry->d_inode; | 35 | struct inode *inode = dentry->d_inode; |
34 | struct page* page; | ||
35 | char *pg_addr; | 36 | char *pg_addr; |
36 | unsigned int already_read; | 37 | unsigned int already_read; |
37 | unsigned int count; | 38 | unsigned int count; |
38 | int bufsize; | 39 | int bufsize; |
39 | int pos; | 40 | int pos; /* XXX: loff_t ? */ |
40 | 41 | ||
41 | page = alloc_page(GFP_HIGHUSER); /* ncpfs has nothing against high pages | 42 | /* |
42 | as long as recvmsg and memset works on it */ | 43 | * ncpfs has nothing against high pages as long |
43 | if (!page) { | 44 | * as recvmsg and memset works on it |
44 | fdata->type = VM_FAULT_OOM; | 45 | */ |
45 | return NULL; | 46 | vmf->page = alloc_page(GFP_HIGHUSER); |
46 | } | 47 | if (!vmf->page) |
47 | pg_addr = kmap(page); | 48 | return VM_FAULT_OOM; |
48 | pos = fdata->pgoff << PAGE_SHIFT; | 49 | pg_addr = kmap(vmf->page); |
50 | pos = vmf->pgoff << PAGE_SHIFT; | ||
49 | 51 | ||
50 | count = PAGE_SIZE; | 52 | count = PAGE_SIZE; |
51 | if (fdata->address + PAGE_SIZE > area->vm_end) { | 53 | if ((unsigned long)vmf->virtual_address + PAGE_SIZE > area->vm_end) { |
52 | WARN_ON(1); /* shouldn't happen? */ | 54 | WARN_ON(1); /* shouldn't happen? */ |
53 | count = area->vm_end - fdata->address; | 55 | count = area->vm_end - (unsigned long)vmf->virtual_address; |
54 | } | 56 | } |
55 | /* what we can read in one go */ | 57 | /* what we can read in one go */ |
56 | bufsize = NCP_SERVER(inode)->buffer_size; | 58 | bufsize = NCP_SERVER(inode)->buffer_size; |
@@ -85,17 +87,16 @@ static struct page* ncp_file_mmap_fault(struct vm_area_struct *area, | |||
85 | 87 | ||
86 | if (already_read < PAGE_SIZE) | 88 | if (already_read < PAGE_SIZE) |
87 | memset(pg_addr + already_read, 0, PAGE_SIZE - already_read); | 89 | memset(pg_addr + already_read, 0, PAGE_SIZE - already_read); |
88 | flush_dcache_page(page); | 90 | flush_dcache_page(vmf->page); |
89 | kunmap(page); | 91 | kunmap(vmf->page); |
90 | 92 | ||
91 | /* | 93 | /* |
92 | * If I understand ncp_read_kernel() properly, the above always | 94 | * If I understand ncp_read_kernel() properly, the above always |
93 | * fetches from the network, here the analogue of disk. | 95 | * fetches from the network, here the analogue of disk. |
94 | * -- wli | 96 | * -- wli |
95 | */ | 97 | */ |
96 | fdata->type = VM_FAULT_MAJOR; | ||
97 | count_vm_event(PGMAJFAULT); | 98 | count_vm_event(PGMAJFAULT); |
98 | return page; | 99 | return VM_FAULT_MAJOR; |
99 | } | 100 | } |
100 | 101 | ||
101 | static struct vm_operations_struct ncp_file_mmap = | 102 | static struct vm_operations_struct ncp_file_mmap = |
@@ -124,7 +125,6 @@ int ncp_mmap(struct file *file, struct vm_area_struct *vma) | |||
124 | return -EFBIG; | 125 | return -EFBIG; |
125 | 126 | ||
126 | vma->vm_ops = &ncp_file_mmap; | 127 | vma->vm_ops = &ncp_file_mmap; |
127 | vma->vm_flags |= VM_CAN_INVALIDATE; | ||
128 | file_accessed(file); | 128 | file_accessed(file); |
129 | return 0; | 129 | return 0; |
130 | } | 130 | } |
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index cd75508b1c8a..ee64749e2eeb 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
@@ -60,30 +60,28 @@ static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset) | |||
60 | return sigprocmask(SIG_SETMASK, oldset, NULL); | 60 | return sigprocmask(SIG_SETMASK, oldset, NULL); |
61 | } | 61 | } |
62 | 62 | ||
63 | static struct page *ocfs2_fault(struct vm_area_struct *area, | 63 | static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) |
64 | struct fault_data *fdata) | ||
65 | { | 64 | { |
66 | struct page *page = NULL; | ||
67 | sigset_t blocked, oldset; | 65 | sigset_t blocked, oldset; |
68 | int ret; | 66 | int error, ret; |
69 | 67 | ||
70 | mlog_entry("(area=%p, page offset=%lu)\n", area, fdata->pgoff); | 68 | mlog_entry("(area=%p, page offset=%lu)\n", area, vmf->pgoff); |
71 | 69 | ||
72 | ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); | 70 | error = ocfs2_vm_op_block_sigs(&blocked, &oldset); |
73 | if (ret < 0) { | 71 | if (error < 0) { |
74 | fdata->type = VM_FAULT_SIGBUS; | 72 | mlog_errno(error); |
75 | mlog_errno(ret); | 73 | ret = VM_FAULT_SIGBUS; |
76 | goto out; | 74 | goto out; |
77 | } | 75 | } |
78 | 76 | ||
79 | page = filemap_fault(area, fdata); | 77 | ret = filemap_fault(area, vmf); |
80 | 78 | ||
81 | ret = ocfs2_vm_op_unblock_sigs(&oldset); | 79 | error = ocfs2_vm_op_unblock_sigs(&oldset); |
82 | if (ret < 0) | 80 | if (error < 0) |
83 | mlog_errno(ret); | 81 | mlog_errno(error); |
84 | out: | 82 | out: |
85 | mlog_exit_ptr(page); | 83 | mlog_exit_ptr(vmf->page); |
86 | return page; | 84 | return ret; |
87 | } | 85 | } |
88 | 86 | ||
89 | static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, | 87 | static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, |
@@ -225,7 +223,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
225 | ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level); | 223 | ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level); |
226 | out: | 224 | out: |
227 | vma->vm_ops = &ocfs2_file_vm_ops; | 225 | vma->vm_ops = &ocfs2_file_vm_ops; |
228 | vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; | 226 | vma->vm_flags |= VM_CAN_NONLINEAR; |
229 | return 0; | 227 | return 0; |
230 | } | 228 | } |
231 | 229 | ||
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index f12e80a69c68..2d4be2f247b2 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -212,20 +212,18 @@ xfs_file_fsync( | |||
212 | } | 212 | } |
213 | 213 | ||
214 | #ifdef CONFIG_XFS_DMAPI | 214 | #ifdef CONFIG_XFS_DMAPI |
215 | STATIC struct page * | 215 | STATIC int |
216 | xfs_vm_fault( | 216 | xfs_vm_fault( |
217 | struct vm_area_struct *vma, | 217 | struct vm_area_struct *vma, |
218 | struct fault_data *fdata) | 218 | struct vm_fault *vmf) |
219 | { | 219 | { |
220 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 220 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
221 | bhv_vnode_t *vp = vn_from_inode(inode); | 221 | bhv_vnode_t *vp = vn_from_inode(inode); |
222 | 222 | ||
223 | ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI); | 223 | ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI); |
224 | if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0)) { | 224 | if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0)) |
225 | fdata->type = VM_FAULT_SIGBUS; | 225 | return VM_FAULT_SIGBUS; |
226 | return NULL; | 226 | return filemap_fault(vma, vmf); |
227 | } | ||
228 | return filemap_fault(vma, fdata); | ||
229 | } | 227 | } |
230 | #endif /* CONFIG_XFS_DMAPI */ | 228 | #endif /* CONFIG_XFS_DMAPI */ |
231 | 229 | ||
@@ -311,7 +309,7 @@ xfs_file_mmap( | |||
311 | struct vm_area_struct *vma) | 309 | struct vm_area_struct *vma) |
312 | { | 310 | { |
313 | vma->vm_ops = &xfs_file_vm_ops; | 311 | vma->vm_ops = &xfs_file_vm_ops; |
314 | vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; | 312 | vma->vm_flags |= VM_CAN_NONLINEAR; |
315 | 313 | ||
316 | #ifdef CONFIG_XFS_DMAPI | 314 | #ifdef CONFIG_XFS_DMAPI |
317 | if (vn_from_inode(filp->f_path.dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI) | 315 | if (vn_from_inode(filp->f_path.dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI) |
diff --git a/include/linux/mm.h b/include/linux/mm.h index f28a1b3e63a9..ff0b8844bd5a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -168,12 +168,7 @@ extern unsigned int kobjsize(const void *objp); | |||
168 | #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ | 168 | #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ |
169 | #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ | 169 | #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ |
170 | 170 | ||
171 | #define VM_CAN_INVALIDATE 0x08000000 /* The mapping may be invalidated, | 171 | #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ |
172 | * eg. truncate or invalidate_inode_*. | ||
173 | * In this case, do_no_page must | ||
174 | * return with the page locked. | ||
175 | */ | ||
176 | #define VM_CAN_NONLINEAR 0x10000000 /* Has ->fault & does nonlinear pages */ | ||
177 | 172 | ||
178 | #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ | 173 | #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ |
179 | #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS | 174 | #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS |
@@ -197,24 +192,44 @@ extern unsigned int kobjsize(const void *objp); | |||
197 | */ | 192 | */ |
198 | extern pgprot_t protection_map[16]; | 193 | extern pgprot_t protection_map[16]; |
199 | 194 | ||
200 | #define FAULT_FLAG_WRITE 0x01 | 195 | #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ |
201 | #define FAULT_FLAG_NONLINEAR 0x02 | 196 | #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ |
197 | |||
198 | |||
199 | #define FAULT_RET_NOPAGE 0x0100 /* ->fault did not return a page. This | ||
200 | * can be used if the handler installs | ||
201 | * their own pte. | ||
202 | */ | ||
203 | #define FAULT_RET_LOCKED 0x0200 /* ->fault locked the page, caller must | ||
204 | * unlock after installing the mapping. | ||
205 | * This is used by pagecache in | ||
206 | * particular, where the page lock is | ||
207 | * used to synchronise against truncate | ||
208 | * and invalidate. Mutually exclusive | ||
209 | * with FAULT_RET_NOPAGE. | ||
210 | */ | ||
202 | 211 | ||
203 | /* | 212 | /* |
204 | * fault_data is filled in the the pagefault handler and passed to the | 213 | * vm_fault is filled by the the pagefault handler and passed to the vma's |
205 | * vma's ->fault function. That function is responsible for filling in | 214 | * ->fault function. The vma's ->fault is responsible for returning the |
206 | * 'type', which is the type of fault if a page is returned, or the type | 215 | * VM_FAULT_xxx type which occupies the lowest byte of the return code, ORed |
207 | * of error if NULL is returned. | 216 | * with FAULT_RET_ flags that occupy the next byte and give details about |
217 | * how the fault was handled. | ||
208 | * | 218 | * |
209 | * pgoff should be used in favour of address, if possible. If pgoff is | 219 | * pgoff should be used in favour of virtual_address, if possible. If pgoff |
210 | * used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get | 220 | * is used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get nonlinear |
211 | * nonlinear mapping support. | 221 | * mapping support. |
212 | */ | 222 | */ |
213 | struct fault_data { | 223 | struct vm_fault { |
214 | unsigned long address; | 224 | unsigned int flags; /* FAULT_FLAG_xxx flags */ |
215 | pgoff_t pgoff; | 225 | pgoff_t pgoff; /* Logical page offset based on vma */ |
216 | unsigned int flags; | 226 | void __user *virtual_address; /* Faulting virtual address */ |
217 | int type; | 227 | |
228 | struct page *page; /* ->fault handlers should return a | ||
229 | * page here, unless FAULT_RET_NOPAGE | ||
230 | * is set (which is also implied by | ||
231 | * VM_FAULT_OOM or SIGBUS). | ||
232 | */ | ||
218 | }; | 233 | }; |
219 | 234 | ||
220 | /* | 235 | /* |
@@ -225,15 +240,11 @@ struct fault_data { | |||
225 | struct vm_operations_struct { | 240 | struct vm_operations_struct { |
226 | void (*open)(struct vm_area_struct * area); | 241 | void (*open)(struct vm_area_struct * area); |
227 | void (*close)(struct vm_area_struct * area); | 242 | void (*close)(struct vm_area_struct * area); |
228 | struct page *(*fault)(struct vm_area_struct *vma, | 243 | int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); |
229 | struct fault_data *fdata); | ||
230 | struct page *(*nopage)(struct vm_area_struct *area, | 244 | struct page *(*nopage)(struct vm_area_struct *area, |
231 | unsigned long address, int *type); | 245 | unsigned long address, int *type); |
232 | unsigned long (*nopfn)(struct vm_area_struct *area, | 246 | unsigned long (*nopfn)(struct vm_area_struct *area, |
233 | unsigned long address); | 247 | unsigned long address); |
234 | int (*populate)(struct vm_area_struct *area, unsigned long address, | ||
235 | unsigned long len, pgprot_t prot, unsigned long pgoff, | ||
236 | int nonblock); | ||
237 | 248 | ||
238 | /* notification that a previously read-only page is about to become | 249 | /* notification that a previously read-only page is about to become |
239 | * writable, if an error is returned it will cause a SIGBUS */ | 250 | * writable, if an error is returned it will cause a SIGBUS */ |
@@ -700,8 +711,14 @@ static inline int page_mapped(struct page *page) | |||
700 | * Used to decide whether a process gets delivered SIGBUS or | 711 | * Used to decide whether a process gets delivered SIGBUS or |
701 | * just gets major/minor fault counters bumped up. | 712 | * just gets major/minor fault counters bumped up. |
702 | */ | 713 | */ |
703 | #define VM_FAULT_OOM 0x00 | 714 | |
704 | #define VM_FAULT_SIGBUS 0x01 | 715 | /* |
716 | * VM_FAULT_ERROR is set for the error cases, to make some tests simpler. | ||
717 | */ | ||
718 | #define VM_FAULT_ERROR 0x20 | ||
719 | |||
720 | #define VM_FAULT_OOM (0x00 | VM_FAULT_ERROR) | ||
721 | #define VM_FAULT_SIGBUS (0x01 | VM_FAULT_ERROR) | ||
705 | #define VM_FAULT_MINOR 0x02 | 722 | #define VM_FAULT_MINOR 0x02 |
706 | #define VM_FAULT_MAJOR 0x03 | 723 | #define VM_FAULT_MAJOR 0x03 |
707 | 724 | ||
@@ -711,6 +728,11 @@ static inline int page_mapped(struct page *page) | |||
711 | */ | 728 | */ |
712 | #define VM_FAULT_WRITE 0x10 | 729 | #define VM_FAULT_WRITE 0x10 |
713 | 730 | ||
731 | /* | ||
732 | * Mask of VM_FAULT_ flags | ||
733 | */ | ||
734 | #define VM_FAULT_MASK 0xff | ||
735 | |||
714 | #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) | 736 | #define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) |
715 | 737 | ||
716 | extern void show_free_areas(void); | 738 | extern void show_free_areas(void); |
@@ -793,8 +815,6 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, | |||
793 | 815 | ||
794 | extern int vmtruncate(struct inode * inode, loff_t offset); | 816 | extern int vmtruncate(struct inode * inode, loff_t offset); |
795 | extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); | 817 | extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); |
796 | extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); | ||
797 | extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot); | ||
798 | 818 | ||
799 | #ifdef CONFIG_MMU | 819 | #ifdef CONFIG_MMU |
800 | extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, | 820 | extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, |
@@ -1135,11 +1155,7 @@ extern void truncate_inode_pages_range(struct address_space *, | |||
1135 | loff_t lstart, loff_t lend); | 1155 | loff_t lstart, loff_t lend); |
1136 | 1156 | ||
1137 | /* generic vm_area_ops exported for stackable file systems */ | 1157 | /* generic vm_area_ops exported for stackable file systems */ |
1138 | extern struct page *filemap_fault(struct vm_area_struct *, struct fault_data *); | 1158 | extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); |
1139 | extern struct page * __deprecated_for_modules | ||
1140 | filemap_nopage(struct vm_area_struct *, unsigned long, int *); | ||
1141 | extern int __deprecated_for_modules filemap_populate(struct vm_area_struct *, | ||
1142 | unsigned long, unsigned long, pgprot_t, unsigned long, int); | ||
1143 | 1159 | ||
1144 | /* mm/page-writeback.c */ | 1160 | /* mm/page-writeback.c */ |
1145 | int write_one_page(struct page *page, int wait); | 1161 | int write_one_page(struct page *page, int wait); |
@@ -224,13 +224,12 @@ static void shm_close(struct vm_area_struct *vma) | |||
224 | mutex_unlock(&shm_ids(ns).mutex); | 224 | mutex_unlock(&shm_ids(ns).mutex); |
225 | } | 225 | } |
226 | 226 | ||
227 | static struct page *shm_fault(struct vm_area_struct *vma, | 227 | static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
228 | struct fault_data *fdata) | ||
229 | { | 228 | { |
230 | struct file *file = vma->vm_file; | 229 | struct file *file = vma->vm_file; |
231 | struct shm_file_data *sfd = shm_file_data(file); | 230 | struct shm_file_data *sfd = shm_file_data(file); |
232 | 231 | ||
233 | return sfd->vm_ops->fault(vma, fdata); | 232 | return sfd->vm_ops->fault(vma, vmf); |
234 | } | 233 | } |
235 | 234 | ||
236 | #ifdef CONFIG_NUMA | 235 | #ifdef CONFIG_NUMA |
diff --git a/mm/filemap.c b/mm/filemap.c index 26b992d169e5..0876cc57255f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1302,8 +1302,8 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset) | |||
1302 | 1302 | ||
1303 | /** | 1303 | /** |
1304 | * filemap_fault - read in file data for page fault handling | 1304 | * filemap_fault - read in file data for page fault handling |
1305 | * @vma: user vma (not used) | 1305 | * @vma: vma in which the fault was taken |
1306 | * @fdata: the applicable fault_data | 1306 | * @vmf: struct vm_fault containing details of the fault |
1307 | * | 1307 | * |
1308 | * filemap_fault() is invoked via the vma operations vector for a | 1308 | * filemap_fault() is invoked via the vma operations vector for a |
1309 | * mapped memory region to read in file data during a page fault. | 1309 | * mapped memory region to read in file data during a page fault. |
@@ -1312,7 +1312,7 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset) | |||
1312 | * it in the page cache, and handles the special cases reasonably without | 1312 | * it in the page cache, and handles the special cases reasonably without |
1313 | * having a lot of duplicated code. | 1313 | * having a lot of duplicated code. |
1314 | */ | 1314 | */ |
1315 | struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) | 1315 | int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
1316 | { | 1316 | { |
1317 | int error; | 1317 | int error; |
1318 | struct file *file = vma->vm_file; | 1318 | struct file *file = vma->vm_file; |
@@ -1322,13 +1322,12 @@ struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) | |||
1322 | struct page *page; | 1322 | struct page *page; |
1323 | unsigned long size; | 1323 | unsigned long size; |
1324 | int did_readaround = 0; | 1324 | int did_readaround = 0; |
1325 | int ret; | ||
1325 | 1326 | ||
1326 | fdata->type = VM_FAULT_MINOR; | 1327 | ret = VM_FAULT_MINOR; |
1327 | |||
1328 | BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); | ||
1329 | 1328 | ||
1330 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1329 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
1331 | if (fdata->pgoff >= size) | 1330 | if (vmf->pgoff >= size) |
1332 | goto outside_data_content; | 1331 | goto outside_data_content; |
1333 | 1332 | ||
1334 | /* If we don't want any read-ahead, don't bother */ | 1333 | /* If we don't want any read-ahead, don't bother */ |
@@ -1342,18 +1341,18 @@ struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) | |||
1342 | * For sequential accesses, we use the generic readahead logic. | 1341 | * For sequential accesses, we use the generic readahead logic. |
1343 | */ | 1342 | */ |
1344 | if (VM_SequentialReadHint(vma)) | 1343 | if (VM_SequentialReadHint(vma)) |
1345 | page_cache_readahead(mapping, ra, file, fdata->pgoff, 1); | 1344 | page_cache_readahead(mapping, ra, file, vmf->pgoff, 1); |
1346 | 1345 | ||
1347 | /* | 1346 | /* |
1348 | * Do we have something in the page cache already? | 1347 | * Do we have something in the page cache already? |
1349 | */ | 1348 | */ |
1350 | retry_find: | 1349 | retry_find: |
1351 | page = find_lock_page(mapping, fdata->pgoff); | 1350 | page = find_lock_page(mapping, vmf->pgoff); |
1352 | if (!page) { | 1351 | if (!page) { |
1353 | unsigned long ra_pages; | 1352 | unsigned long ra_pages; |
1354 | 1353 | ||
1355 | if (VM_SequentialReadHint(vma)) { | 1354 | if (VM_SequentialReadHint(vma)) { |
1356 | handle_ra_miss(mapping, ra, fdata->pgoff); | 1355 | handle_ra_miss(mapping, ra, vmf->pgoff); |
1357 | goto no_cached_page; | 1356 | goto no_cached_page; |
1358 | } | 1357 | } |
1359 | ra->mmap_miss++; | 1358 | ra->mmap_miss++; |
@@ -1370,7 +1369,7 @@ retry_find: | |||
1370 | * check did_readaround, as this is an inner loop. | 1369 | * check did_readaround, as this is an inner loop. |
1371 | */ | 1370 | */ |
1372 | if (!did_readaround) { | 1371 | if (!did_readaround) { |
1373 | fdata->type = VM_FAULT_MAJOR; | 1372 | ret = VM_FAULT_MAJOR; |
1374 | count_vm_event(PGMAJFAULT); | 1373 | count_vm_event(PGMAJFAULT); |
1375 | } | 1374 | } |
1376 | did_readaround = 1; | 1375 | did_readaround = 1; |
@@ -1378,11 +1377,11 @@ retry_find: | |||
1378 | if (ra_pages) { | 1377 | if (ra_pages) { |
1379 | pgoff_t start = 0; | 1378 | pgoff_t start = 0; |
1380 | 1379 | ||
1381 | if (fdata->pgoff > ra_pages / 2) | 1380 | if (vmf->pgoff > ra_pages / 2) |
1382 | start = fdata->pgoff - ra_pages / 2; | 1381 | start = vmf->pgoff - ra_pages / 2; |
1383 | do_page_cache_readahead(mapping, file, start, ra_pages); | 1382 | do_page_cache_readahead(mapping, file, start, ra_pages); |
1384 | } | 1383 | } |
1385 | page = find_lock_page(mapping, fdata->pgoff); | 1384 | page = find_lock_page(mapping, vmf->pgoff); |
1386 | if (!page) | 1385 | if (!page) |
1387 | goto no_cached_page; | 1386 | goto no_cached_page; |
1388 | } | 1387 | } |
@@ -1399,7 +1398,7 @@ retry_find: | |||
1399 | 1398 | ||
1400 | /* Must recheck i_size under page lock */ | 1399 | /* Must recheck i_size under page lock */ |
1401 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 1400 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
1402 | if (unlikely(fdata->pgoff >= size)) { | 1401 | if (unlikely(vmf->pgoff >= size)) { |
1403 | unlock_page(page); | 1402 | unlock_page(page); |
1404 | goto outside_data_content; | 1403 | goto outside_data_content; |
1405 | } | 1404 | } |
@@ -1408,24 +1407,24 @@ retry_find: | |||
1408 | * Found the page and have a reference on it. | 1407 | * Found the page and have a reference on it. |
1409 | */ | 1408 | */ |
1410 | mark_page_accessed(page); | 1409 | mark_page_accessed(page); |
1411 | return page; | 1410 | vmf->page = page; |
1411 | return ret | FAULT_RET_LOCKED; | ||
1412 | 1412 | ||
1413 | outside_data_content: | 1413 | outside_data_content: |
1414 | /* | 1414 | /* |
1415 | * An external ptracer can access pages that normally aren't | 1415 | * An external ptracer can access pages that normally aren't |
1416 | * accessible.. | 1416 | * accessible.. |
1417 | */ | 1417 | */ |
1418 | if (vma->vm_mm == current->mm) { | 1418 | if (vma->vm_mm == current->mm) |
1419 | fdata->type = VM_FAULT_SIGBUS; | 1419 | return VM_FAULT_SIGBUS; |
1420 | return NULL; | 1420 | |
1421 | } | ||
1422 | /* Fall through to the non-read-ahead case */ | 1421 | /* Fall through to the non-read-ahead case */ |
1423 | no_cached_page: | 1422 | no_cached_page: |
1424 | /* | 1423 | /* |
1425 | * We're only likely to ever get here if MADV_RANDOM is in | 1424 | * We're only likely to ever get here if MADV_RANDOM is in |
1426 | * effect. | 1425 | * effect. |
1427 | */ | 1426 | */ |
1428 | error = page_cache_read(file, fdata->pgoff); | 1427 | error = page_cache_read(file, vmf->pgoff); |
1429 | 1428 | ||
1430 | /* | 1429 | /* |
1431 | * The page we want has now been added to the page cache. | 1430 | * The page we want has now been added to the page cache. |
@@ -1441,15 +1440,13 @@ no_cached_page: | |||
1441 | * to schedule I/O. | 1440 | * to schedule I/O. |
1442 | */ | 1441 | */ |
1443 | if (error == -ENOMEM) | 1442 | if (error == -ENOMEM) |
1444 | fdata->type = VM_FAULT_OOM; | 1443 | return VM_FAULT_OOM; |
1445 | else | 1444 | return VM_FAULT_SIGBUS; |
1446 | fdata->type = VM_FAULT_SIGBUS; | ||
1447 | return NULL; | ||
1448 | 1445 | ||
1449 | page_not_uptodate: | 1446 | page_not_uptodate: |
1450 | /* IO error path */ | 1447 | /* IO error path */ |
1451 | if (!did_readaround) { | 1448 | if (!did_readaround) { |
1452 | fdata->type = VM_FAULT_MAJOR; | 1449 | ret = VM_FAULT_MAJOR; |
1453 | count_vm_event(PGMAJFAULT); | 1450 | count_vm_event(PGMAJFAULT); |
1454 | } | 1451 | } |
1455 | 1452 | ||
@@ -1468,206 +1465,10 @@ page_not_uptodate: | |||
1468 | 1465 | ||
1469 | /* Things didn't work out. Return zero to tell the mm layer so. */ | 1466 | /* Things didn't work out. Return zero to tell the mm layer so. */ |
1470 | shrink_readahead_size_eio(file, ra); | 1467 | shrink_readahead_size_eio(file, ra); |
1471 | fdata->type = VM_FAULT_SIGBUS; | 1468 | return VM_FAULT_SIGBUS; |
1472 | return NULL; | ||
1473 | } | 1469 | } |
1474 | EXPORT_SYMBOL(filemap_fault); | 1470 | EXPORT_SYMBOL(filemap_fault); |
1475 | 1471 | ||
1476 | /* | ||
1477 | * filemap_nopage and filemap_populate are legacy exports that are not used | ||
1478 | * in tree. Scheduled for removal. | ||
1479 | */ | ||
1480 | struct page *filemap_nopage(struct vm_area_struct *area, | ||
1481 | unsigned long address, int *type) | ||
1482 | { | ||
1483 | struct page *page; | ||
1484 | struct fault_data fdata; | ||
1485 | fdata.address = address; | ||
1486 | fdata.pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) | ||
1487 | + area->vm_pgoff; | ||
1488 | fdata.flags = 0; | ||
1489 | |||
1490 | page = filemap_fault(area, &fdata); | ||
1491 | if (type) | ||
1492 | *type = fdata.type; | ||
1493 | |||
1494 | return page; | ||
1495 | } | ||
1496 | EXPORT_SYMBOL(filemap_nopage); | ||
1497 | |||
1498 | static struct page * filemap_getpage(struct file *file, unsigned long pgoff, | ||
1499 | int nonblock) | ||
1500 | { | ||
1501 | struct address_space *mapping = file->f_mapping; | ||
1502 | struct page *page; | ||
1503 | int error; | ||
1504 | |||
1505 | /* | ||
1506 | * Do we have something in the page cache already? | ||
1507 | */ | ||
1508 | retry_find: | ||
1509 | page = find_get_page(mapping, pgoff); | ||
1510 | if (!page) { | ||
1511 | if (nonblock) | ||
1512 | return NULL; | ||
1513 | goto no_cached_page; | ||
1514 | } | ||
1515 | |||
1516 | /* | ||
1517 | * Ok, found a page in the page cache, now we need to check | ||
1518 | * that it's up-to-date. | ||
1519 | */ | ||
1520 | if (!PageUptodate(page)) { | ||
1521 | if (nonblock) { | ||
1522 | page_cache_release(page); | ||
1523 | return NULL; | ||
1524 | } | ||
1525 | goto page_not_uptodate; | ||
1526 | } | ||
1527 | |||
1528 | success: | ||
1529 | /* | ||
1530 | * Found the page and have a reference on it. | ||
1531 | */ | ||
1532 | mark_page_accessed(page); | ||
1533 | return page; | ||
1534 | |||
1535 | no_cached_page: | ||
1536 | error = page_cache_read(file, pgoff); | ||
1537 | |||
1538 | /* | ||
1539 | * The page we want has now been added to the page cache. | ||
1540 | * In the unlikely event that someone removed it in the | ||
1541 | * meantime, we'll just come back here and read it again. | ||
1542 | */ | ||
1543 | if (error >= 0) | ||
1544 | goto retry_find; | ||
1545 | |||
1546 | /* | ||
1547 | * An error return from page_cache_read can result if the | ||
1548 | * system is low on memory, or a problem occurs while trying | ||
1549 | * to schedule I/O. | ||
1550 | */ | ||
1551 | return NULL; | ||
1552 | |||
1553 | page_not_uptodate: | ||
1554 | lock_page(page); | ||
1555 | |||
1556 | /* Did it get truncated while we waited for it? */ | ||
1557 | if (!page->mapping) { | ||
1558 | unlock_page(page); | ||
1559 | goto err; | ||
1560 | } | ||
1561 | |||
1562 | /* Did somebody else get it up-to-date? */ | ||
1563 | if (PageUptodate(page)) { | ||
1564 | unlock_page(page); | ||
1565 | goto success; | ||
1566 | } | ||
1567 | |||
1568 | error = mapping->a_ops->readpage(file, page); | ||
1569 | if (!error) { | ||
1570 | wait_on_page_locked(page); | ||
1571 | if (PageUptodate(page)) | ||
1572 | goto success; | ||
1573 | } else if (error == AOP_TRUNCATED_PAGE) { | ||
1574 | page_cache_release(page); | ||
1575 | goto retry_find; | ||
1576 | } | ||
1577 | |||
1578 | /* | ||
1579 | * Umm, take care of errors if the page isn't up-to-date. | ||
1580 | * Try to re-read it _once_. We do this synchronously, | ||
1581 | * because there really aren't any performance issues here | ||
1582 | * and we need to check for errors. | ||
1583 | */ | ||
1584 | lock_page(page); | ||
1585 | |||
1586 | /* Somebody truncated the page on us? */ | ||
1587 | if (!page->mapping) { | ||
1588 | unlock_page(page); | ||
1589 | goto err; | ||
1590 | } | ||
1591 | /* Somebody else successfully read it in? */ | ||
1592 | if (PageUptodate(page)) { | ||
1593 | unlock_page(page); | ||
1594 | goto success; | ||
1595 | } | ||
1596 | |||
1597 | ClearPageError(page); | ||
1598 | error = mapping->a_ops->readpage(file, page); | ||
1599 | if (!error) { | ||
1600 | wait_on_page_locked(page); | ||
1601 | if (PageUptodate(page)) | ||
1602 | goto success; | ||
1603 | } else if (error == AOP_TRUNCATED_PAGE) { | ||
1604 | page_cache_release(page); | ||
1605 | goto retry_find; | ||
1606 | } | ||
1607 | |||
1608 | /* | ||
1609 | * Things didn't work out. Return zero to tell the | ||
1610 | * mm layer so, possibly freeing the page cache page first. | ||
1611 | */ | ||
1612 | err: | ||
1613 | page_cache_release(page); | ||
1614 | |||
1615 | return NULL; | ||
1616 | } | ||
1617 | |||
1618 | int filemap_populate(struct vm_area_struct *vma, unsigned long addr, | ||
1619 | unsigned long len, pgprot_t prot, unsigned long pgoff, | ||
1620 | int nonblock) | ||
1621 | { | ||
1622 | struct file *file = vma->vm_file; | ||
1623 | struct address_space *mapping = file->f_mapping; | ||
1624 | struct inode *inode = mapping->host; | ||
1625 | unsigned long size; | ||
1626 | struct mm_struct *mm = vma->vm_mm; | ||
1627 | struct page *page; | ||
1628 | int err; | ||
1629 | |||
1630 | if (!nonblock) | ||
1631 | force_page_cache_readahead(mapping, vma->vm_file, | ||
1632 | pgoff, len >> PAGE_CACHE_SHIFT); | ||
1633 | |||
1634 | repeat: | ||
1635 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
1636 | if (pgoff + (len >> PAGE_CACHE_SHIFT) > size) | ||
1637 | return -EINVAL; | ||
1638 | |||
1639 | page = filemap_getpage(file, pgoff, nonblock); | ||
1640 | |||
1641 | /* XXX: This is wrong, a filesystem I/O error may have happened. Fix that as | ||
1642 | * done in shmem_populate calling shmem_getpage */ | ||
1643 | if (!page && !nonblock) | ||
1644 | return -ENOMEM; | ||
1645 | |||
1646 | if (page) { | ||
1647 | err = install_page(mm, vma, addr, page, prot); | ||
1648 | if (err) { | ||
1649 | page_cache_release(page); | ||
1650 | return err; | ||
1651 | } | ||
1652 | } else if (vma->vm_flags & VM_NONLINEAR) { | ||
1653 | /* No page was found just because we can't read it in now (being | ||
1654 | * here implies nonblock != 0), but the page may exist, so set | ||
1655 | * the PTE to fault it in later. */ | ||
1656 | err = install_file_pte(mm, vma, addr, pgoff, prot); | ||
1657 | if (err) | ||
1658 | return err; | ||
1659 | } | ||
1660 | |||
1661 | len -= PAGE_SIZE; | ||
1662 | addr += PAGE_SIZE; | ||
1663 | pgoff++; | ||
1664 | if (len) | ||
1665 | goto repeat; | ||
1666 | |||
1667 | return 0; | ||
1668 | } | ||
1669 | EXPORT_SYMBOL(filemap_populate); | ||
1670 | |||
1671 | struct vm_operations_struct generic_file_vm_ops = { | 1472 | struct vm_operations_struct generic_file_vm_ops = { |
1672 | .fault = filemap_fault, | 1473 | .fault = filemap_fault, |
1673 | }; | 1474 | }; |
@@ -1682,7 +1483,7 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma) | |||
1682 | return -ENOEXEC; | 1483 | return -ENOEXEC; |
1683 | file_accessed(file); | 1484 | file_accessed(file); |
1684 | vma->vm_ops = &generic_file_vm_ops; | 1485 | vma->vm_ops = &generic_file_vm_ops; |
1685 | vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; | 1486 | vma->vm_flags |= VM_CAN_NONLINEAR; |
1686 | return 0; | 1487 | return 0; |
1687 | } | 1488 | } |
1688 | 1489 | ||
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 82f4b8e9834e..847d5d78163e 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c | |||
@@ -210,8 +210,7 @@ __xip_unmap (struct address_space * mapping, | |||
210 | * | 210 | * |
211 | * This function is derived from filemap_fault, but used for execute in place | 211 | * This function is derived from filemap_fault, but used for execute in place |
212 | */ | 212 | */ |
213 | static struct page *xip_file_fault(struct vm_area_struct *area, | 213 | static int xip_file_fault(struct vm_area_struct *area, struct vm_fault *vmf) |
214 | struct fault_data *fdata) | ||
215 | { | 214 | { |
216 | struct file *file = area->vm_file; | 215 | struct file *file = area->vm_file; |
217 | struct address_space *mapping = file->f_mapping; | 216 | struct address_space *mapping = file->f_mapping; |
@@ -222,19 +221,15 @@ static struct page *xip_file_fault(struct vm_area_struct *area, | |||
222 | /* XXX: are VM_FAULT_ codes OK? */ | 221 | /* XXX: are VM_FAULT_ codes OK? */ |
223 | 222 | ||
224 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 223 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
225 | if (fdata->pgoff >= size) { | 224 | if (vmf->pgoff >= size) |
226 | fdata->type = VM_FAULT_SIGBUS; | 225 | return VM_FAULT_SIGBUS; |
227 | return NULL; | ||
228 | } | ||
229 | 226 | ||
230 | page = mapping->a_ops->get_xip_page(mapping, | 227 | page = mapping->a_ops->get_xip_page(mapping, |
231 | fdata->pgoff*(PAGE_SIZE/512), 0); | 228 | vmf->pgoff*(PAGE_SIZE/512), 0); |
232 | if (!IS_ERR(page)) | 229 | if (!IS_ERR(page)) |
233 | goto out; | 230 | goto out; |
234 | if (PTR_ERR(page) != -ENODATA) { | 231 | if (PTR_ERR(page) != -ENODATA) |
235 | fdata->type = VM_FAULT_OOM; | 232 | return VM_FAULT_OOM; |
236 | return NULL; | ||
237 | } | ||
238 | 233 | ||
239 | /* sparse block */ | 234 | /* sparse block */ |
240 | if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && | 235 | if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && |
@@ -242,26 +237,22 @@ static struct page *xip_file_fault(struct vm_area_struct *area, | |||
242 | (!(mapping->host->i_sb->s_flags & MS_RDONLY))) { | 237 | (!(mapping->host->i_sb->s_flags & MS_RDONLY))) { |
243 | /* maybe shared writable, allocate new block */ | 238 | /* maybe shared writable, allocate new block */ |
244 | page = mapping->a_ops->get_xip_page(mapping, | 239 | page = mapping->a_ops->get_xip_page(mapping, |
245 | fdata->pgoff*(PAGE_SIZE/512), 1); | 240 | vmf->pgoff*(PAGE_SIZE/512), 1); |
246 | if (IS_ERR(page)) { | 241 | if (IS_ERR(page)) |
247 | fdata->type = VM_FAULT_SIGBUS; | 242 | return VM_FAULT_SIGBUS; |
248 | return NULL; | ||
249 | } | ||
250 | /* unmap page at pgoff from all other vmas */ | 243 | /* unmap page at pgoff from all other vmas */ |
251 | __xip_unmap(mapping, fdata->pgoff); | 244 | __xip_unmap(mapping, vmf->pgoff); |
252 | } else { | 245 | } else { |
253 | /* not shared and writable, use xip_sparse_page() */ | 246 | /* not shared and writable, use xip_sparse_page() */ |
254 | page = xip_sparse_page(); | 247 | page = xip_sparse_page(); |
255 | if (!page) { | 248 | if (!page) |
256 | fdata->type = VM_FAULT_OOM; | 249 | return VM_FAULT_OOM; |
257 | return NULL; | ||
258 | } | ||
259 | } | 250 | } |
260 | 251 | ||
261 | out: | 252 | out: |
262 | fdata->type = VM_FAULT_MINOR; | ||
263 | page_cache_get(page); | 253 | page_cache_get(page); |
264 | return page; | 254 | vmf->page = page; |
255 | return VM_FAULT_MINOR; | ||
265 | } | 256 | } |
266 | 257 | ||
267 | static struct vm_operations_struct xip_file_vm_ops = { | 258 | static struct vm_operations_struct xip_file_vm_ops = { |
diff --git a/mm/fremap.c b/mm/fremap.c index 01e51f01b84e..5f50d736a037 100644 --- a/mm/fremap.c +++ b/mm/fremap.c | |||
@@ -20,13 +20,14 @@ | |||
20 | #include <asm/cacheflush.h> | 20 | #include <asm/cacheflush.h> |
21 | #include <asm/tlbflush.h> | 21 | #include <asm/tlbflush.h> |
22 | 22 | ||
23 | static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, | 23 | static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, |
24 | unsigned long addr, pte_t *ptep) | 24 | unsigned long addr, pte_t *ptep) |
25 | { | 25 | { |
26 | pte_t pte = *ptep; | 26 | pte_t pte = *ptep; |
27 | struct page *page = NULL; | ||
28 | 27 | ||
29 | if (pte_present(pte)) { | 28 | if (pte_present(pte)) { |
29 | struct page *page; | ||
30 | |||
30 | flush_cache_page(vma, addr, pte_pfn(pte)); | 31 | flush_cache_page(vma, addr, pte_pfn(pte)); |
31 | pte = ptep_clear_flush(vma, addr, ptep); | 32 | pte = ptep_clear_flush(vma, addr, ptep); |
32 | page = vm_normal_page(vma, addr, pte); | 33 | page = vm_normal_page(vma, addr, pte); |
@@ -35,68 +36,21 @@ static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma, | |||
35 | set_page_dirty(page); | 36 | set_page_dirty(page); |
36 | page_remove_rmap(page, vma); | 37 | page_remove_rmap(page, vma); |
37 | page_cache_release(page); | 38 | page_cache_release(page); |
39 | update_hiwater_rss(mm); | ||
40 | dec_mm_counter(mm, file_rss); | ||
38 | } | 41 | } |
39 | } else { | 42 | } else { |
40 | if (!pte_file(pte)) | 43 | if (!pte_file(pte)) |
41 | free_swap_and_cache(pte_to_swp_entry(pte)); | 44 | free_swap_and_cache(pte_to_swp_entry(pte)); |
42 | pte_clear_not_present_full(mm, addr, ptep, 0); | 45 | pte_clear_not_present_full(mm, addr, ptep, 0); |
43 | } | 46 | } |
44 | return !!page; | ||
45 | } | 47 | } |
46 | 48 | ||
47 | /* | 49 | /* |
48 | * Install a file page to a given virtual memory address, release any | ||
49 | * previously existing mapping. | ||
50 | */ | ||
51 | int install_page(struct mm_struct *mm, struct vm_area_struct *vma, | ||
52 | unsigned long addr, struct page *page, pgprot_t prot) | ||
53 | { | ||
54 | struct inode *inode; | ||
55 | pgoff_t size; | ||
56 | int err = -ENOMEM; | ||
57 | pte_t *pte; | ||
58 | pte_t pte_val; | ||
59 | spinlock_t *ptl; | ||
60 | |||
61 | pte = get_locked_pte(mm, addr, &ptl); | ||
62 | if (!pte) | ||
63 | goto out; | ||
64 | |||
65 | /* | ||
66 | * This page may have been truncated. Tell the | ||
67 | * caller about it. | ||
68 | */ | ||
69 | err = -EINVAL; | ||
70 | inode = vma->vm_file->f_mapping->host; | ||
71 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
72 | if (!page->mapping || page->index >= size) | ||
73 | goto unlock; | ||
74 | err = -ENOMEM; | ||
75 | if (page_mapcount(page) > INT_MAX/2) | ||
76 | goto unlock; | ||
77 | |||
78 | if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte)) | ||
79 | inc_mm_counter(mm, file_rss); | ||
80 | |||
81 | flush_icache_page(vma, page); | ||
82 | pte_val = mk_pte(page, prot); | ||
83 | set_pte_at(mm, addr, pte, pte_val); | ||
84 | page_add_file_rmap(page); | ||
85 | update_mmu_cache(vma, addr, pte_val); | ||
86 | lazy_mmu_prot_update(pte_val); | ||
87 | err = 0; | ||
88 | unlock: | ||
89 | pte_unmap_unlock(pte, ptl); | ||
90 | out: | ||
91 | return err; | ||
92 | } | ||
93 | EXPORT_SYMBOL(install_page); | ||
94 | |||
95 | /* | ||
96 | * Install a file pte to a given virtual memory address, release any | 50 | * Install a file pte to a given virtual memory address, release any |
97 | * previously existing mapping. | 51 | * previously existing mapping. |
98 | */ | 52 | */ |
99 | int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, | 53 | static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, |
100 | unsigned long addr, unsigned long pgoff, pgprot_t prot) | 54 | unsigned long addr, unsigned long pgoff, pgprot_t prot) |
101 | { | 55 | { |
102 | int err = -ENOMEM; | 56 | int err = -ENOMEM; |
@@ -107,10 +61,8 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, | |||
107 | if (!pte) | 61 | if (!pte) |
108 | goto out; | 62 | goto out; |
109 | 63 | ||
110 | if (!pte_none(*pte) && zap_pte(mm, vma, addr, pte)) { | 64 | if (!pte_none(*pte)) |
111 | update_hiwater_rss(mm); | 65 | zap_pte(mm, vma, addr, pte); |
112 | dec_mm_counter(mm, file_rss); | ||
113 | } | ||
114 | 66 | ||
115 | set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff)); | 67 | set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff)); |
116 | /* | 68 | /* |
@@ -208,8 +160,7 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, | |||
208 | if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR)) | 160 | if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR)) |
209 | goto out; | 161 | goto out; |
210 | 162 | ||
211 | if ((!vma->vm_ops || !vma->vm_ops->populate) && | 163 | if (!vma->vm_flags & VM_CAN_NONLINEAR) |
212 | !(vma->vm_flags & VM_CAN_NONLINEAR)) | ||
213 | goto out; | 164 | goto out; |
214 | 165 | ||
215 | if (end <= start || start < vma->vm_start || end > vma->vm_end) | 166 | if (end <= start || start < vma->vm_start || end > vma->vm_end) |
@@ -239,18 +190,14 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, | |||
239 | spin_unlock(&mapping->i_mmap_lock); | 190 | spin_unlock(&mapping->i_mmap_lock); |
240 | } | 191 | } |
241 | 192 | ||
242 | if (vma->vm_flags & VM_CAN_NONLINEAR) { | 193 | err = populate_range(mm, vma, start, size, pgoff); |
243 | err = populate_range(mm, vma, start, size, pgoff); | 194 | if (!err && !(flags & MAP_NONBLOCK)) { |
244 | if (!err && !(flags & MAP_NONBLOCK)) { | 195 | if (unlikely(has_write_lock)) { |
245 | if (unlikely(has_write_lock)) { | 196 | downgrade_write(&mm->mmap_sem); |
246 | downgrade_write(&mm->mmap_sem); | 197 | has_write_lock = 0; |
247 | has_write_lock = 0; | ||
248 | } | ||
249 | make_pages_present(start, start+size); | ||
250 | } | 198 | } |
251 | } else | 199 | make_pages_present(start, start+size); |
252 | err = vma->vm_ops->populate(vma, start, size, vma->vm_page_prot, | 200 | } |
253 | pgoff, flags & MAP_NONBLOCK); | ||
254 | 201 | ||
255 | /* | 202 | /* |
256 | * We can't clear VM_NONLINEAR because we'd have to do | 203 | * We can't clear VM_NONLINEAR because we'd have to do |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 6912bbf33faa..aaa7c1a682d9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -316,15 +316,14 @@ unsigned long hugetlb_total_pages(void) | |||
316 | * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get | 316 | * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get |
317 | * this far. | 317 | * this far. |
318 | */ | 318 | */ |
319 | static struct page *hugetlb_nopage(struct vm_area_struct *vma, | 319 | static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
320 | unsigned long address, int *unused) | ||
321 | { | 320 | { |
322 | BUG(); | 321 | BUG(); |
323 | return NULL; | 322 | return 0; |
324 | } | 323 | } |
325 | 324 | ||
326 | struct vm_operations_struct hugetlb_vm_ops = { | 325 | struct vm_operations_struct hugetlb_vm_ops = { |
327 | .nopage = hugetlb_nopage, | 326 | .fault = hugetlb_vm_op_fault, |
328 | }; | 327 | }; |
329 | 328 | ||
330 | static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, | 329 | static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, |
diff --git a/mm/memory.c b/mm/memory.c index 7abd3899848b..23c870479b3e 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1834,10 +1834,10 @@ static int unmap_mapping_range_vma(struct vm_area_struct *vma, | |||
1834 | 1834 | ||
1835 | /* | 1835 | /* |
1836 | * files that support invalidating or truncating portions of the | 1836 | * files that support invalidating or truncating portions of the |
1837 | * file from under mmaped areas must set the VM_CAN_INVALIDATE flag, and | 1837 | * file from under mmaped areas must have their ->fault function |
1838 | * have their .nopage function return the page locked. | 1838 | * return a locked page (and FAULT_RET_LOCKED code). This provides |
1839 | * synchronisation against concurrent unmapping here. | ||
1839 | */ | 1840 | */ |
1840 | BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); | ||
1841 | 1841 | ||
1842 | again: | 1842 | again: |
1843 | restart_addr = vma->vm_truncate_count; | 1843 | restart_addr = vma->vm_truncate_count; |
@@ -2306,63 +2306,62 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2306 | pgoff_t pgoff, unsigned int flags, pte_t orig_pte) | 2306 | pgoff_t pgoff, unsigned int flags, pte_t orig_pte) |
2307 | { | 2307 | { |
2308 | spinlock_t *ptl; | 2308 | spinlock_t *ptl; |
2309 | struct page *page, *faulted_page; | 2309 | struct page *page; |
2310 | pte_t entry; | 2310 | pte_t entry; |
2311 | int anon = 0; | 2311 | int anon = 0; |
2312 | struct page *dirty_page = NULL; | 2312 | struct page *dirty_page = NULL; |
2313 | struct fault_data fdata; | 2313 | struct vm_fault vmf; |
2314 | int ret; | ||
2314 | 2315 | ||
2315 | fdata.address = address & PAGE_MASK; | 2316 | vmf.virtual_address = (void __user *)(address & PAGE_MASK); |
2316 | fdata.pgoff = pgoff; | 2317 | vmf.pgoff = pgoff; |
2317 | fdata.flags = flags; | 2318 | vmf.flags = flags; |
2319 | vmf.page = NULL; | ||
2318 | 2320 | ||
2319 | pte_unmap(page_table); | 2321 | pte_unmap(page_table); |
2320 | BUG_ON(vma->vm_flags & VM_PFNMAP); | 2322 | BUG_ON(vma->vm_flags & VM_PFNMAP); |
2321 | 2323 | ||
2322 | if (likely(vma->vm_ops->fault)) { | 2324 | if (likely(vma->vm_ops->fault)) { |
2323 | fdata.type = -1; | 2325 | ret = vma->vm_ops->fault(vma, &vmf); |
2324 | faulted_page = vma->vm_ops->fault(vma, &fdata); | 2326 | if (unlikely(ret & (VM_FAULT_ERROR | FAULT_RET_NOPAGE))) |
2325 | WARN_ON(fdata.type == -1); | 2327 | return (ret & VM_FAULT_MASK); |
2326 | if (unlikely(!faulted_page)) | ||
2327 | return fdata.type; | ||
2328 | } else { | 2328 | } else { |
2329 | /* Legacy ->nopage path */ | 2329 | /* Legacy ->nopage path */ |
2330 | fdata.type = VM_FAULT_MINOR; | 2330 | ret = VM_FAULT_MINOR; |
2331 | faulted_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, | 2331 | vmf.page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); |
2332 | &fdata.type); | ||
2333 | /* no page was available -- either SIGBUS or OOM */ | 2332 | /* no page was available -- either SIGBUS or OOM */ |
2334 | if (unlikely(faulted_page == NOPAGE_SIGBUS)) | 2333 | if (unlikely(vmf.page == NOPAGE_SIGBUS)) |
2335 | return VM_FAULT_SIGBUS; | 2334 | return VM_FAULT_SIGBUS; |
2336 | else if (unlikely(faulted_page == NOPAGE_OOM)) | 2335 | else if (unlikely(vmf.page == NOPAGE_OOM)) |
2337 | return VM_FAULT_OOM; | 2336 | return VM_FAULT_OOM; |
2338 | } | 2337 | } |
2339 | 2338 | ||
2340 | /* | 2339 | /* |
2341 | * For consistency in subsequent calls, make the faulted_page always | 2340 | * For consistency in subsequent calls, make the faulted page always |
2342 | * locked. | 2341 | * locked. |
2343 | */ | 2342 | */ |
2344 | if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE))) | 2343 | if (unlikely(!(ret & FAULT_RET_LOCKED))) |
2345 | lock_page(faulted_page); | 2344 | lock_page(vmf.page); |
2346 | else | 2345 | else |
2347 | BUG_ON(!PageLocked(faulted_page)); | 2346 | VM_BUG_ON(!PageLocked(vmf.page)); |
2348 | 2347 | ||
2349 | /* | 2348 | /* |
2350 | * Should we do an early C-O-W break? | 2349 | * Should we do an early C-O-W break? |
2351 | */ | 2350 | */ |
2352 | page = faulted_page; | 2351 | page = vmf.page; |
2353 | if (flags & FAULT_FLAG_WRITE) { | 2352 | if (flags & FAULT_FLAG_WRITE) { |
2354 | if (!(vma->vm_flags & VM_SHARED)) { | 2353 | if (!(vma->vm_flags & VM_SHARED)) { |
2355 | anon = 1; | 2354 | anon = 1; |
2356 | if (unlikely(anon_vma_prepare(vma))) { | 2355 | if (unlikely(anon_vma_prepare(vma))) { |
2357 | fdata.type = VM_FAULT_OOM; | 2356 | ret = VM_FAULT_OOM; |
2358 | goto out; | 2357 | goto out; |
2359 | } | 2358 | } |
2360 | page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); | 2359 | page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); |
2361 | if (!page) { | 2360 | if (!page) { |
2362 | fdata.type = VM_FAULT_OOM; | 2361 | ret = VM_FAULT_OOM; |
2363 | goto out; | 2362 | goto out; |
2364 | } | 2363 | } |
2365 | copy_user_highpage(page, faulted_page, address, vma); | 2364 | copy_user_highpage(page, vmf.page, address, vma); |
2366 | } else { | 2365 | } else { |
2367 | /* | 2366 | /* |
2368 | * If the page will be shareable, see if the backing | 2367 | * If the page will be shareable, see if the backing |
@@ -2372,11 +2371,23 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2372 | if (vma->vm_ops->page_mkwrite) { | 2371 | if (vma->vm_ops->page_mkwrite) { |
2373 | unlock_page(page); | 2372 | unlock_page(page); |
2374 | if (vma->vm_ops->page_mkwrite(vma, page) < 0) { | 2373 | if (vma->vm_ops->page_mkwrite(vma, page) < 0) { |
2375 | fdata.type = VM_FAULT_SIGBUS; | 2374 | ret = VM_FAULT_SIGBUS; |
2376 | anon = 1; /* no anon but release faulted_page */ | 2375 | anon = 1; /* no anon but release vmf.page */ |
2377 | goto out_unlocked; | 2376 | goto out_unlocked; |
2378 | } | 2377 | } |
2379 | lock_page(page); | 2378 | lock_page(page); |
2379 | /* | ||
2380 | * XXX: this is not quite right (racy vs | ||
2381 | * invalidate) to unlock and relock the page | ||
2382 | * like this, however a better fix requires | ||
2383 | * reworking page_mkwrite locking API, which | ||
2384 | * is better done later. | ||
2385 | */ | ||
2386 | if (!page->mapping) { | ||
2387 | ret = VM_FAULT_MINOR; | ||
2388 | anon = 1; /* no anon but release vmf.page */ | ||
2389 | goto out; | ||
2390 | } | ||
2380 | } | 2391 | } |
2381 | } | 2392 | } |
2382 | 2393 | ||
@@ -2427,16 +2438,16 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2427 | pte_unmap_unlock(page_table, ptl); | 2438 | pte_unmap_unlock(page_table, ptl); |
2428 | 2439 | ||
2429 | out: | 2440 | out: |
2430 | unlock_page(faulted_page); | 2441 | unlock_page(vmf.page); |
2431 | out_unlocked: | 2442 | out_unlocked: |
2432 | if (anon) | 2443 | if (anon) |
2433 | page_cache_release(faulted_page); | 2444 | page_cache_release(vmf.page); |
2434 | else if (dirty_page) { | 2445 | else if (dirty_page) { |
2435 | set_page_dirty_balance(dirty_page); | 2446 | set_page_dirty_balance(dirty_page); |
2436 | put_page(dirty_page); | 2447 | put_page(dirty_page); |
2437 | } | 2448 | } |
2438 | 2449 | ||
2439 | return fdata.type; | 2450 | return (ret & VM_FAULT_MASK); |
2440 | } | 2451 | } |
2441 | 2452 | ||
2442 | static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 2453 | static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
@@ -2447,18 +2458,10 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2447 | - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff; | 2458 | - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff; |
2448 | unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0); | 2459 | unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0); |
2449 | 2460 | ||
2450 | return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte); | 2461 | return __do_fault(mm, vma, address, page_table, pmd, pgoff, |
2462 | flags, orig_pte); | ||
2451 | } | 2463 | } |
2452 | 2464 | ||
2453 | static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | ||
2454 | unsigned long address, pte_t *page_table, pmd_t *pmd, | ||
2455 | int write_access, pgoff_t pgoff, pte_t orig_pte) | ||
2456 | { | ||
2457 | unsigned int flags = FAULT_FLAG_NONLINEAR | | ||
2458 | (write_access ? FAULT_FLAG_WRITE : 0); | ||
2459 | |||
2460 | return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte); | ||
2461 | } | ||
2462 | 2465 | ||
2463 | /* | 2466 | /* |
2464 | * do_no_pfn() tries to create a new page mapping for a page without | 2467 | * do_no_pfn() tries to create a new page mapping for a page without |
@@ -2519,17 +2522,19 @@ static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2519 | * but allow concurrent faults), and pte mapped but not yet locked. | 2522 | * but allow concurrent faults), and pte mapped but not yet locked. |
2520 | * We return with mmap_sem still held, but pte unmapped and unlocked. | 2523 | * We return with mmap_sem still held, but pte unmapped and unlocked. |
2521 | */ | 2524 | */ |
2522 | static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma, | 2525 | static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
2523 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 2526 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
2524 | int write_access, pte_t orig_pte) | 2527 | int write_access, pte_t orig_pte) |
2525 | { | 2528 | { |
2529 | unsigned int flags = FAULT_FLAG_NONLINEAR | | ||
2530 | (write_access ? FAULT_FLAG_WRITE : 0); | ||
2526 | pgoff_t pgoff; | 2531 | pgoff_t pgoff; |
2527 | int err; | ||
2528 | 2532 | ||
2529 | if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) | 2533 | if (!pte_unmap_same(mm, pmd, page_table, orig_pte)) |
2530 | return VM_FAULT_MINOR; | 2534 | return VM_FAULT_MINOR; |
2531 | 2535 | ||
2532 | if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) { | 2536 | if (unlikely(!(vma->vm_flags & VM_NONLINEAR) || |
2537 | !(vma->vm_flags & VM_CAN_NONLINEAR))) { | ||
2533 | /* | 2538 | /* |
2534 | * Page table corrupted: show pte and kill process. | 2539 | * Page table corrupted: show pte and kill process. |
2535 | */ | 2540 | */ |
@@ -2539,18 +2544,8 @@ static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2539 | 2544 | ||
2540 | pgoff = pte_to_pgoff(orig_pte); | 2545 | pgoff = pte_to_pgoff(orig_pte); |
2541 | 2546 | ||
2542 | if (vma->vm_ops && vma->vm_ops->fault) | 2547 | return __do_fault(mm, vma, address, page_table, pmd, pgoff, |
2543 | return do_nonlinear_fault(mm, vma, address, page_table, pmd, | 2548 | flags, orig_pte); |
2544 | write_access, pgoff, orig_pte); | ||
2545 | |||
2546 | /* We can then assume vm->vm_ops && vma->vm_ops->populate */ | ||
2547 | err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, | ||
2548 | vma->vm_page_prot, pgoff, 0); | ||
2549 | if (err == -ENOMEM) | ||
2550 | return VM_FAULT_OOM; | ||
2551 | if (err) | ||
2552 | return VM_FAULT_SIGBUS; | ||
2553 | return VM_FAULT_MAJOR; | ||
2554 | } | 2549 | } |
2555 | 2550 | ||
2556 | /* | 2551 | /* |
@@ -2588,7 +2583,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
2588 | pte, pmd, write_access); | 2583 | pte, pmd, write_access); |
2589 | } | 2584 | } |
2590 | if (pte_file(entry)) | 2585 | if (pte_file(entry)) |
2591 | return do_file_page(mm, vma, address, | 2586 | return do_nonlinear_fault(mm, vma, address, |
2592 | pte, pmd, write_access, entry); | 2587 | pte, pmd, write_access, entry); |
2593 | return do_swap_page(mm, vma, address, | 2588 | return do_swap_page(mm, vma, address, |
2594 | pte, pmd, write_access, entry); | 2589 | pte, pmd, write_access, entry); |
diff --git a/mm/nommu.c b/mm/nommu.c index aee0e1b0ebe7..1b105d28949f 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -1341,10 +1341,10 @@ int in_gate_area_no_task(unsigned long addr) | |||
1341 | return 0; | 1341 | return 0; |
1342 | } | 1342 | } |
1343 | 1343 | ||
1344 | struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) | 1344 | int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
1345 | { | 1345 | { |
1346 | BUG(); | 1346 | BUG(); |
1347 | return NULL; | 1347 | return 0; |
1348 | } | 1348 | } |
1349 | 1349 | ||
1350 | /* | 1350 | /* |
diff --git a/mm/shmem.c b/mm/shmem.c index 6b44440f1b24..0a555af8733d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -1309,29 +1309,21 @@ failed: | |||
1309 | return error; | 1309 | return error; |
1310 | } | 1310 | } |
1311 | 1311 | ||
1312 | static struct page *shmem_fault(struct vm_area_struct *vma, | 1312 | static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
1313 | struct fault_data *fdata) | ||
1314 | { | 1313 | { |
1315 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 1314 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
1316 | struct page *page = NULL; | ||
1317 | int error; | 1315 | int error; |
1316 | int ret; | ||
1318 | 1317 | ||
1319 | BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); | 1318 | if (((loff_t)vmf->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) |
1319 | return VM_FAULT_SIGBUS; | ||
1320 | 1320 | ||
1321 | if (((loff_t)fdata->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { | 1321 | error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_FAULT, &ret); |
1322 | fdata->type = VM_FAULT_SIGBUS; | 1322 | if (error) |
1323 | return NULL; | 1323 | return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); |
1324 | } | ||
1325 | |||
1326 | error = shmem_getpage(inode, fdata->pgoff, &page, | ||
1327 | SGP_FAULT, &fdata->type); | ||
1328 | if (error) { | ||
1329 | fdata->type = ((error == -ENOMEM)?VM_FAULT_OOM:VM_FAULT_SIGBUS); | ||
1330 | return NULL; | ||
1331 | } | ||
1332 | 1324 | ||
1333 | mark_page_accessed(page); | 1325 | mark_page_accessed(vmf->page); |
1334 | return page; | 1326 | return ret | FAULT_RET_LOCKED; |
1335 | } | 1327 | } |
1336 | 1328 | ||
1337 | #ifdef CONFIG_NUMA | 1329 | #ifdef CONFIG_NUMA |
@@ -1378,7 +1370,7 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) | |||
1378 | { | 1370 | { |
1379 | file_accessed(file); | 1371 | file_accessed(file); |
1380 | vma->vm_ops = &shmem_vm_ops; | 1372 | vma->vm_ops = &shmem_vm_ops; |
1381 | vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; | 1373 | vma->vm_flags |= VM_CAN_NONLINEAR; |
1382 | return 0; | 1374 | return 0; |
1383 | } | 1375 | } |
1384 | 1376 | ||
@@ -2560,6 +2552,5 @@ int shmem_zero_setup(struct vm_area_struct *vma) | |||
2560 | fput(vma->vm_file); | 2552 | fput(vma->vm_file); |
2561 | vma->vm_file = file; | 2553 | vma->vm_file = file; |
2562 | vma->vm_ops = &shmem_vm_ops; | 2554 | vma->vm_ops = &shmem_vm_ops; |
2563 | vma->vm_flags |= VM_CAN_INVALIDATE; | ||
2564 | return 0; | 2555 | return 0; |
2565 | } | 2556 | } |