diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 16 | ||||
-rw-r--r-- | mm/bounce.c | 2 | ||||
-rw-r--r-- | mm/filemap.c | 46 | ||||
-rw-r--r-- | mm/filemap_xip.c | 48 | ||||
-rw-r--r-- | mm/madvise.c | 22 | ||||
-rw-r--r-- | mm/migrate.c | 15 | ||||
-rw-r--r-- | mm/nommu.c | 29 | ||||
-rw-r--r-- | mm/oom_kill.c | 6 | ||||
-rw-r--r-- | mm/page-writeback.c | 4 | ||||
-rw-r--r-- | mm/rmap.c | 10 | ||||
-rw-r--r-- | mm/shmem.c | 132 | ||||
-rw-r--r-- | mm/slab.c | 4 |
12 files changed, 264 insertions, 70 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index f50a2811f9dc..e5de3781d3fe 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -55,6 +55,22 @@ long congestion_wait(int rw, long timeout) | |||
55 | } | 55 | } |
56 | EXPORT_SYMBOL(congestion_wait); | 56 | EXPORT_SYMBOL(congestion_wait); |
57 | 57 | ||
58 | long congestion_wait_interruptible(int rw, long timeout) | ||
59 | { | ||
60 | long ret; | ||
61 | DEFINE_WAIT(wait); | ||
62 | wait_queue_head_t *wqh = &congestion_wqh[rw]; | ||
63 | |||
64 | prepare_to_wait(wqh, &wait, TASK_INTERRUPTIBLE); | ||
65 | if (signal_pending(current)) | ||
66 | ret = -ERESTARTSYS; | ||
67 | else | ||
68 | ret = io_schedule_timeout(timeout); | ||
69 | finish_wait(wqh, &wait); | ||
70 | return ret; | ||
71 | } | ||
72 | EXPORT_SYMBOL(congestion_wait_interruptible); | ||
73 | |||
58 | /** | 74 | /** |
59 | * congestion_end - wake up sleepers on a congested backing_dev_info | 75 | * congestion_end - wake up sleepers on a congested backing_dev_info |
60 | * @rw: READ or WRITE | 76 | * @rw: READ or WRITE |
diff --git a/mm/bounce.c b/mm/bounce.c index 643efbe82402..ad401fc57440 100644 --- a/mm/bounce.c +++ b/mm/bounce.c | |||
@@ -204,7 +204,7 @@ static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig, | |||
204 | /* | 204 | /* |
205 | * is destination page below bounce pfn? | 205 | * is destination page below bounce pfn? |
206 | */ | 206 | */ |
207 | if (page_to_pfn(page) < q->bounce_pfn) | 207 | if (page_to_pfn(page) <= q->bounce_pfn) |
208 | continue; | 208 | continue; |
209 | 209 | ||
210 | /* | 210 | /* |
diff --git a/mm/filemap.c b/mm/filemap.c index d1060b8d3cd6..5dfc093ceb3d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -2379,7 +2379,8 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
2379 | struct file *file = iocb->ki_filp; | 2379 | struct file *file = iocb->ki_filp; |
2380 | struct address_space *mapping = file->f_mapping; | 2380 | struct address_space *mapping = file->f_mapping; |
2381 | ssize_t retval; | 2381 | ssize_t retval; |
2382 | size_t write_len = 0; | 2382 | size_t write_len; |
2383 | pgoff_t end = 0; /* silence gcc */ | ||
2383 | 2384 | ||
2384 | /* | 2385 | /* |
2385 | * If it's a write, unmap all mmappings of the file up-front. This | 2386 | * If it's a write, unmap all mmappings of the file up-front. This |
@@ -2388,23 +2389,46 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
2388 | */ | 2389 | */ |
2389 | if (rw == WRITE) { | 2390 | if (rw == WRITE) { |
2390 | write_len = iov_length(iov, nr_segs); | 2391 | write_len = iov_length(iov, nr_segs); |
2392 | end = (offset + write_len - 1) >> PAGE_CACHE_SHIFT; | ||
2391 | if (mapping_mapped(mapping)) | 2393 | if (mapping_mapped(mapping)) |
2392 | unmap_mapping_range(mapping, offset, write_len, 0); | 2394 | unmap_mapping_range(mapping, offset, write_len, 0); |
2393 | } | 2395 | } |
2394 | 2396 | ||
2395 | retval = filemap_write_and_wait(mapping); | 2397 | retval = filemap_write_and_wait(mapping); |
2396 | if (retval == 0) { | 2398 | if (retval) |
2397 | retval = mapping->a_ops->direct_IO(rw, iocb, iov, | 2399 | goto out; |
2398 | offset, nr_segs); | 2400 | |
2399 | if (rw == WRITE && mapping->nrpages) { | 2401 | /* |
2400 | pgoff_t end = (offset + write_len - 1) | 2402 | * After a write we want buffered reads to be sure to go to disk to get |
2401 | >> PAGE_CACHE_SHIFT; | 2403 | * the new data. We invalidate clean cached page from the region we're |
2402 | int err = invalidate_inode_pages2_range(mapping, | 2404 | * about to write. We do this *before* the write so that we can return |
2405 | * -EIO without clobbering -EIOCBQUEUED from ->direct_IO(). | ||
2406 | */ | ||
2407 | if (rw == WRITE && mapping->nrpages) { | ||
2408 | retval = invalidate_inode_pages2_range(mapping, | ||
2403 | offset >> PAGE_CACHE_SHIFT, end); | 2409 | offset >> PAGE_CACHE_SHIFT, end); |
2404 | if (err) | 2410 | if (retval) |
2405 | retval = err; | 2411 | goto out; |
2406 | } | ||
2407 | } | 2412 | } |
2413 | |||
2414 | retval = mapping->a_ops->direct_IO(rw, iocb, iov, offset, nr_segs); | ||
2415 | if (retval) | ||
2416 | goto out; | ||
2417 | |||
2418 | /* | ||
2419 | * Finally, try again to invalidate clean pages which might have been | ||
2420 | * faulted in by get_user_pages() if the source of the write was an | ||
2421 | * mmap()ed region of the file we're writing. That's a pretty crazy | ||
2422 | * thing to do, so we don't support it 100%. If this invalidation | ||
2423 | * fails and we have -EIOCBQUEUED we ignore the failure. | ||
2424 | */ | ||
2425 | if (rw == WRITE && mapping->nrpages) { | ||
2426 | int err = invalidate_inode_pages2_range(mapping, | ||
2427 | offset >> PAGE_CACHE_SHIFT, end); | ||
2428 | if (err && retval >= 0) | ||
2429 | retval = err; | ||
2430 | } | ||
2431 | out: | ||
2408 | return retval; | 2432 | return retval; |
2409 | } | 2433 | } |
2410 | 2434 | ||
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 9dd9fbb75139..cbb335813ec0 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c | |||
@@ -17,6 +17,29 @@ | |||
17 | #include "filemap.h" | 17 | #include "filemap.h" |
18 | 18 | ||
19 | /* | 19 | /* |
20 | * We do use our own empty page to avoid interference with other users | ||
21 | * of ZERO_PAGE(), such as /dev/zero | ||
22 | */ | ||
23 | static struct page *__xip_sparse_page; | ||
24 | |||
25 | static struct page *xip_sparse_page(void) | ||
26 | { | ||
27 | if (!__xip_sparse_page) { | ||
28 | unsigned long zeroes = get_zeroed_page(GFP_HIGHUSER); | ||
29 | if (zeroes) { | ||
30 | static DEFINE_SPINLOCK(xip_alloc_lock); | ||
31 | spin_lock(&xip_alloc_lock); | ||
32 | if (!__xip_sparse_page) | ||
33 | __xip_sparse_page = virt_to_page(zeroes); | ||
34 | else | ||
35 | free_page(zeroes); | ||
36 | spin_unlock(&xip_alloc_lock); | ||
37 | } | ||
38 | } | ||
39 | return __xip_sparse_page; | ||
40 | } | ||
41 | |||
42 | /* | ||
20 | * This is a file read routine for execute in place files, and uses | 43 | * This is a file read routine for execute in place files, and uses |
21 | * the mapping->a_ops->get_xip_page() function for the actual low-level | 44 | * the mapping->a_ops->get_xip_page() function for the actual low-level |
22 | * stuff. | 45 | * stuff. |
@@ -162,7 +185,7 @@ EXPORT_SYMBOL_GPL(xip_file_sendfile); | |||
162 | * xip_write | 185 | * xip_write |
163 | * | 186 | * |
164 | * This function walks all vmas of the address_space and unmaps the | 187 | * This function walks all vmas of the address_space and unmaps the |
165 | * ZERO_PAGE when found at pgoff. Should it go in rmap.c? | 188 | * __xip_sparse_page when found at pgoff. |
166 | */ | 189 | */ |
167 | static void | 190 | static void |
168 | __xip_unmap (struct address_space * mapping, | 191 | __xip_unmap (struct address_space * mapping, |
@@ -177,13 +200,16 @@ __xip_unmap (struct address_space * mapping, | |||
177 | spinlock_t *ptl; | 200 | spinlock_t *ptl; |
178 | struct page *page; | 201 | struct page *page; |
179 | 202 | ||
203 | page = __xip_sparse_page; | ||
204 | if (!page) | ||
205 | return; | ||
206 | |||
180 | spin_lock(&mapping->i_mmap_lock); | 207 | spin_lock(&mapping->i_mmap_lock); |
181 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 208 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { |
182 | mm = vma->vm_mm; | 209 | mm = vma->vm_mm; |
183 | address = vma->vm_start + | 210 | address = vma->vm_start + |
184 | ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); | 211 | ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); |
185 | BUG_ON(address < vma->vm_start || address >= vma->vm_end); | 212 | BUG_ON(address < vma->vm_start || address >= vma->vm_end); |
186 | page = ZERO_PAGE(0); | ||
187 | pte = page_check_address(page, mm, address, &ptl); | 213 | pte = page_check_address(page, mm, address, &ptl); |
188 | if (pte) { | 214 | if (pte) { |
189 | /* Nuke the page table entry. */ | 215 | /* Nuke the page table entry. */ |
@@ -222,16 +248,14 @@ xip_file_nopage(struct vm_area_struct * area, | |||
222 | + area->vm_pgoff; | 248 | + area->vm_pgoff; |
223 | 249 | ||
224 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 250 | size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
225 | if (pgoff >= size) { | 251 | if (pgoff >= size) |
226 | return NULL; | 252 | return NOPAGE_SIGBUS; |
227 | } | ||
228 | 253 | ||
229 | page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0); | 254 | page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0); |
230 | if (!IS_ERR(page)) { | 255 | if (!IS_ERR(page)) |
231 | goto out; | 256 | goto out; |
232 | } | ||
233 | if (PTR_ERR(page) != -ENODATA) | 257 | if (PTR_ERR(page) != -ENODATA) |
234 | return NULL; | 258 | return NOPAGE_SIGBUS; |
235 | 259 | ||
236 | /* sparse block */ | 260 | /* sparse block */ |
237 | if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && | 261 | if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && |
@@ -241,12 +265,14 @@ xip_file_nopage(struct vm_area_struct * area, | |||
241 | page = mapping->a_ops->get_xip_page (mapping, | 265 | page = mapping->a_ops->get_xip_page (mapping, |
242 | pgoff*(PAGE_SIZE/512), 1); | 266 | pgoff*(PAGE_SIZE/512), 1); |
243 | if (IS_ERR(page)) | 267 | if (IS_ERR(page)) |
244 | return NULL; | 268 | return NOPAGE_SIGBUS; |
245 | /* unmap page at pgoff from all other vmas */ | 269 | /* unmap page at pgoff from all other vmas */ |
246 | __xip_unmap(mapping, pgoff); | 270 | __xip_unmap(mapping, pgoff); |
247 | } else { | 271 | } else { |
248 | /* not shared and writable, use ZERO_PAGE() */ | 272 | /* not shared and writable, use xip_sparse_page() */ |
249 | page = ZERO_PAGE(0); | 273 | page = xip_sparse_page(); |
274 | if (!page) | ||
275 | return NOPAGE_OOM; | ||
250 | } | 276 | } |
251 | 277 | ||
252 | out: | 278 | out: |
diff --git a/mm/madvise.c b/mm/madvise.c index 4e196155a0c3..603c5257ed6e 100644 --- a/mm/madvise.c +++ b/mm/madvise.c | |||
@@ -155,10 +155,14 @@ static long madvise_dontneed(struct vm_area_struct * vma, | |||
155 | * Other filesystems return -ENOSYS. | 155 | * Other filesystems return -ENOSYS. |
156 | */ | 156 | */ |
157 | static long madvise_remove(struct vm_area_struct *vma, | 157 | static long madvise_remove(struct vm_area_struct *vma, |
158 | struct vm_area_struct **prev, | ||
158 | unsigned long start, unsigned long end) | 159 | unsigned long start, unsigned long end) |
159 | { | 160 | { |
160 | struct address_space *mapping; | 161 | struct address_space *mapping; |
161 | loff_t offset, endoff; | 162 | loff_t offset, endoff; |
163 | int error; | ||
164 | |||
165 | *prev = NULL; /* tell sys_madvise we drop mmap_sem */ | ||
162 | 166 | ||
163 | if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB)) | 167 | if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB)) |
164 | return -EINVAL; | 168 | return -EINVAL; |
@@ -177,7 +181,12 @@ static long madvise_remove(struct vm_area_struct *vma, | |||
177 | + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); | 181 | + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); |
178 | endoff = (loff_t)(end - vma->vm_start - 1) | 182 | endoff = (loff_t)(end - vma->vm_start - 1) |
179 | + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); | 183 | + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); |
180 | return vmtruncate_range(mapping->host, offset, endoff); | 184 | |
185 | /* vmtruncate_range needs to take i_mutex and i_alloc_sem */ | ||
186 | up_write(¤t->mm->mmap_sem); | ||
187 | error = vmtruncate_range(mapping->host, offset, endoff); | ||
188 | down_write(¤t->mm->mmap_sem); | ||
189 | return error; | ||
181 | } | 190 | } |
182 | 191 | ||
183 | static long | 192 | static long |
@@ -199,7 +208,7 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, | |||
199 | error = madvise_behavior(vma, prev, start, end, behavior); | 208 | error = madvise_behavior(vma, prev, start, end, behavior); |
200 | break; | 209 | break; |
201 | case MADV_REMOVE: | 210 | case MADV_REMOVE: |
202 | error = madvise_remove(vma, start, end); | 211 | error = madvise_remove(vma, prev, start, end); |
203 | break; | 212 | break; |
204 | 213 | ||
205 | case MADV_WILLNEED: | 214 | case MADV_WILLNEED: |
@@ -312,12 +321,15 @@ asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) | |||
312 | if (error) | 321 | if (error) |
313 | goto out; | 322 | goto out; |
314 | start = tmp; | 323 | start = tmp; |
315 | if (start < prev->vm_end) | 324 | if (prev && start < prev->vm_end) |
316 | start = prev->vm_end; | 325 | start = prev->vm_end; |
317 | error = unmapped_error; | 326 | error = unmapped_error; |
318 | if (start >= end) | 327 | if (start >= end) |
319 | goto out; | 328 | goto out; |
320 | vma = prev->vm_next; | 329 | if (prev) |
330 | vma = prev->vm_next; | ||
331 | else /* madvise_remove dropped mmap_sem */ | ||
332 | vma = find_vma(current->mm, start); | ||
321 | } | 333 | } |
322 | out: | 334 | out: |
323 | up_write(¤t->mm->mmap_sem); | 335 | up_write(¤t->mm->mmap_sem); |
diff --git a/mm/migrate.c b/mm/migrate.c index 7a66ca25dc8a..a91ca00abebe 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -297,7 +297,7 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
297 | void **pslot; | 297 | void **pslot; |
298 | 298 | ||
299 | if (!mapping) { | 299 | if (!mapping) { |
300 | /* Anonymous page */ | 300 | /* Anonymous page without mapping */ |
301 | if (page_count(page) != 1) | 301 | if (page_count(page) != 1) |
302 | return -EAGAIN; | 302 | return -EAGAIN; |
303 | return 0; | 303 | return 0; |
@@ -333,6 +333,19 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
333 | */ | 333 | */ |
334 | __put_page(page); | 334 | __put_page(page); |
335 | 335 | ||
336 | /* | ||
337 | * If moved to a different zone then also account | ||
338 | * the page for that zone. Other VM counters will be | ||
339 | * taken care of when we establish references to the | ||
340 | * new page and drop references to the old page. | ||
341 | * | ||
342 | * Note that anonymous pages are accounted for | ||
343 | * via NR_FILE_PAGES and NR_ANON_PAGES if they | ||
344 | * are mapped to swap space. | ||
345 | */ | ||
346 | __dec_zone_page_state(page, NR_FILE_PAGES); | ||
347 | __inc_zone_page_state(newpage, NR_FILE_PAGES); | ||
348 | |||
336 | write_unlock_irq(&mapping->tree_lock); | 349 | write_unlock_irq(&mapping->tree_lock); |
337 | 350 | ||
338 | return 0; | 351 | return 0; |
diff --git a/mm/nommu.c b/mm/nommu.c index 23fb033e596d..1f60194d9b9b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -45,6 +45,7 @@ int heap_stack_gap = 0; | |||
45 | 45 | ||
46 | EXPORT_SYMBOL(mem_map); | 46 | EXPORT_SYMBOL(mem_map); |
47 | EXPORT_SYMBOL(__vm_enough_memory); | 47 | EXPORT_SYMBOL(__vm_enough_memory); |
48 | EXPORT_SYMBOL(num_physpages); | ||
48 | 49 | ||
49 | /* list of shareable VMAs */ | 50 | /* list of shareable VMAs */ |
50 | struct rb_root nommu_vma_tree = RB_ROOT; | 51 | struct rb_root nommu_vma_tree = RB_ROOT; |
@@ -826,6 +827,11 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
826 | unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 827 | unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
827 | unsigned long vmpglen; | 828 | unsigned long vmpglen; |
828 | 829 | ||
830 | /* suppress VMA sharing for shared regions */ | ||
831 | if (vm_flags & VM_SHARED && | ||
832 | capabilities & BDI_CAP_MAP_DIRECT) | ||
833 | goto dont_share_VMAs; | ||
834 | |||
829 | for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) { | 835 | for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) { |
830 | vma = rb_entry(rb, struct vm_area_struct, vm_rb); | 836 | vma = rb_entry(rb, struct vm_area_struct, vm_rb); |
831 | 837 | ||
@@ -859,6 +865,7 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
859 | goto shared; | 865 | goto shared; |
860 | } | 866 | } |
861 | 867 | ||
868 | dont_share_VMAs: | ||
862 | vma = NULL; | 869 | vma = NULL; |
863 | 870 | ||
864 | /* obtain the address at which to make a shared mapping | 871 | /* obtain the address at which to make a shared mapping |
@@ -1193,6 +1200,28 @@ void unmap_mapping_range(struct address_space *mapping, | |||
1193 | EXPORT_SYMBOL(unmap_mapping_range); | 1200 | EXPORT_SYMBOL(unmap_mapping_range); |
1194 | 1201 | ||
1195 | /* | 1202 | /* |
1203 | * ask for an unmapped area at which to create a mapping on a file | ||
1204 | */ | ||
1205 | unsigned long get_unmapped_area(struct file *file, unsigned long addr, | ||
1206 | unsigned long len, unsigned long pgoff, | ||
1207 | unsigned long flags) | ||
1208 | { | ||
1209 | unsigned long (*get_area)(struct file *, unsigned long, unsigned long, | ||
1210 | unsigned long, unsigned long); | ||
1211 | |||
1212 | get_area = current->mm->get_unmapped_area; | ||
1213 | if (file && file->f_op && file->f_op->get_unmapped_area) | ||
1214 | get_area = file->f_op->get_unmapped_area; | ||
1215 | |||
1216 | if (!get_area) | ||
1217 | return -ENOSYS; | ||
1218 | |||
1219 | return get_area(file, addr, len, pgoff, flags); | ||
1220 | } | ||
1221 | |||
1222 | EXPORT_SYMBOL(get_unmapped_area); | ||
1223 | |||
1224 | /* | ||
1196 | * Check that a process has enough memory to allocate a new virtual | 1225 | * Check that a process has enough memory to allocate a new virtual |
1197 | * mapping. 0 means there is enough memory for the allocation to | 1226 | * mapping. 0 means there is enough memory for the allocation to |
1198 | * succeed and -ENOMEM implies there is not. | 1227 | * succeed and -ENOMEM implies there is not. |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index b278b8d60eee..3791edfffeeb 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -176,6 +176,8 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask) | |||
176 | struct zone **z; | 176 | struct zone **z; |
177 | nodemask_t nodes; | 177 | nodemask_t nodes; |
178 | int node; | 178 | int node; |
179 | |||
180 | nodes_clear(nodes); | ||
179 | /* node has memory ? */ | 181 | /* node has memory ? */ |
180 | for_each_online_node(node) | 182 | for_each_online_node(node) |
181 | if (NODE_DATA(node)->node_present_pages) | 183 | if (NODE_DATA(node)->node_present_pages) |
@@ -320,7 +322,7 @@ static int oom_kill_task(struct task_struct *p) | |||
320 | * Don't kill the process if any threads are set to OOM_DISABLE | 322 | * Don't kill the process if any threads are set to OOM_DISABLE |
321 | */ | 323 | */ |
322 | do_each_thread(g, q) { | 324 | do_each_thread(g, q) { |
323 | if (q->mm == mm && p->oomkilladj == OOM_DISABLE) | 325 | if (q->mm == mm && q->oomkilladj == OOM_DISABLE) |
324 | return 1; | 326 | return 1; |
325 | } while_each_thread(g, q); | 327 | } while_each_thread(g, q); |
326 | 328 | ||
@@ -333,7 +335,7 @@ static int oom_kill_task(struct task_struct *p) | |||
333 | */ | 335 | */ |
334 | do_each_thread(g, q) { | 336 | do_each_thread(g, q) { |
335 | if (q->mm == mm && q->tgid != p->tgid) | 337 | if (q->mm == mm && q->tgid != p->tgid) |
336 | force_sig(SIGKILL, p); | 338 | force_sig(SIGKILL, q); |
337 | } while_each_thread(g, q); | 339 | } while_each_thread(g, q); |
338 | 340 | ||
339 | return 0; | 341 | return 0; |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index f469e3cd08e8..a794945fd194 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -67,12 +67,12 @@ static inline long sync_writeback_pages(void) | |||
67 | /* | 67 | /* |
68 | * Start background writeback (via pdflush) at this percentage | 68 | * Start background writeback (via pdflush) at this percentage |
69 | */ | 69 | */ |
70 | int dirty_background_ratio = 10; | 70 | int dirty_background_ratio = 5; |
71 | 71 | ||
72 | /* | 72 | /* |
73 | * The generator of dirty data starts writeback at this percentage | 73 | * The generator of dirty data starts writeback at this percentage |
74 | */ | 74 | */ |
75 | int vm_dirty_ratio = 40; | 75 | int vm_dirty_ratio = 10; |
76 | 76 | ||
77 | /* | 77 | /* |
78 | * The interval between `kupdate'-style writebacks, in jiffies | 78 | * The interval between `kupdate'-style writebacks, in jiffies |
@@ -498,9 +498,11 @@ int page_mkclean(struct page *page) | |||
498 | struct address_space *mapping = page_mapping(page); | 498 | struct address_space *mapping = page_mapping(page); |
499 | if (mapping) | 499 | if (mapping) |
500 | ret = page_mkclean_file(mapping, page); | 500 | ret = page_mkclean_file(mapping, page); |
501 | if (page_test_dirty(page)) { | ||
502 | page_clear_dirty(page); | ||
503 | ret = 1; | ||
504 | } | ||
501 | } | 505 | } |
502 | if (page_test_and_clear_dirty(page)) | ||
503 | ret = 1; | ||
504 | 506 | ||
505 | return ret; | 507 | return ret; |
506 | } | 508 | } |
@@ -605,8 +607,10 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma) | |||
605 | * Leaving it set also helps swapoff to reinstate ptes | 607 | * Leaving it set also helps swapoff to reinstate ptes |
606 | * faster for those pages still in swapcache. | 608 | * faster for those pages still in swapcache. |
607 | */ | 609 | */ |
608 | if (page_test_and_clear_dirty(page)) | 610 | if (page_test_dirty(page)) { |
611 | page_clear_dirty(page); | ||
609 | set_page_dirty(page); | 612 | set_page_dirty(page); |
613 | } | ||
610 | __dec_zone_page_state(page, | 614 | __dec_zone_page_state(page, |
611 | PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); | 615 | PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); |
612 | } | 616 | } |
diff --git a/mm/shmem.c b/mm/shmem.c index b8c429a2d271..b2a35ebf071a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -402,26 +402,38 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long | |||
402 | /* | 402 | /* |
403 | * shmem_free_swp - free some swap entries in a directory | 403 | * shmem_free_swp - free some swap entries in a directory |
404 | * | 404 | * |
405 | * @dir: pointer to the directory | 405 | * @dir: pointer to the directory |
406 | * @edir: pointer after last entry of the directory | 406 | * @edir: pointer after last entry of the directory |
407 | * @punch_lock: pointer to spinlock when needed for the holepunch case | ||
407 | */ | 408 | */ |
408 | static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir) | 409 | static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir, |
410 | spinlock_t *punch_lock) | ||
409 | { | 411 | { |
412 | spinlock_t *punch_unlock = NULL; | ||
410 | swp_entry_t *ptr; | 413 | swp_entry_t *ptr; |
411 | int freed = 0; | 414 | int freed = 0; |
412 | 415 | ||
413 | for (ptr = dir; ptr < edir; ptr++) { | 416 | for (ptr = dir; ptr < edir; ptr++) { |
414 | if (ptr->val) { | 417 | if (ptr->val) { |
418 | if (unlikely(punch_lock)) { | ||
419 | punch_unlock = punch_lock; | ||
420 | punch_lock = NULL; | ||
421 | spin_lock(punch_unlock); | ||
422 | if (!ptr->val) | ||
423 | continue; | ||
424 | } | ||
415 | free_swap_and_cache(*ptr); | 425 | free_swap_and_cache(*ptr); |
416 | *ptr = (swp_entry_t){0}; | 426 | *ptr = (swp_entry_t){0}; |
417 | freed++; | 427 | freed++; |
418 | } | 428 | } |
419 | } | 429 | } |
430 | if (punch_unlock) | ||
431 | spin_unlock(punch_unlock); | ||
420 | return freed; | 432 | return freed; |
421 | } | 433 | } |
422 | 434 | ||
423 | static int shmem_map_and_free_swp(struct page *subdir, | 435 | static int shmem_map_and_free_swp(struct page *subdir, int offset, |
424 | int offset, int limit, struct page ***dir) | 436 | int limit, struct page ***dir, spinlock_t *punch_lock) |
425 | { | 437 | { |
426 | swp_entry_t *ptr; | 438 | swp_entry_t *ptr; |
427 | int freed = 0; | 439 | int freed = 0; |
@@ -431,7 +443,8 @@ static int shmem_map_and_free_swp(struct page *subdir, | |||
431 | int size = limit - offset; | 443 | int size = limit - offset; |
432 | if (size > LATENCY_LIMIT) | 444 | if (size > LATENCY_LIMIT) |
433 | size = LATENCY_LIMIT; | 445 | size = LATENCY_LIMIT; |
434 | freed += shmem_free_swp(ptr+offset, ptr+offset+size); | 446 | freed += shmem_free_swp(ptr+offset, ptr+offset+size, |
447 | punch_lock); | ||
435 | if (need_resched()) { | 448 | if (need_resched()) { |
436 | shmem_swp_unmap(ptr); | 449 | shmem_swp_unmap(ptr); |
437 | if (*dir) { | 450 | if (*dir) { |
@@ -481,7 +494,10 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) | |||
481 | long nr_swaps_freed = 0; | 494 | long nr_swaps_freed = 0; |
482 | int offset; | 495 | int offset; |
483 | int freed; | 496 | int freed; |
484 | int punch_hole = 0; | 497 | int punch_hole; |
498 | spinlock_t *needs_lock; | ||
499 | spinlock_t *punch_lock; | ||
500 | unsigned long upper_limit; | ||
485 | 501 | ||
486 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | 502 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; |
487 | idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 503 | idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
@@ -492,11 +508,20 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) | |||
492 | info->flags |= SHMEM_TRUNCATE; | 508 | info->flags |= SHMEM_TRUNCATE; |
493 | if (likely(end == (loff_t) -1)) { | 509 | if (likely(end == (loff_t) -1)) { |
494 | limit = info->next_index; | 510 | limit = info->next_index; |
511 | upper_limit = SHMEM_MAX_INDEX; | ||
495 | info->next_index = idx; | 512 | info->next_index = idx; |
513 | needs_lock = NULL; | ||
514 | punch_hole = 0; | ||
496 | } else { | 515 | } else { |
497 | limit = (end + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 516 | if (end + 1 >= inode->i_size) { /* we may free a little more */ |
498 | if (limit > info->next_index) | 517 | limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >> |
499 | limit = info->next_index; | 518 | PAGE_CACHE_SHIFT; |
519 | upper_limit = SHMEM_MAX_INDEX; | ||
520 | } else { | ||
521 | limit = (end + 1) >> PAGE_CACHE_SHIFT; | ||
522 | upper_limit = limit; | ||
523 | } | ||
524 | needs_lock = &info->lock; | ||
500 | punch_hole = 1; | 525 | punch_hole = 1; |
501 | } | 526 | } |
502 | 527 | ||
@@ -513,17 +538,30 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) | |||
513 | size = limit; | 538 | size = limit; |
514 | if (size > SHMEM_NR_DIRECT) | 539 | if (size > SHMEM_NR_DIRECT) |
515 | size = SHMEM_NR_DIRECT; | 540 | size = SHMEM_NR_DIRECT; |
516 | nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size); | 541 | nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock); |
517 | } | 542 | } |
518 | 543 | ||
519 | /* | 544 | /* |
520 | * If there are no indirect blocks or we are punching a hole | 545 | * If there are no indirect blocks or we are punching a hole |
521 | * below indirect blocks, nothing to be done. | 546 | * below indirect blocks, nothing to be done. |
522 | */ | 547 | */ |
523 | if (!topdir || (punch_hole && (limit <= SHMEM_NR_DIRECT))) | 548 | if (!topdir || limit <= SHMEM_NR_DIRECT) |
524 | goto done2; | 549 | goto done2; |
525 | 550 | ||
526 | BUG_ON(limit <= SHMEM_NR_DIRECT); | 551 | /* |
552 | * The truncation case has already dropped info->lock, and we're safe | ||
553 | * because i_size and next_index have already been lowered, preventing | ||
554 | * access beyond. But in the punch_hole case, we still need to take | ||
555 | * the lock when updating the swap directory, because there might be | ||
556 | * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or | ||
557 | * shmem_writepage. However, whenever we find we can remove a whole | ||
558 | * directory page (not at the misaligned start or end of the range), | ||
559 | * we first NULLify its pointer in the level above, and then have no | ||
560 | * need to take the lock when updating its contents: needs_lock and | ||
561 | * punch_lock (either pointing to info->lock or NULL) manage this. | ||
562 | */ | ||
563 | |||
564 | upper_limit -= SHMEM_NR_DIRECT; | ||
527 | limit -= SHMEM_NR_DIRECT; | 565 | limit -= SHMEM_NR_DIRECT; |
528 | idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; | 566 | idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; |
529 | offset = idx % ENTRIES_PER_PAGE; | 567 | offset = idx % ENTRIES_PER_PAGE; |
@@ -543,8 +581,14 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) | |||
543 | if (*dir) { | 581 | if (*dir) { |
544 | diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % | 582 | diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % |
545 | ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; | 583 | ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; |
546 | if (!diroff && !offset) { | 584 | if (!diroff && !offset && upper_limit >= stage) { |
547 | *dir = NULL; | 585 | if (needs_lock) { |
586 | spin_lock(needs_lock); | ||
587 | *dir = NULL; | ||
588 | spin_unlock(needs_lock); | ||
589 | needs_lock = NULL; | ||
590 | } else | ||
591 | *dir = NULL; | ||
548 | nr_pages_to_free++; | 592 | nr_pages_to_free++; |
549 | list_add(&middir->lru, &pages_to_free); | 593 | list_add(&middir->lru, &pages_to_free); |
550 | } | 594 | } |
@@ -570,39 +614,55 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) | |||
570 | } | 614 | } |
571 | stage = idx + ENTRIES_PER_PAGEPAGE; | 615 | stage = idx + ENTRIES_PER_PAGEPAGE; |
572 | middir = *dir; | 616 | middir = *dir; |
573 | *dir = NULL; | 617 | if (punch_hole) |
574 | nr_pages_to_free++; | 618 | needs_lock = &info->lock; |
575 | list_add(&middir->lru, &pages_to_free); | 619 | if (upper_limit >= stage) { |
620 | if (needs_lock) { | ||
621 | spin_lock(needs_lock); | ||
622 | *dir = NULL; | ||
623 | spin_unlock(needs_lock); | ||
624 | needs_lock = NULL; | ||
625 | } else | ||
626 | *dir = NULL; | ||
627 | nr_pages_to_free++; | ||
628 | list_add(&middir->lru, &pages_to_free); | ||
629 | } | ||
576 | shmem_dir_unmap(dir); | 630 | shmem_dir_unmap(dir); |
577 | cond_resched(); | 631 | cond_resched(); |
578 | dir = shmem_dir_map(middir); | 632 | dir = shmem_dir_map(middir); |
579 | diroff = 0; | 633 | diroff = 0; |
580 | } | 634 | } |
635 | punch_lock = needs_lock; | ||
581 | subdir = dir[diroff]; | 636 | subdir = dir[diroff]; |
582 | if (subdir && page_private(subdir)) { | 637 | if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) { |
638 | if (needs_lock) { | ||
639 | spin_lock(needs_lock); | ||
640 | dir[diroff] = NULL; | ||
641 | spin_unlock(needs_lock); | ||
642 | punch_lock = NULL; | ||
643 | } else | ||
644 | dir[diroff] = NULL; | ||
645 | nr_pages_to_free++; | ||
646 | list_add(&subdir->lru, &pages_to_free); | ||
647 | } | ||
648 | if (subdir && page_private(subdir) /* has swap entries */) { | ||
583 | size = limit - idx; | 649 | size = limit - idx; |
584 | if (size > ENTRIES_PER_PAGE) | 650 | if (size > ENTRIES_PER_PAGE) |
585 | size = ENTRIES_PER_PAGE; | 651 | size = ENTRIES_PER_PAGE; |
586 | freed = shmem_map_and_free_swp(subdir, | 652 | freed = shmem_map_and_free_swp(subdir, |
587 | offset, size, &dir); | 653 | offset, size, &dir, punch_lock); |
588 | if (!dir) | 654 | if (!dir) |
589 | dir = shmem_dir_map(middir); | 655 | dir = shmem_dir_map(middir); |
590 | nr_swaps_freed += freed; | 656 | nr_swaps_freed += freed; |
591 | if (offset) | 657 | if (offset || punch_lock) { |
592 | spin_lock(&info->lock); | 658 | spin_lock(&info->lock); |
593 | set_page_private(subdir, page_private(subdir) - freed); | 659 | set_page_private(subdir, |
594 | if (offset) | 660 | page_private(subdir) - freed); |
595 | spin_unlock(&info->lock); | 661 | spin_unlock(&info->lock); |
596 | if (!punch_hole) | 662 | } else |
597 | BUG_ON(page_private(subdir) > offset); | 663 | BUG_ON(page_private(subdir) != freed); |
598 | } | ||
599 | if (offset) | ||
600 | offset = 0; | ||
601 | else if (subdir && !page_private(subdir)) { | ||
602 | dir[diroff] = NULL; | ||
603 | nr_pages_to_free++; | ||
604 | list_add(&subdir->lru, &pages_to_free); | ||
605 | } | 664 | } |
665 | offset = 0; | ||
606 | } | 666 | } |
607 | done1: | 667 | done1: |
608 | shmem_dir_unmap(dir); | 668 | shmem_dir_unmap(dir); |
@@ -614,8 +674,16 @@ done2: | |||
614 | * generic_delete_inode did it, before we lowered next_index. | 674 | * generic_delete_inode did it, before we lowered next_index. |
615 | * Also, though shmem_getpage checks i_size before adding to | 675 | * Also, though shmem_getpage checks i_size before adding to |
616 | * cache, no recheck after: so fix the narrow window there too. | 676 | * cache, no recheck after: so fix the narrow window there too. |
677 | * | ||
678 | * Recalling truncate_inode_pages_range and unmap_mapping_range | ||
679 | * every time for punch_hole (which never got a chance to clear | ||
680 | * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive, | ||
681 | * yet hardly ever necessary: try to optimize them out later. | ||
617 | */ | 682 | */ |
618 | truncate_inode_pages_range(inode->i_mapping, start, end); | 683 | truncate_inode_pages_range(inode->i_mapping, start, end); |
684 | if (punch_hole) | ||
685 | unmap_mapping_range(inode->i_mapping, start, | ||
686 | end - start, 1); | ||
619 | } | 687 | } |
620 | 688 | ||
621 | spin_lock(&info->lock); | 689 | spin_lock(&info->lock); |
@@ -1802,8 +1802,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
1802 | /* Print header */ | 1802 | /* Print header */ |
1803 | if (lines == 0) { | 1803 | if (lines == 0) { |
1804 | printk(KERN_ERR | 1804 | printk(KERN_ERR |
1805 | "Slab corruption: start=%p, len=%d\n", | 1805 | "Slab corruption: %s start=%p, len=%d\n", |
1806 | realobj, size); | 1806 | cachep->name, realobj, size); |
1807 | print_objinfo(cachep, objp, 0); | 1807 | print_objinfo(cachep, objp, 0); |
1808 | } | 1808 | } |
1809 | /* Hexdump the affected line */ | 1809 | /* Hexdump the affected line */ |