aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c16
-rw-r--r--mm/bounce.c2
-rw-r--r--mm/filemap.c46
-rw-r--r--mm/filemap_xip.c48
-rw-r--r--mm/madvise.c22
-rw-r--r--mm/migrate.c15
-rw-r--r--mm/nommu.c29
-rw-r--r--mm/oom_kill.c6
-rw-r--r--mm/page-writeback.c4
-rw-r--r--mm/rmap.c10
-rw-r--r--mm/shmem.c132
-rw-r--r--mm/slab.c4
12 files changed, 264 insertions, 70 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f50a2811f9dc..e5de3781d3fe 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -55,6 +55,22 @@ long congestion_wait(int rw, long timeout)
55} 55}
56EXPORT_SYMBOL(congestion_wait); 56EXPORT_SYMBOL(congestion_wait);
57 57
58long congestion_wait_interruptible(int rw, long timeout)
59{
60 long ret;
61 DEFINE_WAIT(wait);
62 wait_queue_head_t *wqh = &congestion_wqh[rw];
63
64 prepare_to_wait(wqh, &wait, TASK_INTERRUPTIBLE);
65 if (signal_pending(current))
66 ret = -ERESTARTSYS;
67 else
68 ret = io_schedule_timeout(timeout);
69 finish_wait(wqh, &wait);
70 return ret;
71}
72EXPORT_SYMBOL(congestion_wait_interruptible);
73
58/** 74/**
59 * congestion_end - wake up sleepers on a congested backing_dev_info 75 * congestion_end - wake up sleepers on a congested backing_dev_info
60 * @rw: READ or WRITE 76 * @rw: READ or WRITE
diff --git a/mm/bounce.c b/mm/bounce.c
index 643efbe82402..ad401fc57440 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -204,7 +204,7 @@ static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
204 /* 204 /*
205 * is destination page below bounce pfn? 205 * is destination page below bounce pfn?
206 */ 206 */
207 if (page_to_pfn(page) < q->bounce_pfn) 207 if (page_to_pfn(page) <= q->bounce_pfn)
208 continue; 208 continue;
209 209
210 /* 210 /*
diff --git a/mm/filemap.c b/mm/filemap.c
index d1060b8d3cd6..5dfc093ceb3d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2379,7 +2379,8 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2379 struct file *file = iocb->ki_filp; 2379 struct file *file = iocb->ki_filp;
2380 struct address_space *mapping = file->f_mapping; 2380 struct address_space *mapping = file->f_mapping;
2381 ssize_t retval; 2381 ssize_t retval;
2382 size_t write_len = 0; 2382 size_t write_len;
2383 pgoff_t end = 0; /* silence gcc */
2383 2384
2384 /* 2385 /*
2385 * If it's a write, unmap all mmappings of the file up-front. This 2386 * If it's a write, unmap all mmappings of the file up-front. This
@@ -2388,23 +2389,46 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
2388 */ 2389 */
2389 if (rw == WRITE) { 2390 if (rw == WRITE) {
2390 write_len = iov_length(iov, nr_segs); 2391 write_len = iov_length(iov, nr_segs);
2392 end = (offset + write_len - 1) >> PAGE_CACHE_SHIFT;
2391 if (mapping_mapped(mapping)) 2393 if (mapping_mapped(mapping))
2392 unmap_mapping_range(mapping, offset, write_len, 0); 2394 unmap_mapping_range(mapping, offset, write_len, 0);
2393 } 2395 }
2394 2396
2395 retval = filemap_write_and_wait(mapping); 2397 retval = filemap_write_and_wait(mapping);
2396 if (retval == 0) { 2398 if (retval)
2397 retval = mapping->a_ops->direct_IO(rw, iocb, iov, 2399 goto out;
2398 offset, nr_segs); 2400
2399 if (rw == WRITE && mapping->nrpages) { 2401 /*
2400 pgoff_t end = (offset + write_len - 1) 2402 * After a write we want buffered reads to be sure to go to disk to get
2401 >> PAGE_CACHE_SHIFT; 2403 * the new data. We invalidate clean cached page from the region we're
2402 int err = invalidate_inode_pages2_range(mapping, 2404 * about to write. We do this *before* the write so that we can return
2405 * -EIO without clobbering -EIOCBQUEUED from ->direct_IO().
2406 */
2407 if (rw == WRITE && mapping->nrpages) {
2408 retval = invalidate_inode_pages2_range(mapping,
2403 offset >> PAGE_CACHE_SHIFT, end); 2409 offset >> PAGE_CACHE_SHIFT, end);
2404 if (err) 2410 if (retval)
2405 retval = err; 2411 goto out;
2406 }
2407 } 2412 }
2413
2414 retval = mapping->a_ops->direct_IO(rw, iocb, iov, offset, nr_segs);
2415 if (retval)
2416 goto out;
2417
2418 /*
2419 * Finally, try again to invalidate clean pages which might have been
2420 * faulted in by get_user_pages() if the source of the write was an
2421 * mmap()ed region of the file we're writing. That's a pretty crazy
2422 * thing to do, so we don't support it 100%. If this invalidation
2423 * fails and we have -EIOCBQUEUED we ignore the failure.
2424 */
2425 if (rw == WRITE && mapping->nrpages) {
2426 int err = invalidate_inode_pages2_range(mapping,
2427 offset >> PAGE_CACHE_SHIFT, end);
2428 if (err && retval >= 0)
2429 retval = err;
2430 }
2431out:
2408 return retval; 2432 return retval;
2409} 2433}
2410 2434
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 9dd9fbb75139..cbb335813ec0 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -17,6 +17,29 @@
17#include "filemap.h" 17#include "filemap.h"
18 18
19/* 19/*
20 * We do use our own empty page to avoid interference with other users
21 * of ZERO_PAGE(), such as /dev/zero
22 */
23static struct page *__xip_sparse_page;
24
25static struct page *xip_sparse_page(void)
26{
27 if (!__xip_sparse_page) {
28 unsigned long zeroes = get_zeroed_page(GFP_HIGHUSER);
29 if (zeroes) {
30 static DEFINE_SPINLOCK(xip_alloc_lock);
31 spin_lock(&xip_alloc_lock);
32 if (!__xip_sparse_page)
33 __xip_sparse_page = virt_to_page(zeroes);
34 else
35 free_page(zeroes);
36 spin_unlock(&xip_alloc_lock);
37 }
38 }
39 return __xip_sparse_page;
40}
41
42/*
20 * This is a file read routine for execute in place files, and uses 43 * This is a file read routine for execute in place files, and uses
21 * the mapping->a_ops->get_xip_page() function for the actual low-level 44 * the mapping->a_ops->get_xip_page() function for the actual low-level
22 * stuff. 45 * stuff.
@@ -162,7 +185,7 @@ EXPORT_SYMBOL_GPL(xip_file_sendfile);
162 * xip_write 185 * xip_write
163 * 186 *
164 * This function walks all vmas of the address_space and unmaps the 187 * This function walks all vmas of the address_space and unmaps the
165 * ZERO_PAGE when found at pgoff. Should it go in rmap.c? 188 * __xip_sparse_page when found at pgoff.
166 */ 189 */
167static void 190static void
168__xip_unmap (struct address_space * mapping, 191__xip_unmap (struct address_space * mapping,
@@ -177,13 +200,16 @@ __xip_unmap (struct address_space * mapping,
177 spinlock_t *ptl; 200 spinlock_t *ptl;
178 struct page *page; 201 struct page *page;
179 202
203 page = __xip_sparse_page;
204 if (!page)
205 return;
206
180 spin_lock(&mapping->i_mmap_lock); 207 spin_lock(&mapping->i_mmap_lock);
181 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 208 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
182 mm = vma->vm_mm; 209 mm = vma->vm_mm;
183 address = vma->vm_start + 210 address = vma->vm_start +
184 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); 211 ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
185 BUG_ON(address < vma->vm_start || address >= vma->vm_end); 212 BUG_ON(address < vma->vm_start || address >= vma->vm_end);
186 page = ZERO_PAGE(0);
187 pte = page_check_address(page, mm, address, &ptl); 213 pte = page_check_address(page, mm, address, &ptl);
188 if (pte) { 214 if (pte) {
189 /* Nuke the page table entry. */ 215 /* Nuke the page table entry. */
@@ -222,16 +248,14 @@ xip_file_nopage(struct vm_area_struct * area,
222 + area->vm_pgoff; 248 + area->vm_pgoff;
223 249
224 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 250 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
225 if (pgoff >= size) { 251 if (pgoff >= size)
226 return NULL; 252 return NOPAGE_SIGBUS;
227 }
228 253
229 page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0); 254 page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0);
230 if (!IS_ERR(page)) { 255 if (!IS_ERR(page))
231 goto out; 256 goto out;
232 }
233 if (PTR_ERR(page) != -ENODATA) 257 if (PTR_ERR(page) != -ENODATA)
234 return NULL; 258 return NOPAGE_SIGBUS;
235 259
236 /* sparse block */ 260 /* sparse block */
237 if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && 261 if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) &&
@@ -241,12 +265,14 @@ xip_file_nopage(struct vm_area_struct * area,
241 page = mapping->a_ops->get_xip_page (mapping, 265 page = mapping->a_ops->get_xip_page (mapping,
242 pgoff*(PAGE_SIZE/512), 1); 266 pgoff*(PAGE_SIZE/512), 1);
243 if (IS_ERR(page)) 267 if (IS_ERR(page))
244 return NULL; 268 return NOPAGE_SIGBUS;
245 /* unmap page at pgoff from all other vmas */ 269 /* unmap page at pgoff from all other vmas */
246 __xip_unmap(mapping, pgoff); 270 __xip_unmap(mapping, pgoff);
247 } else { 271 } else {
248 /* not shared and writable, use ZERO_PAGE() */ 272 /* not shared and writable, use xip_sparse_page() */
249 page = ZERO_PAGE(0); 273 page = xip_sparse_page();
274 if (!page)
275 return NOPAGE_OOM;
250 } 276 }
251 277
252out: 278out:
diff --git a/mm/madvise.c b/mm/madvise.c
index 4e196155a0c3..603c5257ed6e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -155,10 +155,14 @@ static long madvise_dontneed(struct vm_area_struct * vma,
155 * Other filesystems return -ENOSYS. 155 * Other filesystems return -ENOSYS.
156 */ 156 */
157static long madvise_remove(struct vm_area_struct *vma, 157static long madvise_remove(struct vm_area_struct *vma,
158 struct vm_area_struct **prev,
158 unsigned long start, unsigned long end) 159 unsigned long start, unsigned long end)
159{ 160{
160 struct address_space *mapping; 161 struct address_space *mapping;
161 loff_t offset, endoff; 162 loff_t offset, endoff;
163 int error;
164
165 *prev = NULL; /* tell sys_madvise we drop mmap_sem */
162 166
163 if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB)) 167 if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB))
164 return -EINVAL; 168 return -EINVAL;
@@ -177,7 +181,12 @@ static long madvise_remove(struct vm_area_struct *vma,
177 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 181 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
178 endoff = (loff_t)(end - vma->vm_start - 1) 182 endoff = (loff_t)(end - vma->vm_start - 1)
179 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 183 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
180 return vmtruncate_range(mapping->host, offset, endoff); 184
185 /* vmtruncate_range needs to take i_mutex and i_alloc_sem */
186 up_write(&current->mm->mmap_sem);
187 error = vmtruncate_range(mapping->host, offset, endoff);
188 down_write(&current->mm->mmap_sem);
189 return error;
181} 190}
182 191
183static long 192static long
@@ -199,7 +208,7 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
199 error = madvise_behavior(vma, prev, start, end, behavior); 208 error = madvise_behavior(vma, prev, start, end, behavior);
200 break; 209 break;
201 case MADV_REMOVE: 210 case MADV_REMOVE:
202 error = madvise_remove(vma, start, end); 211 error = madvise_remove(vma, prev, start, end);
203 break; 212 break;
204 213
205 case MADV_WILLNEED: 214 case MADV_WILLNEED:
@@ -312,12 +321,15 @@ asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior)
312 if (error) 321 if (error)
313 goto out; 322 goto out;
314 start = tmp; 323 start = tmp;
315 if (start < prev->vm_end) 324 if (prev && start < prev->vm_end)
316 start = prev->vm_end; 325 start = prev->vm_end;
317 error = unmapped_error; 326 error = unmapped_error;
318 if (start >= end) 327 if (start >= end)
319 goto out; 328 goto out;
320 vma = prev->vm_next; 329 if (prev)
330 vma = prev->vm_next;
331 else /* madvise_remove dropped mmap_sem */
332 vma = find_vma(current->mm, start);
321 } 333 }
322out: 334out:
323 up_write(&current->mm->mmap_sem); 335 up_write(&current->mm->mmap_sem);
diff --git a/mm/migrate.c b/mm/migrate.c
index 7a66ca25dc8a..a91ca00abebe 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -297,7 +297,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
297 void **pslot; 297 void **pslot;
298 298
299 if (!mapping) { 299 if (!mapping) {
300 /* Anonymous page */ 300 /* Anonymous page without mapping */
301 if (page_count(page) != 1) 301 if (page_count(page) != 1)
302 return -EAGAIN; 302 return -EAGAIN;
303 return 0; 303 return 0;
@@ -333,6 +333,19 @@ static int migrate_page_move_mapping(struct address_space *mapping,
333 */ 333 */
334 __put_page(page); 334 __put_page(page);
335 335
336 /*
337 * If moved to a different zone then also account
338 * the page for that zone. Other VM counters will be
339 * taken care of when we establish references to the
340 * new page and drop references to the old page.
341 *
342 * Note that anonymous pages are accounted for
343 * via NR_FILE_PAGES and NR_ANON_PAGES if they
344 * are mapped to swap space.
345 */
346 __dec_zone_page_state(page, NR_FILE_PAGES);
347 __inc_zone_page_state(newpage, NR_FILE_PAGES);
348
336 write_unlock_irq(&mapping->tree_lock); 349 write_unlock_irq(&mapping->tree_lock);
337 350
338 return 0; 351 return 0;
diff --git a/mm/nommu.c b/mm/nommu.c
index 23fb033e596d..1f60194d9b9b 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -45,6 +45,7 @@ int heap_stack_gap = 0;
45 45
46EXPORT_SYMBOL(mem_map); 46EXPORT_SYMBOL(mem_map);
47EXPORT_SYMBOL(__vm_enough_memory); 47EXPORT_SYMBOL(__vm_enough_memory);
48EXPORT_SYMBOL(num_physpages);
48 49
49/* list of shareable VMAs */ 50/* list of shareable VMAs */
50struct rb_root nommu_vma_tree = RB_ROOT; 51struct rb_root nommu_vma_tree = RB_ROOT;
@@ -826,6 +827,11 @@ unsigned long do_mmap_pgoff(struct file *file,
826 unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 827 unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
827 unsigned long vmpglen; 828 unsigned long vmpglen;
828 829
830 /* suppress VMA sharing for shared regions */
831 if (vm_flags & VM_SHARED &&
832 capabilities & BDI_CAP_MAP_DIRECT)
833 goto dont_share_VMAs;
834
829 for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) { 835 for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) {
830 vma = rb_entry(rb, struct vm_area_struct, vm_rb); 836 vma = rb_entry(rb, struct vm_area_struct, vm_rb);
831 837
@@ -859,6 +865,7 @@ unsigned long do_mmap_pgoff(struct file *file,
859 goto shared; 865 goto shared;
860 } 866 }
861 867
868 dont_share_VMAs:
862 vma = NULL; 869 vma = NULL;
863 870
864 /* obtain the address at which to make a shared mapping 871 /* obtain the address at which to make a shared mapping
@@ -1193,6 +1200,28 @@ void unmap_mapping_range(struct address_space *mapping,
1193EXPORT_SYMBOL(unmap_mapping_range); 1200EXPORT_SYMBOL(unmap_mapping_range);
1194 1201
1195/* 1202/*
1203 * ask for an unmapped area at which to create a mapping on a file
1204 */
1205unsigned long get_unmapped_area(struct file *file, unsigned long addr,
1206 unsigned long len, unsigned long pgoff,
1207 unsigned long flags)
1208{
1209 unsigned long (*get_area)(struct file *, unsigned long, unsigned long,
1210 unsigned long, unsigned long);
1211
1212 get_area = current->mm->get_unmapped_area;
1213 if (file && file->f_op && file->f_op->get_unmapped_area)
1214 get_area = file->f_op->get_unmapped_area;
1215
1216 if (!get_area)
1217 return -ENOSYS;
1218
1219 return get_area(file, addr, len, pgoff, flags);
1220}
1221
1222EXPORT_SYMBOL(get_unmapped_area);
1223
1224/*
1196 * Check that a process has enough memory to allocate a new virtual 1225 * Check that a process has enough memory to allocate a new virtual
1197 * mapping. 0 means there is enough memory for the allocation to 1226 * mapping. 0 means there is enough memory for the allocation to
1198 * succeed and -ENOMEM implies there is not. 1227 * succeed and -ENOMEM implies there is not.
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index b278b8d60eee..3791edfffeeb 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -176,6 +176,8 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask)
176 struct zone **z; 176 struct zone **z;
177 nodemask_t nodes; 177 nodemask_t nodes;
178 int node; 178 int node;
179
180 nodes_clear(nodes);
179 /* node has memory ? */ 181 /* node has memory ? */
180 for_each_online_node(node) 182 for_each_online_node(node)
181 if (NODE_DATA(node)->node_present_pages) 183 if (NODE_DATA(node)->node_present_pages)
@@ -320,7 +322,7 @@ static int oom_kill_task(struct task_struct *p)
320 * Don't kill the process if any threads are set to OOM_DISABLE 322 * Don't kill the process if any threads are set to OOM_DISABLE
321 */ 323 */
322 do_each_thread(g, q) { 324 do_each_thread(g, q) {
323 if (q->mm == mm && p->oomkilladj == OOM_DISABLE) 325 if (q->mm == mm && q->oomkilladj == OOM_DISABLE)
324 return 1; 326 return 1;
325 } while_each_thread(g, q); 327 } while_each_thread(g, q);
326 328
@@ -333,7 +335,7 @@ static int oom_kill_task(struct task_struct *p)
333 */ 335 */
334 do_each_thread(g, q) { 336 do_each_thread(g, q) {
335 if (q->mm == mm && q->tgid != p->tgid) 337 if (q->mm == mm && q->tgid != p->tgid)
336 force_sig(SIGKILL, p); 338 force_sig(SIGKILL, q);
337 } while_each_thread(g, q); 339 } while_each_thread(g, q);
338 340
339 return 0; 341 return 0;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index f469e3cd08e8..a794945fd194 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -67,12 +67,12 @@ static inline long sync_writeback_pages(void)
67/* 67/*
68 * Start background writeback (via pdflush) at this percentage 68 * Start background writeback (via pdflush) at this percentage
69 */ 69 */
70int dirty_background_ratio = 10; 70int dirty_background_ratio = 5;
71 71
72/* 72/*
73 * The generator of dirty data starts writeback at this percentage 73 * The generator of dirty data starts writeback at this percentage
74 */ 74 */
75int vm_dirty_ratio = 40; 75int vm_dirty_ratio = 10;
76 76
77/* 77/*
78 * The interval between `kupdate'-style writebacks, in jiffies 78 * The interval between `kupdate'-style writebacks, in jiffies
diff --git a/mm/rmap.c b/mm/rmap.c
index 22ed3f71a674..59da5b734c80 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -498,9 +498,11 @@ int page_mkclean(struct page *page)
498 struct address_space *mapping = page_mapping(page); 498 struct address_space *mapping = page_mapping(page);
499 if (mapping) 499 if (mapping)
500 ret = page_mkclean_file(mapping, page); 500 ret = page_mkclean_file(mapping, page);
501 if (page_test_dirty(page)) {
502 page_clear_dirty(page);
503 ret = 1;
504 }
501 } 505 }
502 if (page_test_and_clear_dirty(page))
503 ret = 1;
504 506
505 return ret; 507 return ret;
506} 508}
@@ -605,8 +607,10 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
605 * Leaving it set also helps swapoff to reinstate ptes 607 * Leaving it set also helps swapoff to reinstate ptes
606 * faster for those pages still in swapcache. 608 * faster for those pages still in swapcache.
607 */ 609 */
608 if (page_test_and_clear_dirty(page)) 610 if (page_test_dirty(page)) {
611 page_clear_dirty(page);
609 set_page_dirty(page); 612 set_page_dirty(page);
613 }
610 __dec_zone_page_state(page, 614 __dec_zone_page_state(page,
611 PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); 615 PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
612 } 616 }
diff --git a/mm/shmem.c b/mm/shmem.c
index b8c429a2d271..b2a35ebf071a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -402,26 +402,38 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
402/* 402/*
403 * shmem_free_swp - free some swap entries in a directory 403 * shmem_free_swp - free some swap entries in a directory
404 * 404 *
405 * @dir: pointer to the directory 405 * @dir: pointer to the directory
406 * @edir: pointer after last entry of the directory 406 * @edir: pointer after last entry of the directory
407 * @punch_lock: pointer to spinlock when needed for the holepunch case
407 */ 408 */
408static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir) 409static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir,
410 spinlock_t *punch_lock)
409{ 411{
412 spinlock_t *punch_unlock = NULL;
410 swp_entry_t *ptr; 413 swp_entry_t *ptr;
411 int freed = 0; 414 int freed = 0;
412 415
413 for (ptr = dir; ptr < edir; ptr++) { 416 for (ptr = dir; ptr < edir; ptr++) {
414 if (ptr->val) { 417 if (ptr->val) {
418 if (unlikely(punch_lock)) {
419 punch_unlock = punch_lock;
420 punch_lock = NULL;
421 spin_lock(punch_unlock);
422 if (!ptr->val)
423 continue;
424 }
415 free_swap_and_cache(*ptr); 425 free_swap_and_cache(*ptr);
416 *ptr = (swp_entry_t){0}; 426 *ptr = (swp_entry_t){0};
417 freed++; 427 freed++;
418 } 428 }
419 } 429 }
430 if (punch_unlock)
431 spin_unlock(punch_unlock);
420 return freed; 432 return freed;
421} 433}
422 434
423static int shmem_map_and_free_swp(struct page *subdir, 435static int shmem_map_and_free_swp(struct page *subdir, int offset,
424 int offset, int limit, struct page ***dir) 436 int limit, struct page ***dir, spinlock_t *punch_lock)
425{ 437{
426 swp_entry_t *ptr; 438 swp_entry_t *ptr;
427 int freed = 0; 439 int freed = 0;
@@ -431,7 +443,8 @@ static int shmem_map_and_free_swp(struct page *subdir,
431 int size = limit - offset; 443 int size = limit - offset;
432 if (size > LATENCY_LIMIT) 444 if (size > LATENCY_LIMIT)
433 size = LATENCY_LIMIT; 445 size = LATENCY_LIMIT;
434 freed += shmem_free_swp(ptr+offset, ptr+offset+size); 446 freed += shmem_free_swp(ptr+offset, ptr+offset+size,
447 punch_lock);
435 if (need_resched()) { 448 if (need_resched()) {
436 shmem_swp_unmap(ptr); 449 shmem_swp_unmap(ptr);
437 if (*dir) { 450 if (*dir) {
@@ -481,7 +494,10 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
481 long nr_swaps_freed = 0; 494 long nr_swaps_freed = 0;
482 int offset; 495 int offset;
483 int freed; 496 int freed;
484 int punch_hole = 0; 497 int punch_hole;
498 spinlock_t *needs_lock;
499 spinlock_t *punch_lock;
500 unsigned long upper_limit;
485 501
486 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 502 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
487 idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 503 idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
@@ -492,11 +508,20 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
492 info->flags |= SHMEM_TRUNCATE; 508 info->flags |= SHMEM_TRUNCATE;
493 if (likely(end == (loff_t) -1)) { 509 if (likely(end == (loff_t) -1)) {
494 limit = info->next_index; 510 limit = info->next_index;
511 upper_limit = SHMEM_MAX_INDEX;
495 info->next_index = idx; 512 info->next_index = idx;
513 needs_lock = NULL;
514 punch_hole = 0;
496 } else { 515 } else {
497 limit = (end + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 516 if (end + 1 >= inode->i_size) { /* we may free a little more */
498 if (limit > info->next_index) 517 limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >>
499 limit = info->next_index; 518 PAGE_CACHE_SHIFT;
519 upper_limit = SHMEM_MAX_INDEX;
520 } else {
521 limit = (end + 1) >> PAGE_CACHE_SHIFT;
522 upper_limit = limit;
523 }
524 needs_lock = &info->lock;
500 punch_hole = 1; 525 punch_hole = 1;
501 } 526 }
502 527
@@ -513,17 +538,30 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
513 size = limit; 538 size = limit;
514 if (size > SHMEM_NR_DIRECT) 539 if (size > SHMEM_NR_DIRECT)
515 size = SHMEM_NR_DIRECT; 540 size = SHMEM_NR_DIRECT;
516 nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size); 541 nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock);
517 } 542 }
518 543
519 /* 544 /*
520 * If there are no indirect blocks or we are punching a hole 545 * If there are no indirect blocks or we are punching a hole
521 * below indirect blocks, nothing to be done. 546 * below indirect blocks, nothing to be done.
522 */ 547 */
523 if (!topdir || (punch_hole && (limit <= SHMEM_NR_DIRECT))) 548 if (!topdir || limit <= SHMEM_NR_DIRECT)
524 goto done2; 549 goto done2;
525 550
526 BUG_ON(limit <= SHMEM_NR_DIRECT); 551 /*
552 * The truncation case has already dropped info->lock, and we're safe
553 * because i_size and next_index have already been lowered, preventing
554 * access beyond. But in the punch_hole case, we still need to take
555 * the lock when updating the swap directory, because there might be
556 * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or
557 * shmem_writepage. However, whenever we find we can remove a whole
558 * directory page (not at the misaligned start or end of the range),
559 * we first NULLify its pointer in the level above, and then have no
560 * need to take the lock when updating its contents: needs_lock and
561 * punch_lock (either pointing to info->lock or NULL) manage this.
562 */
563
564 upper_limit -= SHMEM_NR_DIRECT;
527 limit -= SHMEM_NR_DIRECT; 565 limit -= SHMEM_NR_DIRECT;
528 idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; 566 idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0;
529 offset = idx % ENTRIES_PER_PAGE; 567 offset = idx % ENTRIES_PER_PAGE;
@@ -543,8 +581,14 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
543 if (*dir) { 581 if (*dir) {
544 diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % 582 diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) %
545 ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; 583 ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE;
546 if (!diroff && !offset) { 584 if (!diroff && !offset && upper_limit >= stage) {
547 *dir = NULL; 585 if (needs_lock) {
586 spin_lock(needs_lock);
587 *dir = NULL;
588 spin_unlock(needs_lock);
589 needs_lock = NULL;
590 } else
591 *dir = NULL;
548 nr_pages_to_free++; 592 nr_pages_to_free++;
549 list_add(&middir->lru, &pages_to_free); 593 list_add(&middir->lru, &pages_to_free);
550 } 594 }
@@ -570,39 +614,55 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
570 } 614 }
571 stage = idx + ENTRIES_PER_PAGEPAGE; 615 stage = idx + ENTRIES_PER_PAGEPAGE;
572 middir = *dir; 616 middir = *dir;
573 *dir = NULL; 617 if (punch_hole)
574 nr_pages_to_free++; 618 needs_lock = &info->lock;
575 list_add(&middir->lru, &pages_to_free); 619 if (upper_limit >= stage) {
620 if (needs_lock) {
621 spin_lock(needs_lock);
622 *dir = NULL;
623 spin_unlock(needs_lock);
624 needs_lock = NULL;
625 } else
626 *dir = NULL;
627 nr_pages_to_free++;
628 list_add(&middir->lru, &pages_to_free);
629 }
576 shmem_dir_unmap(dir); 630 shmem_dir_unmap(dir);
577 cond_resched(); 631 cond_resched();
578 dir = shmem_dir_map(middir); 632 dir = shmem_dir_map(middir);
579 diroff = 0; 633 diroff = 0;
580 } 634 }
635 punch_lock = needs_lock;
581 subdir = dir[diroff]; 636 subdir = dir[diroff];
582 if (subdir && page_private(subdir)) { 637 if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) {
638 if (needs_lock) {
639 spin_lock(needs_lock);
640 dir[diroff] = NULL;
641 spin_unlock(needs_lock);
642 punch_lock = NULL;
643 } else
644 dir[diroff] = NULL;
645 nr_pages_to_free++;
646 list_add(&subdir->lru, &pages_to_free);
647 }
648 if (subdir && page_private(subdir) /* has swap entries */) {
583 size = limit - idx; 649 size = limit - idx;
584 if (size > ENTRIES_PER_PAGE) 650 if (size > ENTRIES_PER_PAGE)
585 size = ENTRIES_PER_PAGE; 651 size = ENTRIES_PER_PAGE;
586 freed = shmem_map_and_free_swp(subdir, 652 freed = shmem_map_and_free_swp(subdir,
587 offset, size, &dir); 653 offset, size, &dir, punch_lock);
588 if (!dir) 654 if (!dir)
589 dir = shmem_dir_map(middir); 655 dir = shmem_dir_map(middir);
590 nr_swaps_freed += freed; 656 nr_swaps_freed += freed;
591 if (offset) 657 if (offset || punch_lock) {
592 spin_lock(&info->lock); 658 spin_lock(&info->lock);
593 set_page_private(subdir, page_private(subdir) - freed); 659 set_page_private(subdir,
594 if (offset) 660 page_private(subdir) - freed);
595 spin_unlock(&info->lock); 661 spin_unlock(&info->lock);
596 if (!punch_hole) 662 } else
597 BUG_ON(page_private(subdir) > offset); 663 BUG_ON(page_private(subdir) != freed);
598 }
599 if (offset)
600 offset = 0;
601 else if (subdir && !page_private(subdir)) {
602 dir[diroff] = NULL;
603 nr_pages_to_free++;
604 list_add(&subdir->lru, &pages_to_free);
605 } 664 }
665 offset = 0;
606 } 666 }
607done1: 667done1:
608 shmem_dir_unmap(dir); 668 shmem_dir_unmap(dir);
@@ -614,8 +674,16 @@ done2:
614 * generic_delete_inode did it, before we lowered next_index. 674 * generic_delete_inode did it, before we lowered next_index.
615 * Also, though shmem_getpage checks i_size before adding to 675 * Also, though shmem_getpage checks i_size before adding to
616 * cache, no recheck after: so fix the narrow window there too. 676 * cache, no recheck after: so fix the narrow window there too.
677 *
678 * Recalling truncate_inode_pages_range and unmap_mapping_range
679 * every time for punch_hole (which never got a chance to clear
680 * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive,
681 * yet hardly ever necessary: try to optimize them out later.
617 */ 682 */
618 truncate_inode_pages_range(inode->i_mapping, start, end); 683 truncate_inode_pages_range(inode->i_mapping, start, end);
684 if (punch_hole)
685 unmap_mapping_range(inode->i_mapping, start,
686 end - start, 1);
619 } 687 }
620 688
621 spin_lock(&info->lock); 689 spin_lock(&info->lock);
diff --git a/mm/slab.c b/mm/slab.c
index 57f7aa420064..4cbac24ae2f1 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1802,8 +1802,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1802 /* Print header */ 1802 /* Print header */
1803 if (lines == 0) { 1803 if (lines == 0) {
1804 printk(KERN_ERR 1804 printk(KERN_ERR
1805 "Slab corruption: start=%p, len=%d\n", 1805 "Slab corruption: %s start=%p, len=%d\n",
1806 realobj, size); 1806 cachep->name, realobj, size);
1807 print_objinfo(cachep, objp, 0); 1807 print_objinfo(cachep, objp, 0);
1808 } 1808 }
1809 /* Hexdump the affected line */ 1809 /* Hexdump the affected line */