mm: consistent truncate and invalidate loops

Make the pagevec_lookup loops in truncate_inode_pages_range(), invalidate_mapping_pages() and invalidate_inode_pages2_range() more consistent with each other. They were relying upon page->index of an unlocked page, but apologizing for it: accept it, embrace it, add comments and WARN_ONs, and simplify the index handling. invalidate_inode_pages2_range() had special handling for a wrapped page->index + 1 = 0 case; but MAX_LFS_FILESIZE doesn't let us anywhere near there, and a corrupt page->index in the radix_tree could cause more trouble than that would catch. Remove that wrapped handling. invalidate_inode_pages2_range() uses min() to limit the pagevec_lookup when near the end of the range: copy that into the other two, although it's less useful than you might think (it limits the use of the buffer, rather than the indices looked up). Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Hugh Dickins <hughd@google.com> 2011-07-25 20:12:25 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2011-07-25 23:57:10 -0400
commit: b85e0effd3dcbf9118b896232f59526ab1a39a74 (patch)
tree: 32200b2e4052d50d4eb1771e555eaf66d7c4cfc8 /mm/truncate.c
parent: 8a549bea51138be2126a2cc6aabe8f17ef66b79b (diff)
1 files changed, 47 insertions, 63 deletions
diff --git a/mm/truncate.c b/mm/truncate.c
index c924764e2ce5..dc459014f777 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -199,9 +199,6 @@ int invalidate_inode_page(struct page *page)
 * The first pass will remove most pages, so the search cost of the second pass
 * is low.
 *
- * When looking at page->index outside the page lock we need to be careful to
- * copy it into a local to avoid races (it could change at any time).
- *
 * We pass down the cache-hot hint to the page freeing code.  Even if the
 * mapping is large, it is probably the case that the final pages are the most
 * recently touched, and freeing happens in ascending file offset order.
@@ -210,10 +207,10 @@ void truncate_inode_pages_range(struct address_space *mapping,
                                loff_t lstart, loff_t lend)
 {
        const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
-        pgoff_t end;
        const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
        struct pagevec pvec;
-        pgoff_t next;
+        pgoff_t index;
+        pgoff_t end;
        int i;
        cleancache_flush_inode(mapping);
@@ -224,24 +221,21 @@ void truncate_inode_pages_range(struct address_space *mapping,
        end = (lend >> PAGE_CACHE_SHIFT);
        pagevec_init(&pvec, 0);
-        next = start;
+        index = start;
-        while (next <= end &&
+        while (index <= end && pagevec_lookup(&pvec, mapping, index,
-               pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
+                        min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
                mem_cgroup_uncharge_start();
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
-                        pgoff_t page_index = page->index;
-                        if (page_index > end) {
+                        /* We rely upon deletion not changing page->index */
-                                next = page_index;
+                        index = page->index;
+                        if (index > end)
                                break;
-                        }
-                        if (page_index > next)
-                                next = page_index;
-                        next++;
                        if (!trylock_page(page))
                                continue;
+                        WARN_ON(page->index != index);
                        if (PageWriteback(page)) {
                                unlock_page(page);
                                continue;
@@ -252,6 +246,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
                pagevec_release(&pvec);
                mem_cgroup_uncharge_end();
                cond_resched();
+                index++;
        }
        if (partial) {
@@ -264,13 +259,14 @@ void truncate_inode_pages_range(struct address_space *mapping,
                }
        }
-        next = start;
+        index = start;
        for ( ; ; ) {
                cond_resched();
-                if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
+                if (!pagevec_lookup(&pvec, mapping, index,
-                        if (next == start)
+                        min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
+                        if (index == start)
                                break;
-                        next = start;
+                        index = start;
                        continue;
                }
                if (pvec.pages[0]->index > end) {
@@ -281,18 +277,20 @@ void truncate_inode_pages_range(struct address_space *mapping,
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
-                        if (page->index > end)
+                        /* We rely upon deletion not changing page->index */
+                        index = page->index;
+                        if (index > end)
                                break;
                        lock_page(page);
+                        WARN_ON(page->index != index);
                        wait_on_page_writeback(page);
                        truncate_inode_page(mapping, page);
-                        if (page->index > next)
-                                next = page->index;
-                        next++;
                        unlock_page(page);
                }
                pagevec_release(&pvec);
                mem_cgroup_uncharge_end();
+                index++;
        }
        cleancache_flush_inode(mapping);
 }
@@ -333,35 +331,26 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
                pgoff_t start, pgoff_t end)
 {
        struct pagevec pvec;
-        pgoff_t next = start;
+        pgoff_t index = start;
        unsigned long ret;
        unsigned long count = 0;
        int i;
        pagevec_init(&pvec, 0);
-        while (next <= end &&
+        while (index <= end && pagevec_lookup(&pvec, mapping, index,
-                        pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
+                        min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
                mem_cgroup_uncharge_start();
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
-                        pgoff_t index;
-                        int lock_failed;
-                        lock_failed = !trylock_page(page);
-                        /*
+                        /* We rely upon deletion not changing page->index */
-                         * We really shouldn't be looking at the ->index of an
-                         * unlocked page.  But we're not allowed to lock these
-                         * pages.  So we rely upon nobody altering the ->index
-                         * of this (pinned-by-us) page.
-                         */
                        index = page->index;
-                        if (index > next)
+                        if (index > end)
-                                next = index;
+                                break;
-                        next++;
-                        if (lock_failed)
-                                continue;
+                        if (!trylock_page(page))
+                                continue;
+                        WARN_ON(page->index != index);
                        ret = invalidate_inode_page(page);
                        unlock_page(page);
                        /*
@@ -371,12 +360,11 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
                        if (!ret)
                                deactivate_page(page);
                        count += ret;
-                        if (next > end)
-                                break;
                }
                pagevec_release(&pvec);
                mem_cgroup_uncharge_end();
                cond_resched();
+                index++;
        }
        return count;
 }
@@ -442,37 +430,32 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
                                  pgoff_t start, pgoff_t end)
 {
        struct pagevec pvec;
-        pgoff_t next;
+        pgoff_t index;
        int i;
        int ret = 0;
        int ret2 = 0;
        int did_range_unmap = 0;
-        int wrapped = 0;
        cleancache_flush_inode(mapping);
        pagevec_init(&pvec, 0);
-        next = start;
+        index = start;
-        while (next <= end && !wrapped &&
+        while (index <= end && pagevec_lookup(&pvec, mapping, index,
-                pagevec_lookup(&pvec, mapping, next,
+                        min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
-                        min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
                mem_cgroup_uncharge_start();
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
-                        pgoff_t page_index;
+                        /* We rely upon deletion not changing page->index */
+                        index = page->index;
+                        if (index > end)
+                                break;
                        lock_page(page);
+                        WARN_ON(page->index != index);
                        if (page->mapping != mapping) {
                                unlock_page(page);
                                continue;
                        }
-                        page_index = page->index;
-                        next = page_index + 1;
-                        if (next == 0)
-                                wrapped = 1;
-                        if (page_index > end) {
-                                unlock_page(page);
-                                break;
-                        }
                        wait_on_page_writeback(page);
                        if (page_mapped(page)) {
                                if (!did_range_unmap) {
@@ -480,9 +463,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
                                         * Zap the rest of the file in one hit.
                                         */
                                        unmap_mapping_range(mapping,
-                                           (loff_t)page_index<<PAGE_CACHE_SHIFT,
+                                           (loff_t)index << PAGE_CACHE_SHIFT,
-                                           (loff_t)(end - page_index + 1)
+                                           (loff_t)(1 + end - index)
-                                                        << PAGE_CACHE_SHIFT,
+                                                         << PAGE_CACHE_SHIFT,
                                            0);
                                        did_range_unmap = 1;
                                } else {
@@ -490,8 +473,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
                                         * Just zap this page
                                         */
                                        unmap_mapping_range(mapping,
-                                          (loff_t)page_index<<PAGE_CACHE_SHIFT,
+                                           (loff_t)index << PAGE_CACHE_SHIFT,
-                                          PAGE_CACHE_SIZE, 0);
+                                           PAGE_CACHE_SIZE, 0);
                                }
                        }
                        BUG_ON(page_mapped(page));
@@ -507,6 +490,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
                pagevec_release(&pvec);
                mem_cgroup_uncharge_end();
                cond_resched();
+                index++;
        }
        cleancache_flush_inode(mapping);
        return ret;
author	Hugh Dickins <hughd@google.com>	2011-07-25 20:12:25 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2011-07-25 23:57:10 -0400
commit	b85e0effd3dcbf9118b896232f59526ab1a39a74 (patch)
tree	32200b2e4052d50d4eb1771e555eaf66d7c4cfc8 /mm/truncate.c
parent	8a549bea51138be2126a2cc6aabe8f17ef66b79b (diff)

diff --git a/mm/truncate.c b/mm/truncate.c index c924764e2ce5..dc459014f777 100644 --- a/mm/truncate.c +++ b/mm/truncate.c
@@ -199,9 +199,6 @@ int invalidate_inode_page(struct page *page)
199	* The first pass will remove most pages, so the search cost of the second pass	199	* The first pass will remove most pages, so the search cost of the second pass
200	* is low.	200	* is low.
201	*	201	*
202	* When looking at page->index outside the page lock we need to be careful to
203	* copy it into a local to avoid races (it could change at any time).
204	*
205	* We pass down the cache-hot hint to the page freeing code. Even if the	202	* We pass down the cache-hot hint to the page freeing code. Even if the
206	* mapping is large, it is probably the case that the final pages are the most	203	* mapping is large, it is probably the case that the final pages are the most
207	* recently touched, and freeing happens in ascending file offset order.	204	* recently touched, and freeing happens in ascending file offset order.
@@ -210,10 +207,10 @@ void truncate_inode_pages_range(struct address_space *mapping,
210	loff_t lstart, loff_t lend)	207	loff_t lstart, loff_t lend)
211	{	208	{
212	const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;	209	const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
213	pgoff_t end;
214	const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);	210	const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
215	struct pagevec pvec;	211	struct pagevec pvec;
216	pgoff_t next;	212	pgoff_t index;
		213	pgoff_t end;
217	int i;	214	int i;
218		215
219	cleancache_flush_inode(mapping);	216	cleancache_flush_inode(mapping);
@@ -224,24 +221,21 @@ void truncate_inode_pages_range(struct address_space *mapping,
224	end = (lend >> PAGE_CACHE_SHIFT);	221	end = (lend >> PAGE_CACHE_SHIFT);
225		222
226	pagevec_init(&pvec, 0);	223	pagevec_init(&pvec, 0);
227	next = start;	224	index = start;
228	while (next <= end &&	225	while (index <= end && pagevec_lookup(&pvec, mapping, index,
229	pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {	226	min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
230	mem_cgroup_uncharge_start();	227	mem_cgroup_uncharge_start();
231	for (i = 0; i < pagevec_count(&pvec); i++) {	228	for (i = 0; i < pagevec_count(&pvec); i++) {
232	struct page *page = pvec.pages[i];	229	struct page *page = pvec.pages[i];
233	pgoff_t page_index = page->index;
234		230
235	if (page_index > end) {	231	/* We rely upon deletion not changing page->index */
236	next = page_index;	232	index = page->index;
		233	if (index > end)
237	break;	234	break;
238	}
239		235
240	if (page_index > next)
241	next = page_index;
242	next++;
243	if (!trylock_page(page))	236	if (!trylock_page(page))
244	continue;	237	continue;
		238	WARN_ON(page->index != index);
245	if (PageWriteback(page)) {	239	if (PageWriteback(page)) {
246	unlock_page(page);	240	unlock_page(page);
247	continue;	241	continue;
@@ -252,6 +246,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
252	pagevec_release(&pvec);	246	pagevec_release(&pvec);
253	mem_cgroup_uncharge_end();	247	mem_cgroup_uncharge_end();
254	cond_resched();	248	cond_resched();
		249	index++;
255	}	250	}
256		251
257	if (partial) {	252	if (partial) {
@@ -264,13 +259,14 @@ void truncate_inode_pages_range(struct address_space *mapping,
264	}	259	}
265	}	260	}
266		261
267	next = start;	262	index = start;
268	for ( ; ; ) {	263	for ( ; ; ) {
269	cond_resched();	264	cond_resched();
270	if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {	265	if (!pagevec_lookup(&pvec, mapping, index,
271	if (next == start)	266	min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
		267	if (index == start)
272	break;	268	break;
273	next = start;	269	index = start;
274	continue;	270	continue;
275	}	271	}
276	if (pvec.pages[0]->index > end) {	272	if (pvec.pages[0]->index > end) {
@@ -281,18 +277,20 @@ void truncate_inode_pages_range(struct address_space *mapping,
281	for (i = 0; i < pagevec_count(&pvec); i++) {	277	for (i = 0; i < pagevec_count(&pvec); i++) {
282	struct page *page = pvec.pages[i];	278	struct page *page = pvec.pages[i];
283		279
284	if (page->index > end)	280	/* We rely upon deletion not changing page->index */
		281	index = page->index;
		282	if (index > end)
285	break;	283	break;
		284
286	lock_page(page);	285	lock_page(page);
		286	WARN_ON(page->index != index);
287	wait_on_page_writeback(page);	287	wait_on_page_writeback(page);
288	truncate_inode_page(mapping, page);	288	truncate_inode_page(mapping, page);
289	if (page->index > next)
290	next = page->index;
291	next++;
292	unlock_page(page);	289	unlock_page(page);
293	}	290	}
294	pagevec_release(&pvec);	291	pagevec_release(&pvec);
295	mem_cgroup_uncharge_end();	292	mem_cgroup_uncharge_end();
		293	index++;
296	}	294	}
297	cleancache_flush_inode(mapping);	295	cleancache_flush_inode(mapping);
298	}	296	}
@@ -333,35 +331,26 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
333	pgoff_t start, pgoff_t end)	331	pgoff_t start, pgoff_t end)
334	{	332	{
335	struct pagevec pvec;	333	struct pagevec pvec;
336	pgoff_t next = start;	334	pgoff_t index = start;
337	unsigned long ret;	335	unsigned long ret;
338	unsigned long count = 0;	336	unsigned long count = 0;
339	int i;	337	int i;
340		338
341	pagevec_init(&pvec, 0);	339	pagevec_init(&pvec, 0);
342	while (next <= end &&	340	while (index <= end && pagevec_lookup(&pvec, mapping, index,
343	pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {	341	min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
344	mem_cgroup_uncharge_start();	342	mem_cgroup_uncharge_start();
345	for (i = 0; i < pagevec_count(&pvec); i++) {	343	for (i = 0; i < pagevec_count(&pvec); i++) {
346	struct page *page = pvec.pages[i];	344	struct page *page = pvec.pages[i];
347	pgoff_t index;
348	int lock_failed;
349
350	lock_failed = !trylock_page(page);
351		345
352	/*	346	/* We rely upon deletion not changing page->index */
353	* We really shouldn't be looking at the ->index of an
354	* unlocked page. But we're not allowed to lock these
355	* pages. So we rely upon nobody altering the ->index
356	* of this (pinned-by-us) page.
357	*/
358	index = page->index;	347	index = page->index;
359	if (index > next)	348	if (index > end)
360	next = index;	349	break;
361	next++;
362	if (lock_failed)
363	continue;
364		350
		351	if (!trylock_page(page))
		352	continue;
		353	WARN_ON(page->index != index);
365	ret = invalidate_inode_page(page);	354	ret = invalidate_inode_page(page);
366	unlock_page(page);	355	unlock_page(page);
367	/*	356	/*
@@ -371,12 +360,11 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
371	if (!ret)	360	if (!ret)
372	deactivate_page(page);	361	deactivate_page(page);
373	count += ret;	362	count += ret;
374	if (next > end)
375	break;
376	}	363	}
377	pagevec_release(&pvec);	364	pagevec_release(&pvec);
378	mem_cgroup_uncharge_end();	365	mem_cgroup_uncharge_end();
379	cond_resched();	366	cond_resched();
		367	index++;
380	}	368	}
381	return count;	369	return count;
382	}	370	}
@@ -442,37 +430,32 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
442	pgoff_t start, pgoff_t end)	430	pgoff_t start, pgoff_t end)
443	{	431	{
444	struct pagevec pvec;	432	struct pagevec pvec;
445	pgoff_t next;	433	pgoff_t index;
446	int i;	434	int i;
447	int ret = 0;	435	int ret = 0;
448	int ret2 = 0;	436	int ret2 = 0;
449	int did_range_unmap = 0;	437	int did_range_unmap = 0;
450	int wrapped = 0;
451		438
452	cleancache_flush_inode(mapping);	439	cleancache_flush_inode(mapping);
453	pagevec_init(&pvec, 0);	440	pagevec_init(&pvec, 0);
454	next = start;	441	index = start;
455	while (next <= end && !wrapped &&	442	while (index <= end && pagevec_lookup(&pvec, mapping, index,
456	pagevec_lookup(&pvec, mapping, next,	443	min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
457	min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
458	mem_cgroup_uncharge_start();	444	mem_cgroup_uncharge_start();
459	for (i = 0; i < pagevec_count(&pvec); i++) {	445	for (i = 0; i < pagevec_count(&pvec); i++) {
460	struct page *page = pvec.pages[i];	446	struct page *page = pvec.pages[i];
461	pgoff_t page_index;	447
		448	/* We rely upon deletion not changing page->index */
		449	index = page->index;
		450	if (index > end)
		451	break;
462		452
463	lock_page(page);	453	lock_page(page);
		454	WARN_ON(page->index != index);
464	if (page->mapping != mapping) {	455	if (page->mapping != mapping) {
465	unlock_page(page);	456	unlock_page(page);
466	continue;	457	continue;
467	}	458	}
468	page_index = page->index;
469	next = page_index + 1;
470	if (next == 0)
471	wrapped = 1;
472	if (page_index > end) {
473	unlock_page(page);
474	break;
475	}
476	wait_on_page_writeback(page);	459	wait_on_page_writeback(page);
477	if (page_mapped(page)) {	460	if (page_mapped(page)) {
478	if (!did_range_unmap) {	461	if (!did_range_unmap) {
@@ -480,9 +463,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
480	* Zap the rest of the file in one hit.	463	* Zap the rest of the file in one hit.
481	*/	464	*/
482	unmap_mapping_range(mapping,	465	unmap_mapping_range(mapping,
483	(loff_t)page_index<<PAGE_CACHE_SHIFT,	466	(loff_t)index << PAGE_CACHE_SHIFT,
484	(loff_t)(end - page_index + 1)	467	(loff_t)(1 + end - index)
485	<< PAGE_CACHE_SHIFT,	468	<< PAGE_CACHE_SHIFT,
486	0);	469	0);
487	did_range_unmap = 1;	470	did_range_unmap = 1;
488	} else {	471	} else {
@@ -490,8 +473,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
490	* Just zap this page	473	* Just zap this page
491	*/	474	*/
492	unmap_mapping_range(mapping,	475	unmap_mapping_range(mapping,
493	(loff_t)page_index<<PAGE_CACHE_SHIFT,	476	(loff_t)index << PAGE_CACHE_SHIFT,
494	PAGE_CACHE_SIZE, 0);	477	PAGE_CACHE_SIZE, 0);
495	}	478	}
496	}	479	}
497	BUG_ON(page_mapped(page));	480	BUG_ON(page_mapped(page));
@@ -507,6 +490,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
507	pagevec_release(&pvec);	490	pagevec_release(&pvec);
508	mem_cgroup_uncharge_end();	491	mem_cgroup_uncharge_end();
509	cond_resched();	492	cond_resched();
		493	index++;
510	}	494	}
511	cleancache_flush_inode(mapping);	495	cleancache_flush_inode(mapping);
512	return ret;	496	return ret;