summaryrefslogtreecommitdiffstats
path: root/mm/truncate.c
diff options
context:
space:
mode:
authorLukas Czerner <lczerner@redhat.com>2013-05-27 23:32:35 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-05-27 23:32:35 -0400
commit5a7203947a1d9b6f3a00a39fda08c2466489555f (patch)
treef735cb5f4dbf4b0aa750572dbf1efac69ce2d4af /mm/truncate.c
parentbad54831968a8429b08e34faf33178c21ac6bba0 (diff)
mm: teach truncate_inode_pages_range() to handle non page aligned ranges
This commit changes truncate_inode_pages_range() so it can handle non page aligned regions of the truncate. Currently we can hit BUG_ON when the end of the range is not page aligned, but we can handle unaligned start of the range. Being able to handle non page aligned regions of the page can help file system punch_hole implementations and save some work, because once we're holding the page we might as well deal with it right away. In previous commits we've changed ->invalidatepage() prototype to accept 'length' argument to be able to specify range to invalidate. No we can use that new ability in truncate_inode_pages_range(). Signed-off-by: Lukas Czerner <lczerner@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'mm/truncate.c')
-rw-r--r--mm/truncate.c104
1 files changed, 73 insertions, 31 deletions
diff --git a/mm/truncate.c b/mm/truncate.c
index fdba083f0d71..e2e8a8a7eb9d 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -52,14 +52,6 @@ void do_invalidatepage(struct page *page, unsigned int offset,
52 (*invalidatepage)(page, offset, length); 52 (*invalidatepage)(page, offset, length);
53} 53}
54 54
55static inline void truncate_partial_page(struct page *page, unsigned partial)
56{
57 zero_user_segment(page, partial, PAGE_CACHE_SIZE);
58 cleancache_invalidate_page(page->mapping, page);
59 if (page_has_private(page))
60 do_invalidatepage(page, partial, PAGE_CACHE_SIZE - partial);
61}
62
63/* 55/*
64 * This cancels just the dirty bit on the kernel page itself, it 56 * This cancels just the dirty bit on the kernel page itself, it
65 * does NOT actually remove dirty bits on any mmap's that may be 57 * does NOT actually remove dirty bits on any mmap's that may be
@@ -188,11 +180,11 @@ int invalidate_inode_page(struct page *page)
188 * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets 180 * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets
189 * @mapping: mapping to truncate 181 * @mapping: mapping to truncate
190 * @lstart: offset from which to truncate 182 * @lstart: offset from which to truncate
191 * @lend: offset to which to truncate 183 * @lend: offset to which to truncate (inclusive)
192 * 184 *
193 * Truncate the page cache, removing the pages that are between 185 * Truncate the page cache, removing the pages that are between
194 * specified offsets (and zeroing out partial page 186 * specified offsets (and zeroing out partial pages
195 * (if lstart is not page aligned)). 187 * if lstart or lend + 1 is not page aligned).
196 * 188 *
197 * Truncate takes two passes - the first pass is nonblocking. It will not 189 * Truncate takes two passes - the first pass is nonblocking. It will not
198 * block on page locks and it will not block on writeback. The second pass 190 * block on page locks and it will not block on writeback. The second pass
@@ -203,35 +195,58 @@ int invalidate_inode_page(struct page *page)
203 * We pass down the cache-hot hint to the page freeing code. Even if the 195 * We pass down the cache-hot hint to the page freeing code. Even if the
204 * mapping is large, it is probably the case that the final pages are the most 196 * mapping is large, it is probably the case that the final pages are the most
205 * recently touched, and freeing happens in ascending file offset order. 197 * recently touched, and freeing happens in ascending file offset order.
198 *
199 * Note that since ->invalidatepage() accepts range to invalidate
200 * truncate_inode_pages_range is able to handle cases where lend + 1 is not
201 * page aligned properly.
206 */ 202 */
207void truncate_inode_pages_range(struct address_space *mapping, 203void truncate_inode_pages_range(struct address_space *mapping,
208 loff_t lstart, loff_t lend) 204 loff_t lstart, loff_t lend)
209{ 205{
210 const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; 206 pgoff_t start; /* inclusive */
211 const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); 207 pgoff_t end; /* exclusive */
212 struct pagevec pvec; 208 unsigned int partial_start; /* inclusive */
213 pgoff_t index; 209 unsigned int partial_end; /* exclusive */
214 pgoff_t end; 210 struct pagevec pvec;
215 int i; 211 pgoff_t index;
212 int i;
216 213
217 cleancache_invalidate_inode(mapping); 214 cleancache_invalidate_inode(mapping);
218 if (mapping->nrpages == 0) 215 if (mapping->nrpages == 0)
219 return; 216 return;
220 217
221 BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); 218 /* Offsets within partial pages */
222 end = (lend >> PAGE_CACHE_SHIFT); 219 partial_start = lstart & (PAGE_CACHE_SIZE - 1);
220 partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
221
222 /*
223 * 'start' and 'end' always covers the range of pages to be fully
224 * truncated. Partial pages are covered with 'partial_start' at the
225 * start of the range and 'partial_end' at the end of the range.
226 * Note that 'end' is exclusive while 'lend' is inclusive.
227 */
228 start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
229 if (lend == -1)
230 /*
231 * lend == -1 indicates end-of-file so we have to set 'end'
232 * to the highest possible pgoff_t and since the type is
233 * unsigned we're using -1.
234 */
235 end = -1;
236 else
237 end = (lend + 1) >> PAGE_CACHE_SHIFT;
223 238
224 pagevec_init(&pvec, 0); 239 pagevec_init(&pvec, 0);
225 index = start; 240 index = start;
226 while (index <= end && pagevec_lookup(&pvec, mapping, index, 241 while (index < end && pagevec_lookup(&pvec, mapping, index,
227 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 242 min(end - index, (pgoff_t)PAGEVEC_SIZE))) {
228 mem_cgroup_uncharge_start(); 243 mem_cgroup_uncharge_start();
229 for (i = 0; i < pagevec_count(&pvec); i++) { 244 for (i = 0; i < pagevec_count(&pvec); i++) {
230 struct page *page = pvec.pages[i]; 245 struct page *page = pvec.pages[i];
231 246
232 /* We rely upon deletion not changing page->index */ 247 /* We rely upon deletion not changing page->index */
233 index = page->index; 248 index = page->index;
234 if (index > end) 249 if (index >= end)
235 break; 250 break;
236 251
237 if (!trylock_page(page)) 252 if (!trylock_page(page))
@@ -250,27 +265,56 @@ void truncate_inode_pages_range(struct address_space *mapping,
250 index++; 265 index++;
251 } 266 }
252 267
253 if (partial) { 268 if (partial_start) {
254 struct page *page = find_lock_page(mapping, start - 1); 269 struct page *page = find_lock_page(mapping, start - 1);
255 if (page) { 270 if (page) {
271 unsigned int top = PAGE_CACHE_SIZE;
272 if (start > end) {
273 /* Truncation within a single page */
274 top = partial_end;
275 partial_end = 0;
276 }
256 wait_on_page_writeback(page); 277 wait_on_page_writeback(page);
257 truncate_partial_page(page, partial); 278 zero_user_segment(page, partial_start, top);
279 cleancache_invalidate_page(mapping, page);
280 if (page_has_private(page))
281 do_invalidatepage(page, partial_start,
282 top - partial_start);
258 unlock_page(page); 283 unlock_page(page);
259 page_cache_release(page); 284 page_cache_release(page);
260 } 285 }
261 } 286 }
287 if (partial_end) {
288 struct page *page = find_lock_page(mapping, end);
289 if (page) {
290 wait_on_page_writeback(page);
291 zero_user_segment(page, 0, partial_end);
292 cleancache_invalidate_page(mapping, page);
293 if (page_has_private(page))
294 do_invalidatepage(page, 0,
295 partial_end);
296 unlock_page(page);
297 page_cache_release(page);
298 }
299 }
300 /*
301 * If the truncation happened within a single page no pages
302 * will be released, just zeroed, so we can bail out now.
303 */
304 if (start >= end)
305 return;
262 306
263 index = start; 307 index = start;
264 for ( ; ; ) { 308 for ( ; ; ) {
265 cond_resched(); 309 cond_resched();
266 if (!pagevec_lookup(&pvec, mapping, index, 310 if (!pagevec_lookup(&pvec, mapping, index,
267 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 311 min(end - index, (pgoff_t)PAGEVEC_SIZE))) {
268 if (index == start) 312 if (index == start)
269 break; 313 break;
270 index = start; 314 index = start;
271 continue; 315 continue;
272 } 316 }
273 if (index == start && pvec.pages[0]->index > end) { 317 if (index == start && pvec.pages[0]->index >= end) {
274 pagevec_release(&pvec); 318 pagevec_release(&pvec);
275 break; 319 break;
276 } 320 }
@@ -280,7 +324,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
280 324
281 /* We rely upon deletion not changing page->index */ 325 /* We rely upon deletion not changing page->index */
282 index = page->index; 326 index = page->index;
283 if (index > end) 327 if (index >= end)
284 break; 328 break;
285 329
286 lock_page(page); 330 lock_page(page);
@@ -601,10 +645,8 @@ void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend)
601 * This rounding is currently just for example: unmap_mapping_range 645 * This rounding is currently just for example: unmap_mapping_range
602 * expands its hole outwards, whereas we want it to contract the hole 646 * expands its hole outwards, whereas we want it to contract the hole
603 * inwards. However, existing callers of truncate_pagecache_range are 647 * inwards. However, existing callers of truncate_pagecache_range are
604 * doing their own page rounding first; and truncate_inode_pages_range 648 * doing their own page rounding first. Note that unmap_mapping_range
605 * currently BUGs if lend is not pagealigned-1 (it handles partial 649 * allows holelen 0 for all, and we allow lend -1 for end of file.
606 * page at start of hole, but not partial page at end of hole). Note
607 * unmap_mapping_range allows holelen 0 for all, and we allow lend -1.
608 */ 650 */
609 651
610 /* 652 /*