aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-07-02 12:39:34 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-02 12:39:34 -0400
commit9e239bb93914e1c832d54161c7f8f398d0c914ab (patch)
tree0fe11e8e717152660ad77d77e66bf0f1695d7ed1 /mm
parent63580e51bb3e7ec459501165884e5f815a7a9322 (diff)
parent6ae06ff51eab5dcbbf959b05ce0f11003a305ba5 (diff)
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 update from Ted Ts'o: "Lots of bug fixes, cleanups and optimizations. In the bug fixes category, of note is a fix for on-line resizing file systems where the block size is smaller than the page size (i.e., file systems 1k blocks on x86, or more interestingly file systems with 4k blocks on Power or ia64 systems.) In the cleanup category, the ext4's punch hole implementation was significantly improved by Lukas Czerner, and now supports bigalloc file systems. In addition, Jan Kara significantly cleaned up the write submission code path. We also improved error checking and added a few sanity checks. In the optimizations category, two major optimizations deserve mention. The first is that ext4_writepages() is now used for nodelalloc and ext3 compatibility mode. This allows writes to be submitted much more efficiently as a single bio request, instead of being sent as individual 4k writes into the block layer (which then relied on the elevator code to coalesce the requests in the block queue). Secondly, the extent cache shrink mechanism, which was introduce in 3.9, no longer has a scalability bottleneck caused by the i_es_lru spinlock. Other optimizations include some changes to reduce CPU usage and to avoid issuing empty commits unnecessarily." * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (86 commits) ext4: optimize starting extent in ext4_ext_rm_leaf() jbd2: invalidate handle if jbd2_journal_restart() fails ext4: translate flag bits to strings in tracepoints ext4: fix up error handling for mpage_map_and_submit_extent() jbd2: fix theoretical race in jbd2__journal_restart ext4: only zero partial blocks in ext4_zero_partial_blocks() ext4: check error return from ext4_write_inline_data_end() ext4: delete unnecessary C statements ext3,ext4: don't mess with dir_file->f_pos in htree_dirblock_to_tree() jbd2: move superblock checksum calculation to jbd2_write_superblock() ext4: pass inode pointer instead of file pointer to punch hole ext4: improve free space calculation for inline_data ext4: reduce object size when !CONFIG_PRINTK ext4: improve extent cache shrink mechanism to avoid to burn CPU time ext4: implement error handling of ext4_mb_new_preallocation() ext4: fix corruption when online resizing a fs with 1K block size ext4: delete unused variables ext4: return FIEMAP_EXTENT_UNKNOWN for delalloc extents jbd2: remove debug dependency on debug_fs and update Kconfig help text jbd2: use a single printk for jbd_debug() ...
Diffstat (limited to 'mm')
-rw-r--r--mm/readahead.c2
-rw-r--r--mm/truncate.c117
2 files changed, 82 insertions, 37 deletions
diff --git a/mm/readahead.c b/mm/readahead.c
index daed28dd5830..829a77c62834 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -48,7 +48,7 @@ static void read_cache_pages_invalidate_page(struct address_space *mapping,
48 if (!trylock_page(page)) 48 if (!trylock_page(page))
49 BUG(); 49 BUG();
50 page->mapping = mapping; 50 page->mapping = mapping;
51 do_invalidatepage(page, 0); 51 do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
52 page->mapping = NULL; 52 page->mapping = NULL;
53 unlock_page(page); 53 unlock_page(page);
54 } 54 }
diff --git a/mm/truncate.c b/mm/truncate.c
index c75b736e54b7..e2e8a8a7eb9d 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -26,7 +26,8 @@
26/** 26/**
27 * do_invalidatepage - invalidate part or all of a page 27 * do_invalidatepage - invalidate part or all of a page
28 * @page: the page which is affected 28 * @page: the page which is affected
29 * @offset: the index of the truncation point 29 * @offset: start of the range to invalidate
30 * @length: length of the range to invalidate
30 * 31 *
31 * do_invalidatepage() is called when all or part of the page has become 32 * do_invalidatepage() is called when all or part of the page has become
32 * invalidated by a truncate operation. 33 * invalidated by a truncate operation.
@@ -37,24 +38,18 @@
37 * point. Because the caller is about to free (and possibly reuse) those 38 * point. Because the caller is about to free (and possibly reuse) those
38 * blocks on-disk. 39 * blocks on-disk.
39 */ 40 */
40void do_invalidatepage(struct page *page, unsigned long offset) 41void do_invalidatepage(struct page *page, unsigned int offset,
42 unsigned int length)
41{ 43{
42 void (*invalidatepage)(struct page *, unsigned long); 44 void (*invalidatepage)(struct page *, unsigned int, unsigned int);
45
43 invalidatepage = page->mapping->a_ops->invalidatepage; 46 invalidatepage = page->mapping->a_ops->invalidatepage;
44#ifdef CONFIG_BLOCK 47#ifdef CONFIG_BLOCK
45 if (!invalidatepage) 48 if (!invalidatepage)
46 invalidatepage = block_invalidatepage; 49 invalidatepage = block_invalidatepage;
47#endif 50#endif
48 if (invalidatepage) 51 if (invalidatepage)
49 (*invalidatepage)(page, offset); 52 (*invalidatepage)(page, offset, length);
50}
51
52static inline void truncate_partial_page(struct page *page, unsigned partial)
53{
54 zero_user_segment(page, partial, PAGE_CACHE_SIZE);
55 cleancache_invalidate_page(page->mapping, page);
56 if (page_has_private(page))
57 do_invalidatepage(page, partial);
58} 53}
59 54
60/* 55/*
@@ -103,7 +98,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
103 return -EIO; 98 return -EIO;
104 99
105 if (page_has_private(page)) 100 if (page_has_private(page))
106 do_invalidatepage(page, 0); 101 do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
107 102
108 cancel_dirty_page(page, PAGE_CACHE_SIZE); 103 cancel_dirty_page(page, PAGE_CACHE_SIZE);
109 104
@@ -185,11 +180,11 @@ int invalidate_inode_page(struct page *page)
185 * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets 180 * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets
186 * @mapping: mapping to truncate 181 * @mapping: mapping to truncate
187 * @lstart: offset from which to truncate 182 * @lstart: offset from which to truncate
188 * @lend: offset to which to truncate 183 * @lend: offset to which to truncate (inclusive)
189 * 184 *
190 * Truncate the page cache, removing the pages that are between 185 * Truncate the page cache, removing the pages that are between
191 * specified offsets (and zeroing out partial page 186 * specified offsets (and zeroing out partial pages
192 * (if lstart is not page aligned)). 187 * if lstart or lend + 1 is not page aligned).
193 * 188 *
194 * Truncate takes two passes - the first pass is nonblocking. It will not 189 * Truncate takes two passes - the first pass is nonblocking. It will not
195 * block on page locks and it will not block on writeback. The second pass 190 * block on page locks and it will not block on writeback. The second pass
@@ -200,35 +195,58 @@ int invalidate_inode_page(struct page *page)
200 * We pass down the cache-hot hint to the page freeing code. Even if the 195 * We pass down the cache-hot hint to the page freeing code. Even if the
201 * mapping is large, it is probably the case that the final pages are the most 196 * mapping is large, it is probably the case that the final pages are the most
202 * recently touched, and freeing happens in ascending file offset order. 197 * recently touched, and freeing happens in ascending file offset order.
198 *
199 * Note that since ->invalidatepage() accepts range to invalidate
200 * truncate_inode_pages_range is able to handle cases where lend + 1 is not
201 * page aligned properly.
203 */ 202 */
204void truncate_inode_pages_range(struct address_space *mapping, 203void truncate_inode_pages_range(struct address_space *mapping,
205 loff_t lstart, loff_t lend) 204 loff_t lstart, loff_t lend)
206{ 205{
207 const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; 206 pgoff_t start; /* inclusive */
208 const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); 207 pgoff_t end; /* exclusive */
209 struct pagevec pvec; 208 unsigned int partial_start; /* inclusive */
210 pgoff_t index; 209 unsigned int partial_end; /* exclusive */
211 pgoff_t end; 210 struct pagevec pvec;
212 int i; 211 pgoff_t index;
212 int i;
213 213
214 cleancache_invalidate_inode(mapping); 214 cleancache_invalidate_inode(mapping);
215 if (mapping->nrpages == 0) 215 if (mapping->nrpages == 0)
216 return; 216 return;
217 217
218 BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); 218 /* Offsets within partial pages */
219 end = (lend >> PAGE_CACHE_SHIFT); 219 partial_start = lstart & (PAGE_CACHE_SIZE - 1);
220 partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1);
221
222 /*
223 * 'start' and 'end' always covers the range of pages to be fully
224 * truncated. Partial pages are covered with 'partial_start' at the
225 * start of the range and 'partial_end' at the end of the range.
226 * Note that 'end' is exclusive while 'lend' is inclusive.
227 */
228 start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
229 if (lend == -1)
230 /*
231 * lend == -1 indicates end-of-file so we have to set 'end'
232 * to the highest possible pgoff_t and since the type is
233 * unsigned we're using -1.
234 */
235 end = -1;
236 else
237 end = (lend + 1) >> PAGE_CACHE_SHIFT;
220 238
221 pagevec_init(&pvec, 0); 239 pagevec_init(&pvec, 0);
222 index = start; 240 index = start;
223 while (index <= end && pagevec_lookup(&pvec, mapping, index, 241 while (index < end && pagevec_lookup(&pvec, mapping, index,
224 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 242 min(end - index, (pgoff_t)PAGEVEC_SIZE))) {
225 mem_cgroup_uncharge_start(); 243 mem_cgroup_uncharge_start();
226 for (i = 0; i < pagevec_count(&pvec); i++) { 244 for (i = 0; i < pagevec_count(&pvec); i++) {
227 struct page *page = pvec.pages[i]; 245 struct page *page = pvec.pages[i];
228 246
229 /* We rely upon deletion not changing page->index */ 247 /* We rely upon deletion not changing page->index */
230 index = page->index; 248 index = page->index;
231 if (index > end) 249 if (index >= end)
232 break; 250 break;
233 251
234 if (!trylock_page(page)) 252 if (!trylock_page(page))
@@ -247,27 +265,56 @@ void truncate_inode_pages_range(struct address_space *mapping,
247 index++; 265 index++;
248 } 266 }
249 267
250 if (partial) { 268 if (partial_start) {
251 struct page *page = find_lock_page(mapping, start - 1); 269 struct page *page = find_lock_page(mapping, start - 1);
252 if (page) { 270 if (page) {
271 unsigned int top = PAGE_CACHE_SIZE;
272 if (start > end) {
273 /* Truncation within a single page */
274 top = partial_end;
275 partial_end = 0;
276 }
253 wait_on_page_writeback(page); 277 wait_on_page_writeback(page);
254 truncate_partial_page(page, partial); 278 zero_user_segment(page, partial_start, top);
279 cleancache_invalidate_page(mapping, page);
280 if (page_has_private(page))
281 do_invalidatepage(page, partial_start,
282 top - partial_start);
255 unlock_page(page); 283 unlock_page(page);
256 page_cache_release(page); 284 page_cache_release(page);
257 } 285 }
258 } 286 }
287 if (partial_end) {
288 struct page *page = find_lock_page(mapping, end);
289 if (page) {
290 wait_on_page_writeback(page);
291 zero_user_segment(page, 0, partial_end);
292 cleancache_invalidate_page(mapping, page);
293 if (page_has_private(page))
294 do_invalidatepage(page, 0,
295 partial_end);
296 unlock_page(page);
297 page_cache_release(page);
298 }
299 }
300 /*
301 * If the truncation happened within a single page no pages
302 * will be released, just zeroed, so we can bail out now.
303 */
304 if (start >= end)
305 return;
259 306
260 index = start; 307 index = start;
261 for ( ; ; ) { 308 for ( ; ; ) {
262 cond_resched(); 309 cond_resched();
263 if (!pagevec_lookup(&pvec, mapping, index, 310 if (!pagevec_lookup(&pvec, mapping, index,
264 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 311 min(end - index, (pgoff_t)PAGEVEC_SIZE))) {
265 if (index == start) 312 if (index == start)
266 break; 313 break;
267 index = start; 314 index = start;
268 continue; 315 continue;
269 } 316 }
270 if (index == start && pvec.pages[0]->index > end) { 317 if (index == start && pvec.pages[0]->index >= end) {
271 pagevec_release(&pvec); 318 pagevec_release(&pvec);
272 break; 319 break;
273 } 320 }
@@ -277,7 +324,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
277 324
278 /* We rely upon deletion not changing page->index */ 325 /* We rely upon deletion not changing page->index */
279 index = page->index; 326 index = page->index;
280 if (index > end) 327 if (index >= end)
281 break; 328 break;
282 329
283 lock_page(page); 330 lock_page(page);
@@ -598,10 +645,8 @@ void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend)
598 * This rounding is currently just for example: unmap_mapping_range 645 * This rounding is currently just for example: unmap_mapping_range
599 * expands its hole outwards, whereas we want it to contract the hole 646 * expands its hole outwards, whereas we want it to contract the hole
600 * inwards. However, existing callers of truncate_pagecache_range are 647 * inwards. However, existing callers of truncate_pagecache_range are
601 * doing their own page rounding first; and truncate_inode_pages_range 648 * doing their own page rounding first. Note that unmap_mapping_range
602 * currently BUGs if lend is not pagealigned-1 (it handles partial 649 * allows holelen 0 for all, and we allow lend -1 for end of file.
603 * page at start of hole, but not partial page at end of hole). Note
604 * unmap_mapping_range allows holelen 0 for all, and we allow lend -1.
605 */ 650 */
606 651
607 /* 652 /*