diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-02 12:39:34 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-02 12:39:34 -0400 |
commit | 9e239bb93914e1c832d54161c7f8f398d0c914ab (patch) | |
tree | 0fe11e8e717152660ad77d77e66bf0f1695d7ed1 /mm | |
parent | 63580e51bb3e7ec459501165884e5f815a7a9322 (diff) | |
parent | 6ae06ff51eab5dcbbf959b05ce0f11003a305ba5 (diff) |
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 update from Ted Ts'o:
"Lots of bug fixes, cleanups and optimizations. In the bug fixes
category, of note is a fix for on-line resizing file systems where the
block size is smaller than the page size (i.e., file systems 1k blocks
on x86, or more interestingly file systems with 4k blocks on Power or
ia64 systems.)
In the cleanup category, the ext4's punch hole implementation was
significantly improved by Lukas Czerner, and now supports bigalloc
file systems. In addition, Jan Kara significantly cleaned up the
write submission code path. We also improved error checking and added
a few sanity checks.
In the optimizations category, two major optimizations deserve
mention. The first is that ext4_writepages() is now used for
nodelalloc and ext3 compatibility mode. This allows writes to be
submitted much more efficiently as a single bio request, instead of
being sent as individual 4k writes into the block layer (which then
relied on the elevator code to coalesce the requests in the block
queue). Secondly, the extent cache shrink mechanism, which was
introduce in 3.9, no longer has a scalability bottleneck caused by the
i_es_lru spinlock. Other optimizations include some changes to reduce
CPU usage and to avoid issuing empty commits unnecessarily."
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (86 commits)
ext4: optimize starting extent in ext4_ext_rm_leaf()
jbd2: invalidate handle if jbd2_journal_restart() fails
ext4: translate flag bits to strings in tracepoints
ext4: fix up error handling for mpage_map_and_submit_extent()
jbd2: fix theoretical race in jbd2__journal_restart
ext4: only zero partial blocks in ext4_zero_partial_blocks()
ext4: check error return from ext4_write_inline_data_end()
ext4: delete unnecessary C statements
ext3,ext4: don't mess with dir_file->f_pos in htree_dirblock_to_tree()
jbd2: move superblock checksum calculation to jbd2_write_superblock()
ext4: pass inode pointer instead of file pointer to punch hole
ext4: improve free space calculation for inline_data
ext4: reduce object size when !CONFIG_PRINTK
ext4: improve extent cache shrink mechanism to avoid to burn CPU time
ext4: implement error handling of ext4_mb_new_preallocation()
ext4: fix corruption when online resizing a fs with 1K block size
ext4: delete unused variables
ext4: return FIEMAP_EXTENT_UNKNOWN for delalloc extents
jbd2: remove debug dependency on debug_fs and update Kconfig help text
jbd2: use a single printk for jbd_debug()
...
Diffstat (limited to 'mm')
-rw-r--r-- | mm/readahead.c | 2 | ||||
-rw-r--r-- | mm/truncate.c | 117 |
2 files changed, 82 insertions, 37 deletions
diff --git a/mm/readahead.c b/mm/readahead.c index daed28dd5830..829a77c62834 100644 --- a/mm/readahead.c +++ b/mm/readahead.c | |||
@@ -48,7 +48,7 @@ static void read_cache_pages_invalidate_page(struct address_space *mapping, | |||
48 | if (!trylock_page(page)) | 48 | if (!trylock_page(page)) |
49 | BUG(); | 49 | BUG(); |
50 | page->mapping = mapping; | 50 | page->mapping = mapping; |
51 | do_invalidatepage(page, 0); | 51 | do_invalidatepage(page, 0, PAGE_CACHE_SIZE); |
52 | page->mapping = NULL; | 52 | page->mapping = NULL; |
53 | unlock_page(page); | 53 | unlock_page(page); |
54 | } | 54 | } |
diff --git a/mm/truncate.c b/mm/truncate.c index c75b736e54b7..e2e8a8a7eb9d 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -26,7 +26,8 @@ | |||
26 | /** | 26 | /** |
27 | * do_invalidatepage - invalidate part or all of a page | 27 | * do_invalidatepage - invalidate part or all of a page |
28 | * @page: the page which is affected | 28 | * @page: the page which is affected |
29 | * @offset: the index of the truncation point | 29 | * @offset: start of the range to invalidate |
30 | * @length: length of the range to invalidate | ||
30 | * | 31 | * |
31 | * do_invalidatepage() is called when all or part of the page has become | 32 | * do_invalidatepage() is called when all or part of the page has become |
32 | * invalidated by a truncate operation. | 33 | * invalidated by a truncate operation. |
@@ -37,24 +38,18 @@ | |||
37 | * point. Because the caller is about to free (and possibly reuse) those | 38 | * point. Because the caller is about to free (and possibly reuse) those |
38 | * blocks on-disk. | 39 | * blocks on-disk. |
39 | */ | 40 | */ |
40 | void do_invalidatepage(struct page *page, unsigned long offset) | 41 | void do_invalidatepage(struct page *page, unsigned int offset, |
42 | unsigned int length) | ||
41 | { | 43 | { |
42 | void (*invalidatepage)(struct page *, unsigned long); | 44 | void (*invalidatepage)(struct page *, unsigned int, unsigned int); |
45 | |||
43 | invalidatepage = page->mapping->a_ops->invalidatepage; | 46 | invalidatepage = page->mapping->a_ops->invalidatepage; |
44 | #ifdef CONFIG_BLOCK | 47 | #ifdef CONFIG_BLOCK |
45 | if (!invalidatepage) | 48 | if (!invalidatepage) |
46 | invalidatepage = block_invalidatepage; | 49 | invalidatepage = block_invalidatepage; |
47 | #endif | 50 | #endif |
48 | if (invalidatepage) | 51 | if (invalidatepage) |
49 | (*invalidatepage)(page, offset); | 52 | (*invalidatepage)(page, offset, length); |
50 | } | ||
51 | |||
52 | static inline void truncate_partial_page(struct page *page, unsigned partial) | ||
53 | { | ||
54 | zero_user_segment(page, partial, PAGE_CACHE_SIZE); | ||
55 | cleancache_invalidate_page(page->mapping, page); | ||
56 | if (page_has_private(page)) | ||
57 | do_invalidatepage(page, partial); | ||
58 | } | 53 | } |
59 | 54 | ||
60 | /* | 55 | /* |
@@ -103,7 +98,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page) | |||
103 | return -EIO; | 98 | return -EIO; |
104 | 99 | ||
105 | if (page_has_private(page)) | 100 | if (page_has_private(page)) |
106 | do_invalidatepage(page, 0); | 101 | do_invalidatepage(page, 0, PAGE_CACHE_SIZE); |
107 | 102 | ||
108 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | 103 | cancel_dirty_page(page, PAGE_CACHE_SIZE); |
109 | 104 | ||
@@ -185,11 +180,11 @@ int invalidate_inode_page(struct page *page) | |||
185 | * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets | 180 | * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets |
186 | * @mapping: mapping to truncate | 181 | * @mapping: mapping to truncate |
187 | * @lstart: offset from which to truncate | 182 | * @lstart: offset from which to truncate |
188 | * @lend: offset to which to truncate | 183 | * @lend: offset to which to truncate (inclusive) |
189 | * | 184 | * |
190 | * Truncate the page cache, removing the pages that are between | 185 | * Truncate the page cache, removing the pages that are between |
191 | * specified offsets (and zeroing out partial page | 186 | * specified offsets (and zeroing out partial pages |
192 | * (if lstart is not page aligned)). | 187 | * if lstart or lend + 1 is not page aligned). |
193 | * | 188 | * |
194 | * Truncate takes two passes - the first pass is nonblocking. It will not | 189 | * Truncate takes two passes - the first pass is nonblocking. It will not |
195 | * block on page locks and it will not block on writeback. The second pass | 190 | * block on page locks and it will not block on writeback. The second pass |
@@ -200,35 +195,58 @@ int invalidate_inode_page(struct page *page) | |||
200 | * We pass down the cache-hot hint to the page freeing code. Even if the | 195 | * We pass down the cache-hot hint to the page freeing code. Even if the |
201 | * mapping is large, it is probably the case that the final pages are the most | 196 | * mapping is large, it is probably the case that the final pages are the most |
202 | * recently touched, and freeing happens in ascending file offset order. | 197 | * recently touched, and freeing happens in ascending file offset order. |
198 | * | ||
199 | * Note that since ->invalidatepage() accepts range to invalidate | ||
200 | * truncate_inode_pages_range is able to handle cases where lend + 1 is not | ||
201 | * page aligned properly. | ||
203 | */ | 202 | */ |
204 | void truncate_inode_pages_range(struct address_space *mapping, | 203 | void truncate_inode_pages_range(struct address_space *mapping, |
205 | loff_t lstart, loff_t lend) | 204 | loff_t lstart, loff_t lend) |
206 | { | 205 | { |
207 | const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; | 206 | pgoff_t start; /* inclusive */ |
208 | const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); | 207 | pgoff_t end; /* exclusive */ |
209 | struct pagevec pvec; | 208 | unsigned int partial_start; /* inclusive */ |
210 | pgoff_t index; | 209 | unsigned int partial_end; /* exclusive */ |
211 | pgoff_t end; | 210 | struct pagevec pvec; |
212 | int i; | 211 | pgoff_t index; |
212 | int i; | ||
213 | 213 | ||
214 | cleancache_invalidate_inode(mapping); | 214 | cleancache_invalidate_inode(mapping); |
215 | if (mapping->nrpages == 0) | 215 | if (mapping->nrpages == 0) |
216 | return; | 216 | return; |
217 | 217 | ||
218 | BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1)); | 218 | /* Offsets within partial pages */ |
219 | end = (lend >> PAGE_CACHE_SHIFT); | 219 | partial_start = lstart & (PAGE_CACHE_SIZE - 1); |
220 | partial_end = (lend + 1) & (PAGE_CACHE_SIZE - 1); | ||
221 | |||
222 | /* | ||
223 | * 'start' and 'end' always covers the range of pages to be fully | ||
224 | * truncated. Partial pages are covered with 'partial_start' at the | ||
225 | * start of the range and 'partial_end' at the end of the range. | ||
226 | * Note that 'end' is exclusive while 'lend' is inclusive. | ||
227 | */ | ||
228 | start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | ||
229 | if (lend == -1) | ||
230 | /* | ||
231 | * lend == -1 indicates end-of-file so we have to set 'end' | ||
232 | * to the highest possible pgoff_t and since the type is | ||
233 | * unsigned we're using -1. | ||
234 | */ | ||
235 | end = -1; | ||
236 | else | ||
237 | end = (lend + 1) >> PAGE_CACHE_SHIFT; | ||
220 | 238 | ||
221 | pagevec_init(&pvec, 0); | 239 | pagevec_init(&pvec, 0); |
222 | index = start; | 240 | index = start; |
223 | while (index <= end && pagevec_lookup(&pvec, mapping, index, | 241 | while (index < end && pagevec_lookup(&pvec, mapping, index, |
224 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { | 242 | min(end - index, (pgoff_t)PAGEVEC_SIZE))) { |
225 | mem_cgroup_uncharge_start(); | 243 | mem_cgroup_uncharge_start(); |
226 | for (i = 0; i < pagevec_count(&pvec); i++) { | 244 | for (i = 0; i < pagevec_count(&pvec); i++) { |
227 | struct page *page = pvec.pages[i]; | 245 | struct page *page = pvec.pages[i]; |
228 | 246 | ||
229 | /* We rely upon deletion not changing page->index */ | 247 | /* We rely upon deletion not changing page->index */ |
230 | index = page->index; | 248 | index = page->index; |
231 | if (index > end) | 249 | if (index >= end) |
232 | break; | 250 | break; |
233 | 251 | ||
234 | if (!trylock_page(page)) | 252 | if (!trylock_page(page)) |
@@ -247,27 +265,56 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
247 | index++; | 265 | index++; |
248 | } | 266 | } |
249 | 267 | ||
250 | if (partial) { | 268 | if (partial_start) { |
251 | struct page *page = find_lock_page(mapping, start - 1); | 269 | struct page *page = find_lock_page(mapping, start - 1); |
252 | if (page) { | 270 | if (page) { |
271 | unsigned int top = PAGE_CACHE_SIZE; | ||
272 | if (start > end) { | ||
273 | /* Truncation within a single page */ | ||
274 | top = partial_end; | ||
275 | partial_end = 0; | ||
276 | } | ||
253 | wait_on_page_writeback(page); | 277 | wait_on_page_writeback(page); |
254 | truncate_partial_page(page, partial); | 278 | zero_user_segment(page, partial_start, top); |
279 | cleancache_invalidate_page(mapping, page); | ||
280 | if (page_has_private(page)) | ||
281 | do_invalidatepage(page, partial_start, | ||
282 | top - partial_start); | ||
255 | unlock_page(page); | 283 | unlock_page(page); |
256 | page_cache_release(page); | 284 | page_cache_release(page); |
257 | } | 285 | } |
258 | } | 286 | } |
287 | if (partial_end) { | ||
288 | struct page *page = find_lock_page(mapping, end); | ||
289 | if (page) { | ||
290 | wait_on_page_writeback(page); | ||
291 | zero_user_segment(page, 0, partial_end); | ||
292 | cleancache_invalidate_page(mapping, page); | ||
293 | if (page_has_private(page)) | ||
294 | do_invalidatepage(page, 0, | ||
295 | partial_end); | ||
296 | unlock_page(page); | ||
297 | page_cache_release(page); | ||
298 | } | ||
299 | } | ||
300 | /* | ||
301 | * If the truncation happened within a single page no pages | ||
302 | * will be released, just zeroed, so we can bail out now. | ||
303 | */ | ||
304 | if (start >= end) | ||
305 | return; | ||
259 | 306 | ||
260 | index = start; | 307 | index = start; |
261 | for ( ; ; ) { | 308 | for ( ; ; ) { |
262 | cond_resched(); | 309 | cond_resched(); |
263 | if (!pagevec_lookup(&pvec, mapping, index, | 310 | if (!pagevec_lookup(&pvec, mapping, index, |
264 | min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { | 311 | min(end - index, (pgoff_t)PAGEVEC_SIZE))) { |
265 | if (index == start) | 312 | if (index == start) |
266 | break; | 313 | break; |
267 | index = start; | 314 | index = start; |
268 | continue; | 315 | continue; |
269 | } | 316 | } |
270 | if (index == start && pvec.pages[0]->index > end) { | 317 | if (index == start && pvec.pages[0]->index >= end) { |
271 | pagevec_release(&pvec); | 318 | pagevec_release(&pvec); |
272 | break; | 319 | break; |
273 | } | 320 | } |
@@ -277,7 +324,7 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
277 | 324 | ||
278 | /* We rely upon deletion not changing page->index */ | 325 | /* We rely upon deletion not changing page->index */ |
279 | index = page->index; | 326 | index = page->index; |
280 | if (index > end) | 327 | if (index >= end) |
281 | break; | 328 | break; |
282 | 329 | ||
283 | lock_page(page); | 330 | lock_page(page); |
@@ -598,10 +645,8 @@ void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend) | |||
598 | * This rounding is currently just for example: unmap_mapping_range | 645 | * This rounding is currently just for example: unmap_mapping_range |
599 | * expands its hole outwards, whereas we want it to contract the hole | 646 | * expands its hole outwards, whereas we want it to contract the hole |
600 | * inwards. However, existing callers of truncate_pagecache_range are | 647 | * inwards. However, existing callers of truncate_pagecache_range are |
601 | * doing their own page rounding first; and truncate_inode_pages_range | 648 | * doing their own page rounding first. Note that unmap_mapping_range |
602 | * currently BUGs if lend is not pagealigned-1 (it handles partial | 649 | * allows holelen 0 for all, and we allow lend -1 for end of file. |
603 | * page at start of hole, but not partial page at end of hole). Note | ||
604 | * unmap_mapping_range allows holelen 0 for all, and we allow lend -1. | ||
605 | */ | 650 | */ |
606 | 651 | ||
607 | /* | 652 | /* |