aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/pagemap.h
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2014-06-04 19:10:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-04 19:54:10 -0400
commit2457aec63745e235bcafb7ef312b182d8682f0fc (patch)
treec658266ed5a8c1acd4f2028c8bf69ab2a7c8ba42 /include/linux/pagemap.h
parente7470ee89f003634a88e7b5e5a7b65b3025987de (diff)
mm: non-atomically mark page accessed during page cache allocation where possible
aops->write_begin may allocate a new page and make it visible only to have mark_page_accessed called almost immediately after. Once the page is visible the atomic operations are necessary which is noticable overhead when writing to an in-memory filesystem like tmpfs but should also be noticable with fast storage. The objective of the patch is to initialse the accessed information with non-atomic operations before the page is visible. The bulk of filesystems directly or indirectly use grab_cache_page_write_begin or find_or_create_page for the initial allocation of a page cache page. This patch adds an init_page_accessed() helper which behaves like the first call to mark_page_accessed() but may called before the page is visible and can be done non-atomically. The primary APIs of concern in this care are the following and are used by most filesystems. find_get_page find_lock_page find_or_create_page grab_cache_page_nowait grab_cache_page_write_begin All of them are very similar in detail to the patch creates a core helper pagecache_get_page() which takes a flags parameter that affects its behavior such as whether the page should be marked accessed or not. Then old API is preserved but is basically a thin wrapper around this core function. Each of the filesystems are then updated to avoid calling mark_page_accessed when it is known that the VM interfaces have already done the job. There is a slight snag in that the timing of the mark_page_accessed() has now changed so in rare cases it's possible a page gets to the end of the LRU as PageReferenced where as previously it might have been repromoted. This is expected to be rare but it's worth the filesystem people thinking about it in case they see a problem with the timing change. It is also the case that some filesystems may be marking pages accessed that previously did not but it makes sense that filesystems have consistent behaviour in this regard. The test case used to evaulate this is a simple dd of a large file done multiple times with the file deleted on each iterations. The size of the file is 1/10th physical memory to avoid dirty page balancing. In the async case it will be possible that the workload completes without even hitting the disk and will have variable results but highlight the impact of mark_page_accessed for async IO. The sync results are expected to be more stable. The exception is tmpfs where the normal case is for the "IO" to not hit the disk. The test machine was single socket and UMA to avoid any scheduling or NUMA artifacts. Throughput and wall times are presented for sync IO, only wall times are shown for async as the granularity reported by dd and the variability is unsuitable for comparison. As async results were variable do to writback timings, I'm only reporting the maximum figures. The sync results were stable enough to make the mean and stddev uninteresting. The performance results are reported based on a run with no profiling. Profile data is based on a separate run with oprofile running. async dd 3.15.0-rc3 3.15.0-rc3 vanilla accessed-v2 ext3 Max elapsed 13.9900 ( 0.00%) 11.5900 ( 17.16%) tmpfs Max elapsed 0.5100 ( 0.00%) 0.4900 ( 3.92%) btrfs Max elapsed 12.8100 ( 0.00%) 12.7800 ( 0.23%) ext4 Max elapsed 18.6000 ( 0.00%) 13.3400 ( 28.28%) xfs Max elapsed 12.5600 ( 0.00%) 2.0900 ( 83.36%) The XFS figure is a bit strange as it managed to avoid a worst case by sheer luck but the average figures looked reasonable. samples percentage ext3 86107 0.9783 vmlinux-3.15.0-rc4-vanilla mark_page_accessed ext3 23833 0.2710 vmlinux-3.15.0-rc4-accessed-v3r25 mark_page_accessed ext3 5036 0.0573 vmlinux-3.15.0-rc4-accessed-v3r25 init_page_accessed ext4 64566 0.8961 vmlinux-3.15.0-rc4-vanilla mark_page_accessed ext4 5322 0.0713 vmlinux-3.15.0-rc4-accessed-v3r25 mark_page_accessed ext4 2869 0.0384 vmlinux-3.15.0-rc4-accessed-v3r25 init_page_accessed xfs 62126 1.7675 vmlinux-3.15.0-rc4-vanilla mark_page_accessed xfs 1904 0.0554 vmlinux-3.15.0-rc4-accessed-v3r25 init_page_accessed xfs 103 0.0030 vmlinux-3.15.0-rc4-accessed-v3r25 mark_page_accessed btrfs 10655 0.1338 vmlinux-3.15.0-rc4-vanilla mark_page_accessed btrfs 2020 0.0273 vmlinux-3.15.0-rc4-accessed-v3r25 init_page_accessed btrfs 587 0.0079 vmlinux-3.15.0-rc4-accessed-v3r25 mark_page_accessed tmpfs 59562 3.2628 vmlinux-3.15.0-rc4-vanilla mark_page_accessed tmpfs 1210 0.0696 vmlinux-3.15.0-rc4-accessed-v3r25 init_page_accessed tmpfs 94 0.0054 vmlinux-3.15.0-rc4-accessed-v3r25 mark_page_accessed [akpm@linux-foundation.org: don't run init_page_accessed() against an uninitialised pointer] Signed-off-by: Mel Gorman <mgorman@suse.de> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Jan Kara <jack@suse.cz> Cc: Michal Hocko <mhocko@suse.cz> Cc: Hugh Dickins <hughd@google.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Theodore Ts'o <tytso@mit.edu> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Rik van Riel <riel@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Tested-by: Prabhakar Lad <prabhakar.csengg@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux/pagemap.h')
-rw-r--r--include/linux/pagemap.h107
1 files changed, 101 insertions, 6 deletions
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c16fb6d06e36..0a97b583ee8d 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -259,12 +259,109 @@ pgoff_t page_cache_next_hole(struct address_space *mapping,
259pgoff_t page_cache_prev_hole(struct address_space *mapping, 259pgoff_t page_cache_prev_hole(struct address_space *mapping,
260 pgoff_t index, unsigned long max_scan); 260 pgoff_t index, unsigned long max_scan);
261 261
262#define FGP_ACCESSED 0x00000001
263#define FGP_LOCK 0x00000002
264#define FGP_CREAT 0x00000004
265#define FGP_WRITE 0x00000008
266#define FGP_NOFS 0x00000010
267#define FGP_NOWAIT 0x00000020
268
269struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
270 int fgp_flags, gfp_t cache_gfp_mask, gfp_t radix_gfp_mask);
271
272/**
273 * find_get_page - find and get a page reference
274 * @mapping: the address_space to search
275 * @offset: the page index
276 *
277 * Looks up the page cache slot at @mapping & @offset. If there is a
278 * page cache page, it is returned with an increased refcount.
279 *
280 * Otherwise, %NULL is returned.
281 */
282static inline struct page *find_get_page(struct address_space *mapping,
283 pgoff_t offset)
284{
285 return pagecache_get_page(mapping, offset, 0, 0, 0);
286}
287
288static inline struct page *find_get_page_flags(struct address_space *mapping,
289 pgoff_t offset, int fgp_flags)
290{
291 return pagecache_get_page(mapping, offset, fgp_flags, 0, 0);
292}
293
294/**
295 * find_lock_page - locate, pin and lock a pagecache page
296 * pagecache_get_page - find and get a page reference
297 * @mapping: the address_space to search
298 * @offset: the page index
299 *
300 * Looks up the page cache slot at @mapping & @offset. If there is a
301 * page cache page, it is returned locked and with an increased
302 * refcount.
303 *
304 * Otherwise, %NULL is returned.
305 *
306 * find_lock_page() may sleep.
307 */
308static inline struct page *find_lock_page(struct address_space *mapping,
309 pgoff_t offset)
310{
311 return pagecache_get_page(mapping, offset, FGP_LOCK, 0, 0);
312}
313
314/**
315 * find_or_create_page - locate or add a pagecache page
316 * @mapping: the page's address_space
317 * @index: the page's index into the mapping
318 * @gfp_mask: page allocation mode
319 *
320 * Looks up the page cache slot at @mapping & @offset. If there is a
321 * page cache page, it is returned locked and with an increased
322 * refcount.
323 *
324 * If the page is not present, a new page is allocated using @gfp_mask
325 * and added to the page cache and the VM's LRU list. The page is
326 * returned locked and with an increased refcount.
327 *
328 * On memory exhaustion, %NULL is returned.
329 *
330 * find_or_create_page() may sleep, even if @gfp_flags specifies an
331 * atomic allocation!
332 */
333static inline struct page *find_or_create_page(struct address_space *mapping,
334 pgoff_t offset, gfp_t gfp_mask)
335{
336 return pagecache_get_page(mapping, offset,
337 FGP_LOCK|FGP_ACCESSED|FGP_CREAT,
338 gfp_mask, gfp_mask & GFP_RECLAIM_MASK);
339}
340
341/**
342 * grab_cache_page_nowait - returns locked page at given index in given cache
343 * @mapping: target address_space
344 * @index: the page index
345 *
346 * Same as grab_cache_page(), but do not wait if the page is unavailable.
347 * This is intended for speculative data generators, where the data can
348 * be regenerated if the page couldn't be grabbed. This routine should
349 * be safe to call while holding the lock for another page.
350 *
351 * Clear __GFP_FS when allocating the page to avoid recursion into the fs
352 * and deadlock against the caller's locked page.
353 */
354static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
355 pgoff_t index)
356{
357 return pagecache_get_page(mapping, index,
358 FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
359 mapping_gfp_mask(mapping),
360 GFP_NOFS);
361}
362
262struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); 363struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
263struct page *find_get_page(struct address_space *mapping, pgoff_t offset);
264struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset); 364struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
265struct page *find_lock_page(struct address_space *mapping, pgoff_t offset);
266struct page *find_or_create_page(struct address_space *mapping, pgoff_t index,
267 gfp_t gfp_mask);
268unsigned find_get_entries(struct address_space *mapping, pgoff_t start, 365unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
269 unsigned int nr_entries, struct page **entries, 366 unsigned int nr_entries, struct page **entries,
270 pgoff_t *indices); 367 pgoff_t *indices);
@@ -287,8 +384,6 @@ static inline struct page *grab_cache_page(struct address_space *mapping,
287 return find_or_create_page(mapping, index, mapping_gfp_mask(mapping)); 384 return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
288} 385}
289 386
290extern struct page * grab_cache_page_nowait(struct address_space *mapping,
291 pgoff_t index);
292extern struct page * read_cache_page(struct address_space *mapping, 387extern struct page * read_cache_page(struct address_space *mapping,
293 pgoff_t index, filler_t *filler, void *data); 388 pgoff_t index, filler_t *filler, void *data);
294extern struct page * read_cache_page_gfp(struct address_space *mapping, 389extern struct page * read_cache_page_gfp(struct address_space *mapping,