aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2016-01-14 18:19:20 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-14 19:00:49 -0500
commit6a15a37097c7e02390bb08d83dac433c9f10144f (patch)
tree6571e0fd8b882842d89f36897706977936c75c1d
parentc261e7d94f0dd33a34b6cf98686e8b9699b62340 (diff)
mm, proc: reduce cost of /proc/pid/smaps for shmem mappings
The previous patch has improved swap accounting for shmem mapping, which however made /proc/pid/smaps more expensive for shmem mappings, as we consult the radix tree for each pte_none entry, so the overal complexity is O(n*log(n)). We can reduce this significantly for mappings that cannot contain COWed pages, because then we can either use the statistics tha shmem object itself tracks (if the mapping contains the whole object, or the swap usage of the whole object is zero), or use the radix tree iterator, which is much more effective than repeated find_get_entry() calls. This patch therefore introduces a function shmem_swap_usage(vma) and makes /proc/pid/smaps use it when possible. Only for writable private mappings of shmem objects (i.e. tmpfs files) with the shmem object itself (partially) swapped outwe have to resort to the find_get_entry() approach. Hopefully such mappings are relatively uncommon. To demonstrate the diference, I have measured this on a process that creates a 2GB mapping and dirties single pages with a stride of 2MB, and time how long does it take to cat /proc/pid/smaps of this process 100 times. Private writable mapping of a /dev/shm/file (the most complex case): real 0m3.831s user 0m0.180s sys 0m3.212s Shared mapping of an almost full mapping of a partially swapped /dev/shm/file (which needs to employ the radix tree iterator). real 0m1.351s user 0m0.096s sys 0m0.768s Same, but with /dev/shm/file not swapped (so no radix tree walk needed) real 0m0.935s user 0m0.128s sys 0m0.344s Private anonymous mapping: real 0m0.949s user 0m0.116s sys 0m0.348s The cost is now much closer to the private anonymous mapping case, unless the shmem mapping is private and writable. Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Cc: Hugh Dickins <hughd@google.com> Cc: Jerome Marchand <jmarchan@redhat.com> Cc: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> Acked-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/proc/task_mmu.c22
-rw-r--r--include/linux/shmem_fs.h2
-rw-r--r--mm/shmem.c70
3 files changed, 92 insertions, 2 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 85ef60fdf2c0..5830b2e129ed 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -14,6 +14,7 @@
14#include <linux/swapops.h> 14#include <linux/swapops.h>
15#include <linux/mmu_notifier.h> 15#include <linux/mmu_notifier.h>
16#include <linux/page_idle.h> 16#include <linux/page_idle.h>
17#include <linux/shmem_fs.h>
17 18
18#include <asm/elf.h> 19#include <asm/elf.h>
19#include <asm/uaccess.h> 20#include <asm/uaccess.h>
@@ -717,8 +718,25 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
717 718
718#ifdef CONFIG_SHMEM 719#ifdef CONFIG_SHMEM
719 if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) { 720 if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
720 mss.check_shmem_swap = true; 721 /*
721 smaps_walk.pte_hole = smaps_pte_hole; 722 * For shared or readonly shmem mappings we know that all
723 * swapped out pages belong to the shmem object, and we can
724 * obtain the swap value much more efficiently. For private
725 * writable mappings, we might have COW pages that are
726 * not affected by the parent swapped out pages of the shmem
727 * object, so we have to distinguish them during the page walk.
728 * Unless we know that the shmem object (or the part mapped by
729 * our VMA) has no swapped out pages at all.
730 */
731 unsigned long shmem_swapped = shmem_swap_usage(vma);
732
733 if (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
734 !(vma->vm_flags & VM_WRITE)) {
735 mss.swap = shmem_swapped;
736 } else {
737 mss.check_shmem_swap = true;
738 smaps_walk.pte_hole = smaps_pte_hole;
739 }
722 } 740 }
723#endif 741#endif
724 742
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 50777b5b1e4c..bd58be5e7a2a 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -60,6 +60,8 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
60extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); 60extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
61extern int shmem_unuse(swp_entry_t entry, struct page *page); 61extern int shmem_unuse(swp_entry_t entry, struct page *page);
62 62
63extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
64
63static inline struct page *shmem_read_mapping_page( 65static inline struct page *shmem_read_mapping_page(
64 struct address_space *mapping, pgoff_t index) 66 struct address_space *mapping, pgoff_t index)
65{ 67{
diff --git a/mm/shmem.c b/mm/shmem.c
index 9e60093aca3f..e978621de1ef 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -360,6 +360,76 @@ static int shmem_free_swap(struct address_space *mapping,
360} 360}
361 361
362/* 362/*
363 * Determine (in bytes) how many of the shmem object's pages mapped by the
364 * given vma is swapped out.
365 *
366 * This is safe to call without i_mutex or mapping->tree_lock thanks to RCU,
367 * as long as the inode doesn't go away and racy results are not a problem.
368 */
369unsigned long shmem_swap_usage(struct vm_area_struct *vma)
370{
371 struct inode *inode = file_inode(vma->vm_file);
372 struct shmem_inode_info *info = SHMEM_I(inode);
373 struct address_space *mapping = inode->i_mapping;
374 unsigned long swapped;
375 pgoff_t start, end;
376 struct radix_tree_iter iter;
377 void **slot;
378 struct page *page;
379
380 /* Be careful as we don't hold info->lock */
381 swapped = READ_ONCE(info->swapped);
382
383 /*
384 * The easier cases are when the shmem object has nothing in swap, or
385 * the vma maps it whole. Then we can simply use the stats that we
386 * already track.
387 */
388 if (!swapped)
389 return 0;
390
391 if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size)
392 return swapped << PAGE_SHIFT;
393
394 swapped = 0;
395
396 /* Here comes the more involved part */
397 start = linear_page_index(vma, vma->vm_start);
398 end = linear_page_index(vma, vma->vm_end);
399
400 rcu_read_lock();
401
402restart:
403 radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
404 if (iter.index >= end)
405 break;
406
407 page = radix_tree_deref_slot(slot);
408
409 /*
410 * This should only be possible to happen at index 0, so we
411 * don't need to reset the counter, nor do we risk infinite
412 * restarts.
413 */
414 if (radix_tree_deref_retry(page))
415 goto restart;
416
417 if (radix_tree_exceptional_entry(page))
418 swapped++;
419
420 if (need_resched()) {
421 cond_resched_rcu();
422 start = iter.index + 1;
423 goto restart;
424 }
425 }
426
427 rcu_read_unlock();
428
429 return swapped << PAGE_SHIFT;
430}
431
432/*
363 * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists. 433 * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists.
364 */ 434 */
365void shmem_unlock_mapping(struct address_space *mapping) 435void shmem_unlock_mapping(struct address_space *mapping)