summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2017-11-15 20:37:44 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-15 21:21:06 -0500
commitf2187599189d94aeeee2fa5d9806186c7732ed37 (patch)
tree53b413e06354788da54aa4e3a8b0c1ccf30fa189 /mm
parentc7df8ad2910e965a6241b6d8f52fd122e26b0315 (diff)
mm, truncate: remove all exceptional entries from pagevec under one lock
During truncate each entry in a pagevec is checked to see if it is an exceptional entry and if so, the shadow entry is cleaned up. This is potentially expensive as multiple entries for a mapping locks/unlocks the tree lock. This batches the operation such that any exceptional entries removed from a pagevec only acquire the mapping tree lock once. The corner case where this is more expensive is where there is only one exceptional entry but this is unlikely due to temporal locality and how it affects LRU ordering. Note that for truncations of small files created recently, this patch should show no gain because it only batches the handling of exceptional entries. sparsetruncate (large) 4.14.0-rc4 4.14.0-rc4 pickhelper-v1r1 batchshadow-v1r1 Min Time 38.00 ( 0.00%) 27.00 ( 28.95%) 1st-qrtle Time 40.00 ( 0.00%) 28.00 ( 30.00%) 2nd-qrtle Time 44.00 ( 0.00%) 41.00 ( 6.82%) 3rd-qrtle Time 146.00 ( 0.00%) 147.00 ( -0.68%) Max-90% Time 153.00 ( 0.00%) 153.00 ( 0.00%) Max-95% Time 155.00 ( 0.00%) 156.00 ( -0.65%) Max-99% Time 181.00 ( 0.00%) 171.00 ( 5.52%) Amean Time 93.04 ( 0.00%) 88.43 ( 4.96%) Best99%Amean Time 92.08 ( 0.00%) 86.13 ( 6.46%) Best95%Amean Time 89.19 ( 0.00%) 83.13 ( 6.80%) Best90%Amean Time 85.60 ( 0.00%) 79.15 ( 7.53%) Best75%Amean Time 72.95 ( 0.00%) 65.09 ( 10.78%) Best50%Amean Time 39.86 ( 0.00%) 28.20 ( 29.25%) Best25%Amean Time 39.44 ( 0.00%) 27.70 ( 29.77%) bonnie 4.14.0-rc4 4.14.0-rc4 pickhelper-v1r1 batchshadow-v1r1 Hmean SeqCreate ops 71.92 ( 0.00%) 76.78 ( 6.76%) Hmean SeqCreate read 42.42 ( 0.00%) 45.01 ( 6.10%) Hmean SeqCreate del 26519.88 ( 0.00%) 27191.87 ( 2.53%) Hmean RandCreate ops 71.92 ( 0.00%) 76.95 ( 7.00%) Hmean RandCreate read 44.44 ( 0.00%) 49.23 ( 10.78%) Hmean RandCreate del 24948.62 ( 0.00%) 24764.97 ( -0.74%) Truncation of a large number of files shows a substantial gain with 99% of files being truncated 6.46% faster. bonnie shows a modest gain of 2.53% [jack@suse.cz: fix truncate_exceptional_pvec_entries()] Link: http://lkml.kernel.org/r/20171108164226.26788-1-jack@suse.cz Link: http://lkml.kernel.org/r/20171018075952.10627-4-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Jan Kara <jack@suse.cz> Reviewed-by: Jan Kara <jack@suse.cz> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/truncate.c91
1 files changed, 63 insertions, 28 deletions
diff --git a/mm/truncate.c b/mm/truncate.c
index 02a0c0466c78..c30e8fa3d063 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -25,44 +25,85 @@
25#include <linux/rmap.h> 25#include <linux/rmap.h>
26#include "internal.h" 26#include "internal.h"
27 27
28static void clear_shadow_entry(struct address_space *mapping, pgoff_t index, 28/*
29 void *entry) 29 * Regular page slots are stabilized by the page lock even without the tree
30 * itself locked. These unlocked entries need verification under the tree
31 * lock.
32 */
33static inline void __clear_shadow_entry(struct address_space *mapping,
34 pgoff_t index, void *entry)
30{ 35{
31 struct radix_tree_node *node; 36 struct radix_tree_node *node;
32 void **slot; 37 void **slot;
33 38
34 spin_lock_irq(&mapping->tree_lock);
35 /*
36 * Regular page slots are stabilized by the page lock even
37 * without the tree itself locked. These unlocked entries
38 * need verification under the tree lock.
39 */
40 if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot)) 39 if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot))
41 goto unlock; 40 return;
42 if (*slot != entry) 41 if (*slot != entry)
43 goto unlock; 42 return;
44 __radix_tree_replace(&mapping->page_tree, node, slot, NULL, 43 __radix_tree_replace(&mapping->page_tree, node, slot, NULL,
45 workingset_update_node); 44 workingset_update_node);
46 mapping->nrexceptional--; 45 mapping->nrexceptional--;
47unlock: 46}
47
48static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
49 void *entry)
50{
51 spin_lock_irq(&mapping->tree_lock);
52 __clear_shadow_entry(mapping, index, entry);
48 spin_unlock_irq(&mapping->tree_lock); 53 spin_unlock_irq(&mapping->tree_lock);
49} 54}
50 55
51/* 56/*
52 * Unconditionally remove exceptional entry. Usually called from truncate path. 57 * Unconditionally remove exceptional entries. Usually called from truncate
58 * path. Note that the pagevec may be altered by this function by removing
59 * exceptional entries similar to what pagevec_remove_exceptionals does.
53 */ 60 */
54static void truncate_exceptional_entry(struct address_space *mapping, 61static void truncate_exceptional_pvec_entries(struct address_space *mapping,
55 pgoff_t index, void *entry) 62 struct pagevec *pvec, pgoff_t *indices,
63 pgoff_t end)
56{ 64{
65 int i, j;
66 bool dax, lock;
67
57 /* Handled by shmem itself */ 68 /* Handled by shmem itself */
58 if (shmem_mapping(mapping)) 69 if (shmem_mapping(mapping))
59 return; 70 return;
60 71
61 if (dax_mapping(mapping)) { 72 for (j = 0; j < pagevec_count(pvec); j++)
62 dax_delete_mapping_entry(mapping, index); 73 if (radix_tree_exceptional_entry(pvec->pages[j]))
74 break;
75
76 if (j == pagevec_count(pvec))
63 return; 77 return;
78
79 dax = dax_mapping(mapping);
80 lock = !dax && indices[j] < end;
81 if (lock)
82 spin_lock_irq(&mapping->tree_lock);
83
84 for (i = j; i < pagevec_count(pvec); i++) {
85 struct page *page = pvec->pages[i];
86 pgoff_t index = indices[i];
87
88 if (!radix_tree_exceptional_entry(page)) {
89 pvec->pages[j++] = page;
90 continue;
91 }
92
93 if (index >= end)
94 continue;
95
96 if (unlikely(dax)) {
97 dax_delete_mapping_entry(mapping, index);
98 continue;
99 }
100
101 __clear_shadow_entry(mapping, index, page);
64 } 102 }
65 clear_shadow_entry(mapping, index, entry); 103
104 if (lock)
105 spin_unlock_irq(&mapping->tree_lock);
106 pvec->nr = j;
66} 107}
67 108
68/* 109/*
@@ -310,11 +351,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
310 if (index >= end) 351 if (index >= end)
311 break; 352 break;
312 353
313 if (radix_tree_exceptional_entry(page)) { 354 if (radix_tree_exceptional_entry(page))
314 truncate_exceptional_entry(mapping, index,
315 page);
316 continue; 355 continue;
317 }
318 356
319 if (!trylock_page(page)) 357 if (!trylock_page(page))
320 continue; 358 continue;
@@ -334,12 +372,11 @@ void truncate_inode_pages_range(struct address_space *mapping,
334 delete_from_page_cache_batch(mapping, &locked_pvec); 372 delete_from_page_cache_batch(mapping, &locked_pvec);
335 for (i = 0; i < pagevec_count(&locked_pvec); i++) 373 for (i = 0; i < pagevec_count(&locked_pvec); i++)
336 unlock_page(locked_pvec.pages[i]); 374 unlock_page(locked_pvec.pages[i]);
337 pagevec_remove_exceptionals(&pvec); 375 truncate_exceptional_pvec_entries(mapping, &pvec, indices, end);
338 pagevec_release(&pvec); 376 pagevec_release(&pvec);
339 cond_resched(); 377 cond_resched();
340 index++; 378 index++;
341 } 379 }
342
343 if (partial_start) { 380 if (partial_start) {
344 struct page *page = find_lock_page(mapping, start - 1); 381 struct page *page = find_lock_page(mapping, start - 1);
345 if (page) { 382 if (page) {
@@ -397,6 +434,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
397 pagevec_release(&pvec); 434 pagevec_release(&pvec);
398 break; 435 break;
399 } 436 }
437
400 for (i = 0; i < pagevec_count(&pvec); i++) { 438 for (i = 0; i < pagevec_count(&pvec); i++) {
401 struct page *page = pvec.pages[i]; 439 struct page *page = pvec.pages[i];
402 440
@@ -408,11 +446,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
408 break; 446 break;
409 } 447 }
410 448
411 if (radix_tree_exceptional_entry(page)) { 449 if (radix_tree_exceptional_entry(page))
412 truncate_exceptional_entry(mapping, index,
413 page);
414 continue; 450 continue;
415 }
416 451
417 lock_page(page); 452 lock_page(page);
418 WARN_ON(page_to_index(page) != index); 453 WARN_ON(page_to_index(page) != index);
@@ -420,7 +455,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
420 truncate_inode_page(mapping, page); 455 truncate_inode_page(mapping, page);
421 unlock_page(page); 456 unlock_page(page);
422 } 457 }
423 pagevec_remove_exceptionals(&pvec); 458 truncate_exceptional_pvec_entries(mapping, &pvec, indices, end);
424 pagevec_release(&pvec); 459 pagevec_release(&pvec);
425 index++; 460 index++;
426 } 461 }