summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNicholas Piggin <npiggin@gmail.com>2016-12-24 22:00:30 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-25 14:54:48 -0500
commit62906027091f1d02de44041524f0769f60bb9cf3 (patch)
tree6444171af03e463bb0123a392d7b91a0ae6a1f40
parent6326fec1122cde256bd2a8c63f2606e08e44ce1d (diff)
mm: add PageWaiters indicating tasks are waiting for a page bit
Add a new page flag, PageWaiters, to indicate the page waitqueue has tasks waiting. This can be tested rather than testing waitqueue_active which requires another cacheline load. This bit is always set when the page has tasks on page_waitqueue(page), and is set and cleared under the waitqueue lock. It may be set when there are no tasks on the waitqueue, which will cause a harmless extra wakeup check that will clears the bit. The generic bit-waitqueue infrastructure is no longer used for pages. Instead, waitqueues are used directly with a custom key type. The generic code was not flexible enough to have PageWaiters manipulation under the waitqueue lock (which simplifies concurrency). This improves the performance of page lock intensive microbenchmarks by 2-3%. Putting two bits in the same word opens the opportunity to remove the memory barrier between clearing the lock bit and testing the waiters bit, after some work on the arch primitives (e.g., ensuring memory operand widths match and cover both bits). Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Bob Peterson <rpeterso@redhat.com> Cc: Steven Whitehouse <swhiteho@redhat.com> Cc: Andrew Lutomirski <luto@kernel.org> Cc: Andreas Gruenbacher <agruenba@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/page-flags.h9
-rw-r--r--include/linux/pagemap.h23
-rw-r--r--include/linux/writeback.h1
-rw-r--r--include/trace/events/mmflags.h1
-rw-r--r--init/main.c3
-rw-r--r--mm/filemap.c181
-rw-r--r--mm/internal.h2
-rw-r--r--mm/swap.c2
9 files changed, 174 insertions, 50 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4424784ac374..fe6b4036664a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1758,6 +1758,8 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
1758 return ptl; 1758 return ptl;
1759} 1759}
1760 1760
1761extern void __init pagecache_init(void);
1762
1761extern void free_area_init(unsigned long * zones_size); 1763extern void free_area_init(unsigned long * zones_size);
1762extern void free_area_init_node(int nid, unsigned long * zones_size, 1764extern void free_area_init_node(int nid, unsigned long * zones_size,
1763 unsigned long zone_start_pfn, unsigned long *zholes_size); 1765 unsigned long zone_start_pfn, unsigned long *zholes_size);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index a57c909a15e4..c56b39890a41 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -73,6 +73,7 @@
73 */ 73 */
74enum pageflags { 74enum pageflags {
75 PG_locked, /* Page is locked. Don't touch. */ 75 PG_locked, /* Page is locked. Don't touch. */
76 PG_waiters, /* Page has waiters, check its waitqueue */
76 PG_error, 77 PG_error,
77 PG_referenced, 78 PG_referenced,
78 PG_uptodate, 79 PG_uptodate,
@@ -169,6 +170,9 @@ static __always_inline int PageCompound(struct page *page)
169 * for compound page all operations related to the page flag applied to 170 * for compound page all operations related to the page flag applied to
170 * head page. 171 * head page.
171 * 172 *
173 * PF_ONLY_HEAD:
174 * for compound page, callers only ever operate on the head page.
175 *
172 * PF_NO_TAIL: 176 * PF_NO_TAIL:
173 * modifications of the page flag must be done on small or head pages, 177 * modifications of the page flag must be done on small or head pages,
174 * checks can be done on tail pages too. 178 * checks can be done on tail pages too.
@@ -178,6 +182,9 @@ static __always_inline int PageCompound(struct page *page)
178 */ 182 */
179#define PF_ANY(page, enforce) page 183#define PF_ANY(page, enforce) page
180#define PF_HEAD(page, enforce) compound_head(page) 184#define PF_HEAD(page, enforce) compound_head(page)
185#define PF_ONLY_HEAD(page, enforce) ({ \
186 VM_BUG_ON_PGFLAGS(PageTail(page), page); \
187 page;})
181#define PF_NO_TAIL(page, enforce) ({ \ 188#define PF_NO_TAIL(page, enforce) ({ \
182 VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \ 189 VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \
183 compound_head(page);}) 190 compound_head(page);})
@@ -255,6 +262,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; }
255 TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname) 262 TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
256 263
257__PAGEFLAG(Locked, locked, PF_NO_TAIL) 264__PAGEFLAG(Locked, locked, PF_NO_TAIL)
265PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
258PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND) 266PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND)
259PAGEFLAG(Referenced, referenced, PF_HEAD) 267PAGEFLAG(Referenced, referenced, PF_HEAD)
260 TESTCLEARFLAG(Referenced, referenced, PF_HEAD) 268 TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
@@ -743,6 +751,7 @@ static inline int page_has_private(struct page *page)
743 751
744#undef PF_ANY 752#undef PF_ANY
745#undef PF_HEAD 753#undef PF_HEAD
754#undef PF_ONLY_HEAD
746#undef PF_NO_TAIL 755#undef PF_NO_TAIL
747#undef PF_NO_COMPOUND 756#undef PF_NO_COMPOUND
748#endif /* !__GENERATING_BOUNDS_H */ 757#endif /* !__GENERATING_BOUNDS_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index f29f80f81dbf..324c8dbad1e1 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -486,22 +486,14 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
486 * and for filesystems which need to wait on PG_private. 486 * and for filesystems which need to wait on PG_private.
487 */ 487 */
488extern void wait_on_page_bit(struct page *page, int bit_nr); 488extern void wait_on_page_bit(struct page *page, int bit_nr);
489
490extern int wait_on_page_bit_killable(struct page *page, int bit_nr); 489extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
491extern int wait_on_page_bit_killable_timeout(struct page *page, 490extern void wake_up_page_bit(struct page *page, int bit_nr);
492 int bit_nr, unsigned long timeout);
493
494static inline int wait_on_page_locked_killable(struct page *page)
495{
496 if (!PageLocked(page))
497 return 0;
498 return wait_on_page_bit_killable(compound_head(page), PG_locked);
499}
500 491
501extern wait_queue_head_t *page_waitqueue(struct page *page);
502static inline void wake_up_page(struct page *page, int bit) 492static inline void wake_up_page(struct page *page, int bit)
503{ 493{
504 __wake_up_bit(page_waitqueue(page), &page->flags, bit); 494 if (!PageWaiters(page))
495 return;
496 wake_up_page_bit(page, bit);
505} 497}
506 498
507/* 499/*
@@ -517,6 +509,13 @@ static inline void wait_on_page_locked(struct page *page)
517 wait_on_page_bit(compound_head(page), PG_locked); 509 wait_on_page_bit(compound_head(page), PG_locked);
518} 510}
519 511
512static inline int wait_on_page_locked_killable(struct page *page)
513{
514 if (!PageLocked(page))
515 return 0;
516 return wait_on_page_bit_killable(compound_head(page), PG_locked);
517}
518
520/* 519/*
521 * Wait for a page to complete writeback 520 * Wait for a page to complete writeback
522 */ 521 */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index c78f9f0920b5..5527d910ba3d 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -375,7 +375,6 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
375unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); 375unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
376 376
377void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time); 377void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time);
378void page_writeback_init(void);
379void balance_dirty_pages_ratelimited(struct address_space *mapping); 378void balance_dirty_pages_ratelimited(struct address_space *mapping);
380bool wb_over_bg_thresh(struct bdi_writeback *wb); 379bool wb_over_bg_thresh(struct bdi_writeback *wb);
381 380
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 30c2adbdebe8..9e687ca9a307 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -81,6 +81,7 @@
81 81
82#define __def_pageflag_names \ 82#define __def_pageflag_names \
83 {1UL << PG_locked, "locked" }, \ 83 {1UL << PG_locked, "locked" }, \
84 {1UL << PG_waiters, "waiters" }, \
84 {1UL << PG_error, "error" }, \ 85 {1UL << PG_error, "error" }, \
85 {1UL << PG_referenced, "referenced" }, \ 86 {1UL << PG_referenced, "referenced" }, \
86 {1UL << PG_uptodate, "uptodate" }, \ 87 {1UL << PG_uptodate, "uptodate" }, \
diff --git a/init/main.c b/init/main.c
index c81c9fa21bc7..b0c9d6facef9 100644
--- a/init/main.c
+++ b/init/main.c
@@ -647,9 +647,8 @@ asmlinkage __visible void __init start_kernel(void)
647 security_init(); 647 security_init();
648 dbg_late_init(); 648 dbg_late_init();
649 vfs_caches_init(); 649 vfs_caches_init();
650 pagecache_init();
650 signals_init(); 651 signals_init();
651 /* rootfs populating might need page-writeback */
652 page_writeback_init();
653 proc_root_init(); 652 proc_root_init();
654 nsfs_init(); 653 nsfs_init();
655 cpuset_init(); 654 cpuset_init();
diff --git a/mm/filemap.c b/mm/filemap.c
index 32be3c8f3a11..82f26cde830c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -739,45 +739,159 @@ EXPORT_SYMBOL(__page_cache_alloc);
739 * at a cost of "thundering herd" phenomena during rare hash 739 * at a cost of "thundering herd" phenomena during rare hash
740 * collisions. 740 * collisions.
741 */ 741 */
742wait_queue_head_t *page_waitqueue(struct page *page) 742#define PAGE_WAIT_TABLE_BITS 8
743#define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
744static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
745
746static wait_queue_head_t *page_waitqueue(struct page *page)
743{ 747{
744 return bit_waitqueue(page, 0); 748 return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)];
745} 749}
746EXPORT_SYMBOL(page_waitqueue);
747 750
748void wait_on_page_bit(struct page *page, int bit_nr) 751void __init pagecache_init(void)
749{ 752{
750 DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); 753 int i;
751 754
752 if (test_bit(bit_nr, &page->flags)) 755 for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
753 __wait_on_bit(page_waitqueue(page), &wait, bit_wait_io, 756 init_waitqueue_head(&page_wait_table[i]);
754 TASK_UNINTERRUPTIBLE); 757
758 page_writeback_init();
755} 759}
756EXPORT_SYMBOL(wait_on_page_bit);
757 760
758int wait_on_page_bit_killable(struct page *page, int bit_nr) 761struct wait_page_key {
762 struct page *page;
763 int bit_nr;
764 int page_match;
765};
766
767struct wait_page_queue {
768 struct page *page;
769 int bit_nr;
770 wait_queue_t wait;
771};
772
773static int wake_page_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
759{ 774{
760 DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); 775 struct wait_page_key *key = arg;
776 struct wait_page_queue *wait_page
777 = container_of(wait, struct wait_page_queue, wait);
778
779 if (wait_page->page != key->page)
780 return 0;
781 key->page_match = 1;
761 782
762 if (!test_bit(bit_nr, &page->flags)) 783 if (wait_page->bit_nr != key->bit_nr)
784 return 0;
785 if (test_bit(key->bit_nr, &key->page->flags))
763 return 0; 786 return 0;
764 787
765 return __wait_on_bit(page_waitqueue(page), &wait, 788 return autoremove_wake_function(wait, mode, sync, key);
766 bit_wait_io, TASK_KILLABLE);
767} 789}
768 790
769int wait_on_page_bit_killable_timeout(struct page *page, 791void wake_up_page_bit(struct page *page, int bit_nr)
770 int bit_nr, unsigned long timeout)
771{ 792{
772 DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); 793 wait_queue_head_t *q = page_waitqueue(page);
794 struct wait_page_key key;
795 unsigned long flags;
773 796
774 wait.key.timeout = jiffies + timeout; 797 key.page = page;
775 if (!test_bit(bit_nr, &page->flags)) 798 key.bit_nr = bit_nr;
776 return 0; 799 key.page_match = 0;
777 return __wait_on_bit(page_waitqueue(page), &wait, 800
778 bit_wait_io_timeout, TASK_KILLABLE); 801 spin_lock_irqsave(&q->lock, flags);
802 __wake_up_locked_key(q, TASK_NORMAL, &key);
803 /*
804 * It is possible for other pages to have collided on the waitqueue
805 * hash, so in that case check for a page match. That prevents a long-
806 * term waiter
807 *
808 * It is still possible to miss a case here, when we woke page waiters
809 * and removed them from the waitqueue, but there are still other
810 * page waiters.
811 */
812 if (!waitqueue_active(q) || !key.page_match) {
813 ClearPageWaiters(page);
814 /*
815 * It's possible to miss clearing Waiters here, when we woke
816 * our page waiters, but the hashed waitqueue has waiters for
817 * other pages on it.
818 *
819 * That's okay, it's a rare case. The next waker will clear it.
820 */
821 }
822 spin_unlock_irqrestore(&q->lock, flags);
823}
824EXPORT_SYMBOL(wake_up_page_bit);
825
826static inline int wait_on_page_bit_common(wait_queue_head_t *q,
827 struct page *page, int bit_nr, int state, bool lock)
828{
829 struct wait_page_queue wait_page;
830 wait_queue_t *wait = &wait_page.wait;
831 int ret = 0;
832
833 init_wait(wait);
834 wait->func = wake_page_function;
835 wait_page.page = page;
836 wait_page.bit_nr = bit_nr;
837
838 for (;;) {
839 spin_lock_irq(&q->lock);
840
841 if (likely(list_empty(&wait->task_list))) {
842 if (lock)
843 __add_wait_queue_tail_exclusive(q, wait);
844 else
845 __add_wait_queue(q, wait);
846 SetPageWaiters(page);
847 }
848
849 set_current_state(state);
850
851 spin_unlock_irq(&q->lock);
852
853 if (likely(test_bit(bit_nr, &page->flags))) {
854 io_schedule();
855 if (unlikely(signal_pending_state(state, current))) {
856 ret = -EINTR;
857 break;
858 }
859 }
860
861 if (lock) {
862 if (!test_and_set_bit_lock(bit_nr, &page->flags))
863 break;
864 } else {
865 if (!test_bit(bit_nr, &page->flags))
866 break;
867 }
868 }
869
870 finish_wait(q, wait);
871
872 /*
873 * A signal could leave PageWaiters set. Clearing it here if
874 * !waitqueue_active would be possible (by open-coding finish_wait),
875 * but still fail to catch it in the case of wait hash collision. We
876 * already can fail to clear wait hash collision cases, so don't
877 * bother with signals either.
878 */
879
880 return ret;
881}
882
883void wait_on_page_bit(struct page *page, int bit_nr)
884{
885 wait_queue_head_t *q = page_waitqueue(page);
886 wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, false);
887}
888EXPORT_SYMBOL(wait_on_page_bit);
889
890int wait_on_page_bit_killable(struct page *page, int bit_nr)
891{
892 wait_queue_head_t *q = page_waitqueue(page);
893 return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false);
779} 894}
780EXPORT_SYMBOL_GPL(wait_on_page_bit_killable_timeout);
781 895
782/** 896/**
783 * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue 897 * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
@@ -793,6 +907,7 @@ void add_page_wait_queue(struct page *page, wait_queue_t *waiter)
793 907
794 spin_lock_irqsave(&q->lock, flags); 908 spin_lock_irqsave(&q->lock, flags);
795 __add_wait_queue(q, waiter); 909 __add_wait_queue(q, waiter);
910 SetPageWaiters(page);
796 spin_unlock_irqrestore(&q->lock, flags); 911 spin_unlock_irqrestore(&q->lock, flags);
797} 912}
798EXPORT_SYMBOL_GPL(add_page_wait_queue); 913EXPORT_SYMBOL_GPL(add_page_wait_queue);
@@ -874,23 +989,19 @@ EXPORT_SYMBOL_GPL(page_endio);
874 * __lock_page - get a lock on the page, assuming we need to sleep to get it 989 * __lock_page - get a lock on the page, assuming we need to sleep to get it
875 * @page: the page to lock 990 * @page: the page to lock
876 */ 991 */
877void __lock_page(struct page *page) 992void __lock_page(struct page *__page)
878{ 993{
879 struct page *page_head = compound_head(page); 994 struct page *page = compound_head(__page);
880 DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked); 995 wait_queue_head_t *q = page_waitqueue(page);
881 996 wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, true);
882 __wait_on_bit_lock(page_waitqueue(page_head), &wait, bit_wait_io,
883 TASK_UNINTERRUPTIBLE);
884} 997}
885EXPORT_SYMBOL(__lock_page); 998EXPORT_SYMBOL(__lock_page);
886 999
887int __lock_page_killable(struct page *page) 1000int __lock_page_killable(struct page *__page)
888{ 1001{
889 struct page *page_head = compound_head(page); 1002 struct page *page = compound_head(__page);
890 DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked); 1003 wait_queue_head_t *q = page_waitqueue(page);
891 1004 return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, true);
892 return __wait_on_bit_lock(page_waitqueue(page_head), &wait,
893 bit_wait_io, TASK_KILLABLE);
894} 1005}
895EXPORT_SYMBOL_GPL(__lock_page_killable); 1006EXPORT_SYMBOL_GPL(__lock_page_killable);
896 1007
diff --git a/mm/internal.h b/mm/internal.h
index 44d68895a9b9..7aa2ea0a8623 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -36,6 +36,8 @@
36/* Do not use these with a slab allocator */ 36/* Do not use these with a slab allocator */
37#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK) 37#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
38 38
39void page_writeback_init(void);
40
39int do_swap_page(struct vm_fault *vmf); 41int do_swap_page(struct vm_fault *vmf);
40 42
41void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, 43void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
diff --git a/mm/swap.c b/mm/swap.c
index 4dcf852e1e6d..844baedd2429 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -69,6 +69,7 @@ static void __page_cache_release(struct page *page)
69 del_page_from_lru_list(page, lruvec, page_off_lru(page)); 69 del_page_from_lru_list(page, lruvec, page_off_lru(page));
70 spin_unlock_irqrestore(zone_lru_lock(zone), flags); 70 spin_unlock_irqrestore(zone_lru_lock(zone), flags);
71 } 71 }
72 __ClearPageWaiters(page);
72 mem_cgroup_uncharge(page); 73 mem_cgroup_uncharge(page);
73} 74}
74 75
@@ -784,6 +785,7 @@ void release_pages(struct page **pages, int nr, bool cold)
784 785
785 /* Clear Active bit in case of parallel mark_page_accessed */ 786 /* Clear Active bit in case of parallel mark_page_accessed */
786 __ClearPageActive(page); 787 __ClearPageActive(page);
788 __ClearPageWaiters(page);
787 789
788 list_add(&page->lru, &pages_to_free); 790 list_add(&page->lru, &pages_to_free);
789 } 791 }