summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/mm.h2
-rw-r--r--include/linux/page-flags.h9
-rw-r--r--include/linux/pagemap.h23
-rw-r--r--include/linux/writeback.h1
-rw-r--r--include/trace/events/mmflags.h1
-rw-r--r--init/main.c3
-rw-r--r--mm/filemap.c181
-rw-r--r--mm/internal.h2
-rw-r--r--mm/swap.c2
9 files changed, 174 insertions, 50 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4424784ac374..fe6b4036664a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1758,6 +1758,8 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
1758 return ptl; 1758 return ptl;
1759} 1759}
1760 1760
1761extern void __init pagecache_init(void);
1762
1761extern void free_area_init(unsigned long * zones_size); 1763extern void free_area_init(unsigned long * zones_size);
1762extern void free_area_init_node(int nid, unsigned long * zones_size, 1764extern void free_area_init_node(int nid, unsigned long * zones_size,
1763 unsigned long zone_start_pfn, unsigned long *zholes_size); 1765 unsigned long zone_start_pfn, unsigned long *zholes_size);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index a57c909a15e4..c56b39890a41 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -73,6 +73,7 @@
73 */ 73 */
74enum pageflags { 74enum pageflags {
75 PG_locked, /* Page is locked. Don't touch. */ 75 PG_locked, /* Page is locked. Don't touch. */
76 PG_waiters, /* Page has waiters, check its waitqueue */
76 PG_error, 77 PG_error,
77 PG_referenced, 78 PG_referenced,
78 PG_uptodate, 79 PG_uptodate,
@@ -169,6 +170,9 @@ static __always_inline int PageCompound(struct page *page)
169 * for compound page all operations related to the page flag applied to 170 * for compound page all operations related to the page flag applied to
170 * head page. 171 * head page.
171 * 172 *
173 * PF_ONLY_HEAD:
174 * for compound page, callers only ever operate on the head page.
175 *
172 * PF_NO_TAIL: 176 * PF_NO_TAIL:
173 * modifications of the page flag must be done on small or head pages, 177 * modifications of the page flag must be done on small or head pages,
174 * checks can be done on tail pages too. 178 * checks can be done on tail pages too.
@@ -178,6 +182,9 @@ static __always_inline int PageCompound(struct page *page)
178 */ 182 */
179#define PF_ANY(page, enforce) page 183#define PF_ANY(page, enforce) page
180#define PF_HEAD(page, enforce) compound_head(page) 184#define PF_HEAD(page, enforce) compound_head(page)
185#define PF_ONLY_HEAD(page, enforce) ({ \
186 VM_BUG_ON_PGFLAGS(PageTail(page), page); \
187 page;})
181#define PF_NO_TAIL(page, enforce) ({ \ 188#define PF_NO_TAIL(page, enforce) ({ \
182 VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \ 189 VM_BUG_ON_PGFLAGS(enforce && PageTail(page), page); \
183 compound_head(page);}) 190 compound_head(page);})
@@ -255,6 +262,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; }
255 TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname) 262 TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
256 263
257__PAGEFLAG(Locked, locked, PF_NO_TAIL) 264__PAGEFLAG(Locked, locked, PF_NO_TAIL)
265PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
258PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND) 266PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND)
259PAGEFLAG(Referenced, referenced, PF_HEAD) 267PAGEFLAG(Referenced, referenced, PF_HEAD)
260 TESTCLEARFLAG(Referenced, referenced, PF_HEAD) 268 TESTCLEARFLAG(Referenced, referenced, PF_HEAD)
@@ -743,6 +751,7 @@ static inline int page_has_private(struct page *page)
743 751
744#undef PF_ANY 752#undef PF_ANY
745#undef PF_HEAD 753#undef PF_HEAD
754#undef PF_ONLY_HEAD
746#undef PF_NO_TAIL 755#undef PF_NO_TAIL
747#undef PF_NO_COMPOUND 756#undef PF_NO_COMPOUND
748#endif /* !__GENERATING_BOUNDS_H */ 757#endif /* !__GENERATING_BOUNDS_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index f29f80f81dbf..324c8dbad1e1 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -486,22 +486,14 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
486 * and for filesystems which need to wait on PG_private. 486 * and for filesystems which need to wait on PG_private.
487 */ 487 */
488extern void wait_on_page_bit(struct page *page, int bit_nr); 488extern void wait_on_page_bit(struct page *page, int bit_nr);
489
490extern int wait_on_page_bit_killable(struct page *page, int bit_nr); 489extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
491extern int wait_on_page_bit_killable_timeout(struct page *page, 490extern void wake_up_page_bit(struct page *page, int bit_nr);
492 int bit_nr, unsigned long timeout);
493
494static inline int wait_on_page_locked_killable(struct page *page)
495{
496 if (!PageLocked(page))
497 return 0;
498 return wait_on_page_bit_killable(compound_head(page), PG_locked);
499}
500 491
501extern wait_queue_head_t *page_waitqueue(struct page *page);
502static inline void wake_up_page(struct page *page, int bit) 492static inline void wake_up_page(struct page *page, int bit)
503{ 493{
504 __wake_up_bit(page_waitqueue(page), &page->flags, bit); 494 if (!PageWaiters(page))
495 return;
496 wake_up_page_bit(page, bit);
505} 497}
506 498
507/* 499/*
@@ -517,6 +509,13 @@ static inline void wait_on_page_locked(struct page *page)
517 wait_on_page_bit(compound_head(page), PG_locked); 509 wait_on_page_bit(compound_head(page), PG_locked);
518} 510}
519 511
512static inline int wait_on_page_locked_killable(struct page *page)
513{
514 if (!PageLocked(page))
515 return 0;
516 return wait_on_page_bit_killable(compound_head(page), PG_locked);
517}
518
520/* 519/*
521 * Wait for a page to complete writeback 520 * Wait for a page to complete writeback
522 */ 521 */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index c78f9f0920b5..5527d910ba3d 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -375,7 +375,6 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
375unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); 375unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
376 376
377void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time); 377void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time);
378void page_writeback_init(void);
379void balance_dirty_pages_ratelimited(struct address_space *mapping); 378void balance_dirty_pages_ratelimited(struct address_space *mapping);
380bool wb_over_bg_thresh(struct bdi_writeback *wb); 379bool wb_over_bg_thresh(struct bdi_writeback *wb);
381 380
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 30c2adbdebe8..9e687ca9a307 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -81,6 +81,7 @@
81 81
82#define __def_pageflag_names \ 82#define __def_pageflag_names \
83 {1UL << PG_locked, "locked" }, \ 83 {1UL << PG_locked, "locked" }, \
84 {1UL << PG_waiters, "waiters" }, \
84 {1UL << PG_error, "error" }, \ 85 {1UL << PG_error, "error" }, \
85 {1UL << PG_referenced, "referenced" }, \ 86 {1UL << PG_referenced, "referenced" }, \
86 {1UL << PG_uptodate, "uptodate" }, \ 87 {1UL << PG_uptodate, "uptodate" }, \
diff --git a/init/main.c b/init/main.c
index c81c9fa21bc7..b0c9d6facef9 100644
--- a/init/main.c
+++ b/init/main.c
@@ -647,9 +647,8 @@ asmlinkage __visible void __init start_kernel(void)
647 security_init(); 647 security_init();
648 dbg_late_init(); 648 dbg_late_init();
649 vfs_caches_init(); 649 vfs_caches_init();
650 pagecache_init();
650 signals_init(); 651 signals_init();
651 /* rootfs populating might need page-writeback */
652 page_writeback_init();
653 proc_root_init(); 652 proc_root_init();
654 nsfs_init(); 653 nsfs_init();
655 cpuset_init(); 654 cpuset_init();
diff --git a/mm/filemap.c b/mm/filemap.c
index 32be3c8f3a11..82f26cde830c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -739,45 +739,159 @@ EXPORT_SYMBOL(__page_cache_alloc);
739 * at a cost of "thundering herd" phenomena during rare hash 739 * at a cost of "thundering herd" phenomena during rare hash
740 * collisions. 740 * collisions.
741 */ 741 */
742wait_queue_head_t *page_waitqueue(struct page *page) 742#define PAGE_WAIT_TABLE_BITS 8
743#define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
744static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
745
746static wait_queue_head_t *page_waitqueue(struct page *page)
743{ 747{
744 return bit_waitqueue(page, 0); 748 return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)];
745} 749}
746EXPORT_SYMBOL(page_waitqueue);
747 750
748void wait_on_page_bit(struct page *page, int bit_nr) 751void __init pagecache_init(void)
749{ 752{
750 DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); 753 int i;
751 754
752 if (test_bit(bit_nr, &page->flags)) 755 for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
753 __wait_on_bit(page_waitqueue(page), &wait, bit_wait_io, 756 init_waitqueue_head(&page_wait_table[i]);
754 TASK_UNINTERRUPTIBLE); 757
758 page_writeback_init();
755} 759}
756EXPORT_SYMBOL(wait_on_page_bit);
757 760
758int wait_on_page_bit_killable(struct page *page, int bit_nr) 761struct wait_page_key {
762 struct page *page;
763 int bit_nr;
764 int page_match;
765};
766
767struct wait_page_queue {
768 struct page *page;
769 int bit_nr;
770 wait_queue_t wait;
771};
772
773static int wake_page_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
759{ 774{
760 DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); 775 struct wait_page_key *key = arg;
776 struct wait_page_queue *wait_page
777 = container_of(wait, struct wait_page_queue, wait);
778
779 if (wait_page->page != key->page)
780 return 0;
781 key->page_match = 1;
761 782
762 if (!test_bit(bit_nr, &page->flags)) 783 if (wait_page->bit_nr != key->bit_nr)
784 return 0;
785 if (test_bit(key->bit_nr, &key->page->flags))
763 return 0; 786 return 0;
764 787
765 return __wait_on_bit(page_waitqueue(page), &wait, 788 return autoremove_wake_function(wait, mode, sync, key);
766 bit_wait_io, TASK_KILLABLE);
767} 789}
768 790
769int wait_on_page_bit_killable_timeout(struct page *page, 791void wake_up_page_bit(struct page *page, int bit_nr)
770 int bit_nr, unsigned long timeout)
771{ 792{
772 DEFINE_WAIT_BIT(wait, &page->flags, bit_nr); 793 wait_queue_head_t *q = page_waitqueue(page);
794 struct wait_page_key key;
795 unsigned long flags;
773 796
774 wait.key.timeout = jiffies + timeout; 797 key.page = page;
775 if (!test_bit(bit_nr, &page->flags)) 798 key.bit_nr = bit_nr;
776 return 0; 799 key.page_match = 0;
777 return __wait_on_bit(page_waitqueue(page), &wait, 800
778 bit_wait_io_timeout, TASK_KILLABLE); 801 spin_lock_irqsave(&q->lock, flags);
802 __wake_up_locked_key(q, TASK_NORMAL, &key);
803 /*
804 * It is possible for other pages to have collided on the waitqueue
805 * hash, so in that case check for a page match. That prevents a long-
806 * term waiter
807 *
808 * It is still possible to miss a case here, when we woke page waiters
809 * and removed them from the waitqueue, but there are still other
810 * page waiters.
811 */
812 if (!waitqueue_active(q) || !key.page_match) {
813 ClearPageWaiters(page);
814 /*
815 * It's possible to miss clearing Waiters here, when we woke
816 * our page waiters, but the hashed waitqueue has waiters for
817 * other pages on it.
818 *
819 * That's okay, it's a rare case. The next waker will clear it.
820 */
821 }
822 spin_unlock_irqrestore(&q->lock, flags);
823}
824EXPORT_SYMBOL(wake_up_page_bit);
825
826static inline int wait_on_page_bit_common(wait_queue_head_t *q,
827 struct page *page, int bit_nr, int state, bool lock)
828{
829 struct wait_page_queue wait_page;
830 wait_queue_t *wait = &wait_page.wait;
831 int ret = 0;
832
833 init_wait(wait);
834 wait->func = wake_page_function;
835 wait_page.page = page;
836 wait_page.bit_nr = bit_nr;
837
838 for (;;) {
839 spin_lock_irq(&q->lock);
840
841 if (likely(list_empty(&wait->task_list))) {
842 if (lock)
843 __add_wait_queue_tail_exclusive(q, wait);
844 else
845 __add_wait_queue(q, wait);
846 SetPageWaiters(page);
847 }
848
849 set_current_state(state);
850
851 spin_unlock_irq(&q->lock);
852
853 if (likely(test_bit(bit_nr, &page->flags))) {
854 io_schedule();
855 if (unlikely(signal_pending_state(state, current))) {
856 ret = -EINTR;
857 break;
858 }
859 }
860
861 if (lock) {
862 if (!test_and_set_bit_lock(bit_nr, &page->flags))
863 break;
864 } else {
865 if (!test_bit(bit_nr, &page->flags))
866 break;
867 }
868 }
869
870 finish_wait(q, wait);
871
872 /*
873 * A signal could leave PageWaiters set. Clearing it here if
874 * !waitqueue_active would be possible (by open-coding finish_wait),
875 * but still fail to catch it in the case of wait hash collision. We
876 * already can fail to clear wait hash collision cases, so don't
877 * bother with signals either.
878 */
879
880 return ret;
881}
882
883void wait_on_page_bit(struct page *page, int bit_nr)
884{
885 wait_queue_head_t *q = page_waitqueue(page);
886 wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, false);
887}
888EXPORT_SYMBOL(wait_on_page_bit);
889
890int wait_on_page_bit_killable(struct page *page, int bit_nr)
891{
892 wait_queue_head_t *q = page_waitqueue(page);
893 return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, false);
779} 894}
780EXPORT_SYMBOL_GPL(wait_on_page_bit_killable_timeout);
781 895
782/** 896/**
783 * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue 897 * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
@@ -793,6 +907,7 @@ void add_page_wait_queue(struct page *page, wait_queue_t *waiter)
793 907
794 spin_lock_irqsave(&q->lock, flags); 908 spin_lock_irqsave(&q->lock, flags);
795 __add_wait_queue(q, waiter); 909 __add_wait_queue(q, waiter);
910 SetPageWaiters(page);
796 spin_unlock_irqrestore(&q->lock, flags); 911 spin_unlock_irqrestore(&q->lock, flags);
797} 912}
798EXPORT_SYMBOL_GPL(add_page_wait_queue); 913EXPORT_SYMBOL_GPL(add_page_wait_queue);
@@ -874,23 +989,19 @@ EXPORT_SYMBOL_GPL(page_endio);
874 * __lock_page - get a lock on the page, assuming we need to sleep to get it 989 * __lock_page - get a lock on the page, assuming we need to sleep to get it
875 * @page: the page to lock 990 * @page: the page to lock
876 */ 991 */
877void __lock_page(struct page *page) 992void __lock_page(struct page *__page)
878{ 993{
879 struct page *page_head = compound_head(page); 994 struct page *page = compound_head(__page);
880 DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked); 995 wait_queue_head_t *q = page_waitqueue(page);
881 996 wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, true);
882 __wait_on_bit_lock(page_waitqueue(page_head), &wait, bit_wait_io,
883 TASK_UNINTERRUPTIBLE);
884} 997}
885EXPORT_SYMBOL(__lock_page); 998EXPORT_SYMBOL(__lock_page);
886 999
887int __lock_page_killable(struct page *page) 1000int __lock_page_killable(struct page *__page)
888{ 1001{
889 struct page *page_head = compound_head(page); 1002 struct page *page = compound_head(__page);
890 DEFINE_WAIT_BIT(wait, &page_head->flags, PG_locked); 1003 wait_queue_head_t *q = page_waitqueue(page);
891 1004 return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE, true);
892 return __wait_on_bit_lock(page_waitqueue(page_head), &wait,
893 bit_wait_io, TASK_KILLABLE);
894} 1005}
895EXPORT_SYMBOL_GPL(__lock_page_killable); 1006EXPORT_SYMBOL_GPL(__lock_page_killable);
896 1007
diff --git a/mm/internal.h b/mm/internal.h
index 44d68895a9b9..7aa2ea0a8623 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -36,6 +36,8 @@
36/* Do not use these with a slab allocator */ 36/* Do not use these with a slab allocator */
37#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK) 37#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
38 38
39void page_writeback_init(void);
40
39int do_swap_page(struct vm_fault *vmf); 41int do_swap_page(struct vm_fault *vmf);
40 42
41void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, 43void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
diff --git a/mm/swap.c b/mm/swap.c
index 4dcf852e1e6d..844baedd2429 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -69,6 +69,7 @@ static void __page_cache_release(struct page *page)
69 del_page_from_lru_list(page, lruvec, page_off_lru(page)); 69 del_page_from_lru_list(page, lruvec, page_off_lru(page));
70 spin_unlock_irqrestore(zone_lru_lock(zone), flags); 70 spin_unlock_irqrestore(zone_lru_lock(zone), flags);
71 } 71 }
72 __ClearPageWaiters(page);
72 mem_cgroup_uncharge(page); 73 mem_cgroup_uncharge(page);
73} 74}
74 75
@@ -784,6 +785,7 @@ void release_pages(struct page **pages, int nr, bool cold)
784 785
785 /* Clear Active bit in case of parallel mark_page_accessed */ 786 /* Clear Active bit in case of parallel mark_page_accessed */
786 __ClearPageActive(page); 787 __ClearPageActive(page);
788 __ClearPageWaiters(page);
787 789
788 list_add(&page->lru, &pages_to_free); 790 list_add(&page->lru, &pages_to_free);
789 } 791 }