diff options
Diffstat (limited to 'include/linux/pagemap.h')
| -rw-r--r-- | include/linux/pagemap.h | 114 |
1 files changed, 112 insertions, 2 deletions
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index d2fca802f809..a39b38ccdc97 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <asm/uaccess.h> | 12 | #include <asm/uaccess.h> |
| 13 | #include <linux/gfp.h> | 13 | #include <linux/gfp.h> |
| 14 | #include <linux/bitops.h> | 14 | #include <linux/bitops.h> |
| 15 | #include <linux/hardirq.h> /* for in_interrupt() */ | ||
| 15 | 16 | ||
| 16 | /* | 17 | /* |
| 17 | * Bits in mapping->flags. The lower __GFP_BITS_SHIFT bits are the page | 18 | * Bits in mapping->flags. The lower __GFP_BITS_SHIFT bits are the page |
| @@ -19,10 +20,11 @@ | |||
| 19 | */ | 20 | */ |
| 20 | #define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */ | 21 | #define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */ |
| 21 | #define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */ | 22 | #define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */ |
| 23 | #define AS_MM_ALL_LOCKS (__GFP_BITS_SHIFT + 2) /* under mm_take_all_locks() */ | ||
| 22 | 24 | ||
| 23 | static inline void mapping_set_error(struct address_space *mapping, int error) | 25 | static inline void mapping_set_error(struct address_space *mapping, int error) |
| 24 | { | 26 | { |
| 25 | if (error) { | 27 | if (unlikely(error)) { |
| 26 | if (error == -ENOSPC) | 28 | if (error == -ENOSPC) |
| 27 | set_bit(AS_ENOSPC, &mapping->flags); | 29 | set_bit(AS_ENOSPC, &mapping->flags); |
| 28 | else | 30 | else |
| @@ -62,6 +64,98 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) | |||
| 62 | #define page_cache_release(page) put_page(page) | 64 | #define page_cache_release(page) put_page(page) |
| 63 | void release_pages(struct page **pages, int nr, int cold); | 65 | void release_pages(struct page **pages, int nr, int cold); |
| 64 | 66 | ||
| 67 | /* | ||
| 68 | * speculatively take a reference to a page. | ||
| 69 | * If the page is free (_count == 0), then _count is untouched, and 0 | ||
| 70 | * is returned. Otherwise, _count is incremented by 1 and 1 is returned. | ||
| 71 | * | ||
| 72 | * This function must be called inside the same rcu_read_lock() section as has | ||
| 73 | * been used to lookup the page in the pagecache radix-tree (or page table): | ||
| 74 | * this allows allocators to use a synchronize_rcu() to stabilize _count. | ||
| 75 | * | ||
| 76 | * Unless an RCU grace period has passed, the count of all pages coming out | ||
| 77 | * of the allocator must be considered unstable. page_count may return higher | ||
| 78 | * than expected, and put_page must be able to do the right thing when the | ||
| 79 | * page has been finished with, no matter what it is subsequently allocated | ||
| 80 | * for (because put_page is what is used here to drop an invalid speculative | ||
| 81 | * reference). | ||
| 82 | * | ||
| 83 | * This is the interesting part of the lockless pagecache (and lockless | ||
| 84 | * get_user_pages) locking protocol, where the lookup-side (eg. find_get_page) | ||
| 85 | * has the following pattern: | ||
| 86 | * 1. find page in radix tree | ||
| 87 | * 2. conditionally increment refcount | ||
| 88 | * 3. check the page is still in pagecache (if no, goto 1) | ||
| 89 | * | ||
| 90 | * Remove-side that cares about stability of _count (eg. reclaim) has the | ||
| 91 | * following (with tree_lock held for write): | ||
| 92 | * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg) | ||
| 93 | * B. remove page from pagecache | ||
| 94 | * C. free the page | ||
| 95 | * | ||
| 96 | * There are 2 critical interleavings that matter: | ||
| 97 | * - 2 runs before A: in this case, A sees elevated refcount and bails out | ||
| 98 | * - A runs before 2: in this case, 2 sees zero refcount and retries; | ||
| 99 | * subsequently, B will complete and 1 will find no page, causing the | ||
| 100 | * lookup to return NULL. | ||
| 101 | * | ||
| 102 | * It is possible that between 1 and 2, the page is removed then the exact same | ||
| 103 | * page is inserted into the same position in pagecache. That's OK: the | ||
| 104 | * old find_get_page using tree_lock could equally have run before or after | ||
| 105 | * such a re-insertion, depending on order that locks are granted. | ||
| 106 | * | ||
| 107 | * Lookups racing against pagecache insertion isn't a big problem: either 1 | ||
| 108 | * will find the page or it will not. Likewise, the old find_get_page could run | ||
| 109 | * either before the insertion or afterwards, depending on timing. | ||
| 110 | */ | ||
| 111 | static inline int page_cache_get_speculative(struct page *page) | ||
| 112 | { | ||
| 113 | VM_BUG_ON(in_interrupt()); | ||
| 114 | |||
| 115 | #if !defined(CONFIG_SMP) && defined(CONFIG_CLASSIC_RCU) | ||
| 116 | # ifdef CONFIG_PREEMPT | ||
| 117 | VM_BUG_ON(!in_atomic()); | ||
| 118 | # endif | ||
| 119 | /* | ||
| 120 | * Preempt must be disabled here - we rely on rcu_read_lock doing | ||
| 121 | * this for us. | ||
| 122 | * | ||
| 123 | * Pagecache won't be truncated from interrupt context, so if we have | ||
| 124 | * found a page in the radix tree here, we have pinned its refcount by | ||
| 125 | * disabling preempt, and hence no need for the "speculative get" that | ||
| 126 | * SMP requires. | ||
| 127 | */ | ||
| 128 | VM_BUG_ON(page_count(page) == 0); | ||
| 129 | atomic_inc(&page->_count); | ||
| 130 | |||
| 131 | #else | ||
| 132 | if (unlikely(!get_page_unless_zero(page))) { | ||
| 133 | /* | ||
| 134 | * Either the page has been freed, or will be freed. | ||
| 135 | * In either case, retry here and the caller should | ||
| 136 | * do the right thing (see comments above). | ||
| 137 | */ | ||
| 138 | return 0; | ||
| 139 | } | ||
| 140 | #endif | ||
| 141 | VM_BUG_ON(PageTail(page)); | ||
| 142 | |||
| 143 | return 1; | ||
| 144 | } | ||
| 145 | |||
| 146 | static inline int page_freeze_refs(struct page *page, int count) | ||
| 147 | { | ||
| 148 | return likely(atomic_cmpxchg(&page->_count, count, 0) == count); | ||
| 149 | } | ||
| 150 | |||
| 151 | static inline void page_unfreeze_refs(struct page *page, int count) | ||
| 152 | { | ||
| 153 | VM_BUG_ON(page_count(page) != 0); | ||
| 154 | VM_BUG_ON(count == 0); | ||
| 155 | |||
| 156 | atomic_set(&page->_count, count); | ||
| 157 | } | ||
| 158 | |||
| 65 | #ifdef CONFIG_NUMA | 159 | #ifdef CONFIG_NUMA |
| 66 | extern struct page *__page_cache_alloc(gfp_t gfp); | 160 | extern struct page *__page_cache_alloc(gfp_t gfp); |
| 67 | #else | 161 | #else |
| @@ -133,7 +227,7 @@ static inline struct page *read_mapping_page(struct address_space *mapping, | |||
| 133 | return read_cache_page(mapping, index, filler, data); | 227 | return read_cache_page(mapping, index, filler, data); |
| 134 | } | 228 | } |
| 135 | 229 | ||
| 136 | int add_to_page_cache(struct page *page, struct address_space *mapping, | 230 | int add_to_page_cache_locked(struct page *page, struct address_space *mapping, |
| 137 | pgoff_t index, gfp_t gfp_mask); | 231 | pgoff_t index, gfp_t gfp_mask); |
| 138 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, | 232 | int add_to_page_cache_lru(struct page *page, struct address_space *mapping, |
| 139 | pgoff_t index, gfp_t gfp_mask); | 233 | pgoff_t index, gfp_t gfp_mask); |
| @@ -141,6 +235,22 @@ extern void remove_from_page_cache(struct page *page); | |||
| 141 | extern void __remove_from_page_cache(struct page *page); | 235 | extern void __remove_from_page_cache(struct page *page); |
| 142 | 236 | ||
| 143 | /* | 237 | /* |
| 238 | * Like add_to_page_cache_locked, but used to add newly allocated pages: | ||
| 239 | * the page is new, so we can just run SetPageLocked() against it. | ||
| 240 | */ | ||
| 241 | static inline int add_to_page_cache(struct page *page, | ||
| 242 | struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask) | ||
| 243 | { | ||
| 244 | int error; | ||
| 245 | |||
| 246 | SetPageLocked(page); | ||
| 247 | error = add_to_page_cache_locked(page, mapping, offset, gfp_mask); | ||
| 248 | if (unlikely(error)) | ||
| 249 | ClearPageLocked(page); | ||
| 250 | return error; | ||
| 251 | } | ||
| 252 | |||
| 253 | /* | ||
| 144 | * Return byte-offset into filesystem object for page. | 254 | * Return byte-offset into filesystem object for page. |
| 145 | */ | 255 | */ |
| 146 | static inline loff_t page_offset(struct page *page) | 256 | static inline loff_t page_offset(struct page *page) |
