diff options
Diffstat (limited to 'include/linux/writeback.h')
| -rw-r--r-- | include/linux/writeback.h | 221 |
1 files changed, 207 insertions, 14 deletions
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b2dd371ec0ca..b333c945e571 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
| 8 | #include <linux/workqueue.h> | 8 | #include <linux/workqueue.h> |
| 9 | #include <linux/fs.h> | 9 | #include <linux/fs.h> |
| 10 | #include <linux/flex_proportions.h> | ||
| 11 | #include <linux/backing-dev-defs.h> | ||
| 10 | 12 | ||
| 11 | DECLARE_PER_CPU(int, dirty_throttle_leaks); | 13 | DECLARE_PER_CPU(int, dirty_throttle_leaks); |
| 12 | 14 | ||
| @@ -84,18 +86,95 @@ struct writeback_control { | |||
| 84 | unsigned for_reclaim:1; /* Invoked from the page allocator */ | 86 | unsigned for_reclaim:1; /* Invoked from the page allocator */ |
| 85 | unsigned range_cyclic:1; /* range_start is cyclic */ | 87 | unsigned range_cyclic:1; /* range_start is cyclic */ |
| 86 | unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ | 88 | unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ |
| 89 | #ifdef CONFIG_CGROUP_WRITEBACK | ||
| 90 | struct bdi_writeback *wb; /* wb this writeback is issued under */ | ||
| 91 | struct inode *inode; /* inode being written out */ | ||
| 92 | |||
| 93 | /* foreign inode detection, see wbc_detach_inode() */ | ||
| 94 | int wb_id; /* current wb id */ | ||
| 95 | int wb_lcand_id; /* last foreign candidate wb id */ | ||
| 96 | int wb_tcand_id; /* this foreign candidate wb id */ | ||
| 97 | size_t wb_bytes; /* bytes written by current wb */ | ||
| 98 | size_t wb_lcand_bytes; /* bytes written by last candidate */ | ||
| 99 | size_t wb_tcand_bytes; /* bytes written by this candidate */ | ||
| 100 | #endif | ||
| 87 | }; | 101 | }; |
| 88 | 102 | ||
| 89 | /* | 103 | /* |
| 104 | * A wb_domain represents a domain that wb's (bdi_writeback's) belong to | ||
| 105 | * and are measured against each other in. There always is one global | ||
| 106 | * domain, global_wb_domain, that every wb in the system is a member of. | ||
| 107 | * This allows measuring the relative bandwidth of each wb to distribute | ||
| 108 | * dirtyable memory accordingly. | ||
| 109 | */ | ||
| 110 | struct wb_domain { | ||
| 111 | spinlock_t lock; | ||
| 112 | |||
| 113 | /* | ||
| 114 | * Scale the writeback cache size proportional to the relative | ||
| 115 | * writeout speed. | ||
| 116 | * | ||
| 117 | * We do this by keeping a floating proportion between BDIs, based | ||
| 118 | * on page writeback completions [end_page_writeback()]. Those | ||
| 119 | * devices that write out pages fastest will get the larger share, | ||
| 120 | * while the slower will get a smaller share. | ||
| 121 | * | ||
| 122 | * We use page writeout completions because we are interested in | ||
| 123 | * getting rid of dirty pages. Having them written out is the | ||
| 124 | * primary goal. | ||
| 125 | * | ||
| 126 | * We introduce a concept of time, a period over which we measure | ||
| 127 | * these events, because demand can/will vary over time. The length | ||
| 128 | * of this period itself is measured in page writeback completions. | ||
| 129 | */ | ||
| 130 | struct fprop_global completions; | ||
| 131 | struct timer_list period_timer; /* timer for aging of completions */ | ||
| 132 | unsigned long period_time; | ||
| 133 | |||
| 134 | /* | ||
| 135 | * The dirtyable memory and dirty threshold could be suddenly | ||
| 136 | * knocked down by a large amount (eg. on the startup of KVM in a | ||
| 137 | * swapless system). This may throw the system into deep dirty | ||
| 138 | * exceeded state and throttle heavy/light dirtiers alike. To | ||
| 139 | * retain good responsiveness, maintain global_dirty_limit for | ||
| 140 | * tracking slowly down to the knocked down dirty threshold. | ||
| 141 | * | ||
| 142 | * Both fields are protected by ->lock. | ||
| 143 | */ | ||
| 144 | unsigned long dirty_limit_tstamp; | ||
| 145 | unsigned long dirty_limit; | ||
| 146 | }; | ||
| 147 | |||
| 148 | /** | ||
| 149 | * wb_domain_size_changed - memory available to a wb_domain has changed | ||
| 150 | * @dom: wb_domain of interest | ||
| 151 | * | ||
| 152 | * This function should be called when the amount of memory available to | ||
| 153 | * @dom has changed. It resets @dom's dirty limit parameters to prevent | ||
| 154 | * the past values which don't match the current configuration from skewing | ||
| 155 | * dirty throttling. Without this, when memory size of a wb_domain is | ||
| 156 | * greatly reduced, the dirty throttling logic may allow too many pages to | ||
| 157 | * be dirtied leading to consecutive unnecessary OOMs and may get stuck in | ||
| 158 | * that situation. | ||
| 159 | */ | ||
| 160 | static inline void wb_domain_size_changed(struct wb_domain *dom) | ||
| 161 | { | ||
| 162 | spin_lock(&dom->lock); | ||
| 163 | dom->dirty_limit_tstamp = jiffies; | ||
| 164 | dom->dirty_limit = 0; | ||
| 165 | spin_unlock(&dom->lock); | ||
| 166 | } | ||
| 167 | |||
| 168 | /* | ||
| 90 | * fs/fs-writeback.c | 169 | * fs/fs-writeback.c |
| 91 | */ | 170 | */ |
| 92 | struct bdi_writeback; | 171 | struct bdi_writeback; |
| 93 | void writeback_inodes_sb(struct super_block *, enum wb_reason reason); | 172 | void writeback_inodes_sb(struct super_block *, enum wb_reason reason); |
| 94 | void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, | 173 | void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, |
| 95 | enum wb_reason reason); | 174 | enum wb_reason reason); |
| 96 | int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); | 175 | bool try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); |
| 97 | int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, | 176 | bool try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, |
| 98 | enum wb_reason reason); | 177 | enum wb_reason reason); |
| 99 | void sync_inodes_sb(struct super_block *); | 178 | void sync_inodes_sb(struct super_block *); |
| 100 | void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); | 179 | void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); |
| 101 | void inode_wait_for_writeback(struct inode *inode); | 180 | void inode_wait_for_writeback(struct inode *inode); |
| @@ -107,6 +186,123 @@ static inline void wait_on_inode(struct inode *inode) | |||
| 107 | wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE); | 186 | wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE); |
| 108 | } | 187 | } |
| 109 | 188 | ||
| 189 | #ifdef CONFIG_CGROUP_WRITEBACK | ||
| 190 | |||
| 191 | #include <linux/cgroup.h> | ||
| 192 | #include <linux/bio.h> | ||
| 193 | |||
| 194 | void __inode_attach_wb(struct inode *inode, struct page *page); | ||
| 195 | void wbc_attach_and_unlock_inode(struct writeback_control *wbc, | ||
| 196 | struct inode *inode) | ||
| 197 | __releases(&inode->i_lock); | ||
| 198 | void wbc_detach_inode(struct writeback_control *wbc); | ||
| 199 | void wbc_account_io(struct writeback_control *wbc, struct page *page, | ||
| 200 | size_t bytes); | ||
| 201 | |||
| 202 | /** | ||
| 203 | * inode_attach_wb - associate an inode with its wb | ||
| 204 | * @inode: inode of interest | ||
| 205 | * @page: page being dirtied (may be NULL) | ||
| 206 | * | ||
| 207 | * If @inode doesn't have its wb, associate it with the wb matching the | ||
| 208 | * memcg of @page or, if @page is NULL, %current. May be called w/ or w/o | ||
| 209 | * @inode->i_lock. | ||
| 210 | */ | ||
| 211 | static inline void inode_attach_wb(struct inode *inode, struct page *page) | ||
| 212 | { | ||
| 213 | if (!inode->i_wb) | ||
| 214 | __inode_attach_wb(inode, page); | ||
| 215 | } | ||
| 216 | |||
| 217 | /** | ||
| 218 | * inode_detach_wb - disassociate an inode from its wb | ||
| 219 | * @inode: inode of interest | ||
| 220 | * | ||
| 221 | * @inode is being freed. Detach from its wb. | ||
| 222 | */ | ||
| 223 | static inline void inode_detach_wb(struct inode *inode) | ||
| 224 | { | ||
| 225 | if (inode->i_wb) { | ||
| 226 | wb_put(inode->i_wb); | ||
| 227 | inode->i_wb = NULL; | ||
| 228 | } | ||
| 229 | } | ||
| 230 | |||
| 231 | /** | ||
| 232 | * wbc_attach_fdatawrite_inode - associate wbc and inode for fdatawrite | ||
| 233 | * @wbc: writeback_control of interest | ||
| 234 | * @inode: target inode | ||
| 235 | * | ||
| 236 | * This function is to be used by __filemap_fdatawrite_range(), which is an | ||
| 237 | * alternative entry point into writeback code, and first ensures @inode is | ||
| 238 | * associated with a bdi_writeback and attaches it to @wbc. | ||
| 239 | */ | ||
| 240 | static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, | ||
| 241 | struct inode *inode) | ||
| 242 | { | ||
| 243 | spin_lock(&inode->i_lock); | ||
| 244 | inode_attach_wb(inode, NULL); | ||
| 245 | wbc_attach_and_unlock_inode(wbc, inode); | ||
| 246 | } | ||
| 247 | |||
| 248 | /** | ||
| 249 | * wbc_init_bio - writeback specific initializtion of bio | ||
| 250 | * @wbc: writeback_control for the writeback in progress | ||
| 251 | * @bio: bio to be initialized | ||
| 252 | * | ||
| 253 | * @bio is a part of the writeback in progress controlled by @wbc. Perform | ||
| 254 | * writeback specific initialization. This is used to apply the cgroup | ||
| 255 | * writeback context. | ||
| 256 | */ | ||
| 257 | static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) | ||
| 258 | { | ||
| 259 | /* | ||
| 260 | * pageout() path doesn't attach @wbc to the inode being written | ||
| 261 | * out. This is intentional as we don't want the function to block | ||
| 262 | * behind a slow cgroup. Ultimately, we want pageout() to kick off | ||
| 263 | * regular writeback instead of writing things out itself. | ||
| 264 | */ | ||
| 265 | if (wbc->wb) | ||
| 266 | bio_associate_blkcg(bio, wbc->wb->blkcg_css); | ||
| 267 | } | ||
| 268 | |||
| 269 | #else /* CONFIG_CGROUP_WRITEBACK */ | ||
| 270 | |||
| 271 | static inline void inode_attach_wb(struct inode *inode, struct page *page) | ||
| 272 | { | ||
| 273 | } | ||
| 274 | |||
| 275 | static inline void inode_detach_wb(struct inode *inode) | ||
| 276 | { | ||
| 277 | } | ||
| 278 | |||
| 279 | static inline void wbc_attach_and_unlock_inode(struct writeback_control *wbc, | ||
| 280 | struct inode *inode) | ||
| 281 | __releases(&inode->i_lock) | ||
| 282 | { | ||
| 283 | spin_unlock(&inode->i_lock); | ||
| 284 | } | ||
| 285 | |||
| 286 | static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, | ||
| 287 | struct inode *inode) | ||
| 288 | { | ||
| 289 | } | ||
| 290 | |||
| 291 | static inline void wbc_detach_inode(struct writeback_control *wbc) | ||
| 292 | { | ||
| 293 | } | ||
| 294 | |||
| 295 | static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) | ||
| 296 | { | ||
| 297 | } | ||
| 298 | |||
| 299 | static inline void wbc_account_io(struct writeback_control *wbc, | ||
| 300 | struct page *page, size_t bytes) | ||
| 301 | { | ||
| 302 | } | ||
| 303 | |||
| 304 | #endif /* CONFIG_CGROUP_WRITEBACK */ | ||
| 305 | |||
| 110 | /* | 306 | /* |
| 111 | * mm/page-writeback.c | 307 | * mm/page-writeback.c |
| 112 | */ | 308 | */ |
| @@ -120,8 +316,12 @@ static inline void laptop_sync_completion(void) { } | |||
| 120 | #endif | 316 | #endif |
| 121 | void throttle_vm_writeout(gfp_t gfp_mask); | 317 | void throttle_vm_writeout(gfp_t gfp_mask); |
| 122 | bool zone_dirty_ok(struct zone *zone); | 318 | bool zone_dirty_ok(struct zone *zone); |
| 319 | int wb_domain_init(struct wb_domain *dom, gfp_t gfp); | ||
| 320 | #ifdef CONFIG_CGROUP_WRITEBACK | ||
| 321 | void wb_domain_exit(struct wb_domain *dom); | ||
| 322 | #endif | ||
| 123 | 323 | ||
| 124 | extern unsigned long global_dirty_limit; | 324 | extern struct wb_domain global_wb_domain; |
| 125 | 325 | ||
| 126 | /* These are exported to sysctl. */ | 326 | /* These are exported to sysctl. */ |
| 127 | extern int dirty_background_ratio; | 327 | extern int dirty_background_ratio; |
| @@ -155,19 +355,12 @@ int dirty_writeback_centisecs_handler(struct ctl_table *, int, | |||
| 155 | void __user *, size_t *, loff_t *); | 355 | void __user *, size_t *, loff_t *); |
| 156 | 356 | ||
| 157 | void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); | 357 | void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); |
| 158 | unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, | 358 | unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); |
| 159 | unsigned long dirty); | ||
| 160 | |||
| 161 | void __bdi_update_bandwidth(struct backing_dev_info *bdi, | ||
| 162 | unsigned long thresh, | ||
| 163 | unsigned long bg_thresh, | ||
| 164 | unsigned long dirty, | ||
| 165 | unsigned long bdi_thresh, | ||
| 166 | unsigned long bdi_dirty, | ||
| 167 | unsigned long start_time); | ||
| 168 | 359 | ||
| 360 | void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time); | ||
| 169 | void page_writeback_init(void); | 361 | void page_writeback_init(void); |
| 170 | void balance_dirty_pages_ratelimited(struct address_space *mapping); | 362 | void balance_dirty_pages_ratelimited(struct address_space *mapping); |
| 363 | bool wb_over_bg_thresh(struct bdi_writeback *wb); | ||
| 171 | 364 | ||
| 172 | typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, | 365 | typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, |
| 173 | void *data); | 366 | void *data); |
