diff options
Diffstat (limited to 'include')
| -rw-r--r-- | include/linux/backing-dev.h | 14 | ||||
| -rw-r--r-- | include/linux/sched.h | 7 | ||||
| -rw-r--r-- | include/linux/writeback.h | 33 | ||||
| -rw-r--r-- | include/trace/events/writeback.h | 161 |
4 files changed, 178 insertions, 37 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 3b2f9cb82986..b1038bd686ac 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
| @@ -40,6 +40,7 @@ typedef int (congested_fn)(void *, int); | |||
| 40 | enum bdi_stat_item { | 40 | enum bdi_stat_item { |
| 41 | BDI_RECLAIMABLE, | 41 | BDI_RECLAIMABLE, |
| 42 | BDI_WRITEBACK, | 42 | BDI_WRITEBACK, |
| 43 | BDI_DIRTIED, | ||
| 43 | BDI_WRITTEN, | 44 | BDI_WRITTEN, |
| 44 | NR_BDI_STAT_ITEMS | 45 | NR_BDI_STAT_ITEMS |
| 45 | }; | 46 | }; |
| @@ -74,10 +75,20 @@ struct backing_dev_info { | |||
| 74 | struct percpu_counter bdi_stat[NR_BDI_STAT_ITEMS]; | 75 | struct percpu_counter bdi_stat[NR_BDI_STAT_ITEMS]; |
| 75 | 76 | ||
| 76 | unsigned long bw_time_stamp; /* last time write bw is updated */ | 77 | unsigned long bw_time_stamp; /* last time write bw is updated */ |
| 78 | unsigned long dirtied_stamp; | ||
| 77 | unsigned long written_stamp; /* pages written at bw_time_stamp */ | 79 | unsigned long written_stamp; /* pages written at bw_time_stamp */ |
| 78 | unsigned long write_bandwidth; /* the estimated write bandwidth */ | 80 | unsigned long write_bandwidth; /* the estimated write bandwidth */ |
| 79 | unsigned long avg_write_bandwidth; /* further smoothed write bw */ | 81 | unsigned long avg_write_bandwidth; /* further smoothed write bw */ |
| 80 | 82 | ||
| 83 | /* | ||
| 84 | * The base dirty throttle rate, re-calculated on every 200ms. | ||
| 85 | * All the bdi tasks' dirty rate will be curbed under it. | ||
| 86 | * @dirty_ratelimit tracks the estimated @balanced_dirty_ratelimit | ||
| 87 | * in small steps and is much more smooth/stable than the latter. | ||
| 88 | */ | ||
| 89 | unsigned long dirty_ratelimit; | ||
| 90 | unsigned long balanced_dirty_ratelimit; | ||
| 91 | |||
| 81 | struct prop_local_percpu completions; | 92 | struct prop_local_percpu completions; |
| 82 | int dirty_exceeded; | 93 | int dirty_exceeded; |
| 83 | 94 | ||
| @@ -107,7 +118,8 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, | |||
| 107 | int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); | 118 | int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); |
| 108 | void bdi_unregister(struct backing_dev_info *bdi); | 119 | void bdi_unregister(struct backing_dev_info *bdi); |
| 109 | int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); | 120 | int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int); |
| 110 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages); | 121 | void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, |
| 122 | enum wb_reason reason); | ||
| 111 | void bdi_start_background_writeback(struct backing_dev_info *bdi); | 123 | void bdi_start_background_writeback(struct backing_dev_info *bdi); |
| 112 | int bdi_writeback_thread(void *data); | 124 | int bdi_writeback_thread(void *data); |
| 113 | int bdi_has_dirty_io(struct backing_dev_info *bdi); | 125 | int bdi_has_dirty_io(struct backing_dev_info *bdi); |
diff --git a/include/linux/sched.h b/include/linux/sched.h index e8acce717d2a..68daf4f27e2c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -1522,6 +1522,13 @@ struct task_struct { | |||
| 1522 | int make_it_fail; | 1522 | int make_it_fail; |
| 1523 | #endif | 1523 | #endif |
| 1524 | struct prop_local_single dirties; | 1524 | struct prop_local_single dirties; |
| 1525 | /* | ||
| 1526 | * when (nr_dirtied >= nr_dirtied_pause), it's time to call | ||
| 1527 | * balance_dirty_pages() for some dirty throttling pause | ||
| 1528 | */ | ||
| 1529 | int nr_dirtied; | ||
| 1530 | int nr_dirtied_pause; | ||
| 1531 | |||
| 1525 | #ifdef CONFIG_LATENCYTOP | 1532 | #ifdef CONFIG_LATENCYTOP |
| 1526 | int latency_record_count; | 1533 | int latency_record_count; |
| 1527 | struct latency_record latency_record[LT_SAVECOUNT]; | 1534 | struct latency_record latency_record[LT_SAVECOUNT]; |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 2b8963ff0f35..a378c295851f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
| @@ -39,6 +39,23 @@ enum writeback_sync_modes { | |||
| 39 | }; | 39 | }; |
| 40 | 40 | ||
| 41 | /* | 41 | /* |
| 42 | * why some writeback work was initiated | ||
| 43 | */ | ||
| 44 | enum wb_reason { | ||
| 45 | WB_REASON_BACKGROUND, | ||
| 46 | WB_REASON_TRY_TO_FREE_PAGES, | ||
| 47 | WB_REASON_SYNC, | ||
| 48 | WB_REASON_PERIODIC, | ||
| 49 | WB_REASON_LAPTOP_TIMER, | ||
| 50 | WB_REASON_FREE_MORE_MEM, | ||
| 51 | WB_REASON_FS_FREE_SPACE, | ||
| 52 | WB_REASON_FORKER_THREAD, | ||
| 53 | |||
| 54 | WB_REASON_MAX, | ||
| 55 | }; | ||
| 56 | extern const char *wb_reason_name[]; | ||
| 57 | |||
| 58 | /* | ||
| 42 | * A control structure which tells the writeback code what to do. These are | 59 | * A control structure which tells the writeback code what to do. These are |
| 43 | * always on the stack, and hence need no locking. They are always initialised | 60 | * always on the stack, and hence need no locking. They are always initialised |
| 44 | * in a manner such that unspecified fields are set to zero. | 61 | * in a manner such that unspecified fields are set to zero. |
| @@ -69,14 +86,17 @@ struct writeback_control { | |||
| 69 | */ | 86 | */ |
| 70 | struct bdi_writeback; | 87 | struct bdi_writeback; |
| 71 | int inode_wait(void *); | 88 | int inode_wait(void *); |
| 72 | void writeback_inodes_sb(struct super_block *); | 89 | void writeback_inodes_sb(struct super_block *, enum wb_reason reason); |
| 73 | void writeback_inodes_sb_nr(struct super_block *, unsigned long nr); | 90 | void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, |
| 74 | int writeback_inodes_sb_if_idle(struct super_block *); | 91 | enum wb_reason reason); |
| 75 | int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr); | 92 | int writeback_inodes_sb_if_idle(struct super_block *, enum wb_reason reason); |
| 93 | int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr, | ||
| 94 | enum wb_reason reason); | ||
| 76 | void sync_inodes_sb(struct super_block *); | 95 | void sync_inodes_sb(struct super_block *); |
| 77 | long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages); | 96 | long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, |
| 97 | enum wb_reason reason); | ||
| 78 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait); | 98 | long wb_do_writeback(struct bdi_writeback *wb, int force_wait); |
| 79 | void wakeup_flusher_threads(long nr_pages); | 99 | void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); |
| 80 | 100 | ||
| 81 | /* writeback.h requires fs.h; it, too, is not included from here. */ | 101 | /* writeback.h requires fs.h; it, too, is not included from here. */ |
| 82 | static inline void wait_on_inode(struct inode *inode) | 102 | static inline void wait_on_inode(struct inode *inode) |
| @@ -143,6 +163,7 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, | |||
| 143 | 163 | ||
| 144 | void __bdi_update_bandwidth(struct backing_dev_info *bdi, | 164 | void __bdi_update_bandwidth(struct backing_dev_info *bdi, |
| 145 | unsigned long thresh, | 165 | unsigned long thresh, |
| 166 | unsigned long bg_thresh, | ||
| 146 | unsigned long dirty, | 167 | unsigned long dirty, |
| 147 | unsigned long bdi_thresh, | 168 | unsigned long bdi_thresh, |
| 148 | unsigned long bdi_dirty, | 169 | unsigned long bdi_dirty, |
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 5f172703eb4f..b99caa8b780c 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h | |||
| @@ -34,6 +34,7 @@ DECLARE_EVENT_CLASS(writeback_work_class, | |||
| 34 | __field(int, for_kupdate) | 34 | __field(int, for_kupdate) |
| 35 | __field(int, range_cyclic) | 35 | __field(int, range_cyclic) |
| 36 | __field(int, for_background) | 36 | __field(int, for_background) |
| 37 | __field(int, reason) | ||
| 37 | ), | 38 | ), |
| 38 | TP_fast_assign( | 39 | TP_fast_assign( |
| 39 | strncpy(__entry->name, dev_name(bdi->dev), 32); | 40 | strncpy(__entry->name, dev_name(bdi->dev), 32); |
| @@ -43,16 +44,18 @@ DECLARE_EVENT_CLASS(writeback_work_class, | |||
| 43 | __entry->for_kupdate = work->for_kupdate; | 44 | __entry->for_kupdate = work->for_kupdate; |
| 44 | __entry->range_cyclic = work->range_cyclic; | 45 | __entry->range_cyclic = work->range_cyclic; |
| 45 | __entry->for_background = work->for_background; | 46 | __entry->for_background = work->for_background; |
| 47 | __entry->reason = work->reason; | ||
| 46 | ), | 48 | ), |
| 47 | TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d " | 49 | TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d " |
| 48 | "kupdate=%d range_cyclic=%d background=%d", | 50 | "kupdate=%d range_cyclic=%d background=%d reason=%s", |
| 49 | __entry->name, | 51 | __entry->name, |
| 50 | MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev), | 52 | MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev), |
| 51 | __entry->nr_pages, | 53 | __entry->nr_pages, |
| 52 | __entry->sync_mode, | 54 | __entry->sync_mode, |
| 53 | __entry->for_kupdate, | 55 | __entry->for_kupdate, |
| 54 | __entry->range_cyclic, | 56 | __entry->range_cyclic, |
| 55 | __entry->for_background | 57 | __entry->for_background, |
| 58 | wb_reason_name[__entry->reason] | ||
| 56 | ) | 59 | ) |
| 57 | ); | 60 | ); |
| 58 | #define DEFINE_WRITEBACK_WORK_EVENT(name) \ | 61 | #define DEFINE_WRITEBACK_WORK_EVENT(name) \ |
| @@ -104,30 +107,6 @@ DEFINE_WRITEBACK_EVENT(writeback_bdi_register); | |||
| 104 | DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); | 107 | DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister); |
| 105 | DEFINE_WRITEBACK_EVENT(writeback_thread_start); | 108 | DEFINE_WRITEBACK_EVENT(writeback_thread_start); |
| 106 | DEFINE_WRITEBACK_EVENT(writeback_thread_stop); | 109 | DEFINE_WRITEBACK_EVENT(writeback_thread_stop); |
| 107 | DEFINE_WRITEBACK_EVENT(balance_dirty_start); | ||
| 108 | DEFINE_WRITEBACK_EVENT(balance_dirty_wait); | ||
| 109 | |||
| 110 | TRACE_EVENT(balance_dirty_written, | ||
| 111 | |||
| 112 | TP_PROTO(struct backing_dev_info *bdi, int written), | ||
| 113 | |||
| 114 | TP_ARGS(bdi, written), | ||
| 115 | |||
| 116 | TP_STRUCT__entry( | ||
| 117 | __array(char, name, 32) | ||
| 118 | __field(int, written) | ||
| 119 | ), | ||
| 120 | |||
| 121 | TP_fast_assign( | ||
| 122 | strncpy(__entry->name, dev_name(bdi->dev), 32); | ||
| 123 | __entry->written = written; | ||
| 124 | ), | ||
| 125 | |||
| 126 | TP_printk("bdi %s written %d", | ||
| 127 | __entry->name, | ||
| 128 | __entry->written | ||
| 129 | ) | ||
| 130 | ); | ||
| 131 | 110 | ||
| 132 | DECLARE_EVENT_CLASS(wbc_class, | 111 | DECLARE_EVENT_CLASS(wbc_class, |
| 133 | TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), | 112 | TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), |
| @@ -181,27 +160,31 @@ DEFINE_WBC_EVENT(wbc_writepage); | |||
| 181 | 160 | ||
| 182 | TRACE_EVENT(writeback_queue_io, | 161 | TRACE_EVENT(writeback_queue_io, |
| 183 | TP_PROTO(struct bdi_writeback *wb, | 162 | TP_PROTO(struct bdi_writeback *wb, |
| 184 | unsigned long *older_than_this, | 163 | struct wb_writeback_work *work, |
| 185 | int moved), | 164 | int moved), |
| 186 | TP_ARGS(wb, older_than_this, moved), | 165 | TP_ARGS(wb, work, moved), |
| 187 | TP_STRUCT__entry( | 166 | TP_STRUCT__entry( |
| 188 | __array(char, name, 32) | 167 | __array(char, name, 32) |
| 189 | __field(unsigned long, older) | 168 | __field(unsigned long, older) |
| 190 | __field(long, age) | 169 | __field(long, age) |
| 191 | __field(int, moved) | 170 | __field(int, moved) |
| 171 | __field(int, reason) | ||
| 192 | ), | 172 | ), |
| 193 | TP_fast_assign( | 173 | TP_fast_assign( |
| 174 | unsigned long *older_than_this = work->older_than_this; | ||
| 194 | strncpy(__entry->name, dev_name(wb->bdi->dev), 32); | 175 | strncpy(__entry->name, dev_name(wb->bdi->dev), 32); |
| 195 | __entry->older = older_than_this ? *older_than_this : 0; | 176 | __entry->older = older_than_this ? *older_than_this : 0; |
| 196 | __entry->age = older_than_this ? | 177 | __entry->age = older_than_this ? |
| 197 | (jiffies - *older_than_this) * 1000 / HZ : -1; | 178 | (jiffies - *older_than_this) * 1000 / HZ : -1; |
| 198 | __entry->moved = moved; | 179 | __entry->moved = moved; |
| 180 | __entry->reason = work->reason; | ||
| 199 | ), | 181 | ), |
| 200 | TP_printk("bdi %s: older=%lu age=%ld enqueue=%d", | 182 | TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s", |
| 201 | __entry->name, | 183 | __entry->name, |
| 202 | __entry->older, /* older_than_this in jiffies */ | 184 | __entry->older, /* older_than_this in jiffies */ |
| 203 | __entry->age, /* older_than_this in relative milliseconds */ | 185 | __entry->age, /* older_than_this in relative milliseconds */ |
| 204 | __entry->moved) | 186 | __entry->moved, |
| 187 | wb_reason_name[__entry->reason]) | ||
| 205 | ); | 188 | ); |
| 206 | 189 | ||
| 207 | TRACE_EVENT(global_dirty_state, | 190 | TRACE_EVENT(global_dirty_state, |
| @@ -250,6 +233,124 @@ TRACE_EVENT(global_dirty_state, | |||
| 250 | ) | 233 | ) |
| 251 | ); | 234 | ); |
| 252 | 235 | ||
| 236 | #define KBps(x) ((x) << (PAGE_SHIFT - 10)) | ||
| 237 | |||
| 238 | TRACE_EVENT(bdi_dirty_ratelimit, | ||
| 239 | |||
| 240 | TP_PROTO(struct backing_dev_info *bdi, | ||
| 241 | unsigned long dirty_rate, | ||
| 242 | unsigned long task_ratelimit), | ||
| 243 | |||
| 244 | TP_ARGS(bdi, dirty_rate, task_ratelimit), | ||
| 245 | |||
| 246 | TP_STRUCT__entry( | ||
| 247 | __array(char, bdi, 32) | ||
| 248 | __field(unsigned long, write_bw) | ||
| 249 | __field(unsigned long, avg_write_bw) | ||
| 250 | __field(unsigned long, dirty_rate) | ||
| 251 | __field(unsigned long, dirty_ratelimit) | ||
| 252 | __field(unsigned long, task_ratelimit) | ||
| 253 | __field(unsigned long, balanced_dirty_ratelimit) | ||
| 254 | ), | ||
| 255 | |||
| 256 | TP_fast_assign( | ||
| 257 | strlcpy(__entry->bdi, dev_name(bdi->dev), 32); | ||
| 258 | __entry->write_bw = KBps(bdi->write_bandwidth); | ||
| 259 | __entry->avg_write_bw = KBps(bdi->avg_write_bandwidth); | ||
| 260 | __entry->dirty_rate = KBps(dirty_rate); | ||
| 261 | __entry->dirty_ratelimit = KBps(bdi->dirty_ratelimit); | ||
| 262 | __entry->task_ratelimit = KBps(task_ratelimit); | ||
| 263 | __entry->balanced_dirty_ratelimit = | ||
| 264 | KBps(bdi->balanced_dirty_ratelimit); | ||
| 265 | ), | ||
| 266 | |||
| 267 | TP_printk("bdi %s: " | ||
| 268 | "write_bw=%lu awrite_bw=%lu dirty_rate=%lu " | ||
| 269 | "dirty_ratelimit=%lu task_ratelimit=%lu " | ||
| 270 | "balanced_dirty_ratelimit=%lu", | ||
| 271 | __entry->bdi, | ||
| 272 | __entry->write_bw, /* write bandwidth */ | ||
| 273 | __entry->avg_write_bw, /* avg write bandwidth */ | ||
| 274 | __entry->dirty_rate, /* bdi dirty rate */ | ||
| 275 | __entry->dirty_ratelimit, /* base ratelimit */ | ||
| 276 | __entry->task_ratelimit, /* ratelimit with position control */ | ||
| 277 | __entry->balanced_dirty_ratelimit /* the balanced ratelimit */ | ||
| 278 | ) | ||
| 279 | ); | ||
| 280 | |||
| 281 | TRACE_EVENT(balance_dirty_pages, | ||
| 282 | |||
| 283 | TP_PROTO(struct backing_dev_info *bdi, | ||
| 284 | unsigned long thresh, | ||
| 285 | unsigned long bg_thresh, | ||
| 286 | unsigned long dirty, | ||
| 287 | unsigned long bdi_thresh, | ||
| 288 | unsigned long bdi_dirty, | ||
| 289 | unsigned long dirty_ratelimit, | ||
| 290 | unsigned long task_ratelimit, | ||
| 291 | unsigned long dirtied, | ||
| 292 | long pause, | ||
| 293 | unsigned long start_time), | ||
| 294 | |||
| 295 | TP_ARGS(bdi, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty, | ||
| 296 | dirty_ratelimit, task_ratelimit, | ||
| 297 | dirtied, pause, start_time), | ||
| 298 | |||
| 299 | TP_STRUCT__entry( | ||
| 300 | __array( char, bdi, 32) | ||
| 301 | __field(unsigned long, limit) | ||
| 302 | __field(unsigned long, setpoint) | ||
| 303 | __field(unsigned long, dirty) | ||
| 304 | __field(unsigned long, bdi_setpoint) | ||
| 305 | __field(unsigned long, bdi_dirty) | ||
| 306 | __field(unsigned long, dirty_ratelimit) | ||
| 307 | __field(unsigned long, task_ratelimit) | ||
| 308 | __field(unsigned int, dirtied) | ||
| 309 | __field(unsigned int, dirtied_pause) | ||
| 310 | __field(unsigned long, paused) | ||
| 311 | __field( long, pause) | ||
| 312 | ), | ||
| 313 | |||
| 314 | TP_fast_assign( | ||
| 315 | unsigned long freerun = (thresh + bg_thresh) / 2; | ||
| 316 | strlcpy(__entry->bdi, dev_name(bdi->dev), 32); | ||
| 317 | |||
| 318 | __entry->limit = global_dirty_limit; | ||
| 319 | __entry->setpoint = (global_dirty_limit + freerun) / 2; | ||
| 320 | __entry->dirty = dirty; | ||
| 321 | __entry->bdi_setpoint = __entry->setpoint * | ||
| 322 | bdi_thresh / (thresh + 1); | ||
| 323 | __entry->bdi_dirty = bdi_dirty; | ||
| 324 | __entry->dirty_ratelimit = KBps(dirty_ratelimit); | ||
| 325 | __entry->task_ratelimit = KBps(task_ratelimit); | ||
| 326 | __entry->dirtied = dirtied; | ||
| 327 | __entry->dirtied_pause = current->nr_dirtied_pause; | ||
| 328 | __entry->pause = pause * 1000 / HZ; | ||
| 329 | __entry->paused = (jiffies - start_time) * 1000 / HZ; | ||
| 330 | ), | ||
| 331 | |||
| 332 | |||
| 333 | TP_printk("bdi %s: " | ||
| 334 | "limit=%lu setpoint=%lu dirty=%lu " | ||
| 335 | "bdi_setpoint=%lu bdi_dirty=%lu " | ||
| 336 | "dirty_ratelimit=%lu task_ratelimit=%lu " | ||
| 337 | "dirtied=%u dirtied_pause=%u " | ||
| 338 | "paused=%lu pause=%ld", | ||
| 339 | __entry->bdi, | ||
| 340 | __entry->limit, | ||
| 341 | __entry->setpoint, | ||
| 342 | __entry->dirty, | ||
| 343 | __entry->bdi_setpoint, | ||
| 344 | __entry->bdi_dirty, | ||
| 345 | __entry->dirty_ratelimit, | ||
| 346 | __entry->task_ratelimit, | ||
| 347 | __entry->dirtied, | ||
| 348 | __entry->dirtied_pause, | ||
| 349 | __entry->paused, /* ms */ | ||
| 350 | __entry->pause /* ms */ | ||
| 351 | ) | ||
| 352 | ); | ||
| 353 | |||
| 253 | DECLARE_EVENT_CLASS(writeback_congest_waited_template, | 354 | DECLARE_EVENT_CLASS(writeback_congest_waited_template, |
| 254 | 355 | ||
| 255 | TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed), | 356 | TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed), |
