diff options
-rw-r--r-- | mm/page-writeback.c | 95 |
1 files changed, 33 insertions, 62 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index ea0b7cb4a8c7..2cf69a5e46e6 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -253,32 +253,6 @@ static void bdi_writeout_fraction(struct backing_dev_info *bdi, | |||
253 | } | 253 | } |
254 | } | 254 | } |
255 | 255 | ||
256 | /* | ||
257 | * Clip the earned share of dirty pages to that which is actually available. | ||
258 | * This avoids exceeding the total dirty_limit when the floating averages | ||
259 | * fluctuate too quickly. | ||
260 | */ | ||
261 | static void clip_bdi_dirty_limit(struct backing_dev_info *bdi, | ||
262 | unsigned long dirty, unsigned long *pbdi_dirty) | ||
263 | { | ||
264 | unsigned long avail_dirty; | ||
265 | |||
266 | avail_dirty = global_page_state(NR_FILE_DIRTY) + | ||
267 | global_page_state(NR_WRITEBACK) + | ||
268 | global_page_state(NR_UNSTABLE_NFS) + | ||
269 | global_page_state(NR_WRITEBACK_TEMP); | ||
270 | |||
271 | if (avail_dirty < dirty) | ||
272 | avail_dirty = dirty - avail_dirty; | ||
273 | else | ||
274 | avail_dirty = 0; | ||
275 | |||
276 | avail_dirty += bdi_stat(bdi, BDI_RECLAIMABLE) + | ||
277 | bdi_stat(bdi, BDI_WRITEBACK); | ||
278 | |||
279 | *pbdi_dirty = min(*pbdi_dirty, avail_dirty); | ||
280 | } | ||
281 | |||
282 | static inline void task_dirties_fraction(struct task_struct *tsk, | 256 | static inline void task_dirties_fraction(struct task_struct *tsk, |
283 | long *numerator, long *denominator) | 257 | long *numerator, long *denominator) |
284 | { | 258 | { |
@@ -469,7 +443,6 @@ get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, | |||
469 | bdi_dirty = dirty * bdi->max_ratio / 100; | 443 | bdi_dirty = dirty * bdi->max_ratio / 100; |
470 | 444 | ||
471 | *pbdi_dirty = bdi_dirty; | 445 | *pbdi_dirty = bdi_dirty; |
472 | clip_bdi_dirty_limit(bdi, dirty, pbdi_dirty); | ||
473 | task_dirty_limit(current, pbdi_dirty); | 446 | task_dirty_limit(current, pbdi_dirty); |
474 | } | 447 | } |
475 | } | 448 | } |
@@ -491,7 +464,7 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
491 | unsigned long bdi_thresh; | 464 | unsigned long bdi_thresh; |
492 | unsigned long pages_written = 0; | 465 | unsigned long pages_written = 0; |
493 | unsigned long pause = 1; | 466 | unsigned long pause = 1; |
494 | 467 | bool dirty_exceeded = false; | |
495 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 468 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
496 | 469 | ||
497 | for (;;) { | 470 | for (;;) { |
@@ -509,10 +482,35 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
509 | global_page_state(NR_UNSTABLE_NFS); | 482 | global_page_state(NR_UNSTABLE_NFS); |
510 | nr_writeback = global_page_state(NR_WRITEBACK); | 483 | nr_writeback = global_page_state(NR_WRITEBACK); |
511 | 484 | ||
512 | bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); | 485 | /* |
513 | bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK); | 486 | * In order to avoid the stacked BDI deadlock we need |
487 | * to ensure we accurately count the 'dirty' pages when | ||
488 | * the threshold is low. | ||
489 | * | ||
490 | * Otherwise it would be possible to get thresh+n pages | ||
491 | * reported dirty, even though there are thresh-m pages | ||
492 | * actually dirty; with m+n sitting in the percpu | ||
493 | * deltas. | ||
494 | */ | ||
495 | if (bdi_thresh < 2*bdi_stat_error(bdi)) { | ||
496 | bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE); | ||
497 | bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK); | ||
498 | } else { | ||
499 | bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); | ||
500 | bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK); | ||
501 | } | ||
514 | 502 | ||
515 | if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh) | 503 | /* |
504 | * The bdi thresh is somehow "soft" limit derived from the | ||
505 | * global "hard" limit. The former helps to prevent heavy IO | ||
506 | * bdi or process from holding back light ones; The latter is | ||
507 | * the last resort safeguard. | ||
508 | */ | ||
509 | dirty_exceeded = | ||
510 | (bdi_nr_reclaimable + bdi_nr_writeback >= bdi_thresh) | ||
511 | || (nr_reclaimable + nr_writeback >= dirty_thresh); | ||
512 | |||
513 | if (!dirty_exceeded) | ||
516 | break; | 514 | break; |
517 | 515 | ||
518 | /* | 516 | /* |
@@ -540,34 +538,10 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
540 | if (bdi_nr_reclaimable > bdi_thresh) { | 538 | if (bdi_nr_reclaimable > bdi_thresh) { |
541 | writeback_inodes_wb(&bdi->wb, &wbc); | 539 | writeback_inodes_wb(&bdi->wb, &wbc); |
542 | pages_written += write_chunk - wbc.nr_to_write; | 540 | pages_written += write_chunk - wbc.nr_to_write; |
543 | get_dirty_limits(&background_thresh, &dirty_thresh, | ||
544 | &bdi_thresh, bdi); | ||
545 | trace_wbc_balance_dirty_written(&wbc, bdi); | 541 | trace_wbc_balance_dirty_written(&wbc, bdi); |
542 | if (pages_written >= write_chunk) | ||
543 | break; /* We've done our duty */ | ||
546 | } | 544 | } |
547 | |||
548 | /* | ||
549 | * In order to avoid the stacked BDI deadlock we need | ||
550 | * to ensure we accurately count the 'dirty' pages when | ||
551 | * the threshold is low. | ||
552 | * | ||
553 | * Otherwise it would be possible to get thresh+n pages | ||
554 | * reported dirty, even though there are thresh-m pages | ||
555 | * actually dirty; with m+n sitting in the percpu | ||
556 | * deltas. | ||
557 | */ | ||
558 | if (bdi_thresh < 2*bdi_stat_error(bdi)) { | ||
559 | bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE); | ||
560 | bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK); | ||
561 | } else if (bdi_nr_reclaimable) { | ||
562 | bdi_nr_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE); | ||
563 | bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK); | ||
564 | } | ||
565 | |||
566 | if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh) | ||
567 | break; | ||
568 | if (pages_written >= write_chunk) | ||
569 | break; /* We've done our duty */ | ||
570 | |||
571 | trace_wbc_balance_dirty_wait(&wbc, bdi); | 545 | trace_wbc_balance_dirty_wait(&wbc, bdi); |
572 | __set_current_state(TASK_INTERRUPTIBLE); | 546 | __set_current_state(TASK_INTERRUPTIBLE); |
573 | io_schedule_timeout(pause); | 547 | io_schedule_timeout(pause); |
@@ -581,8 +555,7 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
581 | pause = HZ / 10; | 555 | pause = HZ / 10; |
582 | } | 556 | } |
583 | 557 | ||
584 | if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh && | 558 | if (!dirty_exceeded && bdi->dirty_exceeded) |
585 | bdi->dirty_exceeded) | ||
586 | bdi->dirty_exceeded = 0; | 559 | bdi->dirty_exceeded = 0; |
587 | 560 | ||
588 | if (writeback_in_progress(bdi)) | 561 | if (writeback_in_progress(bdi)) |
@@ -597,9 +570,7 @@ static void balance_dirty_pages(struct address_space *mapping, | |||
597 | * background_thresh, to keep the amount of dirty memory low. | 570 | * background_thresh, to keep the amount of dirty memory low. |
598 | */ | 571 | */ |
599 | if ((laptop_mode && pages_written) || | 572 | if ((laptop_mode && pages_written) || |
600 | (!laptop_mode && ((global_page_state(NR_FILE_DIRTY) | 573 | (!laptop_mode && (nr_reclaimable > background_thresh))) |
601 | + global_page_state(NR_UNSTABLE_NFS)) | ||
602 | > background_thresh))) | ||
603 | bdi_start_background_writeback(bdi); | 574 | bdi_start_background_writeback(bdi); |
604 | } | 575 | } |
605 | 576 | ||