aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page-writeback.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r--mm/page-writeback.c245
1 files changed, 184 insertions, 61 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2970e35fd03f..b493db7841dc 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -69,6 +69,12 @@ static inline long sync_writeback_pages(void)
69int dirty_background_ratio = 5; 69int dirty_background_ratio = 5;
70 70
71/* 71/*
72 * dirty_background_bytes starts at 0 (disabled) so that it is a function of
73 * dirty_background_ratio * the amount of dirtyable memory
74 */
75unsigned long dirty_background_bytes;
76
77/*
72 * free highmem will not be subtracted from the total free memory 78 * free highmem will not be subtracted from the total free memory
73 * for calculating free ratios if vm_highmem_is_dirtyable is true 79 * for calculating free ratios if vm_highmem_is_dirtyable is true
74 */ 80 */
@@ -80,6 +86,12 @@ int vm_highmem_is_dirtyable;
80int vm_dirty_ratio = 10; 86int vm_dirty_ratio = 10;
81 87
82/* 88/*
89 * vm_dirty_bytes starts at 0 (disabled) so that it is a function of
90 * vm_dirty_ratio * the amount of dirtyable memory
91 */
92unsigned long vm_dirty_bytes;
93
94/*
83 * The interval between `kupdate'-style writebacks, in jiffies 95 * The interval between `kupdate'-style writebacks, in jiffies
84 */ 96 */
85int dirty_writeback_interval = 5 * HZ; 97int dirty_writeback_interval = 5 * HZ;
@@ -135,23 +147,75 @@ static int calc_period_shift(void)
135{ 147{
136 unsigned long dirty_total; 148 unsigned long dirty_total;
137 149
138 dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / 100; 150 if (vm_dirty_bytes)
151 dirty_total = vm_dirty_bytes / PAGE_SIZE;
152 else
153 dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) /
154 100;
139 return 2 + ilog2(dirty_total - 1); 155 return 2 + ilog2(dirty_total - 1);
140} 156}
141 157
142/* 158/*
143 * update the period when the dirty ratio changes. 159 * update the period when the dirty threshold changes.
144 */ 160 */
161static void update_completion_period(void)
162{
163 int shift = calc_period_shift();
164 prop_change_shift(&vm_completions, shift);
165 prop_change_shift(&vm_dirties, shift);
166}
167
168int dirty_background_ratio_handler(struct ctl_table *table, int write,
169 struct file *filp, void __user *buffer, size_t *lenp,
170 loff_t *ppos)
171{
172 int ret;
173
174 ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
175 if (ret == 0 && write)
176 dirty_background_bytes = 0;
177 return ret;
178}
179
180int dirty_background_bytes_handler(struct ctl_table *table, int write,
181 struct file *filp, void __user *buffer, size_t *lenp,
182 loff_t *ppos)
183{
184 int ret;
185
186 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
187 if (ret == 0 && write)
188 dirty_background_ratio = 0;
189 return ret;
190}
191
145int dirty_ratio_handler(struct ctl_table *table, int write, 192int dirty_ratio_handler(struct ctl_table *table, int write,
146 struct file *filp, void __user *buffer, size_t *lenp, 193 struct file *filp, void __user *buffer, size_t *lenp,
147 loff_t *ppos) 194 loff_t *ppos)
148{ 195{
149 int old_ratio = vm_dirty_ratio; 196 int old_ratio = vm_dirty_ratio;
150 int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); 197 int ret;
198
199 ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
151 if (ret == 0 && write && vm_dirty_ratio != old_ratio) { 200 if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
152 int shift = calc_period_shift(); 201 update_completion_period();
153 prop_change_shift(&vm_completions, shift); 202 vm_dirty_bytes = 0;
154 prop_change_shift(&vm_dirties, shift); 203 }
204 return ret;
205}
206
207
208int dirty_bytes_handler(struct ctl_table *table, int write,
209 struct file *filp, void __user *buffer, size_t *lenp,
210 loff_t *ppos)
211{
212 int old_bytes = vm_dirty_bytes;
213 int ret;
214
215 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
216 if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
217 update_completion_period();
218 vm_dirty_ratio = 0;
155 } 219 }
156 return ret; 220 return ret;
157} 221}
@@ -362,26 +426,32 @@ unsigned long determine_dirtyable_memory(void)
362} 426}
363 427
364void 428void
365get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, 429get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
366 struct backing_dev_info *bdi) 430 unsigned long *pbdi_dirty, struct backing_dev_info *bdi)
367{ 431{
368 int background_ratio; /* Percentages */ 432 unsigned long background;
369 int dirty_ratio; 433 unsigned long dirty;
370 long background;
371 long dirty;
372 unsigned long available_memory = determine_dirtyable_memory(); 434 unsigned long available_memory = determine_dirtyable_memory();
373 struct task_struct *tsk; 435 struct task_struct *tsk;
374 436
375 dirty_ratio = vm_dirty_ratio; 437 if (vm_dirty_bytes)
376 if (dirty_ratio < 5) 438 dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE);
377 dirty_ratio = 5; 439 else {
440 int dirty_ratio;
378 441
379 background_ratio = dirty_background_ratio; 442 dirty_ratio = vm_dirty_ratio;
380 if (background_ratio >= dirty_ratio) 443 if (dirty_ratio < 5)
381 background_ratio = dirty_ratio / 2; 444 dirty_ratio = 5;
445 dirty = (dirty_ratio * available_memory) / 100;
446 }
447
448 if (dirty_background_bytes)
449 background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE);
450 else
451 background = (dirty_background_ratio * available_memory) / 100;
382 452
383 background = (background_ratio * available_memory) / 100; 453 if (background >= dirty)
384 dirty = (dirty_ratio * available_memory) / 100; 454 background = dirty / 2;
385 tsk = current; 455 tsk = current;
386 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { 456 if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) {
387 background += background / 4; 457 background += background / 4;
@@ -423,9 +493,9 @@ static void balance_dirty_pages(struct address_space *mapping)
423{ 493{
424 long nr_reclaimable, bdi_nr_reclaimable; 494 long nr_reclaimable, bdi_nr_reclaimable;
425 long nr_writeback, bdi_nr_writeback; 495 long nr_writeback, bdi_nr_writeback;
426 long background_thresh; 496 unsigned long background_thresh;
427 long dirty_thresh; 497 unsigned long dirty_thresh;
428 long bdi_thresh; 498 unsigned long bdi_thresh;
429 unsigned long pages_written = 0; 499 unsigned long pages_written = 0;
430 unsigned long write_chunk = sync_writeback_pages(); 500 unsigned long write_chunk = sync_writeback_pages();
431 501
@@ -580,8 +650,8 @@ EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr);
580 650
581void throttle_vm_writeout(gfp_t gfp_mask) 651void throttle_vm_writeout(gfp_t gfp_mask)
582{ 652{
583 long background_thresh; 653 unsigned long background_thresh;
584 long dirty_thresh; 654 unsigned long dirty_thresh;
585 655
586 for ( ; ; ) { 656 for ( ; ; ) {
587 get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); 657 get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
@@ -624,8 +694,8 @@ static void background_writeout(unsigned long _min_pages)
624 }; 694 };
625 695
626 for ( ; ; ) { 696 for ( ; ; ) {
627 long background_thresh; 697 unsigned long background_thresh;
628 long dirty_thresh; 698 unsigned long dirty_thresh;
629 699
630 get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); 700 get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
631 if (global_page_state(NR_FILE_DIRTY) + 701 if (global_page_state(NR_FILE_DIRTY) +
@@ -868,9 +938,11 @@ int write_cache_pages(struct address_space *mapping,
868 int done = 0; 938 int done = 0;
869 struct pagevec pvec; 939 struct pagevec pvec;
870 int nr_pages; 940 int nr_pages;
941 pgoff_t uninitialized_var(writeback_index);
871 pgoff_t index; 942 pgoff_t index;
872 pgoff_t end; /* Inclusive */ 943 pgoff_t end; /* Inclusive */
873 int scanned = 0; 944 pgoff_t done_index;
945 int cycled;
874 int range_whole = 0; 946 int range_whole = 0;
875 long nr_to_write = wbc->nr_to_write; 947 long nr_to_write = wbc->nr_to_write;
876 948
@@ -881,83 +953,134 @@ int write_cache_pages(struct address_space *mapping,
881 953
882 pagevec_init(&pvec, 0); 954 pagevec_init(&pvec, 0);
883 if (wbc->range_cyclic) { 955 if (wbc->range_cyclic) {
884 index = mapping->writeback_index; /* Start from prev offset */ 956 writeback_index = mapping->writeback_index; /* prev offset */
957 index = writeback_index;
958 if (index == 0)
959 cycled = 1;
960 else
961 cycled = 0;
885 end = -1; 962 end = -1;
886 } else { 963 } else {
887 index = wbc->range_start >> PAGE_CACHE_SHIFT; 964 index = wbc->range_start >> PAGE_CACHE_SHIFT;
888 end = wbc->range_end >> PAGE_CACHE_SHIFT; 965 end = wbc->range_end >> PAGE_CACHE_SHIFT;
889 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 966 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
890 range_whole = 1; 967 range_whole = 1;
891 scanned = 1; 968 cycled = 1; /* ignore range_cyclic tests */
892 } 969 }
893retry: 970retry:
894 while (!done && (index <= end) && 971 done_index = index;
895 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 972 while (!done && (index <= end)) {
896 PAGECACHE_TAG_DIRTY, 973 int i;
897 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { 974
898 unsigned i; 975 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
976 PAGECACHE_TAG_DIRTY,
977 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
978 if (nr_pages == 0)
979 break;
899 980
900 scanned = 1;
901 for (i = 0; i < nr_pages; i++) { 981 for (i = 0; i < nr_pages; i++) {
902 struct page *page = pvec.pages[i]; 982 struct page *page = pvec.pages[i];
903 983
904 /* 984 /*
905 * At this point we hold neither mapping->tree_lock nor 985 * At this point, the page may be truncated or
906 * lock on the page itself: the page may be truncated or 986 * invalidated (changing page->mapping to NULL), or
907 * invalidated (changing page->mapping to NULL), or even 987 * even swizzled back from swapper_space to tmpfs file
908 * swizzled back from swapper_space to tmpfs file 988 * mapping. However, page->index will not change
909 * mapping 989 * because we have a reference on the page.
910 */ 990 */
991 if (page->index > end) {
992 /*
993 * can't be range_cyclic (1st pass) because
994 * end == -1 in that case.
995 */
996 done = 1;
997 break;
998 }
999
1000 done_index = page->index + 1;
1001
911 lock_page(page); 1002 lock_page(page);
912 1003
1004 /*
1005 * Page truncated or invalidated. We can freely skip it
1006 * then, even for data integrity operations: the page
1007 * has disappeared concurrently, so there could be no
1008 * real expectation of this data interity operation
1009 * even if there is now a new, dirty page at the same
1010 * pagecache address.
1011 */
913 if (unlikely(page->mapping != mapping)) { 1012 if (unlikely(page->mapping != mapping)) {
1013continue_unlock:
914 unlock_page(page); 1014 unlock_page(page);
915 continue; 1015 continue;
916 } 1016 }
917 1017
918 if (!wbc->range_cyclic && page->index > end) { 1018 if (!PageDirty(page)) {
919 done = 1; 1019 /* someone wrote it for us */
920 unlock_page(page); 1020 goto continue_unlock;
921 continue;
922 } 1021 }
923 1022
924 if (wbc->sync_mode != WB_SYNC_NONE) 1023 if (PageWriteback(page)) {
925 wait_on_page_writeback(page); 1024 if (wbc->sync_mode != WB_SYNC_NONE)
926 1025 wait_on_page_writeback(page);
927 if (PageWriteback(page) || 1026 else
928 !clear_page_dirty_for_io(page)) { 1027 goto continue_unlock;
929 unlock_page(page);
930 continue;
931 } 1028 }
932 1029
933 ret = (*writepage)(page, wbc, data); 1030 BUG_ON(PageWriteback(page));
1031 if (!clear_page_dirty_for_io(page))
1032 goto continue_unlock;
934 1033
935 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { 1034 ret = (*writepage)(page, wbc, data);
936 unlock_page(page); 1035 if (unlikely(ret)) {
937 ret = 0; 1036 if (ret == AOP_WRITEPAGE_ACTIVATE) {
1037 unlock_page(page);
1038 ret = 0;
1039 } else {
1040 /*
1041 * done_index is set past this page,
1042 * so media errors will not choke
1043 * background writeout for the entire
1044 * file. This has consequences for
1045 * range_cyclic semantics (ie. it may
1046 * not be suitable for data integrity
1047 * writeout).
1048 */
1049 done = 1;
1050 break;
1051 }
1052 }
1053
1054 if (wbc->sync_mode == WB_SYNC_NONE) {
1055 wbc->nr_to_write--;
1056 if (wbc->nr_to_write <= 0) {
1057 done = 1;
1058 break;
1059 }
938 } 1060 }
939 if (ret || (--nr_to_write <= 0))
940 done = 1;
941 if (wbc->nonblocking && bdi_write_congested(bdi)) { 1061 if (wbc->nonblocking && bdi_write_congested(bdi)) {
942 wbc->encountered_congestion = 1; 1062 wbc->encountered_congestion = 1;
943 done = 1; 1063 done = 1;
1064 break;
944 } 1065 }
945 } 1066 }
946 pagevec_release(&pvec); 1067 pagevec_release(&pvec);
947 cond_resched(); 1068 cond_resched();
948 } 1069 }
949 if (!scanned && !done) { 1070 if (!cycled) {
950 /* 1071 /*
1072 * range_cyclic:
951 * We hit the last page and there is more work to be done: wrap 1073 * We hit the last page and there is more work to be done: wrap
952 * back to the start of the file 1074 * back to the start of the file
953 */ 1075 */
954 scanned = 1; 1076 cycled = 1;
955 index = 0; 1077 index = 0;
1078 end = writeback_index - 1;
956 goto retry; 1079 goto retry;
957 } 1080 }
958 if (!wbc->no_nrwrite_index_update) { 1081 if (!wbc->no_nrwrite_index_update) {
959 if (wbc->range_cyclic || (range_whole && nr_to_write > 0)) 1082 if (wbc->range_cyclic || (range_whole && nr_to_write > 0))
960 mapping->writeback_index = index; 1083 mapping->writeback_index = done_index;
961 wbc->nr_to_write = nr_to_write; 1084 wbc->nr_to_write = nr_to_write;
962 } 1085 }
963 1086