diff options
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r-- | mm/page-writeback.c | 245 |
1 files changed, 184 insertions, 61 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 2970e35fd03f..b493db7841dc 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -69,6 +69,12 @@ static inline long sync_writeback_pages(void) | |||
69 | int dirty_background_ratio = 5; | 69 | int dirty_background_ratio = 5; |
70 | 70 | ||
71 | /* | 71 | /* |
72 | * dirty_background_bytes starts at 0 (disabled) so that it is a function of | ||
73 | * dirty_background_ratio * the amount of dirtyable memory | ||
74 | */ | ||
75 | unsigned long dirty_background_bytes; | ||
76 | |||
77 | /* | ||
72 | * free highmem will not be subtracted from the total free memory | 78 | * free highmem will not be subtracted from the total free memory |
73 | * for calculating free ratios if vm_highmem_is_dirtyable is true | 79 | * for calculating free ratios if vm_highmem_is_dirtyable is true |
74 | */ | 80 | */ |
@@ -80,6 +86,12 @@ int vm_highmem_is_dirtyable; | |||
80 | int vm_dirty_ratio = 10; | 86 | int vm_dirty_ratio = 10; |
81 | 87 | ||
82 | /* | 88 | /* |
89 | * vm_dirty_bytes starts at 0 (disabled) so that it is a function of | ||
90 | * vm_dirty_ratio * the amount of dirtyable memory | ||
91 | */ | ||
92 | unsigned long vm_dirty_bytes; | ||
93 | |||
94 | /* | ||
83 | * The interval between `kupdate'-style writebacks, in jiffies | 95 | * The interval between `kupdate'-style writebacks, in jiffies |
84 | */ | 96 | */ |
85 | int dirty_writeback_interval = 5 * HZ; | 97 | int dirty_writeback_interval = 5 * HZ; |
@@ -135,23 +147,75 @@ static int calc_period_shift(void) | |||
135 | { | 147 | { |
136 | unsigned long dirty_total; | 148 | unsigned long dirty_total; |
137 | 149 | ||
138 | dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / 100; | 150 | if (vm_dirty_bytes) |
151 | dirty_total = vm_dirty_bytes / PAGE_SIZE; | ||
152 | else | ||
153 | dirty_total = (vm_dirty_ratio * determine_dirtyable_memory()) / | ||
154 | 100; | ||
139 | return 2 + ilog2(dirty_total - 1); | 155 | return 2 + ilog2(dirty_total - 1); |
140 | } | 156 | } |
141 | 157 | ||
142 | /* | 158 | /* |
143 | * update the period when the dirty ratio changes. | 159 | * update the period when the dirty threshold changes. |
144 | */ | 160 | */ |
161 | static void update_completion_period(void) | ||
162 | { | ||
163 | int shift = calc_period_shift(); | ||
164 | prop_change_shift(&vm_completions, shift); | ||
165 | prop_change_shift(&vm_dirties, shift); | ||
166 | } | ||
167 | |||
168 | int dirty_background_ratio_handler(struct ctl_table *table, int write, | ||
169 | struct file *filp, void __user *buffer, size_t *lenp, | ||
170 | loff_t *ppos) | ||
171 | { | ||
172 | int ret; | ||
173 | |||
174 | ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | ||
175 | if (ret == 0 && write) | ||
176 | dirty_background_bytes = 0; | ||
177 | return ret; | ||
178 | } | ||
179 | |||
180 | int dirty_background_bytes_handler(struct ctl_table *table, int write, | ||
181 | struct file *filp, void __user *buffer, size_t *lenp, | ||
182 | loff_t *ppos) | ||
183 | { | ||
184 | int ret; | ||
185 | |||
186 | ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); | ||
187 | if (ret == 0 && write) | ||
188 | dirty_background_ratio = 0; | ||
189 | return ret; | ||
190 | } | ||
191 | |||
145 | int dirty_ratio_handler(struct ctl_table *table, int write, | 192 | int dirty_ratio_handler(struct ctl_table *table, int write, |
146 | struct file *filp, void __user *buffer, size_t *lenp, | 193 | struct file *filp, void __user *buffer, size_t *lenp, |
147 | loff_t *ppos) | 194 | loff_t *ppos) |
148 | { | 195 | { |
149 | int old_ratio = vm_dirty_ratio; | 196 | int old_ratio = vm_dirty_ratio; |
150 | int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | 197 | int ret; |
198 | |||
199 | ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | ||
151 | if (ret == 0 && write && vm_dirty_ratio != old_ratio) { | 200 | if (ret == 0 && write && vm_dirty_ratio != old_ratio) { |
152 | int shift = calc_period_shift(); | 201 | update_completion_period(); |
153 | prop_change_shift(&vm_completions, shift); | 202 | vm_dirty_bytes = 0; |
154 | prop_change_shift(&vm_dirties, shift); | 203 | } |
204 | return ret; | ||
205 | } | ||
206 | |||
207 | |||
208 | int dirty_bytes_handler(struct ctl_table *table, int write, | ||
209 | struct file *filp, void __user *buffer, size_t *lenp, | ||
210 | loff_t *ppos) | ||
211 | { | ||
212 | int old_bytes = vm_dirty_bytes; | ||
213 | int ret; | ||
214 | |||
215 | ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); | ||
216 | if (ret == 0 && write && vm_dirty_bytes != old_bytes) { | ||
217 | update_completion_period(); | ||
218 | vm_dirty_ratio = 0; | ||
155 | } | 219 | } |
156 | return ret; | 220 | return ret; |
157 | } | 221 | } |
@@ -362,26 +426,32 @@ unsigned long determine_dirtyable_memory(void) | |||
362 | } | 426 | } |
363 | 427 | ||
364 | void | 428 | void |
365 | get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, | 429 | get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty, |
366 | struct backing_dev_info *bdi) | 430 | unsigned long *pbdi_dirty, struct backing_dev_info *bdi) |
367 | { | 431 | { |
368 | int background_ratio; /* Percentages */ | 432 | unsigned long background; |
369 | int dirty_ratio; | 433 | unsigned long dirty; |
370 | long background; | ||
371 | long dirty; | ||
372 | unsigned long available_memory = determine_dirtyable_memory(); | 434 | unsigned long available_memory = determine_dirtyable_memory(); |
373 | struct task_struct *tsk; | 435 | struct task_struct *tsk; |
374 | 436 | ||
375 | dirty_ratio = vm_dirty_ratio; | 437 | if (vm_dirty_bytes) |
376 | if (dirty_ratio < 5) | 438 | dirty = DIV_ROUND_UP(vm_dirty_bytes, PAGE_SIZE); |
377 | dirty_ratio = 5; | 439 | else { |
440 | int dirty_ratio; | ||
378 | 441 | ||
379 | background_ratio = dirty_background_ratio; | 442 | dirty_ratio = vm_dirty_ratio; |
380 | if (background_ratio >= dirty_ratio) | 443 | if (dirty_ratio < 5) |
381 | background_ratio = dirty_ratio / 2; | 444 | dirty_ratio = 5; |
445 | dirty = (dirty_ratio * available_memory) / 100; | ||
446 | } | ||
447 | |||
448 | if (dirty_background_bytes) | ||
449 | background = DIV_ROUND_UP(dirty_background_bytes, PAGE_SIZE); | ||
450 | else | ||
451 | background = (dirty_background_ratio * available_memory) / 100; | ||
382 | 452 | ||
383 | background = (background_ratio * available_memory) / 100; | 453 | if (background >= dirty) |
384 | dirty = (dirty_ratio * available_memory) / 100; | 454 | background = dirty / 2; |
385 | tsk = current; | 455 | tsk = current; |
386 | if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { | 456 | if (tsk->flags & PF_LESS_THROTTLE || rt_task(tsk)) { |
387 | background += background / 4; | 457 | background += background / 4; |
@@ -423,9 +493,9 @@ static void balance_dirty_pages(struct address_space *mapping) | |||
423 | { | 493 | { |
424 | long nr_reclaimable, bdi_nr_reclaimable; | 494 | long nr_reclaimable, bdi_nr_reclaimable; |
425 | long nr_writeback, bdi_nr_writeback; | 495 | long nr_writeback, bdi_nr_writeback; |
426 | long background_thresh; | 496 | unsigned long background_thresh; |
427 | long dirty_thresh; | 497 | unsigned long dirty_thresh; |
428 | long bdi_thresh; | 498 | unsigned long bdi_thresh; |
429 | unsigned long pages_written = 0; | 499 | unsigned long pages_written = 0; |
430 | unsigned long write_chunk = sync_writeback_pages(); | 500 | unsigned long write_chunk = sync_writeback_pages(); |
431 | 501 | ||
@@ -580,8 +650,8 @@ EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr); | |||
580 | 650 | ||
581 | void throttle_vm_writeout(gfp_t gfp_mask) | 651 | void throttle_vm_writeout(gfp_t gfp_mask) |
582 | { | 652 | { |
583 | long background_thresh; | 653 | unsigned long background_thresh; |
584 | long dirty_thresh; | 654 | unsigned long dirty_thresh; |
585 | 655 | ||
586 | for ( ; ; ) { | 656 | for ( ; ; ) { |
587 | get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); | 657 | get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); |
@@ -624,8 +694,8 @@ static void background_writeout(unsigned long _min_pages) | |||
624 | }; | 694 | }; |
625 | 695 | ||
626 | for ( ; ; ) { | 696 | for ( ; ; ) { |
627 | long background_thresh; | 697 | unsigned long background_thresh; |
628 | long dirty_thresh; | 698 | unsigned long dirty_thresh; |
629 | 699 | ||
630 | get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); | 700 | get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL); |
631 | if (global_page_state(NR_FILE_DIRTY) + | 701 | if (global_page_state(NR_FILE_DIRTY) + |
@@ -868,9 +938,11 @@ int write_cache_pages(struct address_space *mapping, | |||
868 | int done = 0; | 938 | int done = 0; |
869 | struct pagevec pvec; | 939 | struct pagevec pvec; |
870 | int nr_pages; | 940 | int nr_pages; |
941 | pgoff_t uninitialized_var(writeback_index); | ||
871 | pgoff_t index; | 942 | pgoff_t index; |
872 | pgoff_t end; /* Inclusive */ | 943 | pgoff_t end; /* Inclusive */ |
873 | int scanned = 0; | 944 | pgoff_t done_index; |
945 | int cycled; | ||
874 | int range_whole = 0; | 946 | int range_whole = 0; |
875 | long nr_to_write = wbc->nr_to_write; | 947 | long nr_to_write = wbc->nr_to_write; |
876 | 948 | ||
@@ -881,83 +953,134 @@ int write_cache_pages(struct address_space *mapping, | |||
881 | 953 | ||
882 | pagevec_init(&pvec, 0); | 954 | pagevec_init(&pvec, 0); |
883 | if (wbc->range_cyclic) { | 955 | if (wbc->range_cyclic) { |
884 | index = mapping->writeback_index; /* Start from prev offset */ | 956 | writeback_index = mapping->writeback_index; /* prev offset */ |
957 | index = writeback_index; | ||
958 | if (index == 0) | ||
959 | cycled = 1; | ||
960 | else | ||
961 | cycled = 0; | ||
885 | end = -1; | 962 | end = -1; |
886 | } else { | 963 | } else { |
887 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 964 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
888 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 965 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
889 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | 966 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) |
890 | range_whole = 1; | 967 | range_whole = 1; |
891 | scanned = 1; | 968 | cycled = 1; /* ignore range_cyclic tests */ |
892 | } | 969 | } |
893 | retry: | 970 | retry: |
894 | while (!done && (index <= end) && | 971 | done_index = index; |
895 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 972 | while (!done && (index <= end)) { |
896 | PAGECACHE_TAG_DIRTY, | 973 | int i; |
897 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | 974 | |
898 | unsigned i; | 975 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, |
976 | PAGECACHE_TAG_DIRTY, | ||
977 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | ||
978 | if (nr_pages == 0) | ||
979 | break; | ||
899 | 980 | ||
900 | scanned = 1; | ||
901 | for (i = 0; i < nr_pages; i++) { | 981 | for (i = 0; i < nr_pages; i++) { |
902 | struct page *page = pvec.pages[i]; | 982 | struct page *page = pvec.pages[i]; |
903 | 983 | ||
904 | /* | 984 | /* |
905 | * At this point we hold neither mapping->tree_lock nor | 985 | * At this point, the page may be truncated or |
906 | * lock on the page itself: the page may be truncated or | 986 | * invalidated (changing page->mapping to NULL), or |
907 | * invalidated (changing page->mapping to NULL), or even | 987 | * even swizzled back from swapper_space to tmpfs file |
908 | * swizzled back from swapper_space to tmpfs file | 988 | * mapping. However, page->index will not change |
909 | * mapping | 989 | * because we have a reference on the page. |
910 | */ | 990 | */ |
991 | if (page->index > end) { | ||
992 | /* | ||
993 | * can't be range_cyclic (1st pass) because | ||
994 | * end == -1 in that case. | ||
995 | */ | ||
996 | done = 1; | ||
997 | break; | ||
998 | } | ||
999 | |||
1000 | done_index = page->index + 1; | ||
1001 | |||
911 | lock_page(page); | 1002 | lock_page(page); |
912 | 1003 | ||
1004 | /* | ||
1005 | * Page truncated or invalidated. We can freely skip it | ||
1006 | * then, even for data integrity operations: the page | ||
1007 | * has disappeared concurrently, so there could be no | ||
1008 | * real expectation of this data interity operation | ||
1009 | * even if there is now a new, dirty page at the same | ||
1010 | * pagecache address. | ||
1011 | */ | ||
913 | if (unlikely(page->mapping != mapping)) { | 1012 | if (unlikely(page->mapping != mapping)) { |
1013 | continue_unlock: | ||
914 | unlock_page(page); | 1014 | unlock_page(page); |
915 | continue; | 1015 | continue; |
916 | } | 1016 | } |
917 | 1017 | ||
918 | if (!wbc->range_cyclic && page->index > end) { | 1018 | if (!PageDirty(page)) { |
919 | done = 1; | 1019 | /* someone wrote it for us */ |
920 | unlock_page(page); | 1020 | goto continue_unlock; |
921 | continue; | ||
922 | } | 1021 | } |
923 | 1022 | ||
924 | if (wbc->sync_mode != WB_SYNC_NONE) | 1023 | if (PageWriteback(page)) { |
925 | wait_on_page_writeback(page); | 1024 | if (wbc->sync_mode != WB_SYNC_NONE) |
926 | 1025 | wait_on_page_writeback(page); | |
927 | if (PageWriteback(page) || | 1026 | else |
928 | !clear_page_dirty_for_io(page)) { | 1027 | goto continue_unlock; |
929 | unlock_page(page); | ||
930 | continue; | ||
931 | } | 1028 | } |
932 | 1029 | ||
933 | ret = (*writepage)(page, wbc, data); | 1030 | BUG_ON(PageWriteback(page)); |
1031 | if (!clear_page_dirty_for_io(page)) | ||
1032 | goto continue_unlock; | ||
934 | 1033 | ||
935 | if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) { | 1034 | ret = (*writepage)(page, wbc, data); |
936 | unlock_page(page); | 1035 | if (unlikely(ret)) { |
937 | ret = 0; | 1036 | if (ret == AOP_WRITEPAGE_ACTIVATE) { |
1037 | unlock_page(page); | ||
1038 | ret = 0; | ||
1039 | } else { | ||
1040 | /* | ||
1041 | * done_index is set past this page, | ||
1042 | * so media errors will not choke | ||
1043 | * background writeout for the entire | ||
1044 | * file. This has consequences for | ||
1045 | * range_cyclic semantics (ie. it may | ||
1046 | * not be suitable for data integrity | ||
1047 | * writeout). | ||
1048 | */ | ||
1049 | done = 1; | ||
1050 | break; | ||
1051 | } | ||
1052 | } | ||
1053 | |||
1054 | if (wbc->sync_mode == WB_SYNC_NONE) { | ||
1055 | wbc->nr_to_write--; | ||
1056 | if (wbc->nr_to_write <= 0) { | ||
1057 | done = 1; | ||
1058 | break; | ||
1059 | } | ||
938 | } | 1060 | } |
939 | if (ret || (--nr_to_write <= 0)) | ||
940 | done = 1; | ||
941 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | 1061 | if (wbc->nonblocking && bdi_write_congested(bdi)) { |
942 | wbc->encountered_congestion = 1; | 1062 | wbc->encountered_congestion = 1; |
943 | done = 1; | 1063 | done = 1; |
1064 | break; | ||
944 | } | 1065 | } |
945 | } | 1066 | } |
946 | pagevec_release(&pvec); | 1067 | pagevec_release(&pvec); |
947 | cond_resched(); | 1068 | cond_resched(); |
948 | } | 1069 | } |
949 | if (!scanned && !done) { | 1070 | if (!cycled) { |
950 | /* | 1071 | /* |
1072 | * range_cyclic: | ||
951 | * We hit the last page and there is more work to be done: wrap | 1073 | * We hit the last page and there is more work to be done: wrap |
952 | * back to the start of the file | 1074 | * back to the start of the file |
953 | */ | 1075 | */ |
954 | scanned = 1; | 1076 | cycled = 1; |
955 | index = 0; | 1077 | index = 0; |
1078 | end = writeback_index - 1; | ||
956 | goto retry; | 1079 | goto retry; |
957 | } | 1080 | } |
958 | if (!wbc->no_nrwrite_index_update) { | 1081 | if (!wbc->no_nrwrite_index_update) { |
959 | if (wbc->range_cyclic || (range_whole && nr_to_write > 0)) | 1082 | if (wbc->range_cyclic || (range_whole && nr_to_write > 0)) |
960 | mapping->writeback_index = index; | 1083 | mapping->writeback_index = done_index; |
961 | wbc->nr_to_write = nr_to_write; | 1084 | wbc->nr_to_write = nr_to_write; |
962 | } | 1085 | } |
963 | 1086 | ||