diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 750 |
1 files changed, 214 insertions, 536 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 33dc256033b5..66e431060c05 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -53,24 +53,6 @@ | |||
53 | #define CREATE_TRACE_POINTS | 53 | #define CREATE_TRACE_POINTS |
54 | #include <trace/events/vmscan.h> | 54 | #include <trace/events/vmscan.h> |
55 | 55 | ||
56 | /* | ||
57 | * reclaim_mode determines how the inactive list is shrunk | ||
58 | * RECLAIM_MODE_SINGLE: Reclaim only order-0 pages | ||
59 | * RECLAIM_MODE_ASYNC: Do not block | ||
60 | * RECLAIM_MODE_SYNC: Allow blocking e.g. call wait_on_page_writeback | ||
61 | * RECLAIM_MODE_LUMPYRECLAIM: For high-order allocations, take a reference | ||
62 | * page from the LRU and reclaim all pages within a | ||
63 | * naturally aligned range | ||
64 | * RECLAIM_MODE_COMPACTION: For high-order allocations, reclaim a number of | ||
65 | * order-0 pages and then compact the zone | ||
66 | */ | ||
67 | typedef unsigned __bitwise__ reclaim_mode_t; | ||
68 | #define RECLAIM_MODE_SINGLE ((__force reclaim_mode_t)0x01u) | ||
69 | #define RECLAIM_MODE_ASYNC ((__force reclaim_mode_t)0x02u) | ||
70 | #define RECLAIM_MODE_SYNC ((__force reclaim_mode_t)0x04u) | ||
71 | #define RECLAIM_MODE_LUMPYRECLAIM ((__force reclaim_mode_t)0x08u) | ||
72 | #define RECLAIM_MODE_COMPACTION ((__force reclaim_mode_t)0x10u) | ||
73 | |||
74 | struct scan_control { | 56 | struct scan_control { |
75 | /* Incremented by the number of inactive pages that were scanned */ | 57 | /* Incremented by the number of inactive pages that were scanned */ |
76 | unsigned long nr_scanned; | 58 | unsigned long nr_scanned; |
@@ -96,11 +78,8 @@ struct scan_control { | |||
96 | 78 | ||
97 | int order; | 79 | int order; |
98 | 80 | ||
99 | /* | 81 | /* Scan (total_size >> priority) pages at once */ |
100 | * Intend to reclaim enough continuous memory rather than reclaim | 82 | int priority; |
101 | * enough amount of memory. i.e, mode for high order allocation. | ||
102 | */ | ||
103 | reclaim_mode_t reclaim_mode; | ||
104 | 83 | ||
105 | /* | 84 | /* |
106 | * The memory cgroup that hit its limit and as a result is the | 85 | * The memory cgroup that hit its limit and as a result is the |
@@ -115,11 +94,6 @@ struct scan_control { | |||
115 | nodemask_t *nodemask; | 94 | nodemask_t *nodemask; |
116 | }; | 95 | }; |
117 | 96 | ||
118 | struct mem_cgroup_zone { | ||
119 | struct mem_cgroup *mem_cgroup; | ||
120 | struct zone *zone; | ||
121 | }; | ||
122 | |||
123 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) | 97 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) |
124 | 98 | ||
125 | #ifdef ARCH_HAS_PREFETCH | 99 | #ifdef ARCH_HAS_PREFETCH |
@@ -164,44 +138,21 @@ static bool global_reclaim(struct scan_control *sc) | |||
164 | { | 138 | { |
165 | return !sc->target_mem_cgroup; | 139 | return !sc->target_mem_cgroup; |
166 | } | 140 | } |
167 | |||
168 | static bool scanning_global_lru(struct mem_cgroup_zone *mz) | ||
169 | { | ||
170 | return !mz->mem_cgroup; | ||
171 | } | ||
172 | #else | 141 | #else |
173 | static bool global_reclaim(struct scan_control *sc) | 142 | static bool global_reclaim(struct scan_control *sc) |
174 | { | 143 | { |
175 | return true; | 144 | return true; |
176 | } | 145 | } |
177 | |||
178 | static bool scanning_global_lru(struct mem_cgroup_zone *mz) | ||
179 | { | ||
180 | return true; | ||
181 | } | ||
182 | #endif | 146 | #endif |
183 | 147 | ||
184 | static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz) | 148 | static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) |
185 | { | 149 | { |
186 | if (!scanning_global_lru(mz)) | 150 | if (!mem_cgroup_disabled()) |
187 | return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone); | 151 | return mem_cgroup_get_lru_size(lruvec, lru); |
188 | 152 | ||
189 | return &mz->zone->reclaim_stat; | 153 | return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru); |
190 | } | 154 | } |
191 | 155 | ||
192 | static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz, | ||
193 | enum lru_list lru) | ||
194 | { | ||
195 | if (!scanning_global_lru(mz)) | ||
196 | return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup, | ||
197 | zone_to_nid(mz->zone), | ||
198 | zone_idx(mz->zone), | ||
199 | BIT(lru)); | ||
200 | |||
201 | return zone_page_state(mz->zone, NR_LRU_BASE + lru); | ||
202 | } | ||
203 | |||
204 | |||
205 | /* | 156 | /* |
206 | * Add a shrinker callback to be called from the vm | 157 | * Add a shrinker callback to be called from the vm |
207 | */ | 158 | */ |
@@ -364,39 +315,6 @@ out: | |||
364 | return ret; | 315 | return ret; |
365 | } | 316 | } |
366 | 317 | ||
367 | static void set_reclaim_mode(int priority, struct scan_control *sc, | ||
368 | bool sync) | ||
369 | { | ||
370 | reclaim_mode_t syncmode = sync ? RECLAIM_MODE_SYNC : RECLAIM_MODE_ASYNC; | ||
371 | |||
372 | /* | ||
373 | * Initially assume we are entering either lumpy reclaim or | ||
374 | * reclaim/compaction.Depending on the order, we will either set the | ||
375 | * sync mode or just reclaim order-0 pages later. | ||
376 | */ | ||
377 | if (COMPACTION_BUILD) | ||
378 | sc->reclaim_mode = RECLAIM_MODE_COMPACTION; | ||
379 | else | ||
380 | sc->reclaim_mode = RECLAIM_MODE_LUMPYRECLAIM; | ||
381 | |||
382 | /* | ||
383 | * Avoid using lumpy reclaim or reclaim/compaction if possible by | ||
384 | * restricting when its set to either costly allocations or when | ||
385 | * under memory pressure | ||
386 | */ | ||
387 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
388 | sc->reclaim_mode |= syncmode; | ||
389 | else if (sc->order && priority < DEF_PRIORITY - 2) | ||
390 | sc->reclaim_mode |= syncmode; | ||
391 | else | ||
392 | sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC; | ||
393 | } | ||
394 | |||
395 | static void reset_reclaim_mode(struct scan_control *sc) | ||
396 | { | ||
397 | sc->reclaim_mode = RECLAIM_MODE_SINGLE | RECLAIM_MODE_ASYNC; | ||
398 | } | ||
399 | |||
400 | static inline int is_page_cache_freeable(struct page *page) | 318 | static inline int is_page_cache_freeable(struct page *page) |
401 | { | 319 | { |
402 | /* | 320 | /* |
@@ -416,10 +334,6 @@ static int may_write_to_queue(struct backing_dev_info *bdi, | |||
416 | return 1; | 334 | return 1; |
417 | if (bdi == current->backing_dev_info) | 335 | if (bdi == current->backing_dev_info) |
418 | return 1; | 336 | return 1; |
419 | |||
420 | /* lumpy reclaim for hugepage often need a lot of write */ | ||
421 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
422 | return 1; | ||
423 | return 0; | 337 | return 0; |
424 | } | 338 | } |
425 | 339 | ||
@@ -523,8 +437,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
523 | /* synchronous write or broken a_ops? */ | 437 | /* synchronous write or broken a_ops? */ |
524 | ClearPageReclaim(page); | 438 | ClearPageReclaim(page); |
525 | } | 439 | } |
526 | trace_mm_vmscan_writepage(page, | 440 | trace_mm_vmscan_writepage(page, trace_reclaim_flags(page)); |
527 | trace_reclaim_flags(page, sc->reclaim_mode)); | ||
528 | inc_zone_page_state(page, NR_VMSCAN_WRITE); | 441 | inc_zone_page_state(page, NR_VMSCAN_WRITE); |
529 | return PAGE_SUCCESS; | 442 | return PAGE_SUCCESS; |
530 | } | 443 | } |
@@ -701,19 +614,15 @@ enum page_references { | |||
701 | }; | 614 | }; |
702 | 615 | ||
703 | static enum page_references page_check_references(struct page *page, | 616 | static enum page_references page_check_references(struct page *page, |
704 | struct mem_cgroup_zone *mz, | ||
705 | struct scan_control *sc) | 617 | struct scan_control *sc) |
706 | { | 618 | { |
707 | int referenced_ptes, referenced_page; | 619 | int referenced_ptes, referenced_page; |
708 | unsigned long vm_flags; | 620 | unsigned long vm_flags; |
709 | 621 | ||
710 | referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags); | 622 | referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup, |
623 | &vm_flags); | ||
711 | referenced_page = TestClearPageReferenced(page); | 624 | referenced_page = TestClearPageReferenced(page); |
712 | 625 | ||
713 | /* Lumpy reclaim - ignore references */ | ||
714 | if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM) | ||
715 | return PAGEREF_RECLAIM; | ||
716 | |||
717 | /* | 626 | /* |
718 | * Mlock lost the isolation race with us. Let try_to_unmap() | 627 | * Mlock lost the isolation race with us. Let try_to_unmap() |
719 | * move the page to the unevictable list. | 628 | * move the page to the unevictable list. |
@@ -722,7 +631,7 @@ static enum page_references page_check_references(struct page *page, | |||
722 | return PAGEREF_RECLAIM; | 631 | return PAGEREF_RECLAIM; |
723 | 632 | ||
724 | if (referenced_ptes) { | 633 | if (referenced_ptes) { |
725 | if (PageAnon(page)) | 634 | if (PageSwapBacked(page)) |
726 | return PAGEREF_ACTIVATE; | 635 | return PAGEREF_ACTIVATE; |
727 | /* | 636 | /* |
728 | * All mapped pages start out with page table | 637 | * All mapped pages start out with page table |
@@ -763,9 +672,8 @@ static enum page_references page_check_references(struct page *page, | |||
763 | * shrink_page_list() returns the number of reclaimed pages | 672 | * shrink_page_list() returns the number of reclaimed pages |
764 | */ | 673 | */ |
765 | static unsigned long shrink_page_list(struct list_head *page_list, | 674 | static unsigned long shrink_page_list(struct list_head *page_list, |
766 | struct mem_cgroup_zone *mz, | 675 | struct zone *zone, |
767 | struct scan_control *sc, | 676 | struct scan_control *sc, |
768 | int priority, | ||
769 | unsigned long *ret_nr_dirty, | 677 | unsigned long *ret_nr_dirty, |
770 | unsigned long *ret_nr_writeback) | 678 | unsigned long *ret_nr_writeback) |
771 | { | 679 | { |
@@ -794,7 +702,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
794 | goto keep; | 702 | goto keep; |
795 | 703 | ||
796 | VM_BUG_ON(PageActive(page)); | 704 | VM_BUG_ON(PageActive(page)); |
797 | VM_BUG_ON(page_zone(page) != mz->zone); | 705 | VM_BUG_ON(page_zone(page) != zone); |
798 | 706 | ||
799 | sc->nr_scanned++; | 707 | sc->nr_scanned++; |
800 | 708 | ||
@@ -813,22 +721,11 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
813 | 721 | ||
814 | if (PageWriteback(page)) { | 722 | if (PageWriteback(page)) { |
815 | nr_writeback++; | 723 | nr_writeback++; |
816 | /* | 724 | unlock_page(page); |
817 | * Synchronous reclaim cannot queue pages for | 725 | goto keep; |
818 | * writeback due to the possibility of stack overflow | ||
819 | * but if it encounters a page under writeback, wait | ||
820 | * for the IO to complete. | ||
821 | */ | ||
822 | if ((sc->reclaim_mode & RECLAIM_MODE_SYNC) && | ||
823 | may_enter_fs) | ||
824 | wait_on_page_writeback(page); | ||
825 | else { | ||
826 | unlock_page(page); | ||
827 | goto keep_lumpy; | ||
828 | } | ||
829 | } | 726 | } |
830 | 727 | ||
831 | references = page_check_references(page, mz, sc); | 728 | references = page_check_references(page, sc); |
832 | switch (references) { | 729 | switch (references) { |
833 | case PAGEREF_ACTIVATE: | 730 | case PAGEREF_ACTIVATE: |
834 | goto activate_locked; | 731 | goto activate_locked; |
@@ -879,7 +776,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
879 | * unless under significant pressure. | 776 | * unless under significant pressure. |
880 | */ | 777 | */ |
881 | if (page_is_file_cache(page) && | 778 | if (page_is_file_cache(page) && |
882 | (!current_is_kswapd() || priority >= DEF_PRIORITY - 2)) { | 779 | (!current_is_kswapd() || |
780 | sc->priority >= DEF_PRIORITY - 2)) { | ||
883 | /* | 781 | /* |
884 | * Immediately reclaim when written back. | 782 | * Immediately reclaim when written back. |
885 | * Similar in principal to deactivate_page() | 783 | * Similar in principal to deactivate_page() |
@@ -908,7 +806,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
908 | goto activate_locked; | 806 | goto activate_locked; |
909 | case PAGE_SUCCESS: | 807 | case PAGE_SUCCESS: |
910 | if (PageWriteback(page)) | 808 | if (PageWriteback(page)) |
911 | goto keep_lumpy; | 809 | goto keep; |
912 | if (PageDirty(page)) | 810 | if (PageDirty(page)) |
913 | goto keep; | 811 | goto keep; |
914 | 812 | ||
@@ -994,7 +892,6 @@ cull_mlocked: | |||
994 | try_to_free_swap(page); | 892 | try_to_free_swap(page); |
995 | unlock_page(page); | 893 | unlock_page(page); |
996 | putback_lru_page(page); | 894 | putback_lru_page(page); |
997 | reset_reclaim_mode(sc); | ||
998 | continue; | 895 | continue; |
999 | 896 | ||
1000 | activate_locked: | 897 | activate_locked: |
@@ -1007,8 +904,6 @@ activate_locked: | |||
1007 | keep_locked: | 904 | keep_locked: |
1008 | unlock_page(page); | 905 | unlock_page(page); |
1009 | keep: | 906 | keep: |
1010 | reset_reclaim_mode(sc); | ||
1011 | keep_lumpy: | ||
1012 | list_add(&page->lru, &ret_pages); | 907 | list_add(&page->lru, &ret_pages); |
1013 | VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); | 908 | VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); |
1014 | } | 909 | } |
@@ -1020,7 +915,7 @@ keep_lumpy: | |||
1020 | * will encounter the same problem | 915 | * will encounter the same problem |
1021 | */ | 916 | */ |
1022 | if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc)) | 917 | if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc)) |
1023 | zone_set_flag(mz->zone, ZONE_CONGESTED); | 918 | zone_set_flag(zone, ZONE_CONGESTED); |
1024 | 919 | ||
1025 | free_hot_cold_page_list(&free_pages, 1); | 920 | free_hot_cold_page_list(&free_pages, 1); |
1026 | 921 | ||
@@ -1041,34 +936,15 @@ keep_lumpy: | |||
1041 | * | 936 | * |
1042 | * returns 0 on success, -ve errno on failure. | 937 | * returns 0 on success, -ve errno on failure. |
1043 | */ | 938 | */ |
1044 | int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) | 939 | int __isolate_lru_page(struct page *page, isolate_mode_t mode) |
1045 | { | 940 | { |
1046 | bool all_lru_mode; | ||
1047 | int ret = -EINVAL; | 941 | int ret = -EINVAL; |
1048 | 942 | ||
1049 | /* Only take pages on the LRU. */ | 943 | /* Only take pages on the LRU. */ |
1050 | if (!PageLRU(page)) | 944 | if (!PageLRU(page)) |
1051 | return ret; | 945 | return ret; |
1052 | 946 | ||
1053 | all_lru_mode = (mode & (ISOLATE_ACTIVE|ISOLATE_INACTIVE)) == | 947 | /* Do not give back unevictable pages for compaction */ |
1054 | (ISOLATE_ACTIVE|ISOLATE_INACTIVE); | ||
1055 | |||
1056 | /* | ||
1057 | * When checking the active state, we need to be sure we are | ||
1058 | * dealing with comparible boolean values. Take the logical not | ||
1059 | * of each. | ||
1060 | */ | ||
1061 | if (!all_lru_mode && !PageActive(page) != !(mode & ISOLATE_ACTIVE)) | ||
1062 | return ret; | ||
1063 | |||
1064 | if (!all_lru_mode && !!page_is_file_cache(page) != file) | ||
1065 | return ret; | ||
1066 | |||
1067 | /* | ||
1068 | * When this function is being called for lumpy reclaim, we | ||
1069 | * initially look into all LRU pages, active, inactive and | ||
1070 | * unevictable; only give shrink_page_list evictable pages. | ||
1071 | */ | ||
1072 | if (PageUnevictable(page)) | 948 | if (PageUnevictable(page)) |
1073 | return ret; | 949 | return ret; |
1074 | 950 | ||
@@ -1135,54 +1011,39 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) | |||
1135 | * Appropriate locks must be held before calling this function. | 1011 | * Appropriate locks must be held before calling this function. |
1136 | * | 1012 | * |
1137 | * @nr_to_scan: The number of pages to look through on the list. | 1013 | * @nr_to_scan: The number of pages to look through on the list. |
1138 | * @mz: The mem_cgroup_zone to pull pages from. | 1014 | * @lruvec: The LRU vector to pull pages from. |
1139 | * @dst: The temp list to put pages on to. | 1015 | * @dst: The temp list to put pages on to. |
1140 | * @nr_scanned: The number of pages that were scanned. | 1016 | * @nr_scanned: The number of pages that were scanned. |
1141 | * @sc: The scan_control struct for this reclaim session | 1017 | * @sc: The scan_control struct for this reclaim session |
1142 | * @mode: One of the LRU isolation modes | 1018 | * @mode: One of the LRU isolation modes |
1143 | * @active: True [1] if isolating active pages | 1019 | * @lru: LRU list id for isolating |
1144 | * @file: True [1] if isolating file [!anon] pages | ||
1145 | * | 1020 | * |
1146 | * returns how many pages were moved onto *@dst. | 1021 | * returns how many pages were moved onto *@dst. |
1147 | */ | 1022 | */ |
1148 | static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | 1023 | static unsigned long isolate_lru_pages(unsigned long nr_to_scan, |
1149 | struct mem_cgroup_zone *mz, struct list_head *dst, | 1024 | struct lruvec *lruvec, struct list_head *dst, |
1150 | unsigned long *nr_scanned, struct scan_control *sc, | 1025 | unsigned long *nr_scanned, struct scan_control *sc, |
1151 | isolate_mode_t mode, int active, int file) | 1026 | isolate_mode_t mode, enum lru_list lru) |
1152 | { | 1027 | { |
1153 | struct lruvec *lruvec; | 1028 | struct list_head *src = &lruvec->lists[lru]; |
1154 | struct list_head *src; | ||
1155 | unsigned long nr_taken = 0; | 1029 | unsigned long nr_taken = 0; |
1156 | unsigned long nr_lumpy_taken = 0; | ||
1157 | unsigned long nr_lumpy_dirty = 0; | ||
1158 | unsigned long nr_lumpy_failed = 0; | ||
1159 | unsigned long scan; | 1030 | unsigned long scan; |
1160 | int lru = LRU_BASE; | ||
1161 | |||
1162 | lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup); | ||
1163 | if (active) | ||
1164 | lru += LRU_ACTIVE; | ||
1165 | if (file) | ||
1166 | lru += LRU_FILE; | ||
1167 | src = &lruvec->lists[lru]; | ||
1168 | 1031 | ||
1169 | for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { | 1032 | for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { |
1170 | struct page *page; | 1033 | struct page *page; |
1171 | unsigned long pfn; | 1034 | int nr_pages; |
1172 | unsigned long end_pfn; | ||
1173 | unsigned long page_pfn; | ||
1174 | int zone_id; | ||
1175 | 1035 | ||
1176 | page = lru_to_page(src); | 1036 | page = lru_to_page(src); |
1177 | prefetchw_prev_lru_page(page, src, flags); | 1037 | prefetchw_prev_lru_page(page, src, flags); |
1178 | 1038 | ||
1179 | VM_BUG_ON(!PageLRU(page)); | 1039 | VM_BUG_ON(!PageLRU(page)); |
1180 | 1040 | ||
1181 | switch (__isolate_lru_page(page, mode, file)) { | 1041 | switch (__isolate_lru_page(page, mode)) { |
1182 | case 0: | 1042 | case 0: |
1183 | mem_cgroup_lru_del(page); | 1043 | nr_pages = hpage_nr_pages(page); |
1044 | mem_cgroup_update_lru_size(lruvec, lru, -nr_pages); | ||
1184 | list_move(&page->lru, dst); | 1045 | list_move(&page->lru, dst); |
1185 | nr_taken += hpage_nr_pages(page); | 1046 | nr_taken += nr_pages; |
1186 | break; | 1047 | break; |
1187 | 1048 | ||
1188 | case -EBUSY: | 1049 | case -EBUSY: |
@@ -1193,93 +1054,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
1193 | default: | 1054 | default: |
1194 | BUG(); | 1055 | BUG(); |
1195 | } | 1056 | } |
1196 | |||
1197 | if (!sc->order || !(sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM)) | ||
1198 | continue; | ||
1199 | |||
1200 | /* | ||
1201 | * Attempt to take all pages in the order aligned region | ||
1202 | * surrounding the tag page. Only take those pages of | ||
1203 | * the same active state as that tag page. We may safely | ||
1204 | * round the target page pfn down to the requested order | ||
1205 | * as the mem_map is guaranteed valid out to MAX_ORDER, | ||
1206 | * where that page is in a different zone we will detect | ||
1207 | * it from its zone id and abort this block scan. | ||
1208 | */ | ||
1209 | zone_id = page_zone_id(page); | ||
1210 | page_pfn = page_to_pfn(page); | ||
1211 | pfn = page_pfn & ~((1 << sc->order) - 1); | ||
1212 | end_pfn = pfn + (1 << sc->order); | ||
1213 | for (; pfn < end_pfn; pfn++) { | ||
1214 | struct page *cursor_page; | ||
1215 | |||
1216 | /* The target page is in the block, ignore it. */ | ||
1217 | if (unlikely(pfn == page_pfn)) | ||
1218 | continue; | ||
1219 | |||
1220 | /* Avoid holes within the zone. */ | ||
1221 | if (unlikely(!pfn_valid_within(pfn))) | ||
1222 | break; | ||
1223 | |||
1224 | cursor_page = pfn_to_page(pfn); | ||
1225 | |||
1226 | /* Check that we have not crossed a zone boundary. */ | ||
1227 | if (unlikely(page_zone_id(cursor_page) != zone_id)) | ||
1228 | break; | ||
1229 | |||
1230 | /* | ||
1231 | * If we don't have enough swap space, reclaiming of | ||
1232 | * anon page which don't already have a swap slot is | ||
1233 | * pointless. | ||
1234 | */ | ||
1235 | if (nr_swap_pages <= 0 && PageSwapBacked(cursor_page) && | ||
1236 | !PageSwapCache(cursor_page)) | ||
1237 | break; | ||
1238 | |||
1239 | if (__isolate_lru_page(cursor_page, mode, file) == 0) { | ||
1240 | unsigned int isolated_pages; | ||
1241 | |||
1242 | mem_cgroup_lru_del(cursor_page); | ||
1243 | list_move(&cursor_page->lru, dst); | ||
1244 | isolated_pages = hpage_nr_pages(cursor_page); | ||
1245 | nr_taken += isolated_pages; | ||
1246 | nr_lumpy_taken += isolated_pages; | ||
1247 | if (PageDirty(cursor_page)) | ||
1248 | nr_lumpy_dirty += isolated_pages; | ||
1249 | scan++; | ||
1250 | pfn += isolated_pages - 1; | ||
1251 | } else { | ||
1252 | /* | ||
1253 | * Check if the page is freed already. | ||
1254 | * | ||
1255 | * We can't use page_count() as that | ||
1256 | * requires compound_head and we don't | ||
1257 | * have a pin on the page here. If a | ||
1258 | * page is tail, we may or may not | ||
1259 | * have isolated the head, so assume | ||
1260 | * it's not free, it'd be tricky to | ||
1261 | * track the head status without a | ||
1262 | * page pin. | ||
1263 | */ | ||
1264 | if (!PageTail(cursor_page) && | ||
1265 | !atomic_read(&cursor_page->_count)) | ||
1266 | continue; | ||
1267 | break; | ||
1268 | } | ||
1269 | } | ||
1270 | |||
1271 | /* If we break out of the loop above, lumpy reclaim failed */ | ||
1272 | if (pfn < end_pfn) | ||
1273 | nr_lumpy_failed++; | ||
1274 | } | 1057 | } |
1275 | 1058 | ||
1276 | *nr_scanned = scan; | 1059 | *nr_scanned = scan; |
1277 | 1060 | trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan, | |
1278 | trace_mm_vmscan_lru_isolate(sc->order, | 1061 | nr_taken, mode, is_file_lru(lru)); |
1279 | nr_to_scan, scan, | ||
1280 | nr_taken, | ||
1281 | nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, | ||
1282 | mode, file); | ||
1283 | return nr_taken; | 1062 | return nr_taken; |
1284 | } | 1063 | } |
1285 | 1064 | ||
@@ -1316,15 +1095,16 @@ int isolate_lru_page(struct page *page) | |||
1316 | 1095 | ||
1317 | if (PageLRU(page)) { | 1096 | if (PageLRU(page)) { |
1318 | struct zone *zone = page_zone(page); | 1097 | struct zone *zone = page_zone(page); |
1098 | struct lruvec *lruvec; | ||
1319 | 1099 | ||
1320 | spin_lock_irq(&zone->lru_lock); | 1100 | spin_lock_irq(&zone->lru_lock); |
1101 | lruvec = mem_cgroup_page_lruvec(page, zone); | ||
1321 | if (PageLRU(page)) { | 1102 | if (PageLRU(page)) { |
1322 | int lru = page_lru(page); | 1103 | int lru = page_lru(page); |
1323 | ret = 0; | ||
1324 | get_page(page); | 1104 | get_page(page); |
1325 | ClearPageLRU(page); | 1105 | ClearPageLRU(page); |
1326 | 1106 | del_page_from_lru_list(page, lruvec, lru); | |
1327 | del_page_from_lru_list(zone, page, lru); | 1107 | ret = 0; |
1328 | } | 1108 | } |
1329 | spin_unlock_irq(&zone->lru_lock); | 1109 | spin_unlock_irq(&zone->lru_lock); |
1330 | } | 1110 | } |
@@ -1357,11 +1137,10 @@ static int too_many_isolated(struct zone *zone, int file, | |||
1357 | } | 1137 | } |
1358 | 1138 | ||
1359 | static noinline_for_stack void | 1139 | static noinline_for_stack void |
1360 | putback_inactive_pages(struct mem_cgroup_zone *mz, | 1140 | putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) |
1361 | struct list_head *page_list) | ||
1362 | { | 1141 | { |
1363 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); | 1142 | struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; |
1364 | struct zone *zone = mz->zone; | 1143 | struct zone *zone = lruvec_zone(lruvec); |
1365 | LIST_HEAD(pages_to_free); | 1144 | LIST_HEAD(pages_to_free); |
1366 | 1145 | ||
1367 | /* | 1146 | /* |
@@ -1379,9 +1158,13 @@ putback_inactive_pages(struct mem_cgroup_zone *mz, | |||
1379 | spin_lock_irq(&zone->lru_lock); | 1158 | spin_lock_irq(&zone->lru_lock); |
1380 | continue; | 1159 | continue; |
1381 | } | 1160 | } |
1161 | |||
1162 | lruvec = mem_cgroup_page_lruvec(page, zone); | ||
1163 | |||
1382 | SetPageLRU(page); | 1164 | SetPageLRU(page); |
1383 | lru = page_lru(page); | 1165 | lru = page_lru(page); |
1384 | add_page_to_lru_list(zone, page, lru); | 1166 | add_page_to_lru_list(page, lruvec, lru); |
1167 | |||
1385 | if (is_active_lru(lru)) { | 1168 | if (is_active_lru(lru)) { |
1386 | int file = is_file_lru(lru); | 1169 | int file = is_file_lru(lru); |
1387 | int numpages = hpage_nr_pages(page); | 1170 | int numpages = hpage_nr_pages(page); |
@@ -1390,7 +1173,7 @@ putback_inactive_pages(struct mem_cgroup_zone *mz, | |||
1390 | if (put_page_testzero(page)) { | 1173 | if (put_page_testzero(page)) { |
1391 | __ClearPageLRU(page); | 1174 | __ClearPageLRU(page); |
1392 | __ClearPageActive(page); | 1175 | __ClearPageActive(page); |
1393 | del_page_from_lru_list(zone, page, lru); | 1176 | del_page_from_lru_list(page, lruvec, lru); |
1394 | 1177 | ||
1395 | if (unlikely(PageCompound(page))) { | 1178 | if (unlikely(PageCompound(page))) { |
1396 | spin_unlock_irq(&zone->lru_lock); | 1179 | spin_unlock_irq(&zone->lru_lock); |
@@ -1407,112 +1190,24 @@ putback_inactive_pages(struct mem_cgroup_zone *mz, | |||
1407 | list_splice(&pages_to_free, page_list); | 1190 | list_splice(&pages_to_free, page_list); |
1408 | } | 1191 | } |
1409 | 1192 | ||
1410 | static noinline_for_stack void | ||
1411 | update_isolated_counts(struct mem_cgroup_zone *mz, | ||
1412 | struct list_head *page_list, | ||
1413 | unsigned long *nr_anon, | ||
1414 | unsigned long *nr_file) | ||
1415 | { | ||
1416 | struct zone *zone = mz->zone; | ||
1417 | unsigned int count[NR_LRU_LISTS] = { 0, }; | ||
1418 | unsigned long nr_active = 0; | ||
1419 | struct page *page; | ||
1420 | int lru; | ||
1421 | |||
1422 | /* | ||
1423 | * Count pages and clear active flags | ||
1424 | */ | ||
1425 | list_for_each_entry(page, page_list, lru) { | ||
1426 | int numpages = hpage_nr_pages(page); | ||
1427 | lru = page_lru_base_type(page); | ||
1428 | if (PageActive(page)) { | ||
1429 | lru += LRU_ACTIVE; | ||
1430 | ClearPageActive(page); | ||
1431 | nr_active += numpages; | ||
1432 | } | ||
1433 | count[lru] += numpages; | ||
1434 | } | ||
1435 | |||
1436 | preempt_disable(); | ||
1437 | __count_vm_events(PGDEACTIVATE, nr_active); | ||
1438 | |||
1439 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, | ||
1440 | -count[LRU_ACTIVE_FILE]); | ||
1441 | __mod_zone_page_state(zone, NR_INACTIVE_FILE, | ||
1442 | -count[LRU_INACTIVE_FILE]); | ||
1443 | __mod_zone_page_state(zone, NR_ACTIVE_ANON, | ||
1444 | -count[LRU_ACTIVE_ANON]); | ||
1445 | __mod_zone_page_state(zone, NR_INACTIVE_ANON, | ||
1446 | -count[LRU_INACTIVE_ANON]); | ||
1447 | |||
1448 | *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; | ||
1449 | *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; | ||
1450 | |||
1451 | __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon); | ||
1452 | __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file); | ||
1453 | preempt_enable(); | ||
1454 | } | ||
1455 | |||
1456 | /* | ||
1457 | * Returns true if a direct reclaim should wait on pages under writeback. | ||
1458 | * | ||
1459 | * If we are direct reclaiming for contiguous pages and we do not reclaim | ||
1460 | * everything in the list, try again and wait for writeback IO to complete. | ||
1461 | * This will stall high-order allocations noticeably. Only do that when really | ||
1462 | * need to free the pages under high memory pressure. | ||
1463 | */ | ||
1464 | static inline bool should_reclaim_stall(unsigned long nr_taken, | ||
1465 | unsigned long nr_freed, | ||
1466 | int priority, | ||
1467 | struct scan_control *sc) | ||
1468 | { | ||
1469 | int lumpy_stall_priority; | ||
1470 | |||
1471 | /* kswapd should not stall on sync IO */ | ||
1472 | if (current_is_kswapd()) | ||
1473 | return false; | ||
1474 | |||
1475 | /* Only stall on lumpy reclaim */ | ||
1476 | if (sc->reclaim_mode & RECLAIM_MODE_SINGLE) | ||
1477 | return false; | ||
1478 | |||
1479 | /* If we have reclaimed everything on the isolated list, no stall */ | ||
1480 | if (nr_freed == nr_taken) | ||
1481 | return false; | ||
1482 | |||
1483 | /* | ||
1484 | * For high-order allocations, there are two stall thresholds. | ||
1485 | * High-cost allocations stall immediately where as lower | ||
1486 | * order allocations such as stacks require the scanning | ||
1487 | * priority to be much higher before stalling. | ||
1488 | */ | ||
1489 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
1490 | lumpy_stall_priority = DEF_PRIORITY; | ||
1491 | else | ||
1492 | lumpy_stall_priority = DEF_PRIORITY / 3; | ||
1493 | |||
1494 | return priority <= lumpy_stall_priority; | ||
1495 | } | ||
1496 | |||
1497 | /* | 1193 | /* |
1498 | * shrink_inactive_list() is a helper for shrink_zone(). It returns the number | 1194 | * shrink_inactive_list() is a helper for shrink_zone(). It returns the number |
1499 | * of reclaimed pages | 1195 | * of reclaimed pages |
1500 | */ | 1196 | */ |
1501 | static noinline_for_stack unsigned long | 1197 | static noinline_for_stack unsigned long |
1502 | shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, | 1198 | shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, |
1503 | struct scan_control *sc, int priority, int file) | 1199 | struct scan_control *sc, enum lru_list lru) |
1504 | { | 1200 | { |
1505 | LIST_HEAD(page_list); | 1201 | LIST_HEAD(page_list); |
1506 | unsigned long nr_scanned; | 1202 | unsigned long nr_scanned; |
1507 | unsigned long nr_reclaimed = 0; | 1203 | unsigned long nr_reclaimed = 0; |
1508 | unsigned long nr_taken; | 1204 | unsigned long nr_taken; |
1509 | unsigned long nr_anon; | ||
1510 | unsigned long nr_file; | ||
1511 | unsigned long nr_dirty = 0; | 1205 | unsigned long nr_dirty = 0; |
1512 | unsigned long nr_writeback = 0; | 1206 | unsigned long nr_writeback = 0; |
1513 | isolate_mode_t isolate_mode = ISOLATE_INACTIVE; | 1207 | isolate_mode_t isolate_mode = 0; |
1514 | struct zone *zone = mz->zone; | 1208 | int file = is_file_lru(lru); |
1515 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); | 1209 | struct zone *zone = lruvec_zone(lruvec); |
1210 | struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; | ||
1516 | 1211 | ||
1517 | while (unlikely(too_many_isolated(zone, file, sc))) { | 1212 | while (unlikely(too_many_isolated(zone, file, sc))) { |
1518 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 1213 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
@@ -1522,10 +1217,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, | |||
1522 | return SWAP_CLUSTER_MAX; | 1217 | return SWAP_CLUSTER_MAX; |
1523 | } | 1218 | } |
1524 | 1219 | ||
1525 | set_reclaim_mode(priority, sc, false); | ||
1526 | if (sc->reclaim_mode & RECLAIM_MODE_LUMPYRECLAIM) | ||
1527 | isolate_mode |= ISOLATE_ACTIVE; | ||
1528 | |||
1529 | lru_add_drain(); | 1220 | lru_add_drain(); |
1530 | 1221 | ||
1531 | if (!sc->may_unmap) | 1222 | if (!sc->may_unmap) |
@@ -1535,38 +1226,30 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, | |||
1535 | 1226 | ||
1536 | spin_lock_irq(&zone->lru_lock); | 1227 | spin_lock_irq(&zone->lru_lock); |
1537 | 1228 | ||
1538 | nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list, &nr_scanned, | 1229 | nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list, |
1539 | sc, isolate_mode, 0, file); | 1230 | &nr_scanned, sc, isolate_mode, lru); |
1231 | |||
1232 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken); | ||
1233 | __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); | ||
1234 | |||
1540 | if (global_reclaim(sc)) { | 1235 | if (global_reclaim(sc)) { |
1541 | zone->pages_scanned += nr_scanned; | 1236 | zone->pages_scanned += nr_scanned; |
1542 | if (current_is_kswapd()) | 1237 | if (current_is_kswapd()) |
1543 | __count_zone_vm_events(PGSCAN_KSWAPD, zone, | 1238 | __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned); |
1544 | nr_scanned); | ||
1545 | else | 1239 | else |
1546 | __count_zone_vm_events(PGSCAN_DIRECT, zone, | 1240 | __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scanned); |
1547 | nr_scanned); | ||
1548 | } | 1241 | } |
1549 | spin_unlock_irq(&zone->lru_lock); | 1242 | spin_unlock_irq(&zone->lru_lock); |
1550 | 1243 | ||
1551 | if (nr_taken == 0) | 1244 | if (nr_taken == 0) |
1552 | return 0; | 1245 | return 0; |
1553 | 1246 | ||
1554 | update_isolated_counts(mz, &page_list, &nr_anon, &nr_file); | 1247 | nr_reclaimed = shrink_page_list(&page_list, zone, sc, |
1555 | |||
1556 | nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority, | ||
1557 | &nr_dirty, &nr_writeback); | 1248 | &nr_dirty, &nr_writeback); |
1558 | 1249 | ||
1559 | /* Check if we should syncronously wait for writeback */ | ||
1560 | if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { | ||
1561 | set_reclaim_mode(priority, sc, true); | ||
1562 | nr_reclaimed += shrink_page_list(&page_list, mz, sc, | ||
1563 | priority, &nr_dirty, &nr_writeback); | ||
1564 | } | ||
1565 | |||
1566 | spin_lock_irq(&zone->lru_lock); | 1250 | spin_lock_irq(&zone->lru_lock); |
1567 | 1251 | ||
1568 | reclaim_stat->recent_scanned[0] += nr_anon; | 1252 | reclaim_stat->recent_scanned[file] += nr_taken; |
1569 | reclaim_stat->recent_scanned[1] += nr_file; | ||
1570 | 1253 | ||
1571 | if (global_reclaim(sc)) { | 1254 | if (global_reclaim(sc)) { |
1572 | if (current_is_kswapd()) | 1255 | if (current_is_kswapd()) |
@@ -1577,10 +1260,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, | |||
1577 | nr_reclaimed); | 1260 | nr_reclaimed); |
1578 | } | 1261 | } |
1579 | 1262 | ||
1580 | putback_inactive_pages(mz, &page_list); | 1263 | putback_inactive_pages(lruvec, &page_list); |
1581 | 1264 | ||
1582 | __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon); | 1265 | __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); |
1583 | __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file); | ||
1584 | 1266 | ||
1585 | spin_unlock_irq(&zone->lru_lock); | 1267 | spin_unlock_irq(&zone->lru_lock); |
1586 | 1268 | ||
@@ -1609,14 +1291,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, | |||
1609 | * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any | 1291 | * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any |
1610 | * isolated page is PageWriteback | 1292 | * isolated page is PageWriteback |
1611 | */ | 1293 | */ |
1612 | if (nr_writeback && nr_writeback >= (nr_taken >> (DEF_PRIORITY-priority))) | 1294 | if (nr_writeback && nr_writeback >= |
1295 | (nr_taken >> (DEF_PRIORITY - sc->priority))) | ||
1613 | wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); | 1296 | wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); |
1614 | 1297 | ||
1615 | trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, | 1298 | trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, |
1616 | zone_idx(zone), | 1299 | zone_idx(zone), |
1617 | nr_scanned, nr_reclaimed, | 1300 | nr_scanned, nr_reclaimed, |
1618 | priority, | 1301 | sc->priority, |
1619 | trace_shrink_flags(file, sc->reclaim_mode)); | 1302 | trace_shrink_flags(file)); |
1620 | return nr_reclaimed; | 1303 | return nr_reclaimed; |
1621 | } | 1304 | } |
1622 | 1305 | ||
@@ -1638,30 +1321,32 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, | |||
1638 | * But we had to alter page->flags anyway. | 1321 | * But we had to alter page->flags anyway. |
1639 | */ | 1322 | */ |
1640 | 1323 | ||
1641 | static void move_active_pages_to_lru(struct zone *zone, | 1324 | static void move_active_pages_to_lru(struct lruvec *lruvec, |
1642 | struct list_head *list, | 1325 | struct list_head *list, |
1643 | struct list_head *pages_to_free, | 1326 | struct list_head *pages_to_free, |
1644 | enum lru_list lru) | 1327 | enum lru_list lru) |
1645 | { | 1328 | { |
1329 | struct zone *zone = lruvec_zone(lruvec); | ||
1646 | unsigned long pgmoved = 0; | 1330 | unsigned long pgmoved = 0; |
1647 | struct page *page; | 1331 | struct page *page; |
1332 | int nr_pages; | ||
1648 | 1333 | ||
1649 | while (!list_empty(list)) { | 1334 | while (!list_empty(list)) { |
1650 | struct lruvec *lruvec; | ||
1651 | |||
1652 | page = lru_to_page(list); | 1335 | page = lru_to_page(list); |
1336 | lruvec = mem_cgroup_page_lruvec(page, zone); | ||
1653 | 1337 | ||
1654 | VM_BUG_ON(PageLRU(page)); | 1338 | VM_BUG_ON(PageLRU(page)); |
1655 | SetPageLRU(page); | 1339 | SetPageLRU(page); |
1656 | 1340 | ||
1657 | lruvec = mem_cgroup_lru_add_list(zone, page, lru); | 1341 | nr_pages = hpage_nr_pages(page); |
1342 | mem_cgroup_update_lru_size(lruvec, lru, nr_pages); | ||
1658 | list_move(&page->lru, &lruvec->lists[lru]); | 1343 | list_move(&page->lru, &lruvec->lists[lru]); |
1659 | pgmoved += hpage_nr_pages(page); | 1344 | pgmoved += nr_pages; |
1660 | 1345 | ||
1661 | if (put_page_testzero(page)) { | 1346 | if (put_page_testzero(page)) { |
1662 | __ClearPageLRU(page); | 1347 | __ClearPageLRU(page); |
1663 | __ClearPageActive(page); | 1348 | __ClearPageActive(page); |
1664 | del_page_from_lru_list(zone, page, lru); | 1349 | del_page_from_lru_list(page, lruvec, lru); |
1665 | 1350 | ||
1666 | if (unlikely(PageCompound(page))) { | 1351 | if (unlikely(PageCompound(page))) { |
1667 | spin_unlock_irq(&zone->lru_lock); | 1352 | spin_unlock_irq(&zone->lru_lock); |
@@ -1677,9 +1362,9 @@ static void move_active_pages_to_lru(struct zone *zone, | |||
1677 | } | 1362 | } |
1678 | 1363 | ||
1679 | static void shrink_active_list(unsigned long nr_to_scan, | 1364 | static void shrink_active_list(unsigned long nr_to_scan, |
1680 | struct mem_cgroup_zone *mz, | 1365 | struct lruvec *lruvec, |
1681 | struct scan_control *sc, | 1366 | struct scan_control *sc, |
1682 | int priority, int file) | 1367 | enum lru_list lru) |
1683 | { | 1368 | { |
1684 | unsigned long nr_taken; | 1369 | unsigned long nr_taken; |
1685 | unsigned long nr_scanned; | 1370 | unsigned long nr_scanned; |
@@ -1688,15 +1373,14 @@ static void shrink_active_list(unsigned long nr_to_scan, | |||
1688 | LIST_HEAD(l_active); | 1373 | LIST_HEAD(l_active); |
1689 | LIST_HEAD(l_inactive); | 1374 | LIST_HEAD(l_inactive); |
1690 | struct page *page; | 1375 | struct page *page; |
1691 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); | 1376 | struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; |
1692 | unsigned long nr_rotated = 0; | 1377 | unsigned long nr_rotated = 0; |
1693 | isolate_mode_t isolate_mode = ISOLATE_ACTIVE; | 1378 | isolate_mode_t isolate_mode = 0; |
1694 | struct zone *zone = mz->zone; | 1379 | int file = is_file_lru(lru); |
1380 | struct zone *zone = lruvec_zone(lruvec); | ||
1695 | 1381 | ||
1696 | lru_add_drain(); | 1382 | lru_add_drain(); |
1697 | 1383 | ||
1698 | reset_reclaim_mode(sc); | ||
1699 | |||
1700 | if (!sc->may_unmap) | 1384 | if (!sc->may_unmap) |
1701 | isolate_mode |= ISOLATE_UNMAPPED; | 1385 | isolate_mode |= ISOLATE_UNMAPPED; |
1702 | if (!sc->may_writepage) | 1386 | if (!sc->may_writepage) |
@@ -1704,18 +1388,15 @@ static void shrink_active_list(unsigned long nr_to_scan, | |||
1704 | 1388 | ||
1705 | spin_lock_irq(&zone->lru_lock); | 1389 | spin_lock_irq(&zone->lru_lock); |
1706 | 1390 | ||
1707 | nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold, &nr_scanned, sc, | 1391 | nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold, |
1708 | isolate_mode, 1, file); | 1392 | &nr_scanned, sc, isolate_mode, lru); |
1709 | if (global_reclaim(sc)) | 1393 | if (global_reclaim(sc)) |
1710 | zone->pages_scanned += nr_scanned; | 1394 | zone->pages_scanned += nr_scanned; |
1711 | 1395 | ||
1712 | reclaim_stat->recent_scanned[file] += nr_taken; | 1396 | reclaim_stat->recent_scanned[file] += nr_taken; |
1713 | 1397 | ||
1714 | __count_zone_vm_events(PGREFILL, zone, nr_scanned); | 1398 | __count_zone_vm_events(PGREFILL, zone, nr_scanned); |
1715 | if (file) | 1399 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken); |
1716 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken); | ||
1717 | else | ||
1718 | __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken); | ||
1719 | __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); | 1400 | __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); |
1720 | spin_unlock_irq(&zone->lru_lock); | 1401 | spin_unlock_irq(&zone->lru_lock); |
1721 | 1402 | ||
@@ -1737,7 +1418,8 @@ static void shrink_active_list(unsigned long nr_to_scan, | |||
1737 | } | 1418 | } |
1738 | } | 1419 | } |
1739 | 1420 | ||
1740 | if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) { | 1421 | if (page_referenced(page, 0, sc->target_mem_cgroup, |
1422 | &vm_flags)) { | ||
1741 | nr_rotated += hpage_nr_pages(page); | 1423 | nr_rotated += hpage_nr_pages(page); |
1742 | /* | 1424 | /* |
1743 | * Identify referenced, file-backed active pages and | 1425 | * Identify referenced, file-backed active pages and |
@@ -1770,10 +1452,8 @@ static void shrink_active_list(unsigned long nr_to_scan, | |||
1770 | */ | 1452 | */ |
1771 | reclaim_stat->recent_rotated[file] += nr_rotated; | 1453 | reclaim_stat->recent_rotated[file] += nr_rotated; |
1772 | 1454 | ||
1773 | move_active_pages_to_lru(zone, &l_active, &l_hold, | 1455 | move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru); |
1774 | LRU_ACTIVE + file * LRU_FILE); | 1456 | move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE); |
1775 | move_active_pages_to_lru(zone, &l_inactive, &l_hold, | ||
1776 | LRU_BASE + file * LRU_FILE); | ||
1777 | __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); | 1457 | __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); |
1778 | spin_unlock_irq(&zone->lru_lock); | 1458 | spin_unlock_irq(&zone->lru_lock); |
1779 | 1459 | ||
@@ -1796,13 +1476,12 @@ static int inactive_anon_is_low_global(struct zone *zone) | |||
1796 | 1476 | ||
1797 | /** | 1477 | /** |
1798 | * inactive_anon_is_low - check if anonymous pages need to be deactivated | 1478 | * inactive_anon_is_low - check if anonymous pages need to be deactivated |
1799 | * @zone: zone to check | 1479 | * @lruvec: LRU vector to check |
1800 | * @sc: scan control of this context | ||
1801 | * | 1480 | * |
1802 | * Returns true if the zone does not have enough inactive anon pages, | 1481 | * Returns true if the zone does not have enough inactive anon pages, |
1803 | * meaning some active anon pages need to be deactivated. | 1482 | * meaning some active anon pages need to be deactivated. |
1804 | */ | 1483 | */ |
1805 | static int inactive_anon_is_low(struct mem_cgroup_zone *mz) | 1484 | static int inactive_anon_is_low(struct lruvec *lruvec) |
1806 | { | 1485 | { |
1807 | /* | 1486 | /* |
1808 | * If we don't have swap space, anonymous page deactivation | 1487 | * If we don't have swap space, anonymous page deactivation |
@@ -1811,14 +1490,13 @@ static int inactive_anon_is_low(struct mem_cgroup_zone *mz) | |||
1811 | if (!total_swap_pages) | 1490 | if (!total_swap_pages) |
1812 | return 0; | 1491 | return 0; |
1813 | 1492 | ||
1814 | if (!scanning_global_lru(mz)) | 1493 | if (!mem_cgroup_disabled()) |
1815 | return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup, | 1494 | return mem_cgroup_inactive_anon_is_low(lruvec); |
1816 | mz->zone); | ||
1817 | 1495 | ||
1818 | return inactive_anon_is_low_global(mz->zone); | 1496 | return inactive_anon_is_low_global(lruvec_zone(lruvec)); |
1819 | } | 1497 | } |
1820 | #else | 1498 | #else |
1821 | static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz) | 1499 | static inline int inactive_anon_is_low(struct lruvec *lruvec) |
1822 | { | 1500 | { |
1823 | return 0; | 1501 | return 0; |
1824 | } | 1502 | } |
@@ -1836,7 +1514,7 @@ static int inactive_file_is_low_global(struct zone *zone) | |||
1836 | 1514 | ||
1837 | /** | 1515 | /** |
1838 | * inactive_file_is_low - check if file pages need to be deactivated | 1516 | * inactive_file_is_low - check if file pages need to be deactivated |
1839 | * @mz: memory cgroup and zone to check | 1517 | * @lruvec: LRU vector to check |
1840 | * | 1518 | * |
1841 | * When the system is doing streaming IO, memory pressure here | 1519 | * When the system is doing streaming IO, memory pressure here |
1842 | * ensures that active file pages get deactivated, until more | 1520 | * ensures that active file pages get deactivated, until more |
@@ -1848,44 +1526,39 @@ static int inactive_file_is_low_global(struct zone *zone) | |||
1848 | * This uses a different ratio than the anonymous pages, because | 1526 | * This uses a different ratio than the anonymous pages, because |
1849 | * the page cache uses a use-once replacement algorithm. | 1527 | * the page cache uses a use-once replacement algorithm. |
1850 | */ | 1528 | */ |
1851 | static int inactive_file_is_low(struct mem_cgroup_zone *mz) | 1529 | static int inactive_file_is_low(struct lruvec *lruvec) |
1852 | { | 1530 | { |
1853 | if (!scanning_global_lru(mz)) | 1531 | if (!mem_cgroup_disabled()) |
1854 | return mem_cgroup_inactive_file_is_low(mz->mem_cgroup, | 1532 | return mem_cgroup_inactive_file_is_low(lruvec); |
1855 | mz->zone); | ||
1856 | 1533 | ||
1857 | return inactive_file_is_low_global(mz->zone); | 1534 | return inactive_file_is_low_global(lruvec_zone(lruvec)); |
1858 | } | 1535 | } |
1859 | 1536 | ||
1860 | static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file) | 1537 | static int inactive_list_is_low(struct lruvec *lruvec, enum lru_list lru) |
1861 | { | 1538 | { |
1862 | if (file) | 1539 | if (is_file_lru(lru)) |
1863 | return inactive_file_is_low(mz); | 1540 | return inactive_file_is_low(lruvec); |
1864 | else | 1541 | else |
1865 | return inactive_anon_is_low(mz); | 1542 | return inactive_anon_is_low(lruvec); |
1866 | } | 1543 | } |
1867 | 1544 | ||
1868 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, | 1545 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, |
1869 | struct mem_cgroup_zone *mz, | 1546 | struct lruvec *lruvec, struct scan_control *sc) |
1870 | struct scan_control *sc, int priority) | ||
1871 | { | 1547 | { |
1872 | int file = is_file_lru(lru); | ||
1873 | |||
1874 | if (is_active_lru(lru)) { | 1548 | if (is_active_lru(lru)) { |
1875 | if (inactive_list_is_low(mz, file)) | 1549 | if (inactive_list_is_low(lruvec, lru)) |
1876 | shrink_active_list(nr_to_scan, mz, sc, priority, file); | 1550 | shrink_active_list(nr_to_scan, lruvec, sc, lru); |
1877 | return 0; | 1551 | return 0; |
1878 | } | 1552 | } |
1879 | 1553 | ||
1880 | return shrink_inactive_list(nr_to_scan, mz, sc, priority, file); | 1554 | return shrink_inactive_list(nr_to_scan, lruvec, sc, lru); |
1881 | } | 1555 | } |
1882 | 1556 | ||
1883 | static int vmscan_swappiness(struct mem_cgroup_zone *mz, | 1557 | static int vmscan_swappiness(struct scan_control *sc) |
1884 | struct scan_control *sc) | ||
1885 | { | 1558 | { |
1886 | if (global_reclaim(sc)) | 1559 | if (global_reclaim(sc)) |
1887 | return vm_swappiness; | 1560 | return vm_swappiness; |
1888 | return mem_cgroup_swappiness(mz->mem_cgroup); | 1561 | return mem_cgroup_swappiness(sc->target_mem_cgroup); |
1889 | } | 1562 | } |
1890 | 1563 | ||
1891 | /* | 1564 | /* |
@@ -1896,17 +1569,18 @@ static int vmscan_swappiness(struct mem_cgroup_zone *mz, | |||
1896 | * | 1569 | * |
1897 | * nr[0] = anon pages to scan; nr[1] = file pages to scan | 1570 | * nr[0] = anon pages to scan; nr[1] = file pages to scan |
1898 | */ | 1571 | */ |
1899 | static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, | 1572 | static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, |
1900 | unsigned long *nr, int priority) | 1573 | unsigned long *nr) |
1901 | { | 1574 | { |
1902 | unsigned long anon, file, free; | 1575 | unsigned long anon, file, free; |
1903 | unsigned long anon_prio, file_prio; | 1576 | unsigned long anon_prio, file_prio; |
1904 | unsigned long ap, fp; | 1577 | unsigned long ap, fp; |
1905 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); | 1578 | struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; |
1906 | u64 fraction[2], denominator; | 1579 | u64 fraction[2], denominator; |
1907 | enum lru_list lru; | 1580 | enum lru_list lru; |
1908 | int noswap = 0; | 1581 | int noswap = 0; |
1909 | bool force_scan = false; | 1582 | bool force_scan = false; |
1583 | struct zone *zone = lruvec_zone(lruvec); | ||
1910 | 1584 | ||
1911 | /* | 1585 | /* |
1912 | * If the zone or memcg is small, nr[l] can be 0. This | 1586 | * If the zone or memcg is small, nr[l] can be 0. This |
@@ -1918,7 +1592,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, | |||
1918 | * latencies, so it's better to scan a minimum amount there as | 1592 | * latencies, so it's better to scan a minimum amount there as |
1919 | * well. | 1593 | * well. |
1920 | */ | 1594 | */ |
1921 | if (current_is_kswapd() && mz->zone->all_unreclaimable) | 1595 | if (current_is_kswapd() && zone->all_unreclaimable) |
1922 | force_scan = true; | 1596 | force_scan = true; |
1923 | if (!global_reclaim(sc)) | 1597 | if (!global_reclaim(sc)) |
1924 | force_scan = true; | 1598 | force_scan = true; |
@@ -1932,16 +1606,16 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, | |||
1932 | goto out; | 1606 | goto out; |
1933 | } | 1607 | } |
1934 | 1608 | ||
1935 | anon = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) + | 1609 | anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) + |
1936 | zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); | 1610 | get_lru_size(lruvec, LRU_INACTIVE_ANON); |
1937 | file = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) + | 1611 | file = get_lru_size(lruvec, LRU_ACTIVE_FILE) + |
1938 | zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); | 1612 | get_lru_size(lruvec, LRU_INACTIVE_FILE); |
1939 | 1613 | ||
1940 | if (global_reclaim(sc)) { | 1614 | if (global_reclaim(sc)) { |
1941 | free = zone_page_state(mz->zone, NR_FREE_PAGES); | 1615 | free = zone_page_state(zone, NR_FREE_PAGES); |
1942 | /* If we have very few page cache pages, | 1616 | /* If we have very few page cache pages, |
1943 | force-scan anon pages. */ | 1617 | force-scan anon pages. */ |
1944 | if (unlikely(file + free <= high_wmark_pages(mz->zone))) { | 1618 | if (unlikely(file + free <= high_wmark_pages(zone))) { |
1945 | fraction[0] = 1; | 1619 | fraction[0] = 1; |
1946 | fraction[1] = 0; | 1620 | fraction[1] = 0; |
1947 | denominator = 1; | 1621 | denominator = 1; |
@@ -1953,8 +1627,8 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, | |||
1953 | * With swappiness at 100, anonymous and file have the same priority. | 1627 | * With swappiness at 100, anonymous and file have the same priority. |
1954 | * This scanning priority is essentially the inverse of IO cost. | 1628 | * This scanning priority is essentially the inverse of IO cost. |
1955 | */ | 1629 | */ |
1956 | anon_prio = vmscan_swappiness(mz, sc); | 1630 | anon_prio = vmscan_swappiness(sc); |
1957 | file_prio = 200 - vmscan_swappiness(mz, sc); | 1631 | file_prio = 200 - anon_prio; |
1958 | 1632 | ||
1959 | /* | 1633 | /* |
1960 | * OK, so we have swap space and a fair amount of page cache | 1634 | * OK, so we have swap space and a fair amount of page cache |
@@ -1967,7 +1641,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, | |||
1967 | * | 1641 | * |
1968 | * anon in [0], file in [1] | 1642 | * anon in [0], file in [1] |
1969 | */ | 1643 | */ |
1970 | spin_lock_irq(&mz->zone->lru_lock); | 1644 | spin_lock_irq(&zone->lru_lock); |
1971 | if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { | 1645 | if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { |
1972 | reclaim_stat->recent_scanned[0] /= 2; | 1646 | reclaim_stat->recent_scanned[0] /= 2; |
1973 | reclaim_stat->recent_rotated[0] /= 2; | 1647 | reclaim_stat->recent_rotated[0] /= 2; |
@@ -1983,12 +1657,12 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, | |||
1983 | * proportional to the fraction of recently scanned pages on | 1657 | * proportional to the fraction of recently scanned pages on |
1984 | * each list that were recently referenced and in active use. | 1658 | * each list that were recently referenced and in active use. |
1985 | */ | 1659 | */ |
1986 | ap = (anon_prio + 1) * (reclaim_stat->recent_scanned[0] + 1); | 1660 | ap = anon_prio * (reclaim_stat->recent_scanned[0] + 1); |
1987 | ap /= reclaim_stat->recent_rotated[0] + 1; | 1661 | ap /= reclaim_stat->recent_rotated[0] + 1; |
1988 | 1662 | ||
1989 | fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1); | 1663 | fp = file_prio * (reclaim_stat->recent_scanned[1] + 1); |
1990 | fp /= reclaim_stat->recent_rotated[1] + 1; | 1664 | fp /= reclaim_stat->recent_rotated[1] + 1; |
1991 | spin_unlock_irq(&mz->zone->lru_lock); | 1665 | spin_unlock_irq(&zone->lru_lock); |
1992 | 1666 | ||
1993 | fraction[0] = ap; | 1667 | fraction[0] = ap; |
1994 | fraction[1] = fp; | 1668 | fraction[1] = fp; |
@@ -1998,9 +1672,9 @@ out: | |||
1998 | int file = is_file_lru(lru); | 1672 | int file = is_file_lru(lru); |
1999 | unsigned long scan; | 1673 | unsigned long scan; |
2000 | 1674 | ||
2001 | scan = zone_nr_lru_pages(mz, lru); | 1675 | scan = get_lru_size(lruvec, lru); |
2002 | if (priority || noswap) { | 1676 | if (sc->priority || noswap || !vmscan_swappiness(sc)) { |
2003 | scan >>= priority; | 1677 | scan >>= sc->priority; |
2004 | if (!scan && force_scan) | 1678 | if (!scan && force_scan) |
2005 | scan = SWAP_CLUSTER_MAX; | 1679 | scan = SWAP_CLUSTER_MAX; |
2006 | scan = div64_u64(scan * fraction[file], denominator); | 1680 | scan = div64_u64(scan * fraction[file], denominator); |
@@ -2009,14 +1683,25 @@ out: | |||
2009 | } | 1683 | } |
2010 | } | 1684 | } |
2011 | 1685 | ||
1686 | /* Use reclaim/compaction for costly allocs or under memory pressure */ | ||
1687 | static bool in_reclaim_compaction(struct scan_control *sc) | ||
1688 | { | ||
1689 | if (COMPACTION_BUILD && sc->order && | ||
1690 | (sc->order > PAGE_ALLOC_COSTLY_ORDER || | ||
1691 | sc->priority < DEF_PRIORITY - 2)) | ||
1692 | return true; | ||
1693 | |||
1694 | return false; | ||
1695 | } | ||
1696 | |||
2012 | /* | 1697 | /* |
2013 | * Reclaim/compaction depends on a number of pages being freed. To avoid | 1698 | * Reclaim/compaction is used for high-order allocation requests. It reclaims |
2014 | * disruption to the system, a small number of order-0 pages continue to be | 1699 | * order-0 pages before compacting the zone. should_continue_reclaim() returns |
2015 | * rotated and reclaimed in the normal fashion. However, by the time we get | 1700 | * true if more pages should be reclaimed such that when the page allocator |
2016 | * back to the allocator and call try_to_compact_zone(), we ensure that | 1701 | * calls try_to_compact_zone() that it will have enough free pages to succeed. |
2017 | * there are enough free pages for it to be likely successful | 1702 | * It will give up earlier than that if there is difficulty reclaiming pages. |
2018 | */ | 1703 | */ |
2019 | static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, | 1704 | static inline bool should_continue_reclaim(struct lruvec *lruvec, |
2020 | unsigned long nr_reclaimed, | 1705 | unsigned long nr_reclaimed, |
2021 | unsigned long nr_scanned, | 1706 | unsigned long nr_scanned, |
2022 | struct scan_control *sc) | 1707 | struct scan_control *sc) |
@@ -2025,7 +1710,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, | |||
2025 | unsigned long inactive_lru_pages; | 1710 | unsigned long inactive_lru_pages; |
2026 | 1711 | ||
2027 | /* If not in reclaim/compaction mode, stop */ | 1712 | /* If not in reclaim/compaction mode, stop */ |
2028 | if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION)) | 1713 | if (!in_reclaim_compaction(sc)) |
2029 | return false; | 1714 | return false; |
2030 | 1715 | ||
2031 | /* Consider stopping depending on scan and reclaim activity */ | 1716 | /* Consider stopping depending on scan and reclaim activity */ |
@@ -2056,15 +1741,15 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, | |||
2056 | * inactive lists are large enough, continue reclaiming | 1741 | * inactive lists are large enough, continue reclaiming |
2057 | */ | 1742 | */ |
2058 | pages_for_compaction = (2UL << sc->order); | 1743 | pages_for_compaction = (2UL << sc->order); |
2059 | inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); | 1744 | inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE); |
2060 | if (nr_swap_pages > 0) | 1745 | if (nr_swap_pages > 0) |
2061 | inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); | 1746 | inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON); |
2062 | if (sc->nr_reclaimed < pages_for_compaction && | 1747 | if (sc->nr_reclaimed < pages_for_compaction && |
2063 | inactive_lru_pages > pages_for_compaction) | 1748 | inactive_lru_pages > pages_for_compaction) |
2064 | return true; | 1749 | return true; |
2065 | 1750 | ||
2066 | /* If compaction would go ahead or the allocation would succeed, stop */ | 1751 | /* If compaction would go ahead or the allocation would succeed, stop */ |
2067 | switch (compaction_suitable(mz->zone, sc->order)) { | 1752 | switch (compaction_suitable(lruvec_zone(lruvec), sc->order)) { |
2068 | case COMPACT_PARTIAL: | 1753 | case COMPACT_PARTIAL: |
2069 | case COMPACT_CONTINUE: | 1754 | case COMPACT_CONTINUE: |
2070 | return false; | 1755 | return false; |
@@ -2076,8 +1761,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, | |||
2076 | /* | 1761 | /* |
2077 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. | 1762 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. |
2078 | */ | 1763 | */ |
2079 | static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz, | 1764 | static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) |
2080 | struct scan_control *sc) | ||
2081 | { | 1765 | { |
2082 | unsigned long nr[NR_LRU_LISTS]; | 1766 | unsigned long nr[NR_LRU_LISTS]; |
2083 | unsigned long nr_to_scan; | 1767 | unsigned long nr_to_scan; |
@@ -2089,7 +1773,7 @@ static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz, | |||
2089 | restart: | 1773 | restart: |
2090 | nr_reclaimed = 0; | 1774 | nr_reclaimed = 0; |
2091 | nr_scanned = sc->nr_scanned; | 1775 | nr_scanned = sc->nr_scanned; |
2092 | get_scan_count(mz, sc, nr, priority); | 1776 | get_scan_count(lruvec, sc, nr); |
2093 | 1777 | ||
2094 | blk_start_plug(&plug); | 1778 | blk_start_plug(&plug); |
2095 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || | 1779 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
@@ -2101,7 +1785,7 @@ restart: | |||
2101 | nr[lru] -= nr_to_scan; | 1785 | nr[lru] -= nr_to_scan; |
2102 | 1786 | ||
2103 | nr_reclaimed += shrink_list(lru, nr_to_scan, | 1787 | nr_reclaimed += shrink_list(lru, nr_to_scan, |
2104 | mz, sc, priority); | 1788 | lruvec, sc); |
2105 | } | 1789 | } |
2106 | } | 1790 | } |
2107 | /* | 1791 | /* |
@@ -2112,7 +1796,8 @@ restart: | |||
2112 | * with multiple processes reclaiming pages, the total | 1796 | * with multiple processes reclaiming pages, the total |
2113 | * freeing target can get unreasonably large. | 1797 | * freeing target can get unreasonably large. |
2114 | */ | 1798 | */ |
2115 | if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY) | 1799 | if (nr_reclaimed >= nr_to_reclaim && |
1800 | sc->priority < DEF_PRIORITY) | ||
2116 | break; | 1801 | break; |
2117 | } | 1802 | } |
2118 | blk_finish_plug(&plug); | 1803 | blk_finish_plug(&plug); |
@@ -2122,35 +1807,33 @@ restart: | |||
2122 | * Even if we did not try to evict anon pages at all, we want to | 1807 | * Even if we did not try to evict anon pages at all, we want to |
2123 | * rebalance the anon lru active/inactive ratio. | 1808 | * rebalance the anon lru active/inactive ratio. |
2124 | */ | 1809 | */ |
2125 | if (inactive_anon_is_low(mz)) | 1810 | if (inactive_anon_is_low(lruvec)) |
2126 | shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, priority, 0); | 1811 | shrink_active_list(SWAP_CLUSTER_MAX, lruvec, |
1812 | sc, LRU_ACTIVE_ANON); | ||
2127 | 1813 | ||
2128 | /* reclaim/compaction might need reclaim to continue */ | 1814 | /* reclaim/compaction might need reclaim to continue */ |
2129 | if (should_continue_reclaim(mz, nr_reclaimed, | 1815 | if (should_continue_reclaim(lruvec, nr_reclaimed, |
2130 | sc->nr_scanned - nr_scanned, sc)) | 1816 | sc->nr_scanned - nr_scanned, sc)) |
2131 | goto restart; | 1817 | goto restart; |
2132 | 1818 | ||
2133 | throttle_vm_writeout(sc->gfp_mask); | 1819 | throttle_vm_writeout(sc->gfp_mask); |
2134 | } | 1820 | } |
2135 | 1821 | ||
2136 | static void shrink_zone(int priority, struct zone *zone, | 1822 | static void shrink_zone(struct zone *zone, struct scan_control *sc) |
2137 | struct scan_control *sc) | ||
2138 | { | 1823 | { |
2139 | struct mem_cgroup *root = sc->target_mem_cgroup; | 1824 | struct mem_cgroup *root = sc->target_mem_cgroup; |
2140 | struct mem_cgroup_reclaim_cookie reclaim = { | 1825 | struct mem_cgroup_reclaim_cookie reclaim = { |
2141 | .zone = zone, | 1826 | .zone = zone, |
2142 | .priority = priority, | 1827 | .priority = sc->priority, |
2143 | }; | 1828 | }; |
2144 | struct mem_cgroup *memcg; | 1829 | struct mem_cgroup *memcg; |
2145 | 1830 | ||
2146 | memcg = mem_cgroup_iter(root, NULL, &reclaim); | 1831 | memcg = mem_cgroup_iter(root, NULL, &reclaim); |
2147 | do { | 1832 | do { |
2148 | struct mem_cgroup_zone mz = { | 1833 | struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); |
2149 | .mem_cgroup = memcg, | 1834 | |
2150 | .zone = zone, | 1835 | shrink_lruvec(lruvec, sc); |
2151 | }; | ||
2152 | 1836 | ||
2153 | shrink_mem_cgroup_zone(priority, &mz, sc); | ||
2154 | /* | 1837 | /* |
2155 | * Limit reclaim has historically picked one memcg and | 1838 | * Limit reclaim has historically picked one memcg and |
2156 | * scanned it with decreasing priority levels until | 1839 | * scanned it with decreasing priority levels until |
@@ -2226,8 +1909,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) | |||
2226 | * the caller that it should consider retrying the allocation instead of | 1909 | * the caller that it should consider retrying the allocation instead of |
2227 | * further reclaim. | 1910 | * further reclaim. |
2228 | */ | 1911 | */ |
2229 | static bool shrink_zones(int priority, struct zonelist *zonelist, | 1912 | static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) |
2230 | struct scan_control *sc) | ||
2231 | { | 1913 | { |
2232 | struct zoneref *z; | 1914 | struct zoneref *z; |
2233 | struct zone *zone; | 1915 | struct zone *zone; |
@@ -2254,7 +1936,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, | |||
2254 | if (global_reclaim(sc)) { | 1936 | if (global_reclaim(sc)) { |
2255 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 1937 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
2256 | continue; | 1938 | continue; |
2257 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) | 1939 | if (zone->all_unreclaimable && |
1940 | sc->priority != DEF_PRIORITY) | ||
2258 | continue; /* Let kswapd poll it */ | 1941 | continue; /* Let kswapd poll it */ |
2259 | if (COMPACTION_BUILD) { | 1942 | if (COMPACTION_BUILD) { |
2260 | /* | 1943 | /* |
@@ -2286,7 +1969,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, | |||
2286 | /* need some check for avoid more shrink_zone() */ | 1969 | /* need some check for avoid more shrink_zone() */ |
2287 | } | 1970 | } |
2288 | 1971 | ||
2289 | shrink_zone(priority, zone, sc); | 1972 | shrink_zone(zone, sc); |
2290 | } | 1973 | } |
2291 | 1974 | ||
2292 | return aborted_reclaim; | 1975 | return aborted_reclaim; |
@@ -2337,7 +2020,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2337 | struct scan_control *sc, | 2020 | struct scan_control *sc, |
2338 | struct shrink_control *shrink) | 2021 | struct shrink_control *shrink) |
2339 | { | 2022 | { |
2340 | int priority; | ||
2341 | unsigned long total_scanned = 0; | 2023 | unsigned long total_scanned = 0; |
2342 | struct reclaim_state *reclaim_state = current->reclaim_state; | 2024 | struct reclaim_state *reclaim_state = current->reclaim_state; |
2343 | struct zoneref *z; | 2025 | struct zoneref *z; |
@@ -2350,11 +2032,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2350 | if (global_reclaim(sc)) | 2032 | if (global_reclaim(sc)) |
2351 | count_vm_event(ALLOCSTALL); | 2033 | count_vm_event(ALLOCSTALL); |
2352 | 2034 | ||
2353 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { | 2035 | do { |
2354 | sc->nr_scanned = 0; | 2036 | sc->nr_scanned = 0; |
2355 | if (!priority) | 2037 | aborted_reclaim = shrink_zones(zonelist, sc); |
2356 | disable_swap_token(sc->target_mem_cgroup); | ||
2357 | aborted_reclaim = shrink_zones(priority, zonelist, sc); | ||
2358 | 2038 | ||
2359 | /* | 2039 | /* |
2360 | * Don't shrink slabs when reclaiming memory from | 2040 | * Don't shrink slabs when reclaiming memory from |
@@ -2396,7 +2076,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2396 | 2076 | ||
2397 | /* Take a nap, wait for some writeback to complete */ | 2077 | /* Take a nap, wait for some writeback to complete */ |
2398 | if (!sc->hibernation_mode && sc->nr_scanned && | 2078 | if (!sc->hibernation_mode && sc->nr_scanned && |
2399 | priority < DEF_PRIORITY - 2) { | 2079 | sc->priority < DEF_PRIORITY - 2) { |
2400 | struct zone *preferred_zone; | 2080 | struct zone *preferred_zone; |
2401 | 2081 | ||
2402 | first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), | 2082 | first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), |
@@ -2404,7 +2084,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2404 | &preferred_zone); | 2084 | &preferred_zone); |
2405 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); | 2085 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); |
2406 | } | 2086 | } |
2407 | } | 2087 | } while (--sc->priority >= 0); |
2408 | 2088 | ||
2409 | out: | 2089 | out: |
2410 | delayacct_freepages_end(); | 2090 | delayacct_freepages_end(); |
@@ -2442,6 +2122,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
2442 | .may_unmap = 1, | 2122 | .may_unmap = 1, |
2443 | .may_swap = 1, | 2123 | .may_swap = 1, |
2444 | .order = order, | 2124 | .order = order, |
2125 | .priority = DEF_PRIORITY, | ||
2445 | .target_mem_cgroup = NULL, | 2126 | .target_mem_cgroup = NULL, |
2446 | .nodemask = nodemask, | 2127 | .nodemask = nodemask, |
2447 | }; | 2128 | }; |
@@ -2474,17 +2155,15 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, | |||
2474 | .may_unmap = 1, | 2155 | .may_unmap = 1, |
2475 | .may_swap = !noswap, | 2156 | .may_swap = !noswap, |
2476 | .order = 0, | 2157 | .order = 0, |
2158 | .priority = 0, | ||
2477 | .target_mem_cgroup = memcg, | 2159 | .target_mem_cgroup = memcg, |
2478 | }; | 2160 | }; |
2479 | struct mem_cgroup_zone mz = { | 2161 | struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); |
2480 | .mem_cgroup = memcg, | ||
2481 | .zone = zone, | ||
2482 | }; | ||
2483 | 2162 | ||
2484 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 2163 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
2485 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); | 2164 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); |
2486 | 2165 | ||
2487 | trace_mm_vmscan_memcg_softlimit_reclaim_begin(0, | 2166 | trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order, |
2488 | sc.may_writepage, | 2167 | sc.may_writepage, |
2489 | sc.gfp_mask); | 2168 | sc.gfp_mask); |
2490 | 2169 | ||
@@ -2495,7 +2174,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, | |||
2495 | * will pick up pages from other mem cgroup's as well. We hack | 2174 | * will pick up pages from other mem cgroup's as well. We hack |
2496 | * the priority and make it zero. | 2175 | * the priority and make it zero. |
2497 | */ | 2176 | */ |
2498 | shrink_mem_cgroup_zone(0, &mz, &sc); | 2177 | shrink_lruvec(lruvec, &sc); |
2499 | 2178 | ||
2500 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); | 2179 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); |
2501 | 2180 | ||
@@ -2516,6 +2195,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, | |||
2516 | .may_swap = !noswap, | 2195 | .may_swap = !noswap, |
2517 | .nr_to_reclaim = SWAP_CLUSTER_MAX, | 2196 | .nr_to_reclaim = SWAP_CLUSTER_MAX, |
2518 | .order = 0, | 2197 | .order = 0, |
2198 | .priority = DEF_PRIORITY, | ||
2519 | .target_mem_cgroup = memcg, | 2199 | .target_mem_cgroup = memcg, |
2520 | .nodemask = NULL, /* we don't care the placement */ | 2200 | .nodemask = NULL, /* we don't care the placement */ |
2521 | .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 2201 | .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
@@ -2546,8 +2226,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, | |||
2546 | } | 2226 | } |
2547 | #endif | 2227 | #endif |
2548 | 2228 | ||
2549 | static void age_active_anon(struct zone *zone, struct scan_control *sc, | 2229 | static void age_active_anon(struct zone *zone, struct scan_control *sc) |
2550 | int priority) | ||
2551 | { | 2230 | { |
2552 | struct mem_cgroup *memcg; | 2231 | struct mem_cgroup *memcg; |
2553 | 2232 | ||
@@ -2556,14 +2235,11 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc, | |||
2556 | 2235 | ||
2557 | memcg = mem_cgroup_iter(NULL, NULL, NULL); | 2236 | memcg = mem_cgroup_iter(NULL, NULL, NULL); |
2558 | do { | 2237 | do { |
2559 | struct mem_cgroup_zone mz = { | 2238 | struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); |
2560 | .mem_cgroup = memcg, | ||
2561 | .zone = zone, | ||
2562 | }; | ||
2563 | 2239 | ||
2564 | if (inactive_anon_is_low(&mz)) | 2240 | if (inactive_anon_is_low(lruvec)) |
2565 | shrink_active_list(SWAP_CLUSTER_MAX, &mz, | 2241 | shrink_active_list(SWAP_CLUSTER_MAX, lruvec, |
2566 | sc, priority, 0); | 2242 | sc, LRU_ACTIVE_ANON); |
2567 | 2243 | ||
2568 | memcg = mem_cgroup_iter(NULL, memcg, NULL); | 2244 | memcg = mem_cgroup_iter(NULL, memcg, NULL); |
2569 | } while (memcg); | 2245 | } while (memcg); |
@@ -2672,7 +2348,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
2672 | { | 2348 | { |
2673 | int all_zones_ok; | 2349 | int all_zones_ok; |
2674 | unsigned long balanced; | 2350 | unsigned long balanced; |
2675 | int priority; | ||
2676 | int i; | 2351 | int i; |
2677 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 2352 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
2678 | unsigned long total_scanned; | 2353 | unsigned long total_scanned; |
@@ -2696,18 +2371,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
2696 | }; | 2371 | }; |
2697 | loop_again: | 2372 | loop_again: |
2698 | total_scanned = 0; | 2373 | total_scanned = 0; |
2374 | sc.priority = DEF_PRIORITY; | ||
2699 | sc.nr_reclaimed = 0; | 2375 | sc.nr_reclaimed = 0; |
2700 | sc.may_writepage = !laptop_mode; | 2376 | sc.may_writepage = !laptop_mode; |
2701 | count_vm_event(PAGEOUTRUN); | 2377 | count_vm_event(PAGEOUTRUN); |
2702 | 2378 | ||
2703 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { | 2379 | do { |
2704 | unsigned long lru_pages = 0; | 2380 | unsigned long lru_pages = 0; |
2705 | int has_under_min_watermark_zone = 0; | 2381 | int has_under_min_watermark_zone = 0; |
2706 | 2382 | ||
2707 | /* The swap token gets in the way of swapout... */ | ||
2708 | if (!priority) | ||
2709 | disable_swap_token(NULL); | ||
2710 | |||
2711 | all_zones_ok = 1; | 2383 | all_zones_ok = 1; |
2712 | balanced = 0; | 2384 | balanced = 0; |
2713 | 2385 | ||
@@ -2721,14 +2393,15 @@ loop_again: | |||
2721 | if (!populated_zone(zone)) | 2393 | if (!populated_zone(zone)) |
2722 | continue; | 2394 | continue; |
2723 | 2395 | ||
2724 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) | 2396 | if (zone->all_unreclaimable && |
2397 | sc.priority != DEF_PRIORITY) | ||
2725 | continue; | 2398 | continue; |
2726 | 2399 | ||
2727 | /* | 2400 | /* |
2728 | * Do some background aging of the anon list, to give | 2401 | * Do some background aging of the anon list, to give |
2729 | * pages a chance to be referenced before reclaiming. | 2402 | * pages a chance to be referenced before reclaiming. |
2730 | */ | 2403 | */ |
2731 | age_active_anon(zone, &sc, priority); | 2404 | age_active_anon(zone, &sc); |
2732 | 2405 | ||
2733 | /* | 2406 | /* |
2734 | * If the number of buffer_heads in the machine | 2407 | * If the number of buffer_heads in the machine |
@@ -2776,7 +2449,8 @@ loop_again: | |||
2776 | if (!populated_zone(zone)) | 2449 | if (!populated_zone(zone)) |
2777 | continue; | 2450 | continue; |
2778 | 2451 | ||
2779 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) | 2452 | if (zone->all_unreclaimable && |
2453 | sc.priority != DEF_PRIORITY) | ||
2780 | continue; | 2454 | continue; |
2781 | 2455 | ||
2782 | sc.nr_scanned = 0; | 2456 | sc.nr_scanned = 0; |
@@ -2820,7 +2494,7 @@ loop_again: | |||
2820 | !zone_watermark_ok_safe(zone, testorder, | 2494 | !zone_watermark_ok_safe(zone, testorder, |
2821 | high_wmark_pages(zone) + balance_gap, | 2495 | high_wmark_pages(zone) + balance_gap, |
2822 | end_zone, 0)) { | 2496 | end_zone, 0)) { |
2823 | shrink_zone(priority, zone, &sc); | 2497 | shrink_zone(zone, &sc); |
2824 | 2498 | ||
2825 | reclaim_state->reclaimed_slab = 0; | 2499 | reclaim_state->reclaimed_slab = 0; |
2826 | nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages); | 2500 | nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages); |
@@ -2877,7 +2551,7 @@ loop_again: | |||
2877 | * OK, kswapd is getting into trouble. Take a nap, then take | 2551 | * OK, kswapd is getting into trouble. Take a nap, then take |
2878 | * another pass across the zones. | 2552 | * another pass across the zones. |
2879 | */ | 2553 | */ |
2880 | if (total_scanned && (priority < DEF_PRIORITY - 2)) { | 2554 | if (total_scanned && (sc.priority < DEF_PRIORITY - 2)) { |
2881 | if (has_under_min_watermark_zone) | 2555 | if (has_under_min_watermark_zone) |
2882 | count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); | 2556 | count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); |
2883 | else | 2557 | else |
@@ -2892,7 +2566,7 @@ loop_again: | |||
2892 | */ | 2566 | */ |
2893 | if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX) | 2567 | if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX) |
2894 | break; | 2568 | break; |
2895 | } | 2569 | } while (--sc.priority >= 0); |
2896 | out: | 2570 | out: |
2897 | 2571 | ||
2898 | /* | 2572 | /* |
@@ -2942,7 +2616,8 @@ out: | |||
2942 | if (!populated_zone(zone)) | 2616 | if (!populated_zone(zone)) |
2943 | continue; | 2617 | continue; |
2944 | 2618 | ||
2945 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) | 2619 | if (zone->all_unreclaimable && |
2620 | sc.priority != DEF_PRIORITY) | ||
2946 | continue; | 2621 | continue; |
2947 | 2622 | ||
2948 | /* Would compaction fail due to lack of free memory? */ | 2623 | /* Would compaction fail due to lack of free memory? */ |
@@ -3013,7 +2688,10 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx) | |||
3013 | * them before going back to sleep. | 2688 | * them before going back to sleep. |
3014 | */ | 2689 | */ |
3015 | set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold); | 2690 | set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold); |
3016 | schedule(); | 2691 | |
2692 | if (!kthread_should_stop()) | ||
2693 | schedule(); | ||
2694 | |||
3017 | set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold); | 2695 | set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold); |
3018 | } else { | 2696 | } else { |
3019 | if (remaining) | 2697 | if (remaining) |
@@ -3209,6 +2887,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) | |||
3209 | .nr_to_reclaim = nr_to_reclaim, | 2887 | .nr_to_reclaim = nr_to_reclaim, |
3210 | .hibernation_mode = 1, | 2888 | .hibernation_mode = 1, |
3211 | .order = 0, | 2889 | .order = 0, |
2890 | .priority = DEF_PRIORITY, | ||
3212 | }; | 2891 | }; |
3213 | struct shrink_control shrink = { | 2892 | struct shrink_control shrink = { |
3214 | .gfp_mask = sc.gfp_mask, | 2893 | .gfp_mask = sc.gfp_mask, |
@@ -3279,14 +2958,17 @@ int kswapd_run(int nid) | |||
3279 | } | 2958 | } |
3280 | 2959 | ||
3281 | /* | 2960 | /* |
3282 | * Called by memory hotplug when all memory in a node is offlined. | 2961 | * Called by memory hotplug when all memory in a node is offlined. Caller must |
2962 | * hold lock_memory_hotplug(). | ||
3283 | */ | 2963 | */ |
3284 | void kswapd_stop(int nid) | 2964 | void kswapd_stop(int nid) |
3285 | { | 2965 | { |
3286 | struct task_struct *kswapd = NODE_DATA(nid)->kswapd; | 2966 | struct task_struct *kswapd = NODE_DATA(nid)->kswapd; |
3287 | 2967 | ||
3288 | if (kswapd) | 2968 | if (kswapd) { |
3289 | kthread_stop(kswapd); | 2969 | kthread_stop(kswapd); |
2970 | NODE_DATA(nid)->kswapd = NULL; | ||
2971 | } | ||
3290 | } | 2972 | } |
3291 | 2973 | ||
3292 | static int __init kswapd_init(void) | 2974 | static int __init kswapd_init(void) |
@@ -3386,7 +3068,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
3386 | const unsigned long nr_pages = 1 << order; | 3068 | const unsigned long nr_pages = 1 << order; |
3387 | struct task_struct *p = current; | 3069 | struct task_struct *p = current; |
3388 | struct reclaim_state reclaim_state; | 3070 | struct reclaim_state reclaim_state; |
3389 | int priority; | ||
3390 | struct scan_control sc = { | 3071 | struct scan_control sc = { |
3391 | .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), | 3072 | .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), |
3392 | .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), | 3073 | .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), |
@@ -3395,6 +3076,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
3395 | SWAP_CLUSTER_MAX), | 3076 | SWAP_CLUSTER_MAX), |
3396 | .gfp_mask = gfp_mask, | 3077 | .gfp_mask = gfp_mask, |
3397 | .order = order, | 3078 | .order = order, |
3079 | .priority = ZONE_RECLAIM_PRIORITY, | ||
3398 | }; | 3080 | }; |
3399 | struct shrink_control shrink = { | 3081 | struct shrink_control shrink = { |
3400 | .gfp_mask = sc.gfp_mask, | 3082 | .gfp_mask = sc.gfp_mask, |
@@ -3417,11 +3099,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
3417 | * Free memory by calling shrink zone with increasing | 3099 | * Free memory by calling shrink zone with increasing |
3418 | * priorities until we have enough memory freed. | 3100 | * priorities until we have enough memory freed. |
3419 | */ | 3101 | */ |
3420 | priority = ZONE_RECLAIM_PRIORITY; | ||
3421 | do { | 3102 | do { |
3422 | shrink_zone(priority, zone, &sc); | 3103 | shrink_zone(zone, &sc); |
3423 | priority--; | 3104 | } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0); |
3424 | } while (priority >= 0 && sc.nr_reclaimed < nr_pages); | ||
3425 | } | 3105 | } |
3426 | 3106 | ||
3427 | nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE); | 3107 | nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE); |
@@ -3536,7 +3216,7 @@ int page_evictable(struct page *page, struct vm_area_struct *vma) | |||
3536 | if (mapping_unevictable(page_mapping(page))) | 3216 | if (mapping_unevictable(page_mapping(page))) |
3537 | return 0; | 3217 | return 0; |
3538 | 3218 | ||
3539 | if (PageMlocked(page) || (vma && is_mlocked_vma(vma, page))) | 3219 | if (PageMlocked(page) || (vma && mlocked_vma_newpage(vma, page))) |
3540 | return 0; | 3220 | return 0; |
3541 | 3221 | ||
3542 | return 1; | 3222 | return 1; |
@@ -3572,6 +3252,7 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) | |||
3572 | zone = pagezone; | 3252 | zone = pagezone; |
3573 | spin_lock_irq(&zone->lru_lock); | 3253 | spin_lock_irq(&zone->lru_lock); |
3574 | } | 3254 | } |
3255 | lruvec = mem_cgroup_page_lruvec(page, zone); | ||
3575 | 3256 | ||
3576 | if (!PageLRU(page) || !PageUnevictable(page)) | 3257 | if (!PageLRU(page) || !PageUnevictable(page)) |
3577 | continue; | 3258 | continue; |
@@ -3581,11 +3262,8 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) | |||
3581 | 3262 | ||
3582 | VM_BUG_ON(PageActive(page)); | 3263 | VM_BUG_ON(PageActive(page)); |
3583 | ClearPageUnevictable(page); | 3264 | ClearPageUnevictable(page); |
3584 | __dec_zone_state(zone, NR_UNEVICTABLE); | 3265 | del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE); |
3585 | lruvec = mem_cgroup_lru_move_lists(zone, page, | 3266 | add_page_to_lru_list(page, lruvec, lru); |
3586 | LRU_UNEVICTABLE, lru); | ||
3587 | list_move(&page->lru, &lruvec->lists[lru]); | ||
3588 | __inc_zone_state(zone, NR_INACTIVE_ANON + lru); | ||
3589 | pgrescued++; | 3267 | pgrescued++; |
3590 | } | 3268 | } |
3591 | } | 3269 | } |