diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 680 |
1 files changed, 420 insertions, 260 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 26f4a8a4e0c7..2880396f7953 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -103,8 +103,11 @@ struct scan_control { | |||
103 | */ | 103 | */ |
104 | reclaim_mode_t reclaim_mode; | 104 | reclaim_mode_t reclaim_mode; |
105 | 105 | ||
106 | /* Which cgroup do we reclaim from */ | 106 | /* |
107 | struct mem_cgroup *mem_cgroup; | 107 | * The memory cgroup that hit its limit and as a result is the |
108 | * primary target of this reclaim invocation. | ||
109 | */ | ||
110 | struct mem_cgroup *target_mem_cgroup; | ||
108 | 111 | ||
109 | /* | 112 | /* |
110 | * Nodemask of nodes allowed by the caller. If NULL, all nodes | 113 | * Nodemask of nodes allowed by the caller. If NULL, all nodes |
@@ -113,6 +116,11 @@ struct scan_control { | |||
113 | nodemask_t *nodemask; | 116 | nodemask_t *nodemask; |
114 | }; | 117 | }; |
115 | 118 | ||
119 | struct mem_cgroup_zone { | ||
120 | struct mem_cgroup *mem_cgroup; | ||
121 | struct zone *zone; | ||
122 | }; | ||
123 | |||
116 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) | 124 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) |
117 | 125 | ||
118 | #ifdef ARCH_HAS_PREFETCH | 126 | #ifdef ARCH_HAS_PREFETCH |
@@ -153,28 +161,45 @@ static LIST_HEAD(shrinker_list); | |||
153 | static DECLARE_RWSEM(shrinker_rwsem); | 161 | static DECLARE_RWSEM(shrinker_rwsem); |
154 | 162 | ||
155 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 163 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
156 | #define scanning_global_lru(sc) (!(sc)->mem_cgroup) | 164 | static bool global_reclaim(struct scan_control *sc) |
165 | { | ||
166 | return !sc->target_mem_cgroup; | ||
167 | } | ||
168 | |||
169 | static bool scanning_global_lru(struct mem_cgroup_zone *mz) | ||
170 | { | ||
171 | return !mz->mem_cgroup; | ||
172 | } | ||
157 | #else | 173 | #else |
158 | #define scanning_global_lru(sc) (1) | 174 | static bool global_reclaim(struct scan_control *sc) |
175 | { | ||
176 | return true; | ||
177 | } | ||
178 | |||
179 | static bool scanning_global_lru(struct mem_cgroup_zone *mz) | ||
180 | { | ||
181 | return true; | ||
182 | } | ||
159 | #endif | 183 | #endif |
160 | 184 | ||
161 | static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone, | 185 | static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz) |
162 | struct scan_control *sc) | ||
163 | { | 186 | { |
164 | if (!scanning_global_lru(sc)) | 187 | if (!scanning_global_lru(mz)) |
165 | return mem_cgroup_get_reclaim_stat(sc->mem_cgroup, zone); | 188 | return mem_cgroup_get_reclaim_stat(mz->mem_cgroup, mz->zone); |
166 | 189 | ||
167 | return &zone->reclaim_stat; | 190 | return &mz->zone->reclaim_stat; |
168 | } | 191 | } |
169 | 192 | ||
170 | static unsigned long zone_nr_lru_pages(struct zone *zone, | 193 | static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz, |
171 | struct scan_control *sc, enum lru_list lru) | 194 | enum lru_list lru) |
172 | { | 195 | { |
173 | if (!scanning_global_lru(sc)) | 196 | if (!scanning_global_lru(mz)) |
174 | return mem_cgroup_zone_nr_lru_pages(sc->mem_cgroup, | 197 | return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup, |
175 | zone_to_nid(zone), zone_idx(zone), BIT(lru)); | 198 | zone_to_nid(mz->zone), |
199 | zone_idx(mz->zone), | ||
200 | BIT(lru)); | ||
176 | 201 | ||
177 | return zone_page_state(zone, NR_LRU_BASE + lru); | 202 | return zone_page_state(mz->zone, NR_LRU_BASE + lru); |
178 | } | 203 | } |
179 | 204 | ||
180 | 205 | ||
@@ -677,12 +702,13 @@ enum page_references { | |||
677 | }; | 702 | }; |
678 | 703 | ||
679 | static enum page_references page_check_references(struct page *page, | 704 | static enum page_references page_check_references(struct page *page, |
705 | struct mem_cgroup_zone *mz, | ||
680 | struct scan_control *sc) | 706 | struct scan_control *sc) |
681 | { | 707 | { |
682 | int referenced_ptes, referenced_page; | 708 | int referenced_ptes, referenced_page; |
683 | unsigned long vm_flags; | 709 | unsigned long vm_flags; |
684 | 710 | ||
685 | referenced_ptes = page_referenced(page, 1, sc->mem_cgroup, &vm_flags); | 711 | referenced_ptes = page_referenced(page, 1, mz->mem_cgroup, &vm_flags); |
686 | referenced_page = TestClearPageReferenced(page); | 712 | referenced_page = TestClearPageReferenced(page); |
687 | 713 | ||
688 | /* Lumpy reclaim - ignore references */ | 714 | /* Lumpy reclaim - ignore references */ |
@@ -738,7 +764,7 @@ static enum page_references page_check_references(struct page *page, | |||
738 | * shrink_page_list() returns the number of reclaimed pages | 764 | * shrink_page_list() returns the number of reclaimed pages |
739 | */ | 765 | */ |
740 | static unsigned long shrink_page_list(struct list_head *page_list, | 766 | static unsigned long shrink_page_list(struct list_head *page_list, |
741 | struct zone *zone, | 767 | struct mem_cgroup_zone *mz, |
742 | struct scan_control *sc, | 768 | struct scan_control *sc, |
743 | int priority, | 769 | int priority, |
744 | unsigned long *ret_nr_dirty, | 770 | unsigned long *ret_nr_dirty, |
@@ -769,7 +795,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
769 | goto keep; | 795 | goto keep; |
770 | 796 | ||
771 | VM_BUG_ON(PageActive(page)); | 797 | VM_BUG_ON(PageActive(page)); |
772 | VM_BUG_ON(page_zone(page) != zone); | 798 | VM_BUG_ON(page_zone(page) != mz->zone); |
773 | 799 | ||
774 | sc->nr_scanned++; | 800 | sc->nr_scanned++; |
775 | 801 | ||
@@ -803,7 +829,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
803 | } | 829 | } |
804 | } | 830 | } |
805 | 831 | ||
806 | references = page_check_references(page, sc); | 832 | references = page_check_references(page, mz, sc); |
807 | switch (references) { | 833 | switch (references) { |
808 | case PAGEREF_ACTIVATE: | 834 | case PAGEREF_ACTIVATE: |
809 | goto activate_locked; | 835 | goto activate_locked; |
@@ -994,8 +1020,8 @@ keep_lumpy: | |||
994 | * back off and wait for congestion to clear because further reclaim | 1020 | * back off and wait for congestion to clear because further reclaim |
995 | * will encounter the same problem | 1021 | * will encounter the same problem |
996 | */ | 1022 | */ |
997 | if (nr_dirty && nr_dirty == nr_congested && scanning_global_lru(sc)) | 1023 | if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc)) |
998 | zone_set_flag(zone, ZONE_CONGESTED); | 1024 | zone_set_flag(mz->zone, ZONE_CONGESTED); |
999 | 1025 | ||
1000 | free_hot_cold_page_list(&free_pages, 1); | 1026 | free_hot_cold_page_list(&free_pages, 1); |
1001 | 1027 | ||
@@ -1049,8 +1075,39 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) | |||
1049 | 1075 | ||
1050 | ret = -EBUSY; | 1076 | ret = -EBUSY; |
1051 | 1077 | ||
1052 | if ((mode & ISOLATE_CLEAN) && (PageDirty(page) || PageWriteback(page))) | 1078 | /* |
1053 | return ret; | 1079 | * To minimise LRU disruption, the caller can indicate that it only |
1080 | * wants to isolate pages it will be able to operate on without | ||
1081 | * blocking - clean pages for the most part. | ||
1082 | * | ||
1083 | * ISOLATE_CLEAN means that only clean pages should be isolated. This | ||
1084 | * is used by reclaim when it is cannot write to backing storage | ||
1085 | * | ||
1086 | * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages | ||
1087 | * that it is possible to migrate without blocking | ||
1088 | */ | ||
1089 | if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) { | ||
1090 | /* All the caller can do on PageWriteback is block */ | ||
1091 | if (PageWriteback(page)) | ||
1092 | return ret; | ||
1093 | |||
1094 | if (PageDirty(page)) { | ||
1095 | struct address_space *mapping; | ||
1096 | |||
1097 | /* ISOLATE_CLEAN means only clean pages */ | ||
1098 | if (mode & ISOLATE_CLEAN) | ||
1099 | return ret; | ||
1100 | |||
1101 | /* | ||
1102 | * Only pages without mappings or that have a | ||
1103 | * ->migratepage callback are possible to migrate | ||
1104 | * without blocking | ||
1105 | */ | ||
1106 | mapping = page_mapping(page); | ||
1107 | if (mapping && !mapping->a_ops->migratepage) | ||
1108 | return ret; | ||
1109 | } | ||
1110 | } | ||
1054 | 1111 | ||
1055 | if ((mode & ISOLATE_UNMAPPED) && page_mapped(page)) | 1112 | if ((mode & ISOLATE_UNMAPPED) && page_mapped(page)) |
1056 | return ret; | 1113 | return ret; |
@@ -1079,25 +1136,36 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) | |||
1079 | * Appropriate locks must be held before calling this function. | 1136 | * Appropriate locks must be held before calling this function. |
1080 | * | 1137 | * |
1081 | * @nr_to_scan: The number of pages to look through on the list. | 1138 | * @nr_to_scan: The number of pages to look through on the list. |
1082 | * @src: The LRU list to pull pages off. | 1139 | * @mz: The mem_cgroup_zone to pull pages from. |
1083 | * @dst: The temp list to put pages on to. | 1140 | * @dst: The temp list to put pages on to. |
1084 | * @scanned: The number of pages that were scanned. | 1141 | * @nr_scanned: The number of pages that were scanned. |
1085 | * @order: The caller's attempted allocation order | 1142 | * @order: The caller's attempted allocation order |
1086 | * @mode: One of the LRU isolation modes | 1143 | * @mode: One of the LRU isolation modes |
1144 | * @active: True [1] if isolating active pages | ||
1087 | * @file: True [1] if isolating file [!anon] pages | 1145 | * @file: True [1] if isolating file [!anon] pages |
1088 | * | 1146 | * |
1089 | * returns how many pages were moved onto *@dst. | 1147 | * returns how many pages were moved onto *@dst. |
1090 | */ | 1148 | */ |
1091 | static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | 1149 | static unsigned long isolate_lru_pages(unsigned long nr_to_scan, |
1092 | struct list_head *src, struct list_head *dst, | 1150 | struct mem_cgroup_zone *mz, struct list_head *dst, |
1093 | unsigned long *scanned, int order, isolate_mode_t mode, | 1151 | unsigned long *nr_scanned, int order, isolate_mode_t mode, |
1094 | int file) | 1152 | int active, int file) |
1095 | { | 1153 | { |
1154 | struct lruvec *lruvec; | ||
1155 | struct list_head *src; | ||
1096 | unsigned long nr_taken = 0; | 1156 | unsigned long nr_taken = 0; |
1097 | unsigned long nr_lumpy_taken = 0; | 1157 | unsigned long nr_lumpy_taken = 0; |
1098 | unsigned long nr_lumpy_dirty = 0; | 1158 | unsigned long nr_lumpy_dirty = 0; |
1099 | unsigned long nr_lumpy_failed = 0; | 1159 | unsigned long nr_lumpy_failed = 0; |
1100 | unsigned long scan; | 1160 | unsigned long scan; |
1161 | int lru = LRU_BASE; | ||
1162 | |||
1163 | lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup); | ||
1164 | if (active) | ||
1165 | lru += LRU_ACTIVE; | ||
1166 | if (file) | ||
1167 | lru += LRU_FILE; | ||
1168 | src = &lruvec->lists[lru]; | ||
1101 | 1169 | ||
1102 | for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { | 1170 | for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { |
1103 | struct page *page; | 1171 | struct page *page; |
@@ -1113,15 +1181,14 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
1113 | 1181 | ||
1114 | switch (__isolate_lru_page(page, mode, file)) { | 1182 | switch (__isolate_lru_page(page, mode, file)) { |
1115 | case 0: | 1183 | case 0: |
1184 | mem_cgroup_lru_del(page); | ||
1116 | list_move(&page->lru, dst); | 1185 | list_move(&page->lru, dst); |
1117 | mem_cgroup_del_lru(page); | ||
1118 | nr_taken += hpage_nr_pages(page); | 1186 | nr_taken += hpage_nr_pages(page); |
1119 | break; | 1187 | break; |
1120 | 1188 | ||
1121 | case -EBUSY: | 1189 | case -EBUSY: |
1122 | /* else it is being freed elsewhere */ | 1190 | /* else it is being freed elsewhere */ |
1123 | list_move(&page->lru, src); | 1191 | list_move(&page->lru, src); |
1124 | mem_cgroup_rotate_lru_list(page, page_lru(page)); | ||
1125 | continue; | 1192 | continue; |
1126 | 1193 | ||
1127 | default: | 1194 | default: |
@@ -1171,13 +1238,17 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
1171 | break; | 1238 | break; |
1172 | 1239 | ||
1173 | if (__isolate_lru_page(cursor_page, mode, file) == 0) { | 1240 | if (__isolate_lru_page(cursor_page, mode, file) == 0) { |
1241 | unsigned int isolated_pages; | ||
1242 | |||
1243 | mem_cgroup_lru_del(cursor_page); | ||
1174 | list_move(&cursor_page->lru, dst); | 1244 | list_move(&cursor_page->lru, dst); |
1175 | mem_cgroup_del_lru(cursor_page); | 1245 | isolated_pages = hpage_nr_pages(cursor_page); |
1176 | nr_taken += hpage_nr_pages(cursor_page); | 1246 | nr_taken += isolated_pages; |
1177 | nr_lumpy_taken++; | 1247 | nr_lumpy_taken += isolated_pages; |
1178 | if (PageDirty(cursor_page)) | 1248 | if (PageDirty(cursor_page)) |
1179 | nr_lumpy_dirty++; | 1249 | nr_lumpy_dirty += isolated_pages; |
1180 | scan++; | 1250 | scan++; |
1251 | pfn += isolated_pages - 1; | ||
1181 | } else { | 1252 | } else { |
1182 | /* | 1253 | /* |
1183 | * Check if the page is freed already. | 1254 | * Check if the page is freed already. |
@@ -1203,57 +1274,16 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
1203 | nr_lumpy_failed++; | 1274 | nr_lumpy_failed++; |
1204 | } | 1275 | } |
1205 | 1276 | ||
1206 | *scanned = scan; | 1277 | *nr_scanned = scan; |
1207 | 1278 | ||
1208 | trace_mm_vmscan_lru_isolate(order, | 1279 | trace_mm_vmscan_lru_isolate(order, |
1209 | nr_to_scan, scan, | 1280 | nr_to_scan, scan, |
1210 | nr_taken, | 1281 | nr_taken, |
1211 | nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, | 1282 | nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, |
1212 | mode); | 1283 | mode, file); |
1213 | return nr_taken; | 1284 | return nr_taken; |
1214 | } | 1285 | } |
1215 | 1286 | ||
1216 | static unsigned long isolate_pages_global(unsigned long nr, | ||
1217 | struct list_head *dst, | ||
1218 | unsigned long *scanned, int order, | ||
1219 | isolate_mode_t mode, | ||
1220 | struct zone *z, int active, int file) | ||
1221 | { | ||
1222 | int lru = LRU_BASE; | ||
1223 | if (active) | ||
1224 | lru += LRU_ACTIVE; | ||
1225 | if (file) | ||
1226 | lru += LRU_FILE; | ||
1227 | return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order, | ||
1228 | mode, file); | ||
1229 | } | ||
1230 | |||
1231 | /* | ||
1232 | * clear_active_flags() is a helper for shrink_active_list(), clearing | ||
1233 | * any active bits from the pages in the list. | ||
1234 | */ | ||
1235 | static unsigned long clear_active_flags(struct list_head *page_list, | ||
1236 | unsigned int *count) | ||
1237 | { | ||
1238 | int nr_active = 0; | ||
1239 | int lru; | ||
1240 | struct page *page; | ||
1241 | |||
1242 | list_for_each_entry(page, page_list, lru) { | ||
1243 | int numpages = hpage_nr_pages(page); | ||
1244 | lru = page_lru_base_type(page); | ||
1245 | if (PageActive(page)) { | ||
1246 | lru += LRU_ACTIVE; | ||
1247 | ClearPageActive(page); | ||
1248 | nr_active += numpages; | ||
1249 | } | ||
1250 | if (count) | ||
1251 | count[lru] += numpages; | ||
1252 | } | ||
1253 | |||
1254 | return nr_active; | ||
1255 | } | ||
1256 | |||
1257 | /** | 1287 | /** |
1258 | * isolate_lru_page - tries to isolate a page from its LRU list | 1288 | * isolate_lru_page - tries to isolate a page from its LRU list |
1259 | * @page: page to isolate from its LRU list | 1289 | * @page: page to isolate from its LRU list |
@@ -1313,7 +1343,7 @@ static int too_many_isolated(struct zone *zone, int file, | |||
1313 | if (current_is_kswapd()) | 1343 | if (current_is_kswapd()) |
1314 | return 0; | 1344 | return 0; |
1315 | 1345 | ||
1316 | if (!scanning_global_lru(sc)) | 1346 | if (!global_reclaim(sc)) |
1317 | return 0; | 1347 | return 0; |
1318 | 1348 | ||
1319 | if (file) { | 1349 | if (file) { |
@@ -1327,27 +1357,21 @@ static int too_many_isolated(struct zone *zone, int file, | |||
1327 | return isolated > inactive; | 1357 | return isolated > inactive; |
1328 | } | 1358 | } |
1329 | 1359 | ||
1330 | /* | ||
1331 | * TODO: Try merging with migrations version of putback_lru_pages | ||
1332 | */ | ||
1333 | static noinline_for_stack void | 1360 | static noinline_for_stack void |
1334 | putback_lru_pages(struct zone *zone, struct scan_control *sc, | 1361 | putback_inactive_pages(struct mem_cgroup_zone *mz, |
1335 | unsigned long nr_anon, unsigned long nr_file, | 1362 | struct list_head *page_list) |
1336 | struct list_head *page_list) | ||
1337 | { | 1363 | { |
1338 | struct page *page; | 1364 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); |
1339 | struct pagevec pvec; | 1365 | struct zone *zone = mz->zone; |
1340 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1366 | LIST_HEAD(pages_to_free); |
1341 | |||
1342 | pagevec_init(&pvec, 1); | ||
1343 | 1367 | ||
1344 | /* | 1368 | /* |
1345 | * Put back any unfreeable pages. | 1369 | * Put back any unfreeable pages. |
1346 | */ | 1370 | */ |
1347 | spin_lock(&zone->lru_lock); | ||
1348 | while (!list_empty(page_list)) { | 1371 | while (!list_empty(page_list)) { |
1372 | struct page *page = lru_to_page(page_list); | ||
1349 | int lru; | 1373 | int lru; |
1350 | page = lru_to_page(page_list); | 1374 | |
1351 | VM_BUG_ON(PageLRU(page)); | 1375 | VM_BUG_ON(PageLRU(page)); |
1352 | list_del(&page->lru); | 1376 | list_del(&page->lru); |
1353 | if (unlikely(!page_evictable(page, NULL))) { | 1377 | if (unlikely(!page_evictable(page, NULL))) { |
@@ -1364,30 +1388,53 @@ putback_lru_pages(struct zone *zone, struct scan_control *sc, | |||
1364 | int numpages = hpage_nr_pages(page); | 1388 | int numpages = hpage_nr_pages(page); |
1365 | reclaim_stat->recent_rotated[file] += numpages; | 1389 | reclaim_stat->recent_rotated[file] += numpages; |
1366 | } | 1390 | } |
1367 | if (!pagevec_add(&pvec, page)) { | 1391 | if (put_page_testzero(page)) { |
1368 | spin_unlock_irq(&zone->lru_lock); | 1392 | __ClearPageLRU(page); |
1369 | __pagevec_release(&pvec); | 1393 | __ClearPageActive(page); |
1370 | spin_lock_irq(&zone->lru_lock); | 1394 | del_page_from_lru_list(zone, page, lru); |
1395 | |||
1396 | if (unlikely(PageCompound(page))) { | ||
1397 | spin_unlock_irq(&zone->lru_lock); | ||
1398 | (*get_compound_page_dtor(page))(page); | ||
1399 | spin_lock_irq(&zone->lru_lock); | ||
1400 | } else | ||
1401 | list_add(&page->lru, &pages_to_free); | ||
1371 | } | 1402 | } |
1372 | } | 1403 | } |
1373 | __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon); | ||
1374 | __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file); | ||
1375 | 1404 | ||
1376 | spin_unlock_irq(&zone->lru_lock); | 1405 | /* |
1377 | pagevec_release(&pvec); | 1406 | * To save our caller's stack, now use input list for pages to free. |
1407 | */ | ||
1408 | list_splice(&pages_to_free, page_list); | ||
1378 | } | 1409 | } |
1379 | 1410 | ||
1380 | static noinline_for_stack void update_isolated_counts(struct zone *zone, | 1411 | static noinline_for_stack void |
1381 | struct scan_control *sc, | 1412 | update_isolated_counts(struct mem_cgroup_zone *mz, |
1382 | unsigned long *nr_anon, | 1413 | struct list_head *page_list, |
1383 | unsigned long *nr_file, | 1414 | unsigned long *nr_anon, |
1384 | struct list_head *isolated_list) | 1415 | unsigned long *nr_file) |
1385 | { | 1416 | { |
1386 | unsigned long nr_active; | 1417 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); |
1418 | struct zone *zone = mz->zone; | ||
1387 | unsigned int count[NR_LRU_LISTS] = { 0, }; | 1419 | unsigned int count[NR_LRU_LISTS] = { 0, }; |
1388 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1420 | unsigned long nr_active = 0; |
1421 | struct page *page; | ||
1422 | int lru; | ||
1423 | |||
1424 | /* | ||
1425 | * Count pages and clear active flags | ||
1426 | */ | ||
1427 | list_for_each_entry(page, page_list, lru) { | ||
1428 | int numpages = hpage_nr_pages(page); | ||
1429 | lru = page_lru_base_type(page); | ||
1430 | if (PageActive(page)) { | ||
1431 | lru += LRU_ACTIVE; | ||
1432 | ClearPageActive(page); | ||
1433 | nr_active += numpages; | ||
1434 | } | ||
1435 | count[lru] += numpages; | ||
1436 | } | ||
1389 | 1437 | ||
1390 | nr_active = clear_active_flags(isolated_list, count); | ||
1391 | __count_vm_events(PGDEACTIVATE, nr_active); | 1438 | __count_vm_events(PGDEACTIVATE, nr_active); |
1392 | 1439 | ||
1393 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, | 1440 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, |
@@ -1401,8 +1448,6 @@ static noinline_for_stack void update_isolated_counts(struct zone *zone, | |||
1401 | 1448 | ||
1402 | *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; | 1449 | *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; |
1403 | *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; | 1450 | *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; |
1404 | __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon); | ||
1405 | __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file); | ||
1406 | 1451 | ||
1407 | reclaim_stat->recent_scanned[0] += *nr_anon; | 1452 | reclaim_stat->recent_scanned[0] += *nr_anon; |
1408 | reclaim_stat->recent_scanned[1] += *nr_file; | 1453 | reclaim_stat->recent_scanned[1] += *nr_file; |
@@ -1454,8 +1499,8 @@ static inline bool should_reclaim_stall(unsigned long nr_taken, | |||
1454 | * of reclaimed pages | 1499 | * of reclaimed pages |
1455 | */ | 1500 | */ |
1456 | static noinline_for_stack unsigned long | 1501 | static noinline_for_stack unsigned long |
1457 | shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | 1502 | shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, |
1458 | struct scan_control *sc, int priority, int file) | 1503 | struct scan_control *sc, int priority, int file) |
1459 | { | 1504 | { |
1460 | LIST_HEAD(page_list); | 1505 | LIST_HEAD(page_list); |
1461 | unsigned long nr_scanned; | 1506 | unsigned long nr_scanned; |
@@ -1466,6 +1511,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1466 | unsigned long nr_dirty = 0; | 1511 | unsigned long nr_dirty = 0; |
1467 | unsigned long nr_writeback = 0; | 1512 | unsigned long nr_writeback = 0; |
1468 | isolate_mode_t reclaim_mode = ISOLATE_INACTIVE; | 1513 | isolate_mode_t reclaim_mode = ISOLATE_INACTIVE; |
1514 | struct zone *zone = mz->zone; | ||
1469 | 1515 | ||
1470 | while (unlikely(too_many_isolated(zone, file, sc))) { | 1516 | while (unlikely(too_many_isolated(zone, file, sc))) { |
1471 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 1517 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
@@ -1488,9 +1534,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1488 | 1534 | ||
1489 | spin_lock_irq(&zone->lru_lock); | 1535 | spin_lock_irq(&zone->lru_lock); |
1490 | 1536 | ||
1491 | if (scanning_global_lru(sc)) { | 1537 | nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list, |
1492 | nr_taken = isolate_pages_global(nr_to_scan, &page_list, | 1538 | &nr_scanned, sc->order, |
1493 | &nr_scanned, sc->order, reclaim_mode, zone, 0, file); | 1539 | reclaim_mode, 0, file); |
1540 | if (global_reclaim(sc)) { | ||
1494 | zone->pages_scanned += nr_scanned; | 1541 | zone->pages_scanned += nr_scanned; |
1495 | if (current_is_kswapd()) | 1542 | if (current_is_kswapd()) |
1496 | __count_zone_vm_events(PGSCAN_KSWAPD, zone, | 1543 | __count_zone_vm_events(PGSCAN_KSWAPD, zone, |
@@ -1498,14 +1545,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1498 | else | 1545 | else |
1499 | __count_zone_vm_events(PGSCAN_DIRECT, zone, | 1546 | __count_zone_vm_events(PGSCAN_DIRECT, zone, |
1500 | nr_scanned); | 1547 | nr_scanned); |
1501 | } else { | ||
1502 | nr_taken = mem_cgroup_isolate_pages(nr_to_scan, &page_list, | ||
1503 | &nr_scanned, sc->order, reclaim_mode, zone, | ||
1504 | sc->mem_cgroup, 0, file); | ||
1505 | /* | ||
1506 | * mem_cgroup_isolate_pages() keeps track of | ||
1507 | * scanned pages on its own. | ||
1508 | */ | ||
1509 | } | 1548 | } |
1510 | 1549 | ||
1511 | if (nr_taken == 0) { | 1550 | if (nr_taken == 0) { |
@@ -1513,26 +1552,37 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1513 | return 0; | 1552 | return 0; |
1514 | } | 1553 | } |
1515 | 1554 | ||
1516 | update_isolated_counts(zone, sc, &nr_anon, &nr_file, &page_list); | 1555 | update_isolated_counts(mz, &page_list, &nr_anon, &nr_file); |
1556 | |||
1557 | __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon); | ||
1558 | __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file); | ||
1517 | 1559 | ||
1518 | spin_unlock_irq(&zone->lru_lock); | 1560 | spin_unlock_irq(&zone->lru_lock); |
1519 | 1561 | ||
1520 | nr_reclaimed = shrink_page_list(&page_list, zone, sc, priority, | 1562 | nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority, |
1521 | &nr_dirty, &nr_writeback); | 1563 | &nr_dirty, &nr_writeback); |
1522 | 1564 | ||
1523 | /* Check if we should syncronously wait for writeback */ | 1565 | /* Check if we should syncronously wait for writeback */ |
1524 | if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { | 1566 | if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { |
1525 | set_reclaim_mode(priority, sc, true); | 1567 | set_reclaim_mode(priority, sc, true); |
1526 | nr_reclaimed += shrink_page_list(&page_list, zone, sc, | 1568 | nr_reclaimed += shrink_page_list(&page_list, mz, sc, |
1527 | priority, &nr_dirty, &nr_writeback); | 1569 | priority, &nr_dirty, &nr_writeback); |
1528 | } | 1570 | } |
1529 | 1571 | ||
1530 | local_irq_disable(); | 1572 | spin_lock_irq(&zone->lru_lock); |
1573 | |||
1531 | if (current_is_kswapd()) | 1574 | if (current_is_kswapd()) |
1532 | __count_vm_events(KSWAPD_STEAL, nr_reclaimed); | 1575 | __count_vm_events(KSWAPD_STEAL, nr_reclaimed); |
1533 | __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed); | 1576 | __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed); |
1534 | 1577 | ||
1535 | putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list); | 1578 | putback_inactive_pages(mz, &page_list); |
1579 | |||
1580 | __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon); | ||
1581 | __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file); | ||
1582 | |||
1583 | spin_unlock_irq(&zone->lru_lock); | ||
1584 | |||
1585 | free_hot_cold_page_list(&page_list, 1); | ||
1536 | 1586 | ||
1537 | /* | 1587 | /* |
1538 | * If reclaim is isolating dirty pages under writeback, it implies | 1588 | * If reclaim is isolating dirty pages under writeback, it implies |
@@ -1588,30 +1638,47 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1588 | 1638 | ||
1589 | static void move_active_pages_to_lru(struct zone *zone, | 1639 | static void move_active_pages_to_lru(struct zone *zone, |
1590 | struct list_head *list, | 1640 | struct list_head *list, |
1641 | struct list_head *pages_to_free, | ||
1591 | enum lru_list lru) | 1642 | enum lru_list lru) |
1592 | { | 1643 | { |
1593 | unsigned long pgmoved = 0; | 1644 | unsigned long pgmoved = 0; |
1594 | struct pagevec pvec; | ||
1595 | struct page *page; | 1645 | struct page *page; |
1596 | 1646 | ||
1597 | pagevec_init(&pvec, 1); | 1647 | if (buffer_heads_over_limit) { |
1648 | spin_unlock_irq(&zone->lru_lock); | ||
1649 | list_for_each_entry(page, list, lru) { | ||
1650 | if (page_has_private(page) && trylock_page(page)) { | ||
1651 | if (page_has_private(page)) | ||
1652 | try_to_release_page(page, 0); | ||
1653 | unlock_page(page); | ||
1654 | } | ||
1655 | } | ||
1656 | spin_lock_irq(&zone->lru_lock); | ||
1657 | } | ||
1598 | 1658 | ||
1599 | while (!list_empty(list)) { | 1659 | while (!list_empty(list)) { |
1660 | struct lruvec *lruvec; | ||
1661 | |||
1600 | page = lru_to_page(list); | 1662 | page = lru_to_page(list); |
1601 | 1663 | ||
1602 | VM_BUG_ON(PageLRU(page)); | 1664 | VM_BUG_ON(PageLRU(page)); |
1603 | SetPageLRU(page); | 1665 | SetPageLRU(page); |
1604 | 1666 | ||
1605 | list_move(&page->lru, &zone->lru[lru].list); | 1667 | lruvec = mem_cgroup_lru_add_list(zone, page, lru); |
1606 | mem_cgroup_add_lru_list(page, lru); | 1668 | list_move(&page->lru, &lruvec->lists[lru]); |
1607 | pgmoved += hpage_nr_pages(page); | 1669 | pgmoved += hpage_nr_pages(page); |
1608 | 1670 | ||
1609 | if (!pagevec_add(&pvec, page) || list_empty(list)) { | 1671 | if (put_page_testzero(page)) { |
1610 | spin_unlock_irq(&zone->lru_lock); | 1672 | __ClearPageLRU(page); |
1611 | if (buffer_heads_over_limit) | 1673 | __ClearPageActive(page); |
1612 | pagevec_strip(&pvec); | 1674 | del_page_from_lru_list(zone, page, lru); |
1613 | __pagevec_release(&pvec); | 1675 | |
1614 | spin_lock_irq(&zone->lru_lock); | 1676 | if (unlikely(PageCompound(page))) { |
1677 | spin_unlock_irq(&zone->lru_lock); | ||
1678 | (*get_compound_page_dtor(page))(page); | ||
1679 | spin_lock_irq(&zone->lru_lock); | ||
1680 | } else | ||
1681 | list_add(&page->lru, pages_to_free); | ||
1615 | } | 1682 | } |
1616 | } | 1683 | } |
1617 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); | 1684 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); |
@@ -1619,19 +1686,22 @@ static void move_active_pages_to_lru(struct zone *zone, | |||
1619 | __count_vm_events(PGDEACTIVATE, pgmoved); | 1686 | __count_vm_events(PGDEACTIVATE, pgmoved); |
1620 | } | 1687 | } |
1621 | 1688 | ||
1622 | static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | 1689 | static void shrink_active_list(unsigned long nr_to_scan, |
1623 | struct scan_control *sc, int priority, int file) | 1690 | struct mem_cgroup_zone *mz, |
1691 | struct scan_control *sc, | ||
1692 | int priority, int file) | ||
1624 | { | 1693 | { |
1625 | unsigned long nr_taken; | 1694 | unsigned long nr_taken; |
1626 | unsigned long pgscanned; | 1695 | unsigned long nr_scanned; |
1627 | unsigned long vm_flags; | 1696 | unsigned long vm_flags; |
1628 | LIST_HEAD(l_hold); /* The pages which were snipped off */ | 1697 | LIST_HEAD(l_hold); /* The pages which were snipped off */ |
1629 | LIST_HEAD(l_active); | 1698 | LIST_HEAD(l_active); |
1630 | LIST_HEAD(l_inactive); | 1699 | LIST_HEAD(l_inactive); |
1631 | struct page *page; | 1700 | struct page *page; |
1632 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1701 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); |
1633 | unsigned long nr_rotated = 0; | 1702 | unsigned long nr_rotated = 0; |
1634 | isolate_mode_t reclaim_mode = ISOLATE_ACTIVE; | 1703 | isolate_mode_t reclaim_mode = ISOLATE_ACTIVE; |
1704 | struct zone *zone = mz->zone; | ||
1635 | 1705 | ||
1636 | lru_add_drain(); | 1706 | lru_add_drain(); |
1637 | 1707 | ||
@@ -1641,26 +1711,16 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1641 | reclaim_mode |= ISOLATE_CLEAN; | 1711 | reclaim_mode |= ISOLATE_CLEAN; |
1642 | 1712 | ||
1643 | spin_lock_irq(&zone->lru_lock); | 1713 | spin_lock_irq(&zone->lru_lock); |
1644 | if (scanning_global_lru(sc)) { | 1714 | |
1645 | nr_taken = isolate_pages_global(nr_pages, &l_hold, | 1715 | nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold, |
1646 | &pgscanned, sc->order, | 1716 | &nr_scanned, sc->order, |
1647 | reclaim_mode, zone, | 1717 | reclaim_mode, 1, file); |
1648 | 1, file); | 1718 | if (global_reclaim(sc)) |
1649 | zone->pages_scanned += pgscanned; | 1719 | zone->pages_scanned += nr_scanned; |
1650 | } else { | ||
1651 | nr_taken = mem_cgroup_isolate_pages(nr_pages, &l_hold, | ||
1652 | &pgscanned, sc->order, | ||
1653 | reclaim_mode, zone, | ||
1654 | sc->mem_cgroup, 1, file); | ||
1655 | /* | ||
1656 | * mem_cgroup_isolate_pages() keeps track of | ||
1657 | * scanned pages on its own. | ||
1658 | */ | ||
1659 | } | ||
1660 | 1720 | ||
1661 | reclaim_stat->recent_scanned[file] += nr_taken; | 1721 | reclaim_stat->recent_scanned[file] += nr_taken; |
1662 | 1722 | ||
1663 | __count_zone_vm_events(PGREFILL, zone, pgscanned); | 1723 | __count_zone_vm_events(PGREFILL, zone, nr_scanned); |
1664 | if (file) | 1724 | if (file) |
1665 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken); | 1725 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken); |
1666 | else | 1726 | else |
@@ -1678,7 +1738,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1678 | continue; | 1738 | continue; |
1679 | } | 1739 | } |
1680 | 1740 | ||
1681 | if (page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) { | 1741 | if (page_referenced(page, 0, mz->mem_cgroup, &vm_flags)) { |
1682 | nr_rotated += hpage_nr_pages(page); | 1742 | nr_rotated += hpage_nr_pages(page); |
1683 | /* | 1743 | /* |
1684 | * Identify referenced, file-backed active pages and | 1744 | * Identify referenced, file-backed active pages and |
@@ -1711,12 +1771,14 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1711 | */ | 1771 | */ |
1712 | reclaim_stat->recent_rotated[file] += nr_rotated; | 1772 | reclaim_stat->recent_rotated[file] += nr_rotated; |
1713 | 1773 | ||
1714 | move_active_pages_to_lru(zone, &l_active, | 1774 | move_active_pages_to_lru(zone, &l_active, &l_hold, |
1715 | LRU_ACTIVE + file * LRU_FILE); | 1775 | LRU_ACTIVE + file * LRU_FILE); |
1716 | move_active_pages_to_lru(zone, &l_inactive, | 1776 | move_active_pages_to_lru(zone, &l_inactive, &l_hold, |
1717 | LRU_BASE + file * LRU_FILE); | 1777 | LRU_BASE + file * LRU_FILE); |
1718 | __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); | 1778 | __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); |
1719 | spin_unlock_irq(&zone->lru_lock); | 1779 | spin_unlock_irq(&zone->lru_lock); |
1780 | |||
1781 | free_hot_cold_page_list(&l_hold, 1); | ||
1720 | } | 1782 | } |
1721 | 1783 | ||
1722 | #ifdef CONFIG_SWAP | 1784 | #ifdef CONFIG_SWAP |
@@ -1741,10 +1803,8 @@ static int inactive_anon_is_low_global(struct zone *zone) | |||
1741 | * Returns true if the zone does not have enough inactive anon pages, | 1803 | * Returns true if the zone does not have enough inactive anon pages, |
1742 | * meaning some active anon pages need to be deactivated. | 1804 | * meaning some active anon pages need to be deactivated. |
1743 | */ | 1805 | */ |
1744 | static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) | 1806 | static int inactive_anon_is_low(struct mem_cgroup_zone *mz) |
1745 | { | 1807 | { |
1746 | int low; | ||
1747 | |||
1748 | /* | 1808 | /* |
1749 | * If we don't have swap space, anonymous page deactivation | 1809 | * If we don't have swap space, anonymous page deactivation |
1750 | * is pointless. | 1810 | * is pointless. |
@@ -1752,15 +1812,14 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) | |||
1752 | if (!total_swap_pages) | 1812 | if (!total_swap_pages) |
1753 | return 0; | 1813 | return 0; |
1754 | 1814 | ||
1755 | if (scanning_global_lru(sc)) | 1815 | if (!scanning_global_lru(mz)) |
1756 | low = inactive_anon_is_low_global(zone); | 1816 | return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup, |
1757 | else | 1817 | mz->zone); |
1758 | low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone); | 1818 | |
1759 | return low; | 1819 | return inactive_anon_is_low_global(mz->zone); |
1760 | } | 1820 | } |
1761 | #else | 1821 | #else |
1762 | static inline int inactive_anon_is_low(struct zone *zone, | 1822 | static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz) |
1763 | struct scan_control *sc) | ||
1764 | { | 1823 | { |
1765 | return 0; | 1824 | return 0; |
1766 | } | 1825 | } |
@@ -1778,8 +1837,7 @@ static int inactive_file_is_low_global(struct zone *zone) | |||
1778 | 1837 | ||
1779 | /** | 1838 | /** |
1780 | * inactive_file_is_low - check if file pages need to be deactivated | 1839 | * inactive_file_is_low - check if file pages need to be deactivated |
1781 | * @zone: zone to check | 1840 | * @mz: memory cgroup and zone to check |
1782 | * @sc: scan control of this context | ||
1783 | * | 1841 | * |
1784 | * When the system is doing streaming IO, memory pressure here | 1842 | * When the system is doing streaming IO, memory pressure here |
1785 | * ensures that active file pages get deactivated, until more | 1843 | * ensures that active file pages get deactivated, until more |
@@ -1791,45 +1849,44 @@ static int inactive_file_is_low_global(struct zone *zone) | |||
1791 | * This uses a different ratio than the anonymous pages, because | 1849 | * This uses a different ratio than the anonymous pages, because |
1792 | * the page cache uses a use-once replacement algorithm. | 1850 | * the page cache uses a use-once replacement algorithm. |
1793 | */ | 1851 | */ |
1794 | static int inactive_file_is_low(struct zone *zone, struct scan_control *sc) | 1852 | static int inactive_file_is_low(struct mem_cgroup_zone *mz) |
1795 | { | 1853 | { |
1796 | int low; | 1854 | if (!scanning_global_lru(mz)) |
1855 | return mem_cgroup_inactive_file_is_low(mz->mem_cgroup, | ||
1856 | mz->zone); | ||
1797 | 1857 | ||
1798 | if (scanning_global_lru(sc)) | 1858 | return inactive_file_is_low_global(mz->zone); |
1799 | low = inactive_file_is_low_global(zone); | ||
1800 | else | ||
1801 | low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup, zone); | ||
1802 | return low; | ||
1803 | } | 1859 | } |
1804 | 1860 | ||
1805 | static int inactive_list_is_low(struct zone *zone, struct scan_control *sc, | 1861 | static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file) |
1806 | int file) | ||
1807 | { | 1862 | { |
1808 | if (file) | 1863 | if (file) |
1809 | return inactive_file_is_low(zone, sc); | 1864 | return inactive_file_is_low(mz); |
1810 | else | 1865 | else |
1811 | return inactive_anon_is_low(zone, sc); | 1866 | return inactive_anon_is_low(mz); |
1812 | } | 1867 | } |
1813 | 1868 | ||
1814 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, | 1869 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, |
1815 | struct zone *zone, struct scan_control *sc, int priority) | 1870 | struct mem_cgroup_zone *mz, |
1871 | struct scan_control *sc, int priority) | ||
1816 | { | 1872 | { |
1817 | int file = is_file_lru(lru); | 1873 | int file = is_file_lru(lru); |
1818 | 1874 | ||
1819 | if (is_active_lru(lru)) { | 1875 | if (is_active_lru(lru)) { |
1820 | if (inactive_list_is_low(zone, sc, file)) | 1876 | if (inactive_list_is_low(mz, file)) |
1821 | shrink_active_list(nr_to_scan, zone, sc, priority, file); | 1877 | shrink_active_list(nr_to_scan, mz, sc, priority, file); |
1822 | return 0; | 1878 | return 0; |
1823 | } | 1879 | } |
1824 | 1880 | ||
1825 | return shrink_inactive_list(nr_to_scan, zone, sc, priority, file); | 1881 | return shrink_inactive_list(nr_to_scan, mz, sc, priority, file); |
1826 | } | 1882 | } |
1827 | 1883 | ||
1828 | static int vmscan_swappiness(struct scan_control *sc) | 1884 | static int vmscan_swappiness(struct mem_cgroup_zone *mz, |
1885 | struct scan_control *sc) | ||
1829 | { | 1886 | { |
1830 | if (scanning_global_lru(sc)) | 1887 | if (global_reclaim(sc)) |
1831 | return vm_swappiness; | 1888 | return vm_swappiness; |
1832 | return mem_cgroup_swappiness(sc->mem_cgroup); | 1889 | return mem_cgroup_swappiness(mz->mem_cgroup); |
1833 | } | 1890 | } |
1834 | 1891 | ||
1835 | /* | 1892 | /* |
@@ -1840,15 +1897,15 @@ static int vmscan_swappiness(struct scan_control *sc) | |||
1840 | * | 1897 | * |
1841 | * nr[0] = anon pages to scan; nr[1] = file pages to scan | 1898 | * nr[0] = anon pages to scan; nr[1] = file pages to scan |
1842 | */ | 1899 | */ |
1843 | static void get_scan_count(struct zone *zone, struct scan_control *sc, | 1900 | static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, |
1844 | unsigned long *nr, int priority) | 1901 | unsigned long *nr, int priority) |
1845 | { | 1902 | { |
1846 | unsigned long anon, file, free; | 1903 | unsigned long anon, file, free; |
1847 | unsigned long anon_prio, file_prio; | 1904 | unsigned long anon_prio, file_prio; |
1848 | unsigned long ap, fp; | 1905 | unsigned long ap, fp; |
1849 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1906 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); |
1850 | u64 fraction[2], denominator; | 1907 | u64 fraction[2], denominator; |
1851 | enum lru_list l; | 1908 | enum lru_list lru; |
1852 | int noswap = 0; | 1909 | int noswap = 0; |
1853 | bool force_scan = false; | 1910 | bool force_scan = false; |
1854 | 1911 | ||
@@ -1862,9 +1919,9 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, | |||
1862 | * latencies, so it's better to scan a minimum amount there as | 1919 | * latencies, so it's better to scan a minimum amount there as |
1863 | * well. | 1920 | * well. |
1864 | */ | 1921 | */ |
1865 | if (scanning_global_lru(sc) && current_is_kswapd()) | 1922 | if (current_is_kswapd() && mz->zone->all_unreclaimable) |
1866 | force_scan = true; | 1923 | force_scan = true; |
1867 | if (!scanning_global_lru(sc)) | 1924 | if (!global_reclaim(sc)) |
1868 | force_scan = true; | 1925 | force_scan = true; |
1869 | 1926 | ||
1870 | /* If we have no swap space, do not bother scanning anon pages. */ | 1927 | /* If we have no swap space, do not bother scanning anon pages. */ |
@@ -1876,16 +1933,16 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, | |||
1876 | goto out; | 1933 | goto out; |
1877 | } | 1934 | } |
1878 | 1935 | ||
1879 | anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) + | 1936 | anon = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) + |
1880 | zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); | 1937 | zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); |
1881 | file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) + | 1938 | file = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) + |
1882 | zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); | 1939 | zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); |
1883 | 1940 | ||
1884 | if (scanning_global_lru(sc)) { | 1941 | if (global_reclaim(sc)) { |
1885 | free = zone_page_state(zone, NR_FREE_PAGES); | 1942 | free = zone_page_state(mz->zone, NR_FREE_PAGES); |
1886 | /* If we have very few page cache pages, | 1943 | /* If we have very few page cache pages, |
1887 | force-scan anon pages. */ | 1944 | force-scan anon pages. */ |
1888 | if (unlikely(file + free <= high_wmark_pages(zone))) { | 1945 | if (unlikely(file + free <= high_wmark_pages(mz->zone))) { |
1889 | fraction[0] = 1; | 1946 | fraction[0] = 1; |
1890 | fraction[1] = 0; | 1947 | fraction[1] = 0; |
1891 | denominator = 1; | 1948 | denominator = 1; |
@@ -1897,8 +1954,8 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, | |||
1897 | * With swappiness at 100, anonymous and file have the same priority. | 1954 | * With swappiness at 100, anonymous and file have the same priority. |
1898 | * This scanning priority is essentially the inverse of IO cost. | 1955 | * This scanning priority is essentially the inverse of IO cost. |
1899 | */ | 1956 | */ |
1900 | anon_prio = vmscan_swappiness(sc); | 1957 | anon_prio = vmscan_swappiness(mz, sc); |
1901 | file_prio = 200 - vmscan_swappiness(sc); | 1958 | file_prio = 200 - vmscan_swappiness(mz, sc); |
1902 | 1959 | ||
1903 | /* | 1960 | /* |
1904 | * OK, so we have swap space and a fair amount of page cache | 1961 | * OK, so we have swap space and a fair amount of page cache |
@@ -1911,7 +1968,7 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, | |||
1911 | * | 1968 | * |
1912 | * anon in [0], file in [1] | 1969 | * anon in [0], file in [1] |
1913 | */ | 1970 | */ |
1914 | spin_lock_irq(&zone->lru_lock); | 1971 | spin_lock_irq(&mz->zone->lru_lock); |
1915 | if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { | 1972 | if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { |
1916 | reclaim_stat->recent_scanned[0] /= 2; | 1973 | reclaim_stat->recent_scanned[0] /= 2; |
1917 | reclaim_stat->recent_rotated[0] /= 2; | 1974 | reclaim_stat->recent_rotated[0] /= 2; |
@@ -1932,24 +1989,24 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, | |||
1932 | 1989 | ||
1933 | fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1); | 1990 | fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1); |
1934 | fp /= reclaim_stat->recent_rotated[1] + 1; | 1991 | fp /= reclaim_stat->recent_rotated[1] + 1; |
1935 | spin_unlock_irq(&zone->lru_lock); | 1992 | spin_unlock_irq(&mz->zone->lru_lock); |
1936 | 1993 | ||
1937 | fraction[0] = ap; | 1994 | fraction[0] = ap; |
1938 | fraction[1] = fp; | 1995 | fraction[1] = fp; |
1939 | denominator = ap + fp + 1; | 1996 | denominator = ap + fp + 1; |
1940 | out: | 1997 | out: |
1941 | for_each_evictable_lru(l) { | 1998 | for_each_evictable_lru(lru) { |
1942 | int file = is_file_lru(l); | 1999 | int file = is_file_lru(lru); |
1943 | unsigned long scan; | 2000 | unsigned long scan; |
1944 | 2001 | ||
1945 | scan = zone_nr_lru_pages(zone, sc, l); | 2002 | scan = zone_nr_lru_pages(mz, lru); |
1946 | if (priority || noswap) { | 2003 | if (priority || noswap) { |
1947 | scan >>= priority; | 2004 | scan >>= priority; |
1948 | if (!scan && force_scan) | 2005 | if (!scan && force_scan) |
1949 | scan = SWAP_CLUSTER_MAX; | 2006 | scan = SWAP_CLUSTER_MAX; |
1950 | scan = div64_u64(scan * fraction[file], denominator); | 2007 | scan = div64_u64(scan * fraction[file], denominator); |
1951 | } | 2008 | } |
1952 | nr[l] = scan; | 2009 | nr[lru] = scan; |
1953 | } | 2010 | } |
1954 | } | 2011 | } |
1955 | 2012 | ||
@@ -1960,7 +2017,7 @@ out: | |||
1960 | * back to the allocator and call try_to_compact_zone(), we ensure that | 2017 | * back to the allocator and call try_to_compact_zone(), we ensure that |
1961 | * there are enough free pages for it to be likely successful | 2018 | * there are enough free pages for it to be likely successful |
1962 | */ | 2019 | */ |
1963 | static inline bool should_continue_reclaim(struct zone *zone, | 2020 | static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, |
1964 | unsigned long nr_reclaimed, | 2021 | unsigned long nr_reclaimed, |
1965 | unsigned long nr_scanned, | 2022 | unsigned long nr_scanned, |
1966 | struct scan_control *sc) | 2023 | struct scan_control *sc) |
@@ -2000,15 +2057,15 @@ static inline bool should_continue_reclaim(struct zone *zone, | |||
2000 | * inactive lists are large enough, continue reclaiming | 2057 | * inactive lists are large enough, continue reclaiming |
2001 | */ | 2058 | */ |
2002 | pages_for_compaction = (2UL << sc->order); | 2059 | pages_for_compaction = (2UL << sc->order); |
2003 | inactive_lru_pages = zone_nr_lru_pages(zone, sc, LRU_INACTIVE_FILE); | 2060 | inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); |
2004 | if (nr_swap_pages > 0) | 2061 | if (nr_swap_pages > 0) |
2005 | inactive_lru_pages += zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); | 2062 | inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); |
2006 | if (sc->nr_reclaimed < pages_for_compaction && | 2063 | if (sc->nr_reclaimed < pages_for_compaction && |
2007 | inactive_lru_pages > pages_for_compaction) | 2064 | inactive_lru_pages > pages_for_compaction) |
2008 | return true; | 2065 | return true; |
2009 | 2066 | ||
2010 | /* If compaction would go ahead or the allocation would succeed, stop */ | 2067 | /* If compaction would go ahead or the allocation would succeed, stop */ |
2011 | switch (compaction_suitable(zone, sc->order)) { | 2068 | switch (compaction_suitable(mz->zone, sc->order)) { |
2012 | case COMPACT_PARTIAL: | 2069 | case COMPACT_PARTIAL: |
2013 | case COMPACT_CONTINUE: | 2070 | case COMPACT_CONTINUE: |
2014 | return false; | 2071 | return false; |
@@ -2020,12 +2077,12 @@ static inline bool should_continue_reclaim(struct zone *zone, | |||
2020 | /* | 2077 | /* |
2021 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. | 2078 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. |
2022 | */ | 2079 | */ |
2023 | static void shrink_zone(int priority, struct zone *zone, | 2080 | static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz, |
2024 | struct scan_control *sc) | 2081 | struct scan_control *sc) |
2025 | { | 2082 | { |
2026 | unsigned long nr[NR_LRU_LISTS]; | 2083 | unsigned long nr[NR_LRU_LISTS]; |
2027 | unsigned long nr_to_scan; | 2084 | unsigned long nr_to_scan; |
2028 | enum lru_list l; | 2085 | enum lru_list lru; |
2029 | unsigned long nr_reclaimed, nr_scanned; | 2086 | unsigned long nr_reclaimed, nr_scanned; |
2030 | unsigned long nr_to_reclaim = sc->nr_to_reclaim; | 2087 | unsigned long nr_to_reclaim = sc->nr_to_reclaim; |
2031 | struct blk_plug plug; | 2088 | struct blk_plug plug; |
@@ -2033,19 +2090,19 @@ static void shrink_zone(int priority, struct zone *zone, | |||
2033 | restart: | 2090 | restart: |
2034 | nr_reclaimed = 0; | 2091 | nr_reclaimed = 0; |
2035 | nr_scanned = sc->nr_scanned; | 2092 | nr_scanned = sc->nr_scanned; |
2036 | get_scan_count(zone, sc, nr, priority); | 2093 | get_scan_count(mz, sc, nr, priority); |
2037 | 2094 | ||
2038 | blk_start_plug(&plug); | 2095 | blk_start_plug(&plug); |
2039 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || | 2096 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
2040 | nr[LRU_INACTIVE_FILE]) { | 2097 | nr[LRU_INACTIVE_FILE]) { |
2041 | for_each_evictable_lru(l) { | 2098 | for_each_evictable_lru(lru) { |
2042 | if (nr[l]) { | 2099 | if (nr[lru]) { |
2043 | nr_to_scan = min_t(unsigned long, | 2100 | nr_to_scan = min_t(unsigned long, |
2044 | nr[l], SWAP_CLUSTER_MAX); | 2101 | nr[lru], SWAP_CLUSTER_MAX); |
2045 | nr[l] -= nr_to_scan; | 2102 | nr[lru] -= nr_to_scan; |
2046 | 2103 | ||
2047 | nr_reclaimed += shrink_list(l, nr_to_scan, | 2104 | nr_reclaimed += shrink_list(lru, nr_to_scan, |
2048 | zone, sc, priority); | 2105 | mz, sc, priority); |
2049 | } | 2106 | } |
2050 | } | 2107 | } |
2051 | /* | 2108 | /* |
@@ -2066,17 +2123,89 @@ restart: | |||
2066 | * Even if we did not try to evict anon pages at all, we want to | 2123 | * Even if we did not try to evict anon pages at all, we want to |
2067 | * rebalance the anon lru active/inactive ratio. | 2124 | * rebalance the anon lru active/inactive ratio. |
2068 | */ | 2125 | */ |
2069 | if (inactive_anon_is_low(zone, sc)) | 2126 | if (inactive_anon_is_low(mz)) |
2070 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); | 2127 | shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, priority, 0); |
2071 | 2128 | ||
2072 | /* reclaim/compaction might need reclaim to continue */ | 2129 | /* reclaim/compaction might need reclaim to continue */ |
2073 | if (should_continue_reclaim(zone, nr_reclaimed, | 2130 | if (should_continue_reclaim(mz, nr_reclaimed, |
2074 | sc->nr_scanned - nr_scanned, sc)) | 2131 | sc->nr_scanned - nr_scanned, sc)) |
2075 | goto restart; | 2132 | goto restart; |
2076 | 2133 | ||
2077 | throttle_vm_writeout(sc->gfp_mask); | 2134 | throttle_vm_writeout(sc->gfp_mask); |
2078 | } | 2135 | } |
2079 | 2136 | ||
2137 | static void shrink_zone(int priority, struct zone *zone, | ||
2138 | struct scan_control *sc) | ||
2139 | { | ||
2140 | struct mem_cgroup *root = sc->target_mem_cgroup; | ||
2141 | struct mem_cgroup_reclaim_cookie reclaim = { | ||
2142 | .zone = zone, | ||
2143 | .priority = priority, | ||
2144 | }; | ||
2145 | struct mem_cgroup *memcg; | ||
2146 | |||
2147 | memcg = mem_cgroup_iter(root, NULL, &reclaim); | ||
2148 | do { | ||
2149 | struct mem_cgroup_zone mz = { | ||
2150 | .mem_cgroup = memcg, | ||
2151 | .zone = zone, | ||
2152 | }; | ||
2153 | |||
2154 | shrink_mem_cgroup_zone(priority, &mz, sc); | ||
2155 | /* | ||
2156 | * Limit reclaim has historically picked one memcg and | ||
2157 | * scanned it with decreasing priority levels until | ||
2158 | * nr_to_reclaim had been reclaimed. This priority | ||
2159 | * cycle is thus over after a single memcg. | ||
2160 | * | ||
2161 | * Direct reclaim and kswapd, on the other hand, have | ||
2162 | * to scan all memory cgroups to fulfill the overall | ||
2163 | * scan target for the zone. | ||
2164 | */ | ||
2165 | if (!global_reclaim(sc)) { | ||
2166 | mem_cgroup_iter_break(root, memcg); | ||
2167 | break; | ||
2168 | } | ||
2169 | memcg = mem_cgroup_iter(root, memcg, &reclaim); | ||
2170 | } while (memcg); | ||
2171 | } | ||
2172 | |||
2173 | /* Returns true if compaction should go ahead for a high-order request */ | ||
2174 | static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) | ||
2175 | { | ||
2176 | unsigned long balance_gap, watermark; | ||
2177 | bool watermark_ok; | ||
2178 | |||
2179 | /* Do not consider compaction for orders reclaim is meant to satisfy */ | ||
2180 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER) | ||
2181 | return false; | ||
2182 | |||
2183 | /* | ||
2184 | * Compaction takes time to run and there are potentially other | ||
2185 | * callers using the pages just freed. Continue reclaiming until | ||
2186 | * there is a buffer of free pages available to give compaction | ||
2187 | * a reasonable chance of completing and allocating the page | ||
2188 | */ | ||
2189 | balance_gap = min(low_wmark_pages(zone), | ||
2190 | (zone->present_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) / | ||
2191 | KSWAPD_ZONE_BALANCE_GAP_RATIO); | ||
2192 | watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order); | ||
2193 | watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0); | ||
2194 | |||
2195 | /* | ||
2196 | * If compaction is deferred, reclaim up to a point where | ||
2197 | * compaction will have a chance of success when re-enabled | ||
2198 | */ | ||
2199 | if (compaction_deferred(zone)) | ||
2200 | return watermark_ok; | ||
2201 | |||
2202 | /* If compaction is not ready to start, keep reclaiming */ | ||
2203 | if (!compaction_suitable(zone, sc->order)) | ||
2204 | return false; | ||
2205 | |||
2206 | return watermark_ok; | ||
2207 | } | ||
2208 | |||
2080 | /* | 2209 | /* |
2081 | * This is the direct reclaim path, for page-allocating processes. We only | 2210 | * This is the direct reclaim path, for page-allocating processes. We only |
2082 | * try to reclaim pages from zones which will satisfy the caller's allocation | 2211 | * try to reclaim pages from zones which will satisfy the caller's allocation |
@@ -2094,8 +2223,9 @@ restart: | |||
2094 | * scan then give up on it. | 2223 | * scan then give up on it. |
2095 | * | 2224 | * |
2096 | * This function returns true if a zone is being reclaimed for a costly | 2225 | * This function returns true if a zone is being reclaimed for a costly |
2097 | * high-order allocation and compaction is either ready to begin or deferred. | 2226 | * high-order allocation and compaction is ready to begin. This indicates to |
2098 | * This indicates to the caller that it should retry the allocation or fail. | 2227 | * the caller that it should consider retrying the allocation instead of |
2228 | * further reclaim. | ||
2099 | */ | 2229 | */ |
2100 | static bool shrink_zones(int priority, struct zonelist *zonelist, | 2230 | static bool shrink_zones(int priority, struct zonelist *zonelist, |
2101 | struct scan_control *sc) | 2231 | struct scan_control *sc) |
@@ -2104,7 +2234,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, | |||
2104 | struct zone *zone; | 2234 | struct zone *zone; |
2105 | unsigned long nr_soft_reclaimed; | 2235 | unsigned long nr_soft_reclaimed; |
2106 | unsigned long nr_soft_scanned; | 2236 | unsigned long nr_soft_scanned; |
2107 | bool should_abort_reclaim = false; | 2237 | bool aborted_reclaim = false; |
2108 | 2238 | ||
2109 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | 2239 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
2110 | gfp_zone(sc->gfp_mask), sc->nodemask) { | 2240 | gfp_zone(sc->gfp_mask), sc->nodemask) { |
@@ -2114,7 +2244,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, | |||
2114 | * Take care memory controller reclaiming has small influence | 2244 | * Take care memory controller reclaiming has small influence |
2115 | * to global LRU. | 2245 | * to global LRU. |
2116 | */ | 2246 | */ |
2117 | if (scanning_global_lru(sc)) { | 2247 | if (global_reclaim(sc)) { |
2118 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 2248 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
2119 | continue; | 2249 | continue; |
2120 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) | 2250 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) |
@@ -2129,10 +2259,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, | |||
2129 | * noticable problem, like transparent huge page | 2259 | * noticable problem, like transparent huge page |
2130 | * allocations. | 2260 | * allocations. |
2131 | */ | 2261 | */ |
2132 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER && | 2262 | if (compaction_ready(zone, sc)) { |
2133 | (compaction_suitable(zone, sc->order) || | 2263 | aborted_reclaim = true; |
2134 | compaction_deferred(zone))) { | ||
2135 | should_abort_reclaim = true; | ||
2136 | continue; | 2264 | continue; |
2137 | } | 2265 | } |
2138 | } | 2266 | } |
@@ -2154,7 +2282,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, | |||
2154 | shrink_zone(priority, zone, sc); | 2282 | shrink_zone(priority, zone, sc); |
2155 | } | 2283 | } |
2156 | 2284 | ||
2157 | return should_abort_reclaim; | 2285 | return aborted_reclaim; |
2158 | } | 2286 | } |
2159 | 2287 | ||
2160 | static bool zone_reclaimable(struct zone *zone) | 2288 | static bool zone_reclaimable(struct zone *zone) |
@@ -2208,25 +2336,25 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2208 | struct zoneref *z; | 2336 | struct zoneref *z; |
2209 | struct zone *zone; | 2337 | struct zone *zone; |
2210 | unsigned long writeback_threshold; | 2338 | unsigned long writeback_threshold; |
2339 | bool aborted_reclaim; | ||
2211 | 2340 | ||
2212 | get_mems_allowed(); | 2341 | get_mems_allowed(); |
2213 | delayacct_freepages_start(); | 2342 | delayacct_freepages_start(); |
2214 | 2343 | ||
2215 | if (scanning_global_lru(sc)) | 2344 | if (global_reclaim(sc)) |
2216 | count_vm_event(ALLOCSTALL); | 2345 | count_vm_event(ALLOCSTALL); |
2217 | 2346 | ||
2218 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { | 2347 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { |
2219 | sc->nr_scanned = 0; | 2348 | sc->nr_scanned = 0; |
2220 | if (!priority) | 2349 | if (!priority) |
2221 | disable_swap_token(sc->mem_cgroup); | 2350 | disable_swap_token(sc->target_mem_cgroup); |
2222 | if (shrink_zones(priority, zonelist, sc)) | 2351 | aborted_reclaim = shrink_zones(priority, zonelist, sc); |
2223 | break; | ||
2224 | 2352 | ||
2225 | /* | 2353 | /* |
2226 | * Don't shrink slabs when reclaiming memory from | 2354 | * Don't shrink slabs when reclaiming memory from |
2227 | * over limit cgroups | 2355 | * over limit cgroups |
2228 | */ | 2356 | */ |
2229 | if (scanning_global_lru(sc)) { | 2357 | if (global_reclaim(sc)) { |
2230 | unsigned long lru_pages = 0; | 2358 | unsigned long lru_pages = 0; |
2231 | for_each_zone_zonelist(zone, z, zonelist, | 2359 | for_each_zone_zonelist(zone, z, zonelist, |
2232 | gfp_zone(sc->gfp_mask)) { | 2360 | gfp_zone(sc->gfp_mask)) { |
@@ -2287,8 +2415,12 @@ out: | |||
2287 | if (oom_killer_disabled) | 2415 | if (oom_killer_disabled) |
2288 | return 0; | 2416 | return 0; |
2289 | 2417 | ||
2418 | /* Aborted reclaim to try compaction? don't OOM, then */ | ||
2419 | if (aborted_reclaim) | ||
2420 | return 1; | ||
2421 | |||
2290 | /* top priority shrink_zones still had more to do? don't OOM, then */ | 2422 | /* top priority shrink_zones still had more to do? don't OOM, then */ |
2291 | if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc)) | 2423 | if (global_reclaim(sc) && !all_unreclaimable(zonelist, sc)) |
2292 | return 1; | 2424 | return 1; |
2293 | 2425 | ||
2294 | return 0; | 2426 | return 0; |
@@ -2305,7 +2437,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
2305 | .may_unmap = 1, | 2437 | .may_unmap = 1, |
2306 | .may_swap = 1, | 2438 | .may_swap = 1, |
2307 | .order = order, | 2439 | .order = order, |
2308 | .mem_cgroup = NULL, | 2440 | .target_mem_cgroup = NULL, |
2309 | .nodemask = nodemask, | 2441 | .nodemask = nodemask, |
2310 | }; | 2442 | }; |
2311 | struct shrink_control shrink = { | 2443 | struct shrink_control shrink = { |
@@ -2325,7 +2457,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
2325 | 2457 | ||
2326 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 2458 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
2327 | 2459 | ||
2328 | unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | 2460 | unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, |
2329 | gfp_t gfp_mask, bool noswap, | 2461 | gfp_t gfp_mask, bool noswap, |
2330 | struct zone *zone, | 2462 | struct zone *zone, |
2331 | unsigned long *nr_scanned) | 2463 | unsigned long *nr_scanned) |
@@ -2337,7 +2469,11 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
2337 | .may_unmap = 1, | 2469 | .may_unmap = 1, |
2338 | .may_swap = !noswap, | 2470 | .may_swap = !noswap, |
2339 | .order = 0, | 2471 | .order = 0, |
2340 | .mem_cgroup = mem, | 2472 | .target_mem_cgroup = memcg, |
2473 | }; | ||
2474 | struct mem_cgroup_zone mz = { | ||
2475 | .mem_cgroup = memcg, | ||
2476 | .zone = zone, | ||
2341 | }; | 2477 | }; |
2342 | 2478 | ||
2343 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 2479 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
@@ -2354,7 +2490,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
2354 | * will pick up pages from other mem cgroup's as well. We hack | 2490 | * will pick up pages from other mem cgroup's as well. We hack |
2355 | * the priority and make it zero. | 2491 | * the priority and make it zero. |
2356 | */ | 2492 | */ |
2357 | shrink_zone(0, zone, &sc); | 2493 | shrink_mem_cgroup_zone(0, &mz, &sc); |
2358 | 2494 | ||
2359 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); | 2495 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); |
2360 | 2496 | ||
@@ -2362,7 +2498,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
2362 | return sc.nr_reclaimed; | 2498 | return sc.nr_reclaimed; |
2363 | } | 2499 | } |
2364 | 2500 | ||
2365 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | 2501 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, |
2366 | gfp_t gfp_mask, | 2502 | gfp_t gfp_mask, |
2367 | bool noswap) | 2503 | bool noswap) |
2368 | { | 2504 | { |
@@ -2375,7 +2511,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
2375 | .may_swap = !noswap, | 2511 | .may_swap = !noswap, |
2376 | .nr_to_reclaim = SWAP_CLUSTER_MAX, | 2512 | .nr_to_reclaim = SWAP_CLUSTER_MAX, |
2377 | .order = 0, | 2513 | .order = 0, |
2378 | .mem_cgroup = mem_cont, | 2514 | .target_mem_cgroup = memcg, |
2379 | .nodemask = NULL, /* we don't care the placement */ | 2515 | .nodemask = NULL, /* we don't care the placement */ |
2380 | .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 2516 | .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
2381 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), | 2517 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK), |
@@ -2389,7 +2525,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
2389 | * take care of from where we get pages. So the node where we start the | 2525 | * take care of from where we get pages. So the node where we start the |
2390 | * scan does not need to be the current node. | 2526 | * scan does not need to be the current node. |
2391 | */ | 2527 | */ |
2392 | nid = mem_cgroup_select_victim_node(mem_cont); | 2528 | nid = mem_cgroup_select_victim_node(memcg); |
2393 | 2529 | ||
2394 | zonelist = NODE_DATA(nid)->node_zonelists; | 2530 | zonelist = NODE_DATA(nid)->node_zonelists; |
2395 | 2531 | ||
@@ -2405,6 +2541,29 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
2405 | } | 2541 | } |
2406 | #endif | 2542 | #endif |
2407 | 2543 | ||
2544 | static void age_active_anon(struct zone *zone, struct scan_control *sc, | ||
2545 | int priority) | ||
2546 | { | ||
2547 | struct mem_cgroup *memcg; | ||
2548 | |||
2549 | if (!total_swap_pages) | ||
2550 | return; | ||
2551 | |||
2552 | memcg = mem_cgroup_iter(NULL, NULL, NULL); | ||
2553 | do { | ||
2554 | struct mem_cgroup_zone mz = { | ||
2555 | .mem_cgroup = memcg, | ||
2556 | .zone = zone, | ||
2557 | }; | ||
2558 | |||
2559 | if (inactive_anon_is_low(&mz)) | ||
2560 | shrink_active_list(SWAP_CLUSTER_MAX, &mz, | ||
2561 | sc, priority, 0); | ||
2562 | |||
2563 | memcg = mem_cgroup_iter(NULL, memcg, NULL); | ||
2564 | } while (memcg); | ||
2565 | } | ||
2566 | |||
2408 | /* | 2567 | /* |
2409 | * pgdat_balanced is used when checking if a node is balanced for high-order | 2568 | * pgdat_balanced is used when checking if a node is balanced for high-order |
2410 | * allocations. Only zones that meet watermarks and are in a zone allowed | 2569 | * allocations. Only zones that meet watermarks and are in a zone allowed |
@@ -2525,7 +2684,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
2525 | */ | 2684 | */ |
2526 | .nr_to_reclaim = ULONG_MAX, | 2685 | .nr_to_reclaim = ULONG_MAX, |
2527 | .order = order, | 2686 | .order = order, |
2528 | .mem_cgroup = NULL, | 2687 | .target_mem_cgroup = NULL, |
2529 | }; | 2688 | }; |
2530 | struct shrink_control shrink = { | 2689 | struct shrink_control shrink = { |
2531 | .gfp_mask = sc.gfp_mask, | 2690 | .gfp_mask = sc.gfp_mask, |
@@ -2564,9 +2723,7 @@ loop_again: | |||
2564 | * Do some background aging of the anon list, to give | 2723 | * Do some background aging of the anon list, to give |
2565 | * pages a chance to be referenced before reclaiming. | 2724 | * pages a chance to be referenced before reclaiming. |
2566 | */ | 2725 | */ |
2567 | if (inactive_anon_is_low(zone, &sc)) | 2726 | age_active_anon(zone, &sc, priority); |
2568 | shrink_active_list(SWAP_CLUSTER_MAX, zone, | ||
2569 | &sc, priority, 0); | ||
2570 | 2727 | ||
2571 | if (!zone_watermark_ok_safe(zone, order, | 2728 | if (!zone_watermark_ok_safe(zone, order, |
2572 | high_wmark_pages(zone), 0, 0)) { | 2729 | high_wmark_pages(zone), 0, 0)) { |
@@ -3355,16 +3512,18 @@ int page_evictable(struct page *page, struct vm_area_struct *vma) | |||
3355 | */ | 3512 | */ |
3356 | static void check_move_unevictable_page(struct page *page, struct zone *zone) | 3513 | static void check_move_unevictable_page(struct page *page, struct zone *zone) |
3357 | { | 3514 | { |
3358 | VM_BUG_ON(PageActive(page)); | 3515 | struct lruvec *lruvec; |
3359 | 3516 | ||
3517 | VM_BUG_ON(PageActive(page)); | ||
3360 | retry: | 3518 | retry: |
3361 | ClearPageUnevictable(page); | 3519 | ClearPageUnevictable(page); |
3362 | if (page_evictable(page, NULL)) { | 3520 | if (page_evictable(page, NULL)) { |
3363 | enum lru_list l = page_lru_base_type(page); | 3521 | enum lru_list l = page_lru_base_type(page); |
3364 | 3522 | ||
3365 | __dec_zone_state(zone, NR_UNEVICTABLE); | 3523 | __dec_zone_state(zone, NR_UNEVICTABLE); |
3366 | list_move(&page->lru, &zone->lru[l].list); | 3524 | lruvec = mem_cgroup_lru_move_lists(zone, page, |
3367 | mem_cgroup_move_lists(page, LRU_UNEVICTABLE, l); | 3525 | LRU_UNEVICTABLE, l); |
3526 | list_move(&page->lru, &lruvec->lists[l]); | ||
3368 | __inc_zone_state(zone, NR_INACTIVE_ANON + l); | 3527 | __inc_zone_state(zone, NR_INACTIVE_ANON + l); |
3369 | __count_vm_event(UNEVICTABLE_PGRESCUED); | 3528 | __count_vm_event(UNEVICTABLE_PGRESCUED); |
3370 | } else { | 3529 | } else { |
@@ -3372,8 +3531,9 @@ retry: | |||
3372 | * rotate unevictable list | 3531 | * rotate unevictable list |
3373 | */ | 3532 | */ |
3374 | SetPageUnevictable(page); | 3533 | SetPageUnevictable(page); |
3375 | list_move(&page->lru, &zone->lru[LRU_UNEVICTABLE].list); | 3534 | lruvec = mem_cgroup_lru_move_lists(zone, page, LRU_UNEVICTABLE, |
3376 | mem_cgroup_rotate_lru_list(page, LRU_UNEVICTABLE); | 3535 | LRU_UNEVICTABLE); |
3536 | list_move(&page->lru, &lruvec->lists[LRU_UNEVICTABLE]); | ||
3377 | if (page_evictable(page, NULL)) | 3537 | if (page_evictable(page, NULL)) |
3378 | goto retry; | 3538 | goto retry; |
3379 | } | 3539 | } |