aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c247
1 files changed, 145 insertions, 102 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 40e29429e7b0..1b849500640c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1032,11 +1032,9 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
1032static int fallbacks[MIGRATE_TYPES][4] = { 1032static int fallbacks[MIGRATE_TYPES][4] = {
1033 [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, 1033 [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE },
1034 [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, 1034 [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE },
1035 [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
1035#ifdef CONFIG_CMA 1036#ifdef CONFIG_CMA
1036 [MIGRATE_MOVABLE] = { MIGRATE_CMA, MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
1037 [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */ 1037 [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */
1038#else
1039 [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE },
1040#endif 1038#endif
1041 [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ 1039 [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */
1042#ifdef CONFIG_MEMORY_ISOLATION 1040#ifdef CONFIG_MEMORY_ISOLATION
@@ -1044,6 +1042,17 @@ static int fallbacks[MIGRATE_TYPES][4] = {
1044#endif 1042#endif
1045}; 1043};
1046 1044
1045#ifdef CONFIG_CMA
1046static struct page *__rmqueue_cma_fallback(struct zone *zone,
1047 unsigned int order)
1048{
1049 return __rmqueue_smallest(zone, order, MIGRATE_CMA);
1050}
1051#else
1052static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
1053 unsigned int order) { return NULL; }
1054#endif
1055
1047/* 1056/*
1048 * Move the free pages in a range to the free lists of the requested type. 1057 * Move the free pages in a range to the free lists of the requested type.
1049 * Note that start_page and end_pages are not aligned on a pageblock 1058 * Note that start_page and end_pages are not aligned on a pageblock
@@ -1136,14 +1145,40 @@ static void change_pageblock_range(struct page *pageblock_page,
1136 * as fragmentation caused by those allocations polluting movable pageblocks 1145 * as fragmentation caused by those allocations polluting movable pageblocks
1137 * is worse than movable allocations stealing from unmovable and reclaimable 1146 * is worse than movable allocations stealing from unmovable and reclaimable
1138 * pageblocks. 1147 * pageblocks.
1139 *
1140 * If we claim more than half of the pageblock, change pageblock's migratetype
1141 * as well.
1142 */ 1148 */
1143static void try_to_steal_freepages(struct zone *zone, struct page *page, 1149static bool can_steal_fallback(unsigned int order, int start_mt)
1144 int start_type, int fallback_type) 1150{
1151 /*
1152 * Leaving this order check is intended, although there is
1153 * relaxed order check in next check. The reason is that
1154 * we can actually steal whole pageblock if this condition met,
1155 * but, below check doesn't guarantee it and that is just heuristic
1156 * so could be changed anytime.
1157 */
1158 if (order >= pageblock_order)
1159 return true;
1160
1161 if (order >= pageblock_order / 2 ||
1162 start_mt == MIGRATE_RECLAIMABLE ||
1163 start_mt == MIGRATE_UNMOVABLE ||
1164 page_group_by_mobility_disabled)
1165 return true;
1166
1167 return false;
1168}
1169
1170/*
1171 * This function implements actual steal behaviour. If order is large enough,
1172 * we can steal whole pageblock. If not, we first move freepages in this
1173 * pageblock and check whether half of pages are moved or not. If half of
1174 * pages are moved, we can change migratetype of pageblock and permanently
1175 * use it's pages as requested migratetype in the future.
1176 */
1177static void steal_suitable_fallback(struct zone *zone, struct page *page,
1178 int start_type)
1145{ 1179{
1146 int current_order = page_order(page); 1180 int current_order = page_order(page);
1181 int pages;
1147 1182
1148 /* Take ownership for orders >= pageblock_order */ 1183 /* Take ownership for orders >= pageblock_order */
1149 if (current_order >= pageblock_order) { 1184 if (current_order >= pageblock_order) {
@@ -1151,19 +1186,49 @@ static void try_to_steal_freepages(struct zone *zone, struct page *page,
1151 return; 1186 return;
1152 } 1187 }
1153 1188
1154 if (current_order >= pageblock_order / 2 || 1189 pages = move_freepages_block(zone, page, start_type);
1155 start_type == MIGRATE_RECLAIMABLE || 1190
1156 start_type == MIGRATE_UNMOVABLE || 1191 /* Claim the whole block if over half of it is free */
1157 page_group_by_mobility_disabled) { 1192 if (pages >= (1 << (pageblock_order-1)) ||
1158 int pages; 1193 page_group_by_mobility_disabled)
1194 set_pageblock_migratetype(page, start_type);
1195}
1196
1197/*
1198 * Check whether there is a suitable fallback freepage with requested order.
1199 * If only_stealable is true, this function returns fallback_mt only if
1200 * we can steal other freepages all together. This would help to reduce
1201 * fragmentation due to mixed migratetype pages in one pageblock.
1202 */
1203int find_suitable_fallback(struct free_area *area, unsigned int order,
1204 int migratetype, bool only_stealable, bool *can_steal)
1205{
1206 int i;
1207 int fallback_mt;
1208
1209 if (area->nr_free == 0)
1210 return -1;
1211
1212 *can_steal = false;
1213 for (i = 0;; i++) {
1214 fallback_mt = fallbacks[migratetype][i];
1215 if (fallback_mt == MIGRATE_RESERVE)
1216 break;
1217
1218 if (list_empty(&area->free_list[fallback_mt]))
1219 continue;
1159 1220
1160 pages = move_freepages_block(zone, page, start_type); 1221 if (can_steal_fallback(order, migratetype))
1222 *can_steal = true;
1161 1223
1162 /* Claim the whole block if over half of it is free */ 1224 if (!only_stealable)
1163 if (pages >= (1 << (pageblock_order-1)) || 1225 return fallback_mt;
1164 page_group_by_mobility_disabled) 1226
1165 set_pageblock_migratetype(page, start_type); 1227 if (*can_steal)
1228 return fallback_mt;
1166 } 1229 }
1230
1231 return -1;
1167} 1232}
1168 1233
1169/* Remove an element from the buddy allocator from the fallback list */ 1234/* Remove an element from the buddy allocator from the fallback list */
@@ -1173,64 +1238,45 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
1173 struct free_area *area; 1238 struct free_area *area;
1174 unsigned int current_order; 1239 unsigned int current_order;
1175 struct page *page; 1240 struct page *page;
1241 int fallback_mt;
1242 bool can_steal;
1176 1243
1177 /* Find the largest possible block of pages in the other list */ 1244 /* Find the largest possible block of pages in the other list */
1178 for (current_order = MAX_ORDER-1; 1245 for (current_order = MAX_ORDER-1;
1179 current_order >= order && current_order <= MAX_ORDER-1; 1246 current_order >= order && current_order <= MAX_ORDER-1;
1180 --current_order) { 1247 --current_order) {
1181 int i; 1248 area = &(zone->free_area[current_order]);
1182 for (i = 0;; i++) { 1249 fallback_mt = find_suitable_fallback(area, current_order,
1183 int migratetype = fallbacks[start_migratetype][i]; 1250 start_migratetype, false, &can_steal);
1184 int buddy_type = start_migratetype; 1251 if (fallback_mt == -1)
1185 1252 continue;
1186 /* MIGRATE_RESERVE handled later if necessary */
1187 if (migratetype == MIGRATE_RESERVE)
1188 break;
1189
1190 area = &(zone->free_area[current_order]);
1191 if (list_empty(&area->free_list[migratetype]))
1192 continue;
1193
1194 page = list_entry(area->free_list[migratetype].next,
1195 struct page, lru);
1196 area->nr_free--;
1197
1198 if (!is_migrate_cma(migratetype)) {
1199 try_to_steal_freepages(zone, page,
1200 start_migratetype,
1201 migratetype);
1202 } else {
1203 /*
1204 * When borrowing from MIGRATE_CMA, we need to
1205 * release the excess buddy pages to CMA
1206 * itself, and we do not try to steal extra
1207 * free pages.
1208 */
1209 buddy_type = migratetype;
1210 }
1211 1253
1212 /* Remove the page from the freelists */ 1254 page = list_entry(area->free_list[fallback_mt].next,
1213 list_del(&page->lru); 1255 struct page, lru);
1214 rmv_page_order(page); 1256 if (can_steal)
1257 steal_suitable_fallback(zone, page, start_migratetype);
1215 1258
1216 expand(zone, page, order, current_order, area, 1259 /* Remove the page from the freelists */
1217 buddy_type); 1260 area->nr_free--;
1261 list_del(&page->lru);
1262 rmv_page_order(page);
1218 1263
1219 /* 1264 expand(zone, page, order, current_order, area,
1220 * The freepage_migratetype may differ from pageblock's 1265 start_migratetype);
1221 * migratetype depending on the decisions in 1266 /*
1222 * try_to_steal_freepages(). This is OK as long as it 1267 * The freepage_migratetype may differ from pageblock's
1223 * does not differ for MIGRATE_CMA pageblocks. For CMA 1268 * migratetype depending on the decisions in
1224 * we need to make sure unallocated pages flushed from 1269 * try_to_steal_freepages(). This is OK as long as it
1225 * pcp lists are returned to the correct freelist. 1270 * does not differ for MIGRATE_CMA pageblocks. For CMA
1226 */ 1271 * we need to make sure unallocated pages flushed from
1227 set_freepage_migratetype(page, buddy_type); 1272 * pcp lists are returned to the correct freelist.
1273 */
1274 set_freepage_migratetype(page, start_migratetype);
1228 1275
1229 trace_mm_page_alloc_extfrag(page, order, current_order, 1276 trace_mm_page_alloc_extfrag(page, order, current_order,
1230 start_migratetype, migratetype); 1277 start_migratetype, fallback_mt);
1231 1278
1232 return page; 1279 return page;
1233 }
1234 } 1280 }
1235 1281
1236 return NULL; 1282 return NULL;
@@ -1249,7 +1295,11 @@ retry_reserve:
1249 page = __rmqueue_smallest(zone, order, migratetype); 1295 page = __rmqueue_smallest(zone, order, migratetype);
1250 1296
1251 if (unlikely(!page) && migratetype != MIGRATE_RESERVE) { 1297 if (unlikely(!page) && migratetype != MIGRATE_RESERVE) {
1252 page = __rmqueue_fallback(zone, order, migratetype); 1298 if (migratetype == MIGRATE_MOVABLE)
1299 page = __rmqueue_cma_fallback(zone, order);
1300
1301 if (!page)
1302 page = __rmqueue_fallback(zone, order, migratetype);
1253 1303
1254 /* 1304 /*
1255 * Use MIGRATE_RESERVE rather than fail an allocation. goto 1305 * Use MIGRATE_RESERVE rather than fail an allocation. goto
@@ -2362,13 +2412,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2362 *did_some_progress = 1; 2412 *did_some_progress = 1;
2363 goto out; 2413 goto out;
2364 } 2414 }
2365 /* 2415 /* The OOM killer may not free memory on a specific node */
2366 * GFP_THISNODE contains __GFP_NORETRY and we never hit this.
2367 * Sanity check for bare calls of __GFP_THISNODE, not real OOM.
2368 * The caller should handle page allocation failure by itself if
2369 * it specifies __GFP_THISNODE.
2370 * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER.
2371 */
2372 if (gfp_mask & __GFP_THISNODE) 2416 if (gfp_mask & __GFP_THISNODE)
2373 goto out; 2417 goto out;
2374 } 2418 }
@@ -2623,15 +2667,11 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2623 } 2667 }
2624 2668
2625 /* 2669 /*
2626 * GFP_THISNODE (meaning __GFP_THISNODE, __GFP_NORETRY and 2670 * If this allocation cannot block and it is for a specific node, then
2627 * __GFP_NOWARN set) should not cause reclaim since the subsystem 2671 * fail early. There's no need to wakeup kswapd or retry for a
2628 * (f.e. slab) using GFP_THISNODE may choose to trigger reclaim 2672 * speculative node-specific allocation.
2629 * using a larger set of nodes after it has established that the
2630 * allowed per node queues are empty and that nodes are
2631 * over allocated.
2632 */ 2673 */
2633 if (IS_ENABLED(CONFIG_NUMA) && 2674 if (IS_ENABLED(CONFIG_NUMA) && (gfp_mask & __GFP_THISNODE) && !wait)
2634 (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
2635 goto nopage; 2675 goto nopage;
2636 2676
2637retry: 2677retry:
@@ -2824,7 +2864,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2824 /* 2864 /*
2825 * Check the zones suitable for the gfp_mask contain at least one 2865 * Check the zones suitable for the gfp_mask contain at least one
2826 * valid zone. It's possible to have an empty zonelist as a result 2866 * valid zone. It's possible to have an empty zonelist as a result
2827 * of GFP_THISNODE and a memoryless node 2867 * of __GFP_THISNODE and a memoryless node
2828 */ 2868 */
2829 if (unlikely(!zonelist->_zonerefs->zone)) 2869 if (unlikely(!zonelist->_zonerefs->zone))
2830 return NULL; 2870 return NULL;
@@ -3201,38 +3241,31 @@ static void show_migration_types(unsigned char type)
3201 * Show free area list (used inside shift_scroll-lock stuff) 3241 * Show free area list (used inside shift_scroll-lock stuff)
3202 * We also calculate the percentage fragmentation. We do this by counting the 3242 * We also calculate the percentage fragmentation. We do this by counting the
3203 * memory on each free list with the exception of the first item on the list. 3243 * memory on each free list with the exception of the first item on the list.
3204 * Suppresses nodes that are not allowed by current's cpuset if 3244 *
3205 * SHOW_MEM_FILTER_NODES is passed. 3245 * Bits in @filter:
3246 * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's
3247 * cpuset.
3206 */ 3248 */
3207void show_free_areas(unsigned int filter) 3249void show_free_areas(unsigned int filter)
3208{ 3250{
3251 unsigned long free_pcp = 0;
3209 int cpu; 3252 int cpu;
3210 struct zone *zone; 3253 struct zone *zone;
3211 3254
3212 for_each_populated_zone(zone) { 3255 for_each_populated_zone(zone) {
3213 if (skip_free_areas_node(filter, zone_to_nid(zone))) 3256 if (skip_free_areas_node(filter, zone_to_nid(zone)))
3214 continue; 3257 continue;
3215 show_node(zone);
3216 printk("%s per-cpu:\n", zone->name);
3217 3258
3218 for_each_online_cpu(cpu) { 3259 for_each_online_cpu(cpu)
3219 struct per_cpu_pageset *pageset; 3260 free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count;
3220
3221 pageset = per_cpu_ptr(zone->pageset, cpu);
3222
3223 printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
3224 cpu, pageset->pcp.high,
3225 pageset->pcp.batch, pageset->pcp.count);
3226 }
3227 } 3261 }
3228 3262
3229 printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" 3263 printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
3230 " active_file:%lu inactive_file:%lu isolated_file:%lu\n" 3264 " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
3231 " unevictable:%lu" 3265 " unevictable:%lu dirty:%lu writeback:%lu unstable:%lu\n"
3232 " dirty:%lu writeback:%lu unstable:%lu\n" 3266 " slab_reclaimable:%lu slab_unreclaimable:%lu\n"
3233 " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
3234 " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n" 3267 " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
3235 " free_cma:%lu\n", 3268 " free:%lu free_pcp:%lu free_cma:%lu\n",
3236 global_page_state(NR_ACTIVE_ANON), 3269 global_page_state(NR_ACTIVE_ANON),
3237 global_page_state(NR_INACTIVE_ANON), 3270 global_page_state(NR_INACTIVE_ANON),
3238 global_page_state(NR_ISOLATED_ANON), 3271 global_page_state(NR_ISOLATED_ANON),
@@ -3243,13 +3276,14 @@ void show_free_areas(unsigned int filter)
3243 global_page_state(NR_FILE_DIRTY), 3276 global_page_state(NR_FILE_DIRTY),
3244 global_page_state(NR_WRITEBACK), 3277 global_page_state(NR_WRITEBACK),
3245 global_page_state(NR_UNSTABLE_NFS), 3278 global_page_state(NR_UNSTABLE_NFS),
3246 global_page_state(NR_FREE_PAGES),
3247 global_page_state(NR_SLAB_RECLAIMABLE), 3279 global_page_state(NR_SLAB_RECLAIMABLE),
3248 global_page_state(NR_SLAB_UNRECLAIMABLE), 3280 global_page_state(NR_SLAB_UNRECLAIMABLE),
3249 global_page_state(NR_FILE_MAPPED), 3281 global_page_state(NR_FILE_MAPPED),
3250 global_page_state(NR_SHMEM), 3282 global_page_state(NR_SHMEM),
3251 global_page_state(NR_PAGETABLE), 3283 global_page_state(NR_PAGETABLE),
3252 global_page_state(NR_BOUNCE), 3284 global_page_state(NR_BOUNCE),
3285 global_page_state(NR_FREE_PAGES),
3286 free_pcp,
3253 global_page_state(NR_FREE_CMA_PAGES)); 3287 global_page_state(NR_FREE_CMA_PAGES));
3254 3288
3255 for_each_populated_zone(zone) { 3289 for_each_populated_zone(zone) {
@@ -3257,6 +3291,11 @@ void show_free_areas(unsigned int filter)
3257 3291
3258 if (skip_free_areas_node(filter, zone_to_nid(zone))) 3292 if (skip_free_areas_node(filter, zone_to_nid(zone)))
3259 continue; 3293 continue;
3294
3295 free_pcp = 0;
3296 for_each_online_cpu(cpu)
3297 free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count;
3298
3260 show_node(zone); 3299 show_node(zone);
3261 printk("%s" 3300 printk("%s"
3262 " free:%lukB" 3301 " free:%lukB"
@@ -3283,6 +3322,8 @@ void show_free_areas(unsigned int filter)
3283 " pagetables:%lukB" 3322 " pagetables:%lukB"
3284 " unstable:%lukB" 3323 " unstable:%lukB"
3285 " bounce:%lukB" 3324 " bounce:%lukB"
3325 " free_pcp:%lukB"
3326 " local_pcp:%ukB"
3286 " free_cma:%lukB" 3327 " free_cma:%lukB"
3287 " writeback_tmp:%lukB" 3328 " writeback_tmp:%lukB"
3288 " pages_scanned:%lu" 3329 " pages_scanned:%lu"
@@ -3314,6 +3355,8 @@ void show_free_areas(unsigned int filter)
3314 K(zone_page_state(zone, NR_PAGETABLE)), 3355 K(zone_page_state(zone, NR_PAGETABLE)),
3315 K(zone_page_state(zone, NR_UNSTABLE_NFS)), 3356 K(zone_page_state(zone, NR_UNSTABLE_NFS)),
3316 K(zone_page_state(zone, NR_BOUNCE)), 3357 K(zone_page_state(zone, NR_BOUNCE)),
3358 K(free_pcp),
3359 K(this_cpu_read(zone->pageset->pcp.count)),
3317 K(zone_page_state(zone, NR_FREE_CMA_PAGES)), 3360 K(zone_page_state(zone, NR_FREE_CMA_PAGES)),
3318 K(zone_page_state(zone, NR_WRITEBACK_TEMP)), 3361 K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
3319 K(zone_page_state(zone, NR_PAGES_SCANNED)), 3362 K(zone_page_state(zone, NR_PAGES_SCANNED)),
@@ -5717,7 +5760,7 @@ static void __setup_per_zone_wmarks(void)
5717 * value here. 5760 * value here.
5718 * 5761 *
5719 * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN) 5762 * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN)
5720 * deltas controls asynch page reclaim, and so should 5763 * deltas control asynch page reclaim, and so should
5721 * not be capped for highmem. 5764 * not be capped for highmem.
5722 */ 5765 */
5723 unsigned long min_pages; 5766 unsigned long min_pages;