diff options
-rw-r--r-- | include/linux/mmzone.h | 6 | ||||
-rw-r--r-- | mm/page_alloc.c | 2 | ||||
-rw-r--r-- | mm/vmscan.c | 55 | ||||
-rw-r--r-- | mm/vmstat.c | 2 |
4 files changed, 43 insertions, 22 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ed0762b283a9..e06683e2bea3 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -218,13 +218,9 @@ struct zone { | |||
218 | * under - it drives the swappiness decision: whether to unmap mapped | 218 | * under - it drives the swappiness decision: whether to unmap mapped |
219 | * pages. | 219 | * pages. |
220 | * | 220 | * |
221 | * temp_priority is used to remember the scanning priority at which | 221 | * Access to both this field is quite racy even on uniprocessor. But |
222 | * this zone was successfully refilled to free_pages == pages_high. | ||
223 | * | ||
224 | * Access to both these fields is quite racy even on uniprocessor. But | ||
225 | * it is expected to average out OK. | 222 | * it is expected to average out OK. |
226 | */ | 223 | */ |
227 | int temp_priority; | ||
228 | int prev_priority; | 224 | int prev_priority; |
229 | 225 | ||
230 | 226 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f5fc45472d5c..ecf853b5e30e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -2407,7 +2407,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat, | |||
2407 | zone->zone_pgdat = pgdat; | 2407 | zone->zone_pgdat = pgdat; |
2408 | zone->free_pages = 0; | 2408 | zone->free_pages = 0; |
2409 | 2409 | ||
2410 | zone->temp_priority = zone->prev_priority = DEF_PRIORITY; | 2410 | zone->prev_priority = DEF_PRIORITY; |
2411 | 2411 | ||
2412 | zone_pcp_init(zone); | 2412 | zone_pcp_init(zone); |
2413 | INIT_LIST_HEAD(&zone->active_list); | 2413 | INIT_LIST_HEAD(&zone->active_list); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index f05527bf792b..b32560ead5c0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -723,6 +723,20 @@ done: | |||
723 | return nr_reclaimed; | 723 | return nr_reclaimed; |
724 | } | 724 | } |
725 | 725 | ||
726 | /* | ||
727 | * We are about to scan this zone at a certain priority level. If that priority | ||
728 | * level is smaller (ie: more urgent) than the previous priority, then note | ||
729 | * that priority level within the zone. This is done so that when the next | ||
730 | * process comes in to scan this zone, it will immediately start out at this | ||
731 | * priority level rather than having to build up its own scanning priority. | ||
732 | * Here, this priority affects only the reclaim-mapped threshold. | ||
733 | */ | ||
734 | static inline void note_zone_scanning_priority(struct zone *zone, int priority) | ||
735 | { | ||
736 | if (priority < zone->prev_priority) | ||
737 | zone->prev_priority = priority; | ||
738 | } | ||
739 | |||
726 | static inline int zone_is_near_oom(struct zone *zone) | 740 | static inline int zone_is_near_oom(struct zone *zone) |
727 | { | 741 | { |
728 | return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3; | 742 | return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3; |
@@ -972,9 +986,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones, | |||
972 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) | 986 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) |
973 | continue; | 987 | continue; |
974 | 988 | ||
975 | zone->temp_priority = priority; | 989 | note_zone_scanning_priority(zone, priority); |
976 | if (zone->prev_priority > priority) | ||
977 | zone->prev_priority = priority; | ||
978 | 990 | ||
979 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) | 991 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) |
980 | continue; /* Let kswapd poll it */ | 992 | continue; /* Let kswapd poll it */ |
@@ -1024,7 +1036,6 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | |||
1024 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) | 1036 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) |
1025 | continue; | 1037 | continue; |
1026 | 1038 | ||
1027 | zone->temp_priority = DEF_PRIORITY; | ||
1028 | lru_pages += zone->nr_active + zone->nr_inactive; | 1039 | lru_pages += zone->nr_active + zone->nr_inactive; |
1029 | } | 1040 | } |
1030 | 1041 | ||
@@ -1065,13 +1076,22 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | |||
1065 | if (!sc.all_unreclaimable) | 1076 | if (!sc.all_unreclaimable) |
1066 | ret = 1; | 1077 | ret = 1; |
1067 | out: | 1078 | out: |
1079 | /* | ||
1080 | * Now that we've scanned all the zones at this priority level, note | ||
1081 | * that level within the zone so that the next thread which performs | ||
1082 | * scanning of this zone will immediately start out at this priority | ||
1083 | * level. This affects only the decision whether or not to bring | ||
1084 | * mapped pages onto the inactive list. | ||
1085 | */ | ||
1086 | if (priority < 0) | ||
1087 | priority = 0; | ||
1068 | for (i = 0; zones[i] != 0; i++) { | 1088 | for (i = 0; zones[i] != 0; i++) { |
1069 | struct zone *zone = zones[i]; | 1089 | struct zone *zone = zones[i]; |
1070 | 1090 | ||
1071 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) | 1091 | if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) |
1072 | continue; | 1092 | continue; |
1073 | 1093 | ||
1074 | zone->prev_priority = zone->temp_priority; | 1094 | zone->prev_priority = priority; |
1075 | } | 1095 | } |
1076 | return ret; | 1096 | return ret; |
1077 | } | 1097 | } |
@@ -1111,6 +1131,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) | |||
1111 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1131 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
1112 | .swappiness = vm_swappiness, | 1132 | .swappiness = vm_swappiness, |
1113 | }; | 1133 | }; |
1134 | /* | ||
1135 | * temp_priority is used to remember the scanning priority at which | ||
1136 | * this zone was successfully refilled to free_pages == pages_high. | ||
1137 | */ | ||
1138 | int temp_priority[MAX_NR_ZONES]; | ||
1114 | 1139 | ||
1115 | loop_again: | 1140 | loop_again: |
1116 | total_scanned = 0; | 1141 | total_scanned = 0; |
@@ -1118,11 +1143,8 @@ loop_again: | |||
1118 | sc.may_writepage = !laptop_mode; | 1143 | sc.may_writepage = !laptop_mode; |
1119 | count_vm_event(PAGEOUTRUN); | 1144 | count_vm_event(PAGEOUTRUN); |
1120 | 1145 | ||
1121 | for (i = 0; i < pgdat->nr_zones; i++) { | 1146 | for (i = 0; i < pgdat->nr_zones; i++) |
1122 | struct zone *zone = pgdat->node_zones + i; | 1147 | temp_priority[i] = DEF_PRIORITY; |
1123 | |||
1124 | zone->temp_priority = DEF_PRIORITY; | ||
1125 | } | ||
1126 | 1148 | ||
1127 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { | 1149 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { |
1128 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 1150 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
@@ -1183,10 +1205,9 @@ scan: | |||
1183 | if (!zone_watermark_ok(zone, order, zone->pages_high, | 1205 | if (!zone_watermark_ok(zone, order, zone->pages_high, |
1184 | end_zone, 0)) | 1206 | end_zone, 0)) |
1185 | all_zones_ok = 0; | 1207 | all_zones_ok = 0; |
1186 | zone->temp_priority = priority; | 1208 | temp_priority[i] = priority; |
1187 | if (zone->prev_priority > priority) | ||
1188 | zone->prev_priority = priority; | ||
1189 | sc.nr_scanned = 0; | 1209 | sc.nr_scanned = 0; |
1210 | note_zone_scanning_priority(zone, priority); | ||
1190 | nr_reclaimed += shrink_zone(priority, zone, &sc); | 1211 | nr_reclaimed += shrink_zone(priority, zone, &sc); |
1191 | reclaim_state->reclaimed_slab = 0; | 1212 | reclaim_state->reclaimed_slab = 0; |
1192 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, | 1213 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, |
@@ -1226,10 +1247,15 @@ scan: | |||
1226 | break; | 1247 | break; |
1227 | } | 1248 | } |
1228 | out: | 1249 | out: |
1250 | /* | ||
1251 | * Note within each zone the priority level at which this zone was | ||
1252 | * brought into a happy state. So that the next thread which scans this | ||
1253 | * zone will start out at that priority level. | ||
1254 | */ | ||
1229 | for (i = 0; i < pgdat->nr_zones; i++) { | 1255 | for (i = 0; i < pgdat->nr_zones; i++) { |
1230 | struct zone *zone = pgdat->node_zones + i; | 1256 | struct zone *zone = pgdat->node_zones + i; |
1231 | 1257 | ||
1232 | zone->prev_priority = zone->temp_priority; | 1258 | zone->prev_priority = temp_priority[i]; |
1233 | } | 1259 | } |
1234 | if (!all_zones_ok) { | 1260 | if (!all_zones_ok) { |
1235 | cond_resched(); | 1261 | cond_resched(); |
@@ -1614,6 +1640,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
1614 | */ | 1640 | */ |
1615 | priority = ZONE_RECLAIM_PRIORITY; | 1641 | priority = ZONE_RECLAIM_PRIORITY; |
1616 | do { | 1642 | do { |
1643 | note_zone_scanning_priority(zone, priority); | ||
1617 | nr_reclaimed += shrink_zone(priority, zone, &sc); | 1644 | nr_reclaimed += shrink_zone(priority, zone, &sc); |
1618 | priority--; | 1645 | priority--; |
1619 | } while (priority >= 0 && nr_reclaimed < nr_pages); | 1646 | } while (priority >= 0 && nr_reclaimed < nr_pages); |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 45b124e012f5..8614e8f6743b 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -587,11 +587,9 @@ static int zoneinfo_show(struct seq_file *m, void *arg) | |||
587 | seq_printf(m, | 587 | seq_printf(m, |
588 | "\n all_unreclaimable: %u" | 588 | "\n all_unreclaimable: %u" |
589 | "\n prev_priority: %i" | 589 | "\n prev_priority: %i" |
590 | "\n temp_priority: %i" | ||
591 | "\n start_pfn: %lu", | 590 | "\n start_pfn: %lu", |
592 | zone->all_unreclaimable, | 591 | zone->all_unreclaimable, |
593 | zone->prev_priority, | 592 | zone->prev_priority, |
594 | zone->temp_priority, | ||
595 | zone->zone_start_pfn); | 593 | zone->zone_start_pfn); |
596 | spin_unlock_irqrestore(&zone->lock, flags); | 594 | spin_unlock_irqrestore(&zone->lock, flags); |
597 | seq_putc(m, '\n'); | 595 | seq_putc(m, '\n'); |