aboutsummaryrefslogtreecommitdiffstats
path: root/mm/percpu.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2014-09-02 14:46:05 -0400
committerTejun Heo <tj@kernel.org>2014-09-02 14:46:05 -0400
commit1a4d76076cda69b0abf15463a8cebc172406da25 (patch)
treeeb2f8e317795c30942ae298585708e10652e8537 /mm/percpu.c
parentfe6bd8c3d28357174587c4fe895d10b00321b692 (diff)
percpu: implement asynchronous chunk population
The percpu allocator now supports atomic allocations by only allocating from already populated areas but the mechanism to ensure that there's adequate amount of populated areas was missing. This patch expands pcpu_balance_work so that in addition to freeing excess free chunks it also populates chunks to maintain an adequate level of populated areas. pcpu_alloc() schedules pcpu_balance_work if the amount of free populated areas is too low or after an atomic allocation failure. * PERPCU_DYNAMIC_RESERVE is increased by two pages to account for PCPU_EMPTY_POP_PAGES_LOW. * pcpu_async_enabled is added to gate both async jobs - chunk->map_extend_work and pcpu_balance_work - so that we don't end up scheduling them while the needed subsystems aren't up yet. Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'mm/percpu.c')
-rw-r--r--mm/percpu.c117
1 files changed, 113 insertions, 4 deletions
diff --git a/mm/percpu.c b/mm/percpu.c
index 28a830590b4c..867efd38d879 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -78,6 +78,8 @@
78#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ 78#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */
79#define PCPU_ATOMIC_MAP_MARGIN_LOW 32 79#define PCPU_ATOMIC_MAP_MARGIN_LOW 32
80#define PCPU_ATOMIC_MAP_MARGIN_HIGH 64 80#define PCPU_ATOMIC_MAP_MARGIN_HIGH 64
81#define PCPU_EMPTY_POP_PAGES_LOW 2
82#define PCPU_EMPTY_POP_PAGES_HIGH 4
81 83
82#ifdef CONFIG_SMP 84#ifdef CONFIG_SMP
83/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ 85/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
@@ -168,9 +170,22 @@ static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
168 */ 170 */
169static int pcpu_nr_empty_pop_pages; 171static int pcpu_nr_empty_pop_pages;
170 172
171/* balance work is used to populate or destroy chunks asynchronously */ 173/*
174 * Balance work is used to populate or destroy chunks asynchronously. We
175 * try to keep the number of populated free pages between
176 * PCPU_EMPTY_POP_PAGES_LOW and HIGH for atomic allocations and at most one
177 * empty chunk.
178 */
172static void pcpu_balance_workfn(struct work_struct *work); 179static void pcpu_balance_workfn(struct work_struct *work);
173static DECLARE_WORK(pcpu_balance_work, pcpu_balance_workfn); 180static DECLARE_WORK(pcpu_balance_work, pcpu_balance_workfn);
181static bool pcpu_async_enabled __read_mostly;
182static bool pcpu_atomic_alloc_failed;
183
184static void pcpu_schedule_balance_work(void)
185{
186 if (pcpu_async_enabled)
187 schedule_work(&pcpu_balance_work);
188}
174 189
175static bool pcpu_addr_in_first_chunk(void *addr) 190static bool pcpu_addr_in_first_chunk(void *addr)
176{ 191{
@@ -386,7 +401,8 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk, bool is_atomic)
386 margin = 3; 401 margin = 3;
387 402
388 if (chunk->map_alloc < 403 if (chunk->map_alloc <
389 chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW) 404 chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW &&
405 pcpu_async_enabled)
390 schedule_work(&chunk->map_extend_work); 406 schedule_work(&chunk->map_extend_work);
391 } else { 407 } else {
392 margin = PCPU_ATOMIC_MAP_MARGIN_HIGH; 408 margin = PCPU_ATOMIC_MAP_MARGIN_HIGH;
@@ -1005,6 +1021,9 @@ area_found:
1005 if (chunk != pcpu_reserved_chunk) 1021 if (chunk != pcpu_reserved_chunk)
1006 pcpu_nr_empty_pop_pages -= occ_pages; 1022 pcpu_nr_empty_pop_pages -= occ_pages;
1007 1023
1024 if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW)
1025 pcpu_schedule_balance_work();
1026
1008 /* clear the areas and return address relative to base address */ 1027 /* clear the areas and return address relative to base address */
1009 for_each_possible_cpu(cpu) 1028 for_each_possible_cpu(cpu)
1010 memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size); 1029 memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
@@ -1023,6 +1042,11 @@ fail:
1023 if (!--warn_limit) 1042 if (!--warn_limit)
1024 pr_info("PERCPU: limit reached, disable warning\n"); 1043 pr_info("PERCPU: limit reached, disable warning\n");
1025 } 1044 }
1045 if (is_atomic) {
1046 /* see the flag handling in pcpu_blance_workfn() */
1047 pcpu_atomic_alloc_failed = true;
1048 pcpu_schedule_balance_work();
1049 }
1026 return NULL; 1050 return NULL;
1027} 1051}
1028 1052
@@ -1080,7 +1104,7 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
1080} 1104}
1081 1105
1082/** 1106/**
1083 * pcpu_balance_workfn - reclaim fully free chunks, workqueue function 1107 * pcpu_balance_workfn - manage the amount of free chunks and populated pages
1084 * @work: unused 1108 * @work: unused
1085 * 1109 *
1086 * Reclaim all fully free chunks except for the first one. 1110 * Reclaim all fully free chunks except for the first one.
@@ -1090,7 +1114,12 @@ static void pcpu_balance_workfn(struct work_struct *work)
1090 LIST_HEAD(to_free); 1114 LIST_HEAD(to_free);
1091 struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; 1115 struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1];
1092 struct pcpu_chunk *chunk, *next; 1116 struct pcpu_chunk *chunk, *next;
1117 int slot, nr_to_pop, ret;
1093 1118
1119 /*
1120 * There's no reason to keep around multiple unused chunks and VM
1121 * areas can be scarce. Destroy all free chunks except for one.
1122 */
1094 mutex_lock(&pcpu_alloc_mutex); 1123 mutex_lock(&pcpu_alloc_mutex);
1095 spin_lock_irq(&pcpu_lock); 1124 spin_lock_irq(&pcpu_lock);
1096 1125
@@ -1118,6 +1147,74 @@ static void pcpu_balance_workfn(struct work_struct *work)
1118 pcpu_destroy_chunk(chunk); 1147 pcpu_destroy_chunk(chunk);
1119 } 1148 }
1120 1149
1150 /*
1151 * Ensure there are certain number of free populated pages for
1152 * atomic allocs. Fill up from the most packed so that atomic
1153 * allocs don't increase fragmentation. If atomic allocation
1154 * failed previously, always populate the maximum amount. This
1155 * should prevent atomic allocs larger than PAGE_SIZE from keeping
1156 * failing indefinitely; however, large atomic allocs are not
1157 * something we support properly and can be highly unreliable and
1158 * inefficient.
1159 */
1160retry_pop:
1161 if (pcpu_atomic_alloc_failed) {
1162 nr_to_pop = PCPU_EMPTY_POP_PAGES_HIGH;
1163 /* best effort anyway, don't worry about synchronization */
1164 pcpu_atomic_alloc_failed = false;
1165 } else {
1166 nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH -
1167 pcpu_nr_empty_pop_pages,
1168 0, PCPU_EMPTY_POP_PAGES_HIGH);
1169 }
1170
1171 for (slot = pcpu_size_to_slot(PAGE_SIZE); slot < pcpu_nr_slots; slot++) {
1172 int nr_unpop = 0, rs, re;
1173
1174 if (!nr_to_pop)
1175 break;
1176
1177 spin_lock_irq(&pcpu_lock);
1178 list_for_each_entry(chunk, &pcpu_slot[slot], list) {
1179 nr_unpop = pcpu_unit_pages - chunk->nr_populated;
1180 if (nr_unpop)
1181 break;
1182 }
1183 spin_unlock_irq(&pcpu_lock);
1184
1185 if (!nr_unpop)
1186 continue;
1187
1188 /* @chunk can't go away while pcpu_alloc_mutex is held */
1189 pcpu_for_each_unpop_region(chunk, rs, re, 0, pcpu_unit_pages) {
1190 int nr = min(re - rs, nr_to_pop);
1191
1192 ret = pcpu_populate_chunk(chunk, rs, rs + nr);
1193 if (!ret) {
1194 nr_to_pop -= nr;
1195 spin_lock_irq(&pcpu_lock);
1196 pcpu_chunk_populated(chunk, rs, rs + nr);
1197 spin_unlock_irq(&pcpu_lock);
1198 } else {
1199 nr_to_pop = 0;
1200 }
1201
1202 if (!nr_to_pop)
1203 break;
1204 }
1205 }
1206
1207 if (nr_to_pop) {
1208 /* ran out of chunks to populate, create a new one and retry */
1209 chunk = pcpu_create_chunk();
1210 if (chunk) {
1211 spin_lock_irq(&pcpu_lock);
1212 pcpu_chunk_relocate(chunk, -1);
1213 spin_unlock_irq(&pcpu_lock);
1214 goto retry_pop;
1215 }
1216 }
1217
1121 mutex_unlock(&pcpu_alloc_mutex); 1218 mutex_unlock(&pcpu_alloc_mutex);
1122} 1219}
1123 1220
@@ -1160,7 +1257,7 @@ void free_percpu(void __percpu *ptr)
1160 1257
1161 list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list) 1258 list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list)
1162 if (pos != chunk) { 1259 if (pos != chunk) {
1163 schedule_work(&pcpu_balance_work); 1260 pcpu_schedule_balance_work();
1164 break; 1261 break;
1165 } 1262 }
1166 } 1263 }
@@ -2187,3 +2284,15 @@ void __init percpu_init_late(void)
2187 spin_unlock_irqrestore(&pcpu_lock, flags); 2284 spin_unlock_irqrestore(&pcpu_lock, flags);
2188 } 2285 }
2189} 2286}
2287
2288/*
2289 * Percpu allocator is initialized early during boot when neither slab or
2290 * workqueue is available. Plug async management until everything is up
2291 * and running.
2292 */
2293static int __init percpu_enable_async(void)
2294{
2295 pcpu_async_enabled = true;
2296 return 0;
2297}
2298subsys_initcall(percpu_enable_async);