diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/vmscan.c | 241 |
1 files changed, 140 insertions, 101 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index fe0d5c458440..799ebceeb4f7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -155,14 +155,31 @@ static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) | |||
155 | } | 155 | } |
156 | 156 | ||
157 | /* | 157 | /* |
158 | * Add a shrinker callback to be called from the vm | 158 | * Add a shrinker callback to be called from the vm. |
159 | */ | 159 | */ |
160 | void register_shrinker(struct shrinker *shrinker) | 160 | int register_shrinker(struct shrinker *shrinker) |
161 | { | 161 | { |
162 | atomic_long_set(&shrinker->nr_in_batch, 0); | 162 | size_t size = sizeof(*shrinker->nr_deferred); |
163 | |||
164 | /* | ||
165 | * If we only have one possible node in the system anyway, save | ||
166 | * ourselves the trouble and disable NUMA aware behavior. This way we | ||
167 | * will save memory and some small loop time later. | ||
168 | */ | ||
169 | if (nr_node_ids == 1) | ||
170 | shrinker->flags &= ~SHRINKER_NUMA_AWARE; | ||
171 | |||
172 | if (shrinker->flags & SHRINKER_NUMA_AWARE) | ||
173 | size *= nr_node_ids; | ||
174 | |||
175 | shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); | ||
176 | if (!shrinker->nr_deferred) | ||
177 | return -ENOMEM; | ||
178 | |||
163 | down_write(&shrinker_rwsem); | 179 | down_write(&shrinker_rwsem); |
164 | list_add_tail(&shrinker->list, &shrinker_list); | 180 | list_add_tail(&shrinker->list, &shrinker_list); |
165 | up_write(&shrinker_rwsem); | 181 | up_write(&shrinker_rwsem); |
182 | return 0; | ||
166 | } | 183 | } |
167 | EXPORT_SYMBOL(register_shrinker); | 184 | EXPORT_SYMBOL(register_shrinker); |
168 | 185 | ||
@@ -186,6 +203,118 @@ static inline int do_shrinker_shrink(struct shrinker *shrinker, | |||
186 | } | 203 | } |
187 | 204 | ||
188 | #define SHRINK_BATCH 128 | 205 | #define SHRINK_BATCH 128 |
206 | |||
207 | static unsigned long | ||
208 | shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker, | ||
209 | unsigned long nr_pages_scanned, unsigned long lru_pages) | ||
210 | { | ||
211 | unsigned long freed = 0; | ||
212 | unsigned long long delta; | ||
213 | long total_scan; | ||
214 | long max_pass; | ||
215 | long nr; | ||
216 | long new_nr; | ||
217 | int nid = shrinkctl->nid; | ||
218 | long batch_size = shrinker->batch ? shrinker->batch | ||
219 | : SHRINK_BATCH; | ||
220 | |||
221 | if (shrinker->count_objects) | ||
222 | max_pass = shrinker->count_objects(shrinker, shrinkctl); | ||
223 | else | ||
224 | max_pass = do_shrinker_shrink(shrinker, shrinkctl, 0); | ||
225 | if (max_pass == 0) | ||
226 | return 0; | ||
227 | |||
228 | /* | ||
229 | * copy the current shrinker scan count into a local variable | ||
230 | * and zero it so that other concurrent shrinker invocations | ||
231 | * don't also do this scanning work. | ||
232 | */ | ||
233 | nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0); | ||
234 | |||
235 | total_scan = nr; | ||
236 | delta = (4 * nr_pages_scanned) / shrinker->seeks; | ||
237 | delta *= max_pass; | ||
238 | do_div(delta, lru_pages + 1); | ||
239 | total_scan += delta; | ||
240 | if (total_scan < 0) { | ||
241 | printk(KERN_ERR | ||
242 | "shrink_slab: %pF negative objects to delete nr=%ld\n", | ||
243 | shrinker->shrink, total_scan); | ||
244 | total_scan = max_pass; | ||
245 | } | ||
246 | |||
247 | /* | ||
248 | * We need to avoid excessive windup on filesystem shrinkers | ||
249 | * due to large numbers of GFP_NOFS allocations causing the | ||
250 | * shrinkers to return -1 all the time. This results in a large | ||
251 | * nr being built up so when a shrink that can do some work | ||
252 | * comes along it empties the entire cache due to nr >>> | ||
253 | * max_pass. This is bad for sustaining a working set in | ||
254 | * memory. | ||
255 | * | ||
256 | * Hence only allow the shrinker to scan the entire cache when | ||
257 | * a large delta change is calculated directly. | ||
258 | */ | ||
259 | if (delta < max_pass / 4) | ||
260 | total_scan = min(total_scan, max_pass / 2); | ||
261 | |||
262 | /* | ||
263 | * Avoid risking looping forever due to too large nr value: | ||
264 | * never try to free more than twice the estimate number of | ||
265 | * freeable entries. | ||
266 | */ | ||
267 | if (total_scan > max_pass * 2) | ||
268 | total_scan = max_pass * 2; | ||
269 | |||
270 | trace_mm_shrink_slab_start(shrinker, shrinkctl, nr, | ||
271 | nr_pages_scanned, lru_pages, | ||
272 | max_pass, delta, total_scan); | ||
273 | |||
274 | while (total_scan >= batch_size) { | ||
275 | |||
276 | if (shrinker->scan_objects) { | ||
277 | unsigned long ret; | ||
278 | shrinkctl->nr_to_scan = batch_size; | ||
279 | ret = shrinker->scan_objects(shrinker, shrinkctl); | ||
280 | |||
281 | if (ret == SHRINK_STOP) | ||
282 | break; | ||
283 | freed += ret; | ||
284 | } else { | ||
285 | int nr_before; | ||
286 | long ret; | ||
287 | |||
288 | nr_before = do_shrinker_shrink(shrinker, shrinkctl, 0); | ||
289 | ret = do_shrinker_shrink(shrinker, shrinkctl, | ||
290 | batch_size); | ||
291 | if (ret == -1) | ||
292 | break; | ||
293 | if (ret < nr_before) | ||
294 | freed += nr_before - ret; | ||
295 | } | ||
296 | |||
297 | count_vm_events(SLABS_SCANNED, batch_size); | ||
298 | total_scan -= batch_size; | ||
299 | |||
300 | cond_resched(); | ||
301 | } | ||
302 | |||
303 | /* | ||
304 | * move the unused scan count back into the shrinker in a | ||
305 | * manner that handles concurrent updates. If we exhausted the | ||
306 | * scan, there is no need to do an update. | ||
307 | */ | ||
308 | if (total_scan > 0) | ||
309 | new_nr = atomic_long_add_return(total_scan, | ||
310 | &shrinker->nr_deferred[nid]); | ||
311 | else | ||
312 | new_nr = atomic_long_read(&shrinker->nr_deferred[nid]); | ||
313 | |||
314 | trace_mm_shrink_slab_end(shrinker, freed, nr, new_nr); | ||
315 | return freed; | ||
316 | } | ||
317 | |||
189 | /* | 318 | /* |
190 | * Call the shrink functions to age shrinkable caches | 319 | * Call the shrink functions to age shrinkable caches |
191 | * | 320 | * |
@@ -227,108 +356,18 @@ unsigned long shrink_slab(struct shrink_control *shrinkctl, | |||
227 | } | 356 | } |
228 | 357 | ||
229 | list_for_each_entry(shrinker, &shrinker_list, list) { | 358 | list_for_each_entry(shrinker, &shrinker_list, list) { |
230 | unsigned long long delta; | 359 | for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) { |
231 | long total_scan; | 360 | if (!node_online(shrinkctl->nid)) |
232 | long max_pass; | 361 | continue; |
233 | long nr; | ||
234 | long new_nr; | ||
235 | long batch_size = shrinker->batch ? shrinker->batch | ||
236 | : SHRINK_BATCH; | ||
237 | |||
238 | if (shrinker->count_objects) | ||
239 | max_pass = shrinker->count_objects(shrinker, shrinkctl); | ||
240 | else | ||
241 | max_pass = do_shrinker_shrink(shrinker, shrinkctl, 0); | ||
242 | if (max_pass == 0) | ||
243 | continue; | ||
244 | |||
245 | /* | ||
246 | * copy the current shrinker scan count into a local variable | ||
247 | * and zero it so that other concurrent shrinker invocations | ||
248 | * don't also do this scanning work. | ||
249 | */ | ||
250 | nr = atomic_long_xchg(&shrinker->nr_in_batch, 0); | ||
251 | |||
252 | total_scan = nr; | ||
253 | delta = (4 * nr_pages_scanned) / shrinker->seeks; | ||
254 | delta *= max_pass; | ||
255 | do_div(delta, lru_pages + 1); | ||
256 | total_scan += delta; | ||
257 | if (total_scan < 0) { | ||
258 | printk(KERN_ERR | ||
259 | "shrink_slab: %pF negative objects to delete nr=%ld\n", | ||
260 | shrinker->shrink, total_scan); | ||
261 | total_scan = max_pass; | ||
262 | } | ||
263 | |||
264 | /* | ||
265 | * We need to avoid excessive windup on filesystem shrinkers | ||
266 | * due to large numbers of GFP_NOFS allocations causing the | ||
267 | * shrinkers to return -1 all the time. This results in a large | ||
268 | * nr being built up so when a shrink that can do some work | ||
269 | * comes along it empties the entire cache due to nr >>> | ||
270 | * max_pass. This is bad for sustaining a working set in | ||
271 | * memory. | ||
272 | * | ||
273 | * Hence only allow the shrinker to scan the entire cache when | ||
274 | * a large delta change is calculated directly. | ||
275 | */ | ||
276 | if (delta < max_pass / 4) | ||
277 | total_scan = min(total_scan, max_pass / 2); | ||
278 | |||
279 | /* | ||
280 | * Avoid risking looping forever due to too large nr value: | ||
281 | * never try to free more than twice the estimate number of | ||
282 | * freeable entries. | ||
283 | */ | ||
284 | if (total_scan > max_pass * 2) | ||
285 | total_scan = max_pass * 2; | ||
286 | |||
287 | trace_mm_shrink_slab_start(shrinker, shrinkctl, nr, | ||
288 | nr_pages_scanned, lru_pages, | ||
289 | max_pass, delta, total_scan); | ||
290 | |||
291 | while (total_scan >= batch_size) { | ||
292 | |||
293 | if (shrinker->scan_objects) { | ||
294 | unsigned long ret; | ||
295 | shrinkctl->nr_to_scan = batch_size; | ||
296 | ret = shrinker->scan_objects(shrinker, shrinkctl); | ||
297 | 362 | ||
298 | if (ret == SHRINK_STOP) | 363 | if (!(shrinker->flags & SHRINKER_NUMA_AWARE) && |
299 | break; | 364 | (shrinkctl->nid != 0)) |
300 | freed += ret; | 365 | break; |
301 | } else { | ||
302 | int nr_before; | ||
303 | long ret; | ||
304 | |||
305 | nr_before = do_shrinker_shrink(shrinker, shrinkctl, 0); | ||
306 | ret = do_shrinker_shrink(shrinker, shrinkctl, | ||
307 | batch_size); | ||
308 | if (ret == -1) | ||
309 | break; | ||
310 | if (ret < nr_before) | ||
311 | freed += nr_before - ret; | ||
312 | } | ||
313 | 366 | ||
314 | count_vm_events(SLABS_SCANNED, batch_size); | 367 | freed += shrink_slab_node(shrinkctl, shrinker, |
315 | total_scan -= batch_size; | 368 | nr_pages_scanned, lru_pages); |
316 | 369 | ||
317 | cond_resched(); | ||
318 | } | 370 | } |
319 | |||
320 | /* | ||
321 | * move the unused scan count back into the shrinker in a | ||
322 | * manner that handles concurrent updates. If we exhausted the | ||
323 | * scan, there is no need to do an update. | ||
324 | */ | ||
325 | if (total_scan > 0) | ||
326 | new_nr = atomic_long_add_return(total_scan, | ||
327 | &shrinker->nr_in_batch); | ||
328 | else | ||
329 | new_nr = atomic_long_read(&shrinker->nr_in_batch); | ||
330 | |||
331 | trace_mm_shrink_slab_end(shrinker, freed, nr, new_nr); | ||
332 | } | 371 | } |
333 | up_read(&shrinker_rwsem); | 372 | up_read(&shrinker_rwsem); |
334 | out: | 373 | out: |