aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/shrinker.h14
-rw-r--r--mm/vmscan.c241
2 files changed, 152 insertions, 103 deletions
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 76f520c4c394..8f80f243fed9 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -19,6 +19,8 @@ struct shrink_control {
19 19
20 /* shrink from these nodes */ 20 /* shrink from these nodes */
21 nodemask_t nodes_to_scan; 21 nodemask_t nodes_to_scan;
22 /* current node being shrunk (for NUMA aware shrinkers) */
23 int nid;
22}; 24};
23 25
24#define SHRINK_STOP (~0UL) 26#define SHRINK_STOP (~0UL)
@@ -44,6 +46,8 @@ struct shrink_control {
44 * due to potential deadlocks. If SHRINK_STOP is returned, then no further 46 * due to potential deadlocks. If SHRINK_STOP is returned, then no further
45 * attempts to call the @scan_objects will be made from the current reclaim 47 * attempts to call the @scan_objects will be made from the current reclaim
46 * context. 48 * context.
49 *
50 * @flags determine the shrinker abilities, like numa awareness
47 */ 51 */
48struct shrinker { 52struct shrinker {
49 int (*shrink)(struct shrinker *, struct shrink_control *sc); 53 int (*shrink)(struct shrinker *, struct shrink_control *sc);
@@ -54,12 +58,18 @@ struct shrinker {
54 58
55 int seeks; /* seeks to recreate an obj */ 59 int seeks; /* seeks to recreate an obj */
56 long batch; /* reclaim batch size, 0 = default */ 60 long batch; /* reclaim batch size, 0 = default */
61 unsigned long flags;
57 62
58 /* These are for internal use */ 63 /* These are for internal use */
59 struct list_head list; 64 struct list_head list;
60 atomic_long_t nr_in_batch; /* objs pending delete */ 65 /* objs pending delete, per node */
66 atomic_long_t *nr_deferred;
61}; 67};
62#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ 68#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
63extern void register_shrinker(struct shrinker *); 69
70/* Flags */
71#define SHRINKER_NUMA_AWARE (1 << 0)
72
73extern int register_shrinker(struct shrinker *);
64extern void unregister_shrinker(struct shrinker *); 74extern void unregister_shrinker(struct shrinker *);
65#endif 75#endif
diff --git a/mm/vmscan.c b/mm/vmscan.c
index fe0d5c458440..799ebceeb4f7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -155,14 +155,31 @@ static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
155} 155}
156 156
157/* 157/*
158 * Add a shrinker callback to be called from the vm 158 * Add a shrinker callback to be called from the vm.
159 */ 159 */
160void register_shrinker(struct shrinker *shrinker) 160int register_shrinker(struct shrinker *shrinker)
161{ 161{
162 atomic_long_set(&shrinker->nr_in_batch, 0); 162 size_t size = sizeof(*shrinker->nr_deferred);
163
164 /*
165 * If we only have one possible node in the system anyway, save
166 * ourselves the trouble and disable NUMA aware behavior. This way we
167 * will save memory and some small loop time later.
168 */
169 if (nr_node_ids == 1)
170 shrinker->flags &= ~SHRINKER_NUMA_AWARE;
171
172 if (shrinker->flags & SHRINKER_NUMA_AWARE)
173 size *= nr_node_ids;
174
175 shrinker->nr_deferred = kzalloc(size, GFP_KERNEL);
176 if (!shrinker->nr_deferred)
177 return -ENOMEM;
178
163 down_write(&shrinker_rwsem); 179 down_write(&shrinker_rwsem);
164 list_add_tail(&shrinker->list, &shrinker_list); 180 list_add_tail(&shrinker->list, &shrinker_list);
165 up_write(&shrinker_rwsem); 181 up_write(&shrinker_rwsem);
182 return 0;
166} 183}
167EXPORT_SYMBOL(register_shrinker); 184EXPORT_SYMBOL(register_shrinker);
168 185
@@ -186,6 +203,118 @@ static inline int do_shrinker_shrink(struct shrinker *shrinker,
186} 203}
187 204
188#define SHRINK_BATCH 128 205#define SHRINK_BATCH 128
206
207static unsigned long
208shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
209 unsigned long nr_pages_scanned, unsigned long lru_pages)
210{
211 unsigned long freed = 0;
212 unsigned long long delta;
213 long total_scan;
214 long max_pass;
215 long nr;
216 long new_nr;
217 int nid = shrinkctl->nid;
218 long batch_size = shrinker->batch ? shrinker->batch
219 : SHRINK_BATCH;
220
221 if (shrinker->count_objects)
222 max_pass = shrinker->count_objects(shrinker, shrinkctl);
223 else
224 max_pass = do_shrinker_shrink(shrinker, shrinkctl, 0);
225 if (max_pass == 0)
226 return 0;
227
228 /*
229 * copy the current shrinker scan count into a local variable
230 * and zero it so that other concurrent shrinker invocations
231 * don't also do this scanning work.
232 */
233 nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
234
235 total_scan = nr;
236 delta = (4 * nr_pages_scanned) / shrinker->seeks;
237 delta *= max_pass;
238 do_div(delta, lru_pages + 1);
239 total_scan += delta;
240 if (total_scan < 0) {
241 printk(KERN_ERR
242 "shrink_slab: %pF negative objects to delete nr=%ld\n",
243 shrinker->shrink, total_scan);
244 total_scan = max_pass;
245 }
246
247 /*
248 * We need to avoid excessive windup on filesystem shrinkers
249 * due to large numbers of GFP_NOFS allocations causing the
250 * shrinkers to return -1 all the time. This results in a large
251 * nr being built up so when a shrink that can do some work
252 * comes along it empties the entire cache due to nr >>>
253 * max_pass. This is bad for sustaining a working set in
254 * memory.
255 *
256 * Hence only allow the shrinker to scan the entire cache when
257 * a large delta change is calculated directly.
258 */
259 if (delta < max_pass / 4)
260 total_scan = min(total_scan, max_pass / 2);
261
262 /*
263 * Avoid risking looping forever due to too large nr value:
264 * never try to free more than twice the estimate number of
265 * freeable entries.
266 */
267 if (total_scan > max_pass * 2)
268 total_scan = max_pass * 2;
269
270 trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
271 nr_pages_scanned, lru_pages,
272 max_pass, delta, total_scan);
273
274 while (total_scan >= batch_size) {
275
276 if (shrinker->scan_objects) {
277 unsigned long ret;
278 shrinkctl->nr_to_scan = batch_size;
279 ret = shrinker->scan_objects(shrinker, shrinkctl);
280
281 if (ret == SHRINK_STOP)
282 break;
283 freed += ret;
284 } else {
285 int nr_before;
286 long ret;
287
288 nr_before = do_shrinker_shrink(shrinker, shrinkctl, 0);
289 ret = do_shrinker_shrink(shrinker, shrinkctl,
290 batch_size);
291 if (ret == -1)
292 break;
293 if (ret < nr_before)
294 freed += nr_before - ret;
295 }
296
297 count_vm_events(SLABS_SCANNED, batch_size);
298 total_scan -= batch_size;
299
300 cond_resched();
301 }
302
303 /*
304 * move the unused scan count back into the shrinker in a
305 * manner that handles concurrent updates. If we exhausted the
306 * scan, there is no need to do an update.
307 */
308 if (total_scan > 0)
309 new_nr = atomic_long_add_return(total_scan,
310 &shrinker->nr_deferred[nid]);
311 else
312 new_nr = atomic_long_read(&shrinker->nr_deferred[nid]);
313
314 trace_mm_shrink_slab_end(shrinker, freed, nr, new_nr);
315 return freed;
316}
317
189/* 318/*
190 * Call the shrink functions to age shrinkable caches 319 * Call the shrink functions to age shrinkable caches
191 * 320 *
@@ -227,108 +356,18 @@ unsigned long shrink_slab(struct shrink_control *shrinkctl,
227 } 356 }
228 357
229 list_for_each_entry(shrinker, &shrinker_list, list) { 358 list_for_each_entry(shrinker, &shrinker_list, list) {
230 unsigned long long delta; 359 for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) {
231 long total_scan; 360 if (!node_online(shrinkctl->nid))
232 long max_pass; 361 continue;
233 long nr;
234 long new_nr;
235 long batch_size = shrinker->batch ? shrinker->batch
236 : SHRINK_BATCH;
237
238 if (shrinker->count_objects)
239 max_pass = shrinker->count_objects(shrinker, shrinkctl);
240 else
241 max_pass = do_shrinker_shrink(shrinker, shrinkctl, 0);
242 if (max_pass == 0)
243 continue;
244
245 /*
246 * copy the current shrinker scan count into a local variable
247 * and zero it so that other concurrent shrinker invocations
248 * don't also do this scanning work.
249 */
250 nr = atomic_long_xchg(&shrinker->nr_in_batch, 0);
251
252 total_scan = nr;
253 delta = (4 * nr_pages_scanned) / shrinker->seeks;
254 delta *= max_pass;
255 do_div(delta, lru_pages + 1);
256 total_scan += delta;
257 if (total_scan < 0) {
258 printk(KERN_ERR
259 "shrink_slab: %pF negative objects to delete nr=%ld\n",
260 shrinker->shrink, total_scan);
261 total_scan = max_pass;
262 }
263
264 /*
265 * We need to avoid excessive windup on filesystem shrinkers
266 * due to large numbers of GFP_NOFS allocations causing the
267 * shrinkers to return -1 all the time. This results in a large
268 * nr being built up so when a shrink that can do some work
269 * comes along it empties the entire cache due to nr >>>
270 * max_pass. This is bad for sustaining a working set in
271 * memory.
272 *
273 * Hence only allow the shrinker to scan the entire cache when
274 * a large delta change is calculated directly.
275 */
276 if (delta < max_pass / 4)
277 total_scan = min(total_scan, max_pass / 2);
278
279 /*
280 * Avoid risking looping forever due to too large nr value:
281 * never try to free more than twice the estimate number of
282 * freeable entries.
283 */
284 if (total_scan > max_pass * 2)
285 total_scan = max_pass * 2;
286
287 trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
288 nr_pages_scanned, lru_pages,
289 max_pass, delta, total_scan);
290
291 while (total_scan >= batch_size) {
292
293 if (shrinker->scan_objects) {
294 unsigned long ret;
295 shrinkctl->nr_to_scan = batch_size;
296 ret = shrinker->scan_objects(shrinker, shrinkctl);
297 362
298 if (ret == SHRINK_STOP) 363 if (!(shrinker->flags & SHRINKER_NUMA_AWARE) &&
299 break; 364 (shrinkctl->nid != 0))
300 freed += ret; 365 break;
301 } else {
302 int nr_before;
303 long ret;
304
305 nr_before = do_shrinker_shrink(shrinker, shrinkctl, 0);
306 ret = do_shrinker_shrink(shrinker, shrinkctl,
307 batch_size);
308 if (ret == -1)
309 break;
310 if (ret < nr_before)
311 freed += nr_before - ret;
312 }
313 366
314 count_vm_events(SLABS_SCANNED, batch_size); 367 freed += shrink_slab_node(shrinkctl, shrinker,
315 total_scan -= batch_size; 368 nr_pages_scanned, lru_pages);
316 369
317 cond_resched();
318 } 370 }
319
320 /*
321 * move the unused scan count back into the shrinker in a
322 * manner that handles concurrent updates. If we exhausted the
323 * scan, there is no need to do an update.
324 */
325 if (total_scan > 0)
326 new_nr = atomic_long_add_return(total_scan,
327 &shrinker->nr_in_batch);
328 else
329 new_nr = atomic_long_read(&shrinker->nr_in_batch);
330
331 trace_mm_shrink_slab_end(shrinker, freed, nr, new_nr);
332 } 371 }
333 up_read(&shrinker_rwsem); 372 up_read(&shrinker_rwsem);
334out: 373out: