diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/dcache.h | 14 | ||||
-rw-r--r-- | include/linux/fs.h | 26 | ||||
-rw-r--r-- | include/linux/list_lru.h | 131 | ||||
-rw-r--r-- | include/linux/shrinker.h | 54 | ||||
-rw-r--r-- | include/trace/events/vmscan.h | 4 | ||||
-rw-r--r-- | include/uapi/linux/fs.h | 6 |
6 files changed, 196 insertions, 39 deletions
diff --git a/include/linux/dcache.h b/include/linux/dcache.h index feaa8d88eef7..59066e0b4ff1 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h | |||
@@ -55,11 +55,11 @@ struct qstr { | |||
55 | #define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) | 55 | #define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) |
56 | 56 | ||
57 | struct dentry_stat_t { | 57 | struct dentry_stat_t { |
58 | int nr_dentry; | 58 | long nr_dentry; |
59 | int nr_unused; | 59 | long nr_unused; |
60 | int age_limit; /* age in seconds */ | 60 | long age_limit; /* age in seconds */ |
61 | int want_pages; /* pages requested by system */ | 61 | long want_pages; /* pages requested by system */ |
62 | int dummy[2]; | 62 | long dummy[2]; |
63 | }; | 63 | }; |
64 | extern struct dentry_stat_t dentry_stat; | 64 | extern struct dentry_stat_t dentry_stat; |
65 | 65 | ||
@@ -395,4 +395,8 @@ static inline bool d_mountpoint(const struct dentry *dentry) | |||
395 | 395 | ||
396 | extern int sysctl_vfs_cache_pressure; | 396 | extern int sysctl_vfs_cache_pressure; |
397 | 397 | ||
398 | static inline unsigned long vfs_pressure_ratio(unsigned long val) | ||
399 | { | ||
400 | return mult_frac(val, sysctl_vfs_cache_pressure, 100); | ||
401 | } | ||
398 | #endif /* __LINUX_DCACHE_H */ | 402 | #endif /* __LINUX_DCACHE_H */ |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 529d8711baba..a4acd3c61190 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/stat.h> | 10 | #include <linux/stat.h> |
11 | #include <linux/cache.h> | 11 | #include <linux/cache.h> |
12 | #include <linux/list.h> | 12 | #include <linux/list.h> |
13 | #include <linux/list_lru.h> | ||
13 | #include <linux/llist.h> | 14 | #include <linux/llist.h> |
14 | #include <linux/radix-tree.h> | 15 | #include <linux/radix-tree.h> |
15 | #include <linux/rbtree.h> | 16 | #include <linux/rbtree.h> |
@@ -1269,15 +1270,6 @@ struct super_block { | |||
1269 | struct list_head s_files; | 1270 | struct list_head s_files; |
1270 | #endif | 1271 | #endif |
1271 | struct list_head s_mounts; /* list of mounts; _not_ for fs use */ | 1272 | struct list_head s_mounts; /* list of mounts; _not_ for fs use */ |
1272 | /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */ | ||
1273 | struct list_head s_dentry_lru; /* unused dentry lru */ | ||
1274 | int s_nr_dentry_unused; /* # of dentry on lru */ | ||
1275 | |||
1276 | /* s_inode_lru_lock protects s_inode_lru and s_nr_inodes_unused */ | ||
1277 | spinlock_t s_inode_lru_lock ____cacheline_aligned_in_smp; | ||
1278 | struct list_head s_inode_lru; /* unused inode lru */ | ||
1279 | int s_nr_inodes_unused; /* # of inodes on lru */ | ||
1280 | |||
1281 | struct block_device *s_bdev; | 1273 | struct block_device *s_bdev; |
1282 | struct backing_dev_info *s_bdi; | 1274 | struct backing_dev_info *s_bdi; |
1283 | struct mtd_info *s_mtd; | 1275 | struct mtd_info *s_mtd; |
@@ -1331,11 +1323,14 @@ struct super_block { | |||
1331 | 1323 | ||
1332 | /* AIO completions deferred from interrupt context */ | 1324 | /* AIO completions deferred from interrupt context */ |
1333 | struct workqueue_struct *s_dio_done_wq; | 1325 | struct workqueue_struct *s_dio_done_wq; |
1334 | }; | ||
1335 | 1326 | ||
1336 | /* superblock cache pruning functions */ | 1327 | /* |
1337 | extern void prune_icache_sb(struct super_block *sb, int nr_to_scan); | 1328 | * Keep the lru lists last in the structure so they always sit on their |
1338 | extern void prune_dcache_sb(struct super_block *sb, int nr_to_scan); | 1329 | * own individual cachelines. |
1330 | */ | ||
1331 | struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; | ||
1332 | struct list_lru s_inode_lru ____cacheline_aligned_in_smp; | ||
1333 | }; | ||
1339 | 1334 | ||
1340 | extern struct timespec current_fs_time(struct super_block *sb); | 1335 | extern struct timespec current_fs_time(struct super_block *sb); |
1341 | 1336 | ||
@@ -1629,8 +1624,8 @@ struct super_operations { | |||
1629 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); | 1624 | ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); |
1630 | #endif | 1625 | #endif |
1631 | int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); | 1626 | int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); |
1632 | int (*nr_cached_objects)(struct super_block *); | 1627 | long (*nr_cached_objects)(struct super_block *, int); |
1633 | void (*free_cached_objects)(struct super_block *, int); | 1628 | long (*free_cached_objects)(struct super_block *, long, int); |
1634 | }; | 1629 | }; |
1635 | 1630 | ||
1636 | /* | 1631 | /* |
@@ -2494,7 +2489,6 @@ extern const struct file_operations generic_ro_fops; | |||
2494 | #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) | 2489 | #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) |
2495 | 2490 | ||
2496 | extern int vfs_readlink(struct dentry *, char __user *, int, const char *); | 2491 | extern int vfs_readlink(struct dentry *, char __user *, int, const char *); |
2497 | extern int vfs_follow_link(struct nameidata *, const char *); | ||
2498 | extern int page_readlink(struct dentry *, char __user *, int); | 2492 | extern int page_readlink(struct dentry *, char __user *, int); |
2499 | extern void *page_follow_link_light(struct dentry *, struct nameidata *); | 2493 | extern void *page_follow_link_light(struct dentry *, struct nameidata *); |
2500 | extern void page_put_link(struct dentry *, struct nameidata *, void *); | 2494 | extern void page_put_link(struct dentry *, struct nameidata *, void *); |
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h new file mode 100644 index 000000000000..3ce541753c88 --- /dev/null +++ b/include/linux/list_lru.h | |||
@@ -0,0 +1,131 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved. | ||
3 | * Authors: David Chinner and Glauber Costa | ||
4 | * | ||
5 | * Generic LRU infrastructure | ||
6 | */ | ||
7 | #ifndef _LRU_LIST_H | ||
8 | #define _LRU_LIST_H | ||
9 | |||
10 | #include <linux/list.h> | ||
11 | #include <linux/nodemask.h> | ||
12 | |||
13 | /* list_lru_walk_cb has to always return one of those */ | ||
14 | enum lru_status { | ||
15 | LRU_REMOVED, /* item removed from list */ | ||
16 | LRU_ROTATE, /* item referenced, give another pass */ | ||
17 | LRU_SKIP, /* item cannot be locked, skip */ | ||
18 | LRU_RETRY, /* item not freeable. May drop the lock | ||
19 | internally, but has to return locked. */ | ||
20 | }; | ||
21 | |||
22 | struct list_lru_node { | ||
23 | spinlock_t lock; | ||
24 | struct list_head list; | ||
25 | /* kept as signed so we can catch imbalance bugs */ | ||
26 | long nr_items; | ||
27 | } ____cacheline_aligned_in_smp; | ||
28 | |||
29 | struct list_lru { | ||
30 | struct list_lru_node *node; | ||
31 | nodemask_t active_nodes; | ||
32 | }; | ||
33 | |||
34 | void list_lru_destroy(struct list_lru *lru); | ||
35 | int list_lru_init(struct list_lru *lru); | ||
36 | |||
37 | /** | ||
38 | * list_lru_add: add an element to the lru list's tail | ||
39 | * @list_lru: the lru pointer | ||
40 | * @item: the item to be added. | ||
41 | * | ||
42 | * If the element is already part of a list, this function returns doing | ||
43 | * nothing. Therefore the caller does not need to keep state about whether or | ||
44 | * not the element already belongs in the list and is allowed to lazy update | ||
45 | * it. Note however that this is valid for *a* list, not *this* list. If | ||
46 | * the caller organize itself in a way that elements can be in more than | ||
47 | * one type of list, it is up to the caller to fully remove the item from | ||
48 | * the previous list (with list_lru_del() for instance) before moving it | ||
49 | * to @list_lru | ||
50 | * | ||
51 | * Return value: true if the list was updated, false otherwise | ||
52 | */ | ||
53 | bool list_lru_add(struct list_lru *lru, struct list_head *item); | ||
54 | |||
55 | /** | ||
56 | * list_lru_del: delete an element to the lru list | ||
57 | * @list_lru: the lru pointer | ||
58 | * @item: the item to be deleted. | ||
59 | * | ||
60 | * This function works analogously as list_lru_add in terms of list | ||
61 | * manipulation. The comments about an element already pertaining to | ||
62 | * a list are also valid for list_lru_del. | ||
63 | * | ||
64 | * Return value: true if the list was updated, false otherwise | ||
65 | */ | ||
66 | bool list_lru_del(struct list_lru *lru, struct list_head *item); | ||
67 | |||
68 | /** | ||
69 | * list_lru_count_node: return the number of objects currently held by @lru | ||
70 | * @lru: the lru pointer. | ||
71 | * @nid: the node id to count from. | ||
72 | * | ||
73 | * Always return a non-negative number, 0 for empty lists. There is no | ||
74 | * guarantee that the list is not updated while the count is being computed. | ||
75 | * Callers that want such a guarantee need to provide an outer lock. | ||
76 | */ | ||
77 | unsigned long list_lru_count_node(struct list_lru *lru, int nid); | ||
78 | static inline unsigned long list_lru_count(struct list_lru *lru) | ||
79 | { | ||
80 | long count = 0; | ||
81 | int nid; | ||
82 | |||
83 | for_each_node_mask(nid, lru->active_nodes) | ||
84 | count += list_lru_count_node(lru, nid); | ||
85 | |||
86 | return count; | ||
87 | } | ||
88 | |||
89 | typedef enum lru_status | ||
90 | (*list_lru_walk_cb)(struct list_head *item, spinlock_t *lock, void *cb_arg); | ||
91 | /** | ||
92 | * list_lru_walk_node: walk a list_lru, isolating and disposing freeable items. | ||
93 | * @lru: the lru pointer. | ||
94 | * @nid: the node id to scan from. | ||
95 | * @isolate: callback function that is resposible for deciding what to do with | ||
96 | * the item currently being scanned | ||
97 | * @cb_arg: opaque type that will be passed to @isolate | ||
98 | * @nr_to_walk: how many items to scan. | ||
99 | * | ||
100 | * This function will scan all elements in a particular list_lru, calling the | ||
101 | * @isolate callback for each of those items, along with the current list | ||
102 | * spinlock and a caller-provided opaque. The @isolate callback can choose to | ||
103 | * drop the lock internally, but *must* return with the lock held. The callback | ||
104 | * will return an enum lru_status telling the list_lru infrastructure what to | ||
105 | * do with the object being scanned. | ||
106 | * | ||
107 | * Please note that nr_to_walk does not mean how many objects will be freed, | ||
108 | * just how many objects will be scanned. | ||
109 | * | ||
110 | * Return value: the number of objects effectively removed from the LRU. | ||
111 | */ | ||
112 | unsigned long list_lru_walk_node(struct list_lru *lru, int nid, | ||
113 | list_lru_walk_cb isolate, void *cb_arg, | ||
114 | unsigned long *nr_to_walk); | ||
115 | |||
116 | static inline unsigned long | ||
117 | list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate, | ||
118 | void *cb_arg, unsigned long nr_to_walk) | ||
119 | { | ||
120 | long isolated = 0; | ||
121 | int nid; | ||
122 | |||
123 | for_each_node_mask(nid, lru->active_nodes) { | ||
124 | isolated += list_lru_walk_node(lru, nid, isolate, | ||
125 | cb_arg, &nr_to_walk); | ||
126 | if (nr_to_walk <= 0) | ||
127 | break; | ||
128 | } | ||
129 | return isolated; | ||
130 | } | ||
131 | #endif /* _LRU_LIST_H */ | ||
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index ac6b8ee07825..68c097077ef0 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h | |||
@@ -4,39 +4,67 @@ | |||
4 | /* | 4 | /* |
5 | * This struct is used to pass information from page reclaim to the shrinkers. | 5 | * This struct is used to pass information from page reclaim to the shrinkers. |
6 | * We consolidate the values for easier extention later. | 6 | * We consolidate the values for easier extention later. |
7 | * | ||
8 | * The 'gfpmask' refers to the allocation we are currently trying to | ||
9 | * fulfil. | ||
7 | */ | 10 | */ |
8 | struct shrink_control { | 11 | struct shrink_control { |
9 | gfp_t gfp_mask; | 12 | gfp_t gfp_mask; |
10 | 13 | ||
11 | /* How many slab objects shrinker() should scan and try to reclaim */ | 14 | /* |
15 | * How many objects scan_objects should scan and try to reclaim. | ||
16 | * This is reset before every call, so it is safe for callees | ||
17 | * to modify. | ||
18 | */ | ||
12 | unsigned long nr_to_scan; | 19 | unsigned long nr_to_scan; |
20 | |||
21 | /* shrink from these nodes */ | ||
22 | nodemask_t nodes_to_scan; | ||
23 | /* current node being shrunk (for NUMA aware shrinkers) */ | ||
24 | int nid; | ||
13 | }; | 25 | }; |
14 | 26 | ||
27 | #define SHRINK_STOP (~0UL) | ||
15 | /* | 28 | /* |
16 | * A callback you can register to apply pressure to ageable caches. | 29 | * A callback you can register to apply pressure to ageable caches. |
17 | * | 30 | * |
18 | * 'sc' is passed shrink_control which includes a count 'nr_to_scan' | 31 | * @count_objects should return the number of freeable items in the cache. If |
19 | * and a 'gfpmask'. It should look through the least-recently-used | 32 | * there are no objects to free or the number of freeable items cannot be |
20 | * 'nr_to_scan' entries and attempt to free them up. It should return | 33 | * determined, it should return 0. No deadlock checks should be done during the |
21 | * the number of objects which remain in the cache. If it returns -1, it means | 34 | * count callback - the shrinker relies on aggregating scan counts that couldn't |
22 | * it cannot do any scanning at this time (eg. there is a risk of deadlock). | 35 | * be executed due to potential deadlocks to be run at a later call when the |
36 | * deadlock condition is no longer pending. | ||
23 | * | 37 | * |
24 | * The 'gfpmask' refers to the allocation we are currently trying to | 38 | * @scan_objects will only be called if @count_objects returned a non-zero |
25 | * fulfil. | 39 | * value for the number of freeable objects. The callout should scan the cache |
40 | * and attempt to free items from the cache. It should then return the number | ||
41 | * of objects freed during the scan, or SHRINK_STOP if progress cannot be made | ||
42 | * due to potential deadlocks. If SHRINK_STOP is returned, then no further | ||
43 | * attempts to call the @scan_objects will be made from the current reclaim | ||
44 | * context. | ||
26 | * | 45 | * |
27 | * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is | 46 | * @flags determine the shrinker abilities, like numa awareness |
28 | * querying the cache size, so a fastpath for that case is appropriate. | ||
29 | */ | 47 | */ |
30 | struct shrinker { | 48 | struct shrinker { |
31 | int (*shrink)(struct shrinker *, struct shrink_control *sc); | 49 | unsigned long (*count_objects)(struct shrinker *, |
50 | struct shrink_control *sc); | ||
51 | unsigned long (*scan_objects)(struct shrinker *, | ||
52 | struct shrink_control *sc); | ||
53 | |||
32 | int seeks; /* seeks to recreate an obj */ | 54 | int seeks; /* seeks to recreate an obj */ |
33 | long batch; /* reclaim batch size, 0 = default */ | 55 | long batch; /* reclaim batch size, 0 = default */ |
56 | unsigned long flags; | ||
34 | 57 | ||
35 | /* These are for internal use */ | 58 | /* These are for internal use */ |
36 | struct list_head list; | 59 | struct list_head list; |
37 | atomic_long_t nr_in_batch; /* objs pending delete */ | 60 | /* objs pending delete, per node */ |
61 | atomic_long_t *nr_deferred; | ||
38 | }; | 62 | }; |
39 | #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ | 63 | #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ |
40 | extern void register_shrinker(struct shrinker *); | 64 | |
65 | /* Flags */ | ||
66 | #define SHRINKER_NUMA_AWARE (1 << 0) | ||
67 | |||
68 | extern int register_shrinker(struct shrinker *); | ||
41 | extern void unregister_shrinker(struct shrinker *); | 69 | extern void unregister_shrinker(struct shrinker *); |
42 | #endif | 70 | #endif |
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 63cfcccaebb3..132a985aba8b 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h | |||
@@ -202,7 +202,7 @@ TRACE_EVENT(mm_shrink_slab_start, | |||
202 | 202 | ||
203 | TP_fast_assign( | 203 | TP_fast_assign( |
204 | __entry->shr = shr; | 204 | __entry->shr = shr; |
205 | __entry->shrink = shr->shrink; | 205 | __entry->shrink = shr->scan_objects; |
206 | __entry->nr_objects_to_shrink = nr_objects_to_shrink; | 206 | __entry->nr_objects_to_shrink = nr_objects_to_shrink; |
207 | __entry->gfp_flags = sc->gfp_mask; | 207 | __entry->gfp_flags = sc->gfp_mask; |
208 | __entry->pgs_scanned = pgs_scanned; | 208 | __entry->pgs_scanned = pgs_scanned; |
@@ -241,7 +241,7 @@ TRACE_EVENT(mm_shrink_slab_end, | |||
241 | 241 | ||
242 | TP_fast_assign( | 242 | TP_fast_assign( |
243 | __entry->shr = shr; | 243 | __entry->shr = shr; |
244 | __entry->shrink = shr->shrink; | 244 | __entry->shrink = shr->scan_objects; |
245 | __entry->unused_scan = unused_scan_cnt; | 245 | __entry->unused_scan = unused_scan_cnt; |
246 | __entry->new_scan = new_scan_cnt; | 246 | __entry->new_scan = new_scan_cnt; |
247 | __entry->retval = shrinker_retval; | 247 | __entry->retval = shrinker_retval; |
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index a4ed56cf0eac..6c28b61bb690 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h | |||
@@ -49,9 +49,9 @@ struct files_stat_struct { | |||
49 | }; | 49 | }; |
50 | 50 | ||
51 | struct inodes_stat_t { | 51 | struct inodes_stat_t { |
52 | int nr_inodes; | 52 | long nr_inodes; |
53 | int nr_unused; | 53 | long nr_unused; |
54 | int dummy[5]; /* padding for sysctl ABI compatibility */ | 54 | long dummy[5]; /* padding for sysctl ABI compatibility */ |
55 | }; | 55 | }; |
56 | 56 | ||
57 | 57 | ||