diff options
author | Glauber Costa <glommer@openvz.org> | 2013-08-27 20:17:53 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2013-09-10 18:56:29 -0400 |
commit | 3942c07ccf98e66b8893f396dca98f5b076f905f (patch) | |
tree | 063ec7aa542d9fa812482c02e2436205fe6a9e8e | |
parent | da5338c7498556b760871661ffecb053cc6f708f (diff) |
fs: bump inode and dentry counters to long
This series reworks our current object cache shrinking infrastructure in
two main ways:
* Noticing that a lot of users copy and paste their own version of LRU
lists for objects, we put some effort in providing a generic version.
It is modeled after the filesystem users: dentries, inodes, and xfs
(for various tasks), but we expect that other users could benefit in
the near future with little or no modification. Let us know if you
have any issues.
* The underlying list_lru being proposed automatically and
transparently keeps the elements in per-node lists, and is able to
manipulate the node lists individually. Given this infrastructure, we
are able to modify the up-to-now hammer called shrink_slab to proceed
with node-reclaim instead of always searching memory from all over like
it has been doing.
Per-node lru lists are also expected to lead to less contention in the lru
locks on multi-node scans, since we are now no longer fighting for a
global lock. The locks usually disappear from the profilers with this
change.
Although we have no official benchmarks for this version - be our guest to
independently evaluate this - earlier versions of this series were
performance tested (details at
http://permalink.gmane.org/gmane.linux.kernel.mm/100537) yielding no
visible performance regressions while yielding a better qualitative
behavior in NUMA machines.
With this infrastructure in place, we can use the list_lru entry point to
provide memcg isolation and per-memcg targeted reclaim. Historically,
those two pieces of work have been posted together. This version presents
only the infrastructure work, deferring the memcg work for a later time,
so we can focus on getting this part tested. You can see more about the
history of such work at http://lwn.net/Articles/552769/
Dave Chinner (18):
dcache: convert dentry_stat.nr_unused to per-cpu counters
dentry: move to per-sb LRU locks
dcache: remove dentries from LRU before putting on dispose list
mm: new shrinker API
shrinker: convert superblock shrinkers to new API
list: add a new LRU list type
inode: convert inode lru list to generic lru list code.
dcache: convert to use new lru list infrastructure
list_lru: per-node list infrastructure
shrinker: add node awareness
fs: convert inode and dentry shrinking to be node aware
xfs: convert buftarg LRU to generic code
xfs: rework buffer dispose list tracking
xfs: convert dquot cache lru to list_lru
fs: convert fs shrinkers to new scan/count API
drivers: convert shrinkers to new count/scan API
shrinker: convert remaining shrinkers to count/scan API
shrinker: Kill old ->shrink API.
Glauber Costa (7):
fs: bump inode and dentry counters to long
super: fix calculation of shrinkable objects for small numbers
list_lru: per-node API
vmscan: per-node deferred work
i915: bail out earlier when shrinker cannot acquire mutex
hugepage: convert huge zero page shrinker to new shrinker API
list_lru: dynamically adjust node arrays
This patch:
There are situations in very large machines in which we can have a large
quantity of dirty inodes, unused dentries, etc. This is particularly true
when umounting a filesystem, where eventually since every live object will
eventually be discarded.
Dave Chinner reported a problem with this while experimenting with the
shrinker revamp patchset. So we believe it is time for a change. This
patch just moves int to longs. Machines where it matters should have a
big long anyway.
Signed-off-by: Glauber Costa <glommer@openvz.org>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Cc: Arve Hjønnevåg <arve@android.com>
Cc: Carlos Maiolino <cmaiolino@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Gleb Natapov <gleb@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: J. Bruce Fields <bfields@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Kent Overstreet <koverstreet@google.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Thomas Hellstrom <thellstrom@vmware.com>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | fs/dcache.c | 8 | ||||
-rw-r--r-- | fs/inode.c | 18 | ||||
-rw-r--r-- | fs/internal.h | 2 | ||||
-rw-r--r-- | include/linux/dcache.h | 10 | ||||
-rw-r--r-- | include/linux/fs.h | 4 | ||||
-rw-r--r-- | include/uapi/linux/fs.h | 6 | ||||
-rw-r--r-- | kernel/sysctl.c | 6 |
7 files changed, 27 insertions, 27 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index 4d9df3c940e6..6ef1c2e1bbc4 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -146,13 +146,13 @@ struct dentry_stat_t dentry_stat = { | |||
146 | .age_limit = 45, | 146 | .age_limit = 45, |
147 | }; | 147 | }; |
148 | 148 | ||
149 | static DEFINE_PER_CPU(unsigned int, nr_dentry); | 149 | static DEFINE_PER_CPU(long, nr_dentry); |
150 | 150 | ||
151 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) | 151 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) |
152 | static int get_nr_dentry(void) | 152 | static long get_nr_dentry(void) |
153 | { | 153 | { |
154 | int i; | 154 | int i; |
155 | int sum = 0; | 155 | long sum = 0; |
156 | for_each_possible_cpu(i) | 156 | for_each_possible_cpu(i) |
157 | sum += per_cpu(nr_dentry, i); | 157 | sum += per_cpu(nr_dentry, i); |
158 | return sum < 0 ? 0 : sum; | 158 | return sum < 0 ? 0 : sum; |
@@ -162,7 +162,7 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, | |||
162 | size_t *lenp, loff_t *ppos) | 162 | size_t *lenp, loff_t *ppos) |
163 | { | 163 | { |
164 | dentry_stat.nr_dentry = get_nr_dentry(); | 164 | dentry_stat.nr_dentry = get_nr_dentry(); |
165 | return proc_dointvec(table, write, buffer, lenp, ppos); | 165 | return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
166 | } | 166 | } |
167 | #endif | 167 | #endif |
168 | 168 | ||
diff --git a/fs/inode.c b/fs/inode.c index 93a0625b46e4..2a3c37ea823d 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -70,33 +70,33 @@ EXPORT_SYMBOL(empty_aops); | |||
70 | */ | 70 | */ |
71 | struct inodes_stat_t inodes_stat; | 71 | struct inodes_stat_t inodes_stat; |
72 | 72 | ||
73 | static DEFINE_PER_CPU(unsigned int, nr_inodes); | 73 | static DEFINE_PER_CPU(unsigned long, nr_inodes); |
74 | static DEFINE_PER_CPU(unsigned int, nr_unused); | 74 | static DEFINE_PER_CPU(unsigned long, nr_unused); |
75 | 75 | ||
76 | static struct kmem_cache *inode_cachep __read_mostly; | 76 | static struct kmem_cache *inode_cachep __read_mostly; |
77 | 77 | ||
78 | static int get_nr_inodes(void) | 78 | static long get_nr_inodes(void) |
79 | { | 79 | { |
80 | int i; | 80 | int i; |
81 | int sum = 0; | 81 | long sum = 0; |
82 | for_each_possible_cpu(i) | 82 | for_each_possible_cpu(i) |
83 | sum += per_cpu(nr_inodes, i); | 83 | sum += per_cpu(nr_inodes, i); |
84 | return sum < 0 ? 0 : sum; | 84 | return sum < 0 ? 0 : sum; |
85 | } | 85 | } |
86 | 86 | ||
87 | static inline int get_nr_inodes_unused(void) | 87 | static inline long get_nr_inodes_unused(void) |
88 | { | 88 | { |
89 | int i; | 89 | int i; |
90 | int sum = 0; | 90 | long sum = 0; |
91 | for_each_possible_cpu(i) | 91 | for_each_possible_cpu(i) |
92 | sum += per_cpu(nr_unused, i); | 92 | sum += per_cpu(nr_unused, i); |
93 | return sum < 0 ? 0 : sum; | 93 | return sum < 0 ? 0 : sum; |
94 | } | 94 | } |
95 | 95 | ||
96 | int get_nr_dirty_inodes(void) | 96 | long get_nr_dirty_inodes(void) |
97 | { | 97 | { |
98 | /* not actually dirty inodes, but a wild approximation */ | 98 | /* not actually dirty inodes, but a wild approximation */ |
99 | int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); | 99 | long nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); |
100 | return nr_dirty > 0 ? nr_dirty : 0; | 100 | return nr_dirty > 0 ? nr_dirty : 0; |
101 | } | 101 | } |
102 | 102 | ||
@@ -109,7 +109,7 @@ int proc_nr_inodes(ctl_table *table, int write, | |||
109 | { | 109 | { |
110 | inodes_stat.nr_inodes = get_nr_inodes(); | 110 | inodes_stat.nr_inodes = get_nr_inodes(); |
111 | inodes_stat.nr_unused = get_nr_inodes_unused(); | 111 | inodes_stat.nr_unused = get_nr_inodes_unused(); |
112 | return proc_dointvec(table, write, buffer, lenp, ppos); | 112 | return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
113 | } | 113 | } |
114 | #endif | 114 | #endif |
115 | 115 | ||
diff --git a/fs/internal.h b/fs/internal.h index 2be46ea5dd0b..b6495659d6e8 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -121,7 +121,7 @@ extern void inode_add_lru(struct inode *inode); | |||
121 | */ | 121 | */ |
122 | extern void inode_wb_list_del(struct inode *inode); | 122 | extern void inode_wb_list_del(struct inode *inode); |
123 | 123 | ||
124 | extern int get_nr_dirty_inodes(void); | 124 | extern long get_nr_dirty_inodes(void); |
125 | extern void evict_inodes(struct super_block *); | 125 | extern void evict_inodes(struct super_block *); |
126 | extern int invalidate_inodes(struct super_block *, bool); | 126 | extern int invalidate_inodes(struct super_block *, bool); |
127 | 127 | ||
diff --git a/include/linux/dcache.h b/include/linux/dcache.h index feaa8d88eef7..844a1ef387e4 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h | |||
@@ -55,11 +55,11 @@ struct qstr { | |||
55 | #define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) | 55 | #define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) |
56 | 56 | ||
57 | struct dentry_stat_t { | 57 | struct dentry_stat_t { |
58 | int nr_dentry; | 58 | long nr_dentry; |
59 | int nr_unused; | 59 | long nr_unused; |
60 | int age_limit; /* age in seconds */ | 60 | long age_limit; /* age in seconds */ |
61 | int want_pages; /* pages requested by system */ | 61 | long want_pages; /* pages requested by system */ |
62 | int dummy[2]; | 62 | long dummy[2]; |
63 | }; | 63 | }; |
64 | extern struct dentry_stat_t dentry_stat; | 64 | extern struct dentry_stat_t dentry_stat; |
65 | 65 | ||
diff --git a/include/linux/fs.h b/include/linux/fs.h index 49e71b0f0e9f..3b3edac75df2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -1271,12 +1271,12 @@ struct super_block { | |||
1271 | struct list_head s_mounts; /* list of mounts; _not_ for fs use */ | 1271 | struct list_head s_mounts; /* list of mounts; _not_ for fs use */ |
1272 | /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */ | 1272 | /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */ |
1273 | struct list_head s_dentry_lru; /* unused dentry lru */ | 1273 | struct list_head s_dentry_lru; /* unused dentry lru */ |
1274 | int s_nr_dentry_unused; /* # of dentry on lru */ | 1274 | long s_nr_dentry_unused; /* # of dentry on lru */ |
1275 | 1275 | ||
1276 | /* s_inode_lru_lock protects s_inode_lru and s_nr_inodes_unused */ | 1276 | /* s_inode_lru_lock protects s_inode_lru and s_nr_inodes_unused */ |
1277 | spinlock_t s_inode_lru_lock ____cacheline_aligned_in_smp; | 1277 | spinlock_t s_inode_lru_lock ____cacheline_aligned_in_smp; |
1278 | struct list_head s_inode_lru; /* unused inode lru */ | 1278 | struct list_head s_inode_lru; /* unused inode lru */ |
1279 | int s_nr_inodes_unused; /* # of inodes on lru */ | 1279 | long s_nr_inodes_unused; /* # of inodes on lru */ |
1280 | 1280 | ||
1281 | struct block_device *s_bdev; | 1281 | struct block_device *s_bdev; |
1282 | struct backing_dev_info *s_bdi; | 1282 | struct backing_dev_info *s_bdi; |
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index a4ed56cf0eac..6c28b61bb690 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h | |||
@@ -49,9 +49,9 @@ struct files_stat_struct { | |||
49 | }; | 49 | }; |
50 | 50 | ||
51 | struct inodes_stat_t { | 51 | struct inodes_stat_t { |
52 | int nr_inodes; | 52 | long nr_inodes; |
53 | int nr_unused; | 53 | long nr_unused; |
54 | int dummy[5]; /* padding for sysctl ABI compatibility */ | 54 | long dummy[5]; /* padding for sysctl ABI compatibility */ |
55 | }; | 55 | }; |
56 | 56 | ||
57 | 57 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 07f6fc468e17..7822cd88a95c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -1471,14 +1471,14 @@ static struct ctl_table fs_table[] = { | |||
1471 | { | 1471 | { |
1472 | .procname = "inode-nr", | 1472 | .procname = "inode-nr", |
1473 | .data = &inodes_stat, | 1473 | .data = &inodes_stat, |
1474 | .maxlen = 2*sizeof(int), | 1474 | .maxlen = 2*sizeof(long), |
1475 | .mode = 0444, | 1475 | .mode = 0444, |
1476 | .proc_handler = proc_nr_inodes, | 1476 | .proc_handler = proc_nr_inodes, |
1477 | }, | 1477 | }, |
1478 | { | 1478 | { |
1479 | .procname = "inode-state", | 1479 | .procname = "inode-state", |
1480 | .data = &inodes_stat, | 1480 | .data = &inodes_stat, |
1481 | .maxlen = 7*sizeof(int), | 1481 | .maxlen = 7*sizeof(long), |
1482 | .mode = 0444, | 1482 | .mode = 0444, |
1483 | .proc_handler = proc_nr_inodes, | 1483 | .proc_handler = proc_nr_inodes, |
1484 | }, | 1484 | }, |
@@ -1508,7 +1508,7 @@ static struct ctl_table fs_table[] = { | |||
1508 | { | 1508 | { |
1509 | .procname = "dentry-state", | 1509 | .procname = "dentry-state", |
1510 | .data = &dentry_stat, | 1510 | .data = &dentry_stat, |
1511 | .maxlen = 6*sizeof(int), | 1511 | .maxlen = 6*sizeof(long), |
1512 | .mode = 0444, | 1512 | .mode = 0444, |
1513 | .proc_handler = proc_nr_dentry, | 1513 | .proc_handler = proc_nr_dentry, |
1514 | }, | 1514 | }, |