aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/mount.h
diff options
context:
space:
mode:
authorNick Piggin <npiggin@kernel.dk>2011-01-07 01:50:11 -0500
committerNick Piggin <npiggin@kernel.dk>2011-01-07 01:50:33 -0500
commitb3e19d924b6eaf2ca7d22cba99a517c5171007b6 (patch)
tree8c1fa4074114a883a4e2de2f7d12eb29ed91bdf1 /include/linux/mount.h
parentc6653a838b1b2738561aff0b8c0f62a9b714bdd9 (diff)
fs: scale mntget/mntput
The problem that this patch aims to fix is vfsmount refcounting scalability. We need to take a reference on the vfsmount for every successful path lookup, which often go to the same mount point. The fundamental difficulty is that a "simple" reference count can never be made scalable, because any time a reference is dropped, we must check whether that was the last reference. To do that requires communication with all other CPUs that may have taken a reference count. We can make refcounts more scalable in a couple of ways, involving keeping distributed counters, and checking for the global-zero condition less frequently. - check the global sum once every interval (this will delay zero detection for some interval, so it's probably a showstopper for vfsmounts). - keep a local count and only taking the global sum when local reaches 0 (this is difficult for vfsmounts, because we can't hold preempt off for the life of a reference, so a counter would need to be per-thread or tied strongly to a particular CPU which requires more locking). - keep a local difference of increments and decrements, which allows us to sum the total difference and hence find the refcount when summing all CPUs. Then, keep a single integer "long" refcount for slow and long lasting references, and only take the global sum of local counters when the long refcount is 0. This last scheme is what I implemented here. Attached mounts and process root and working directory references are "long" references, and everything else is a short reference. This allows scalable vfsmount references during path walking over mounted subtrees and unattached (lazy umounted) mounts with processes still running in them. This results in one fewer atomic op in the fastpath: mntget is now just a per-CPU inc, rather than an atomic inc; and mntput just requires a spinlock and non-atomic decrement in the common case. However code is otherwise bigger and heavier, so single threaded performance is basically a wash. Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Diffstat (limited to 'include/linux/mount.h')
-rw-r--r--include/linux/mount.h53
1 files changed, 17 insertions, 36 deletions
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 5e7a59408dd4..1869ea24a739 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -13,6 +13,7 @@
13#include <linux/list.h> 13#include <linux/list.h>
14#include <linux/nodemask.h> 14#include <linux/nodemask.h>
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/seqlock.h>
16#include <asm/atomic.h> 17#include <asm/atomic.h>
17 18
18struct super_block; 19struct super_block;
@@ -46,12 +47,24 @@ struct mnt_namespace;
46 47
47#define MNT_INTERNAL 0x4000 48#define MNT_INTERNAL 0x4000
48 49
50struct mnt_pcp {
51 int mnt_count;
52 int mnt_writers;
53};
54
49struct vfsmount { 55struct vfsmount {
50 struct list_head mnt_hash; 56 struct list_head mnt_hash;
51 struct vfsmount *mnt_parent; /* fs we are mounted on */ 57 struct vfsmount *mnt_parent; /* fs we are mounted on */
52 struct dentry *mnt_mountpoint; /* dentry of mountpoint */ 58 struct dentry *mnt_mountpoint; /* dentry of mountpoint */
53 struct dentry *mnt_root; /* root of the mounted tree */ 59 struct dentry *mnt_root; /* root of the mounted tree */
54 struct super_block *mnt_sb; /* pointer to superblock */ 60 struct super_block *mnt_sb; /* pointer to superblock */
61#ifdef CONFIG_SMP
62 struct mnt_pcp __percpu *mnt_pcp;
63 atomic_t mnt_longrefs;
64#else
65 int mnt_count;
66 int mnt_writers;
67#endif
55 struct list_head mnt_mounts; /* list of children, anchored here */ 68 struct list_head mnt_mounts; /* list of children, anchored here */
56 struct list_head mnt_child; /* and going through their mnt_child */ 69 struct list_head mnt_child; /* and going through their mnt_child */
57 int mnt_flags; 70 int mnt_flags;
@@ -70,57 +83,25 @@ struct vfsmount {
70 struct mnt_namespace *mnt_ns; /* containing namespace */ 83 struct mnt_namespace *mnt_ns; /* containing namespace */
71 int mnt_id; /* mount identifier */ 84 int mnt_id; /* mount identifier */
72 int mnt_group_id; /* peer group identifier */ 85 int mnt_group_id; /* peer group identifier */
73 /*
74 * We put mnt_count & mnt_expiry_mark at the end of struct vfsmount
75 * to let these frequently modified fields in a separate cache line
76 * (so that reads of mnt_flags wont ping-pong on SMP machines)
77 */
78 atomic_t mnt_count;
79 int mnt_expiry_mark; /* true if marked for expiry */ 86 int mnt_expiry_mark; /* true if marked for expiry */
80 int mnt_pinned; 87 int mnt_pinned;
81 int mnt_ghosts; 88 int mnt_ghosts;
82#ifdef CONFIG_SMP
83 int __percpu *mnt_writers;
84#else
85 int mnt_writers;
86#endif
87}; 89};
88 90
89static inline int *get_mnt_writers_ptr(struct vfsmount *mnt)
90{
91#ifdef CONFIG_SMP
92 return mnt->mnt_writers;
93#else
94 return &mnt->mnt_writers;
95#endif
96}
97
98static inline struct vfsmount *mntget(struct vfsmount *mnt)
99{
100 if (mnt)
101 atomic_inc(&mnt->mnt_count);
102 return mnt;
103}
104
105struct file; /* forward dec */ 91struct file; /* forward dec */
106 92
107extern int mnt_want_write(struct vfsmount *mnt); 93extern int mnt_want_write(struct vfsmount *mnt);
108extern int mnt_want_write_file(struct file *file); 94extern int mnt_want_write_file(struct file *file);
109extern int mnt_clone_write(struct vfsmount *mnt); 95extern int mnt_clone_write(struct vfsmount *mnt);
110extern void mnt_drop_write(struct vfsmount *mnt); 96extern void mnt_drop_write(struct vfsmount *mnt);
111extern void mntput_no_expire(struct vfsmount *mnt); 97extern void mntput(struct vfsmount *mnt);
98extern struct vfsmount *mntget(struct vfsmount *mnt);
99extern void mntput_long(struct vfsmount *mnt);
100extern struct vfsmount *mntget_long(struct vfsmount *mnt);
112extern void mnt_pin(struct vfsmount *mnt); 101extern void mnt_pin(struct vfsmount *mnt);
113extern void mnt_unpin(struct vfsmount *mnt); 102extern void mnt_unpin(struct vfsmount *mnt);
114extern int __mnt_is_readonly(struct vfsmount *mnt); 103extern int __mnt_is_readonly(struct vfsmount *mnt);
115 104
116static inline void mntput(struct vfsmount *mnt)
117{
118 if (mnt) {
119 mnt->mnt_expiry_mark = 0;
120 mntput_no_expire(mnt);
121 }
122}
123
124extern struct vfsmount *do_kern_mount(const char *fstype, int flags, 105extern struct vfsmount *do_kern_mount(const char *fstype, int flags,
125 const char *name, void *data); 106 const char *name, void *data);
126 107