aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2011-05-28 11:25:51 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2011-05-28 12:02:09 -0400
commit69b4573296469fd3f70cf7044693074980517067 (patch)
treeaea41eacb2a0f32748145a59bb8dc300b4485f36
parentd76ee18a8551e33ad7dbd55cac38bc7b094f3abb (diff)
Cache xattr security drop check for write v2
Some recent benchmarking on btrfs showed that a major scaling bottleneck on large systems on btrfs is currently the xattr lookup on every write. Why xattr lookup on every write I hear you ask? write wants to drop suid and security related xattrs that could set o capabilities for executables. To do that it currently looks up security.capability on EVERY write (even for non executables) to decide whether to drop it or not. In btrfs this causes an additional tree walk, hitting some per file system locks and quite bad scalability. In a simple read workload on a 8S system I saw over 90% CPU time in spinlocks related to that. Chris Mason tells me this is also a problem in ext4, where it hits the global mbcache lock. This patch adds a simple per inode to avoid this problem. We only do the lookup once per file and then if there is no xattr cache the decision. All xattr changes clear the flag. I also used the same flag to avoid the suid check, although that one is pretty cheap. A file system can also set this flag when it creates the inode, if it has a cheap way to do so. This is done for some common file systems in followon patches. With this patch a major part of the lock contention disappears for btrfs. Some testing on smaller systems didn't show significant performance changes, but at least it helps the larger systems and is generally more efficient. v2: Rename is_sgid. add file system helper. Cc: chris.mason@oracle.com Cc: josef@redhat.com Cc: viro@zeniv.linux.org.uk Cc: agruen@linbit.com Cc: Serge E. Hallyn <serue@us.ibm.com> Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/attr.c7
-rw-r--r--fs/xattr.c7
-rw-r--r--include/linux/fs.h13
-rw-r--r--mm/filemap.c14
4 files changed, 37 insertions, 4 deletions
diff --git a/fs/attr.c b/fs/attr.c
index 91dbe2a107f2..caf2aa521e2b 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -175,6 +175,13 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
175 return -EPERM; 175 return -EPERM;
176 } 176 }
177 177
178 if ((ia_valid & ATTR_MODE)) {
179 mode_t amode = attr->ia_mode;
180 /* Flag setting protected by i_mutex */
181 if (is_sxid(amode))
182 inode->i_flags &= ~S_NOSEC;
183 }
184
178 now = current_fs_time(inode->i_sb); 185 now = current_fs_time(inode->i_sb);
179 186
180 attr->ia_ctime = now; 187 attr->ia_ctime = now;
diff --git a/fs/xattr.c b/fs/xattr.c
index 4be2e7666d02..f060663ab70c 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -91,7 +91,11 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
91{ 91{
92 struct inode *inode = dentry->d_inode; 92 struct inode *inode = dentry->d_inode;
93 int error = -EOPNOTSUPP; 93 int error = -EOPNOTSUPP;
94 int issec = !strncmp(name, XATTR_SECURITY_PREFIX,
95 XATTR_SECURITY_PREFIX_LEN);
94 96
97 if (issec)
98 inode->i_flags &= ~S_NOSEC;
95 if (inode->i_op->setxattr) { 99 if (inode->i_op->setxattr) {
96 error = inode->i_op->setxattr(dentry, name, value, size, flags); 100 error = inode->i_op->setxattr(dentry, name, value, size, flags);
97 if (!error) { 101 if (!error) {
@@ -99,8 +103,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
99 security_inode_post_setxattr(dentry, name, value, 103 security_inode_post_setxattr(dentry, name, value,
100 size, flags); 104 size, flags);
101 } 105 }
102 } else if (!strncmp(name, XATTR_SECURITY_PREFIX, 106 } else if (issec) {
103 XATTR_SECURITY_PREFIX_LEN)) {
104 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; 107 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
105 error = security_inode_setsecurity(inode, suffix, value, 108 error = security_inode_setsecurity(inode, suffix, value,
106 size, flags); 109 size, flags);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 573028df050d..c55d6b7cd5d6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -237,6 +237,7 @@ struct inodes_stat_t {
237#define S_PRIVATE 512 /* Inode is fs-internal */ 237#define S_PRIVATE 512 /* Inode is fs-internal */
238#define S_IMA 1024 /* Inode has an associated IMA struct */ 238#define S_IMA 1024 /* Inode has an associated IMA struct */
239#define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */ 239#define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */
240#define S_NOSEC 4096 /* no suid or xattr security attributes */
240 241
241/* 242/*
242 * Note that nosuid etc flags are inode-specific: setting some file-system 243 * Note that nosuid etc flags are inode-specific: setting some file-system
@@ -273,6 +274,7 @@ struct inodes_stat_t {
273#define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) 274#define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE)
274#define IS_IMA(inode) ((inode)->i_flags & S_IMA) 275#define IS_IMA(inode) ((inode)->i_flags & S_IMA)
275#define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) 276#define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT)
277#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC)
276 278
277/* the read-only stuff doesn't really belong here, but any other place is 279/* the read-only stuff doesn't really belong here, but any other place is
278 probably as bad and I don't want to create yet another include file. */ 280 probably as bad and I don't want to create yet another include file. */
@@ -2582,5 +2584,16 @@ int __init get_filesystem_list(char *buf);
2582#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ 2584#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
2583 (flag & __FMODE_NONOTIFY))) 2585 (flag & __FMODE_NONOTIFY)))
2584 2586
2587static inline int is_sxid(mode_t mode)
2588{
2589 return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
2590}
2591
2592static inline void inode_has_no_xattr(struct inode *inode)
2593{
2594 if (!is_sxid(inode->i_mode))
2595 inode->i_flags |= S_NOSEC;
2596}
2597
2585#endif /* __KERNEL__ */ 2598#endif /* __KERNEL__ */
2586#endif /* _LINUX_FS_H */ 2599#endif /* _LINUX_FS_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index dac95a24deac..d7b10578a64b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1982,16 +1982,26 @@ static int __remove_suid(struct dentry *dentry, int kill)
1982int file_remove_suid(struct file *file) 1982int file_remove_suid(struct file *file)
1983{ 1983{
1984 struct dentry *dentry = file->f_path.dentry; 1984 struct dentry *dentry = file->f_path.dentry;
1985 int killsuid = should_remove_suid(dentry); 1985 struct inode *inode = dentry->d_inode;
1986 int killpriv = security_inode_need_killpriv(dentry); 1986 int killsuid;
1987 int killpriv;
1987 int error = 0; 1988 int error = 0;
1988 1989
1990 /* Fast path for nothing security related */
1991 if (IS_NOSEC(inode))
1992 return 0;
1993
1994 killsuid = should_remove_suid(dentry);
1995 killpriv = security_inode_need_killpriv(dentry);
1996
1989 if (killpriv < 0) 1997 if (killpriv < 0)
1990 return killpriv; 1998 return killpriv;
1991 if (killpriv) 1999 if (killpriv)
1992 error = security_inode_killpriv(dentry); 2000 error = security_inode_killpriv(dentry);
1993 if (!error && killsuid) 2001 if (!error && killsuid)
1994 error = __remove_suid(dentry, killsuid); 2002 error = __remove_suid(dentry, killsuid);
2003 if (!error)
2004 inode->i_flags |= S_NOSEC;
1995 2005
1996 return error; 2006 return error;
1997} 2007}