aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYan, Zheng <zyan@redhat.com>2016-03-06 21:34:50 -0500
committerIlya Dryomov <idryomov@gmail.com>2016-03-25 13:51:55 -0400
commit315f24088048a51eed341c53be66ea477a3c7d16 (patch)
treeb0947756198ee556f500e2b5dd001104fec003b8
parent29dccfa5af388916e48a3edc5ac9dc8cc996d9b4 (diff)
ceph: fix security xattr deadlock
When security is enabled, security module can call filesystem's getxattr/setxattr callbacks during d_instantiate(). For cephfs, d_instantiate() is usually called by MDS' dispatch thread, while handling MDS reply. If the MDS reply does not include xattrs and corresponding caps, getxattr/setxattr need to send a new request to MDS and waits for the reply. This makes MDS' dispatch sleep, nobody handles later MDS replies. The fix is make sure lookup/atomic_open reply include xattrs and corresponding caps. So getxattr can be handled by cached xattrs. This requires some modification to both MDS and request message. (Client tells MDS what caps it wants; MDS encodes proper caps in the reply) Smack security module may call setxattr during d_instantiate(). Unlike getxattr, we can't force MDS to issue CEPH_CAP_XATTR_EXCL to us. So just make setxattr return error when called by MDS' dispatch thread. Signed-off-by: Yan, Zheng <zyan@redhat.com>
-rw-r--r--fs/ceph/dir.c9
-rw-r--r--fs/ceph/export.c13
-rw-r--r--fs/ceph/file.c7
-rw-r--r--fs/ceph/inode.c18
-rw-r--r--fs/ceph/mds_client.c2
-rw-r--r--fs/ceph/super.h16
-rw-r--r--fs/ceph/xattr.c68
-rw-r--r--include/linux/ceph/ceph_fs.h3
8 files changed, 125 insertions, 11 deletions
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index fd11fb231a2e..b9f50a388aee 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -624,6 +624,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
624 struct ceph_mds_client *mdsc = fsc->mdsc; 624 struct ceph_mds_client *mdsc = fsc->mdsc;
625 struct ceph_mds_request *req; 625 struct ceph_mds_request *req;
626 int op; 626 int op;
627 int mask;
627 int err; 628 int err;
628 629
629 dout("lookup %p dentry %p '%pd'\n", 630 dout("lookup %p dentry %p '%pd'\n",
@@ -666,8 +667,12 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
666 return ERR_CAST(req); 667 return ERR_CAST(req);
667 req->r_dentry = dget(dentry); 668 req->r_dentry = dget(dentry);
668 req->r_num_caps = 2; 669 req->r_num_caps = 2;
669 /* we only need inode linkage */ 670
670 req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); 671 mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
672 if (ceph_security_xattr_wanted(dir))
673 mask |= CEPH_CAP_XATTR_SHARED;
674 req->r_args.getattr.mask = cpu_to_le32(mask);
675
671 req->r_locked_dir = dir; 676 req->r_locked_dir = dir;
672 err = ceph_mdsc_do_request(mdsc, NULL, req); 677 err = ceph_mdsc_do_request(mdsc, NULL, req);
673 err = ceph_handle_snapdir(req, dentry, err); 678 err = ceph_handle_snapdir(req, dentry, err);
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 3b3172357326..6e72c98162d5 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -71,12 +71,18 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
71 inode = ceph_find_inode(sb, vino); 71 inode = ceph_find_inode(sb, vino);
72 if (!inode) { 72 if (!inode) {
73 struct ceph_mds_request *req; 73 struct ceph_mds_request *req;
74 int mask;
74 75
75 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO, 76 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
76 USE_ANY_MDS); 77 USE_ANY_MDS);
77 if (IS_ERR(req)) 78 if (IS_ERR(req))
78 return ERR_CAST(req); 79 return ERR_CAST(req);
79 80
81 mask = CEPH_STAT_CAP_INODE;
82 if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
83 mask |= CEPH_CAP_XATTR_SHARED;
84 req->r_args.getattr.mask = cpu_to_le32(mask);
85
80 req->r_ino1 = vino; 86 req->r_ino1 = vino;
81 req->r_num_caps = 1; 87 req->r_num_caps = 1;
82 err = ceph_mdsc_do_request(mdsc, NULL, req); 88 err = ceph_mdsc_do_request(mdsc, NULL, req);
@@ -128,6 +134,7 @@ static struct dentry *__get_parent(struct super_block *sb,
128 struct ceph_mds_request *req; 134 struct ceph_mds_request *req;
129 struct inode *inode; 135 struct inode *inode;
130 struct dentry *dentry; 136 struct dentry *dentry;
137 int mask;
131 int err; 138 int err;
132 139
133 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT, 140 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
@@ -144,6 +151,12 @@ static struct dentry *__get_parent(struct super_block *sb,
144 .snap = CEPH_NOSNAP, 151 .snap = CEPH_NOSNAP,
145 }; 152 };
146 } 153 }
154
155 mask = CEPH_STAT_CAP_INODE;
156 if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
157 mask |= CEPH_CAP_XATTR_SHARED;
158 req->r_args.getattr.mask = cpu_to_le32(mask);
159
147 req->r_num_caps = 1; 160 req->r_num_caps = 1;
148 err = ceph_mdsc_do_request(mdsc, NULL, req); 161 err = ceph_mdsc_do_request(mdsc, NULL, req);
149 inode = req->r_target_inode; 162 inode = req->r_target_inode;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 389adacbc719..334a75170a3b 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -300,6 +300,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
300 struct ceph_mds_request *req; 300 struct ceph_mds_request *req;
301 struct dentry *dn; 301 struct dentry *dn;
302 struct ceph_acls_info acls = {}; 302 struct ceph_acls_info acls = {};
303 int mask;
303 int err; 304 int err;
304 305
305 dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n", 306 dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n",
@@ -335,6 +336,12 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
335 acls.pagelist = NULL; 336 acls.pagelist = NULL;
336 } 337 }
337 } 338 }
339
340 mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
341 if (ceph_security_xattr_wanted(dir))
342 mask |= CEPH_CAP_XATTR_SHARED;
343 req->r_args.open.mask = cpu_to_le32(mask);
344
338 req->r_locked_dir = dir; /* caller holds dir->i_mutex */ 345 req->r_locked_dir = dir; /* caller holds dir->i_mutex */
339 err = ceph_mdsc_do_request(mdsc, 346 err = ceph_mdsc_do_request(mdsc,
340 (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, 347 (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 66edef12c6f2..8b136dc0bc13 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1389,7 +1389,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
1389 struct qstr dname; 1389 struct qstr dname;
1390 struct dentry *dn; 1390 struct dentry *dn;
1391 struct inode *in; 1391 struct inode *in;
1392 int err = 0, ret, i; 1392 int err = 0, skipped = 0, ret, i;
1393 struct inode *snapdir = NULL; 1393 struct inode *snapdir = NULL;
1394 struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; 1394 struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
1395 struct ceph_dentry_info *di; 1395 struct ceph_dentry_info *di;
@@ -1501,7 +1501,17 @@ retry_lookup:
1501 } 1501 }
1502 1502
1503 if (d_really_is_negative(dn)) { 1503 if (d_really_is_negative(dn)) {
1504 struct dentry *realdn = splice_dentry(dn, in); 1504 struct dentry *realdn;
1505
1506 if (ceph_security_xattr_deadlock(in)) {
1507 dout(" skip splicing dn %p to inode %p"
1508 " (security xattr deadlock)\n", dn, in);
1509 iput(in);
1510 skipped++;
1511 goto next_item;
1512 }
1513
1514 realdn = splice_dentry(dn, in);
1505 if (IS_ERR(realdn)) { 1515 if (IS_ERR(realdn)) {
1506 err = PTR_ERR(realdn); 1516 err = PTR_ERR(realdn);
1507 d_drop(dn); 1517 d_drop(dn);
@@ -1518,7 +1528,7 @@ retry_lookup:
1518 req->r_session, 1528 req->r_session,
1519 req->r_request_started); 1529 req->r_request_started);
1520 1530
1521 if (err == 0 && cache_ctl.index >= 0) { 1531 if (err == 0 && skipped == 0 && cache_ctl.index >= 0) {
1522 ret = fill_readdir_cache(d_inode(parent), dn, 1532 ret = fill_readdir_cache(d_inode(parent), dn,
1523 &cache_ctl, req); 1533 &cache_ctl, req);
1524 if (ret < 0) 1534 if (ret < 0)
@@ -1529,7 +1539,7 @@ next_item:
1529 dput(dn); 1539 dput(dn);
1530 } 1540 }
1531out: 1541out:
1532 if (err == 0) { 1542 if (err == 0 && skipped == 0) {
1533 req->r_did_prepopulate = true; 1543 req->r_did_prepopulate = true;
1534 req->r_readdir_cache_idx = cache_ctl.index; 1544 req->r_readdir_cache_idx = cache_ctl.index;
1535 } 1545 }
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index aa43dcb5f9b9..44852c3ae531 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2540,6 +2540,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2540 2540
2541 /* insert trace into our cache */ 2541 /* insert trace into our cache */
2542 mutex_lock(&req->r_fill_mutex); 2542 mutex_lock(&req->r_fill_mutex);
2543 current->journal_info = req;
2543 err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); 2544 err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
2544 if (err == 0) { 2545 if (err == 0) {
2545 if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || 2546 if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
@@ -2547,6 +2548,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2547 ceph_readdir_prepopulate(req, req->r_session); 2548 ceph_readdir_prepopulate(req, req->r_session);
2548 ceph_unreserve_caps(mdsc, &req->r_caps_reservation); 2549 ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
2549 } 2550 }
2551 current->journal_info = NULL;
2550 mutex_unlock(&req->r_fill_mutex); 2552 mutex_unlock(&req->r_fill_mutex);
2551 2553
2552 up_read(&mdsc->snap_rwsem); 2554 up_read(&mdsc->snap_rwsem);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 57ac43d64322..2d48138da58e 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -468,7 +468,7 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
468#define CEPH_I_POOL_PERM (1 << 4) /* pool rd/wr bits are valid */ 468#define CEPH_I_POOL_PERM (1 << 4) /* pool rd/wr bits are valid */
469#define CEPH_I_POOL_RD (1 << 5) /* can read from pool */ 469#define CEPH_I_POOL_RD (1 << 5) /* can read from pool */
470#define CEPH_I_POOL_WR (1 << 6) /* can write to pool */ 470#define CEPH_I_POOL_WR (1 << 6) /* can write to pool */
471 471#define CEPH_I_SEC_INITED (1 << 7) /* security initialized */
472 472
473static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci, 473static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
474 long long release_count, 474 long long release_count,
@@ -804,6 +804,20 @@ extern void __init ceph_xattr_init(void);
804extern void ceph_xattr_exit(void); 804extern void ceph_xattr_exit(void);
805extern const struct xattr_handler *ceph_xattr_handlers[]; 805extern const struct xattr_handler *ceph_xattr_handlers[];
806 806
807#ifdef CONFIG_SECURITY
808extern bool ceph_security_xattr_deadlock(struct inode *in);
809extern bool ceph_security_xattr_wanted(struct inode *in);
810#else
811static inline bool ceph_security_xattr_deadlock(struct inode *in)
812{
813 return false;
814}
815static inline bool ceph_security_xattr_wanted(struct inode *in)
816{
817 return false;
818}
819#endif
820
807/* acl.c */ 821/* acl.c */
808struct ceph_acls_info { 822struct ceph_acls_info {
809 void *default_acl; 823 void *default_acl;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 139cdef8eb41..9410abdef3ce 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -714,13 +714,31 @@ void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
714 } 714 }
715} 715}
716 716
717static inline int __get_request_mask(struct inode *in) {
718 struct ceph_mds_request *req = current->journal_info;
719 int mask = 0;
720 if (req && req->r_target_inode == in) {
721 if (req->r_op == CEPH_MDS_OP_LOOKUP ||
722 req->r_op == CEPH_MDS_OP_LOOKUPINO ||
723 req->r_op == CEPH_MDS_OP_LOOKUPPARENT ||
724 req->r_op == CEPH_MDS_OP_GETATTR) {
725 mask = le32_to_cpu(req->r_args.getattr.mask);
726 } else if (req->r_op == CEPH_MDS_OP_OPEN ||
727 req->r_op == CEPH_MDS_OP_CREATE) {
728 mask = le32_to_cpu(req->r_args.open.mask);
729 }
730 }
731 return mask;
732}
733
717ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, 734ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
718 size_t size) 735 size_t size)
719{ 736{
720 struct ceph_inode_info *ci = ceph_inode(inode); 737 struct ceph_inode_info *ci = ceph_inode(inode);
721 int err;
722 struct ceph_inode_xattr *xattr; 738 struct ceph_inode_xattr *xattr;
723 struct ceph_vxattr *vxattr = NULL; 739 struct ceph_vxattr *vxattr = NULL;
740 int req_mask;
741 int err;
724 742
725 if (!ceph_is_valid_xattr(name)) 743 if (!ceph_is_valid_xattr(name))
726 return -ENODATA; 744 return -ENODATA;
@@ -734,13 +752,24 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
734 return err; 752 return err;
735 } 753 }
736 754
755 req_mask = __get_request_mask(inode);
756
737 spin_lock(&ci->i_ceph_lock); 757 spin_lock(&ci->i_ceph_lock);
738 dout("getxattr %p ver=%lld index_ver=%lld\n", inode, 758 dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
739 ci->i_xattrs.version, ci->i_xattrs.index_version); 759 ci->i_xattrs.version, ci->i_xattrs.index_version);
740 760
741 if (ci->i_xattrs.version == 0 || 761 if (ci->i_xattrs.version == 0 ||
742 !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) { 762 !((req_mask & CEPH_CAP_XATTR_SHARED) ||
763 __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) {
743 spin_unlock(&ci->i_ceph_lock); 764 spin_unlock(&ci->i_ceph_lock);
765
766 /* security module gets xattr while filling trace */
767 if (current->journal_info != NULL) {
768 pr_warn_ratelimited("sync getxattr %p "
769 "during filling trace\n", inode);
770 return -EBUSY;
771 }
772
744 /* get xattrs from mds (if we don't already have them) */ 773 /* get xattrs from mds (if we don't already have them) */
745 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true); 774 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
746 if (err) 775 if (err)
@@ -767,6 +796,9 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
767 796
768 memcpy(value, xattr->val, xattr->val_len); 797 memcpy(value, xattr->val, xattr->val_len);
769 798
799 if (current->journal_info != NULL &&
800 !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
801 ci->i_ceph_flags |= CEPH_I_SEC_INITED;
770out: 802out:
771 spin_unlock(&ci->i_ceph_lock); 803 spin_unlock(&ci->i_ceph_lock);
772 return err; 804 return err;
@@ -1017,7 +1049,15 @@ do_sync:
1017do_sync_unlocked: 1049do_sync_unlocked:
1018 if (lock_snap_rwsem) 1050 if (lock_snap_rwsem)
1019 up_read(&mdsc->snap_rwsem); 1051 up_read(&mdsc->snap_rwsem);
1020 err = ceph_sync_setxattr(dentry, name, value, size, flags); 1052
1053 /* security module set xattr while filling trace */
1054 if (current->journal_info != NULL) {
1055 pr_warn_ratelimited("sync setxattr %p "
1056 "during filling trace\n", inode);
1057 err = -EBUSY;
1058 } else {
1059 err = ceph_sync_setxattr(dentry, name, value, size, flags);
1060 }
1021out: 1061out:
1022 ceph_free_cap_flush(prealloc_cf); 1062 ceph_free_cap_flush(prealloc_cf);
1023 kfree(newname); 1063 kfree(newname);
@@ -1166,3 +1206,25 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
1166 1206
1167 return __ceph_removexattr(dentry, name); 1207 return __ceph_removexattr(dentry, name);
1168} 1208}
1209
1210#ifdef CONFIG_SECURITY
1211bool ceph_security_xattr_wanted(struct inode *in)
1212{
1213 return in->i_security != NULL;
1214}
1215
1216bool ceph_security_xattr_deadlock(struct inode *in)
1217{
1218 struct ceph_inode_info *ci;
1219 bool ret;
1220 if (in->i_security == NULL)
1221 return false;
1222 ci = ceph_inode(in);
1223 spin_lock(&ci->i_ceph_lock);
1224 ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) &&
1225 !(ci->i_xattrs.version > 0 &&
1226 __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0));
1227 spin_unlock(&ci->i_ceph_lock);
1228 return ret;
1229}
1230#endif
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index bf74005eedec..37f28bf55ce4 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -376,7 +376,8 @@ union ceph_mds_request_args {
376 __le32 stripe_count; /* ... */ 376 __le32 stripe_count; /* ... */
377 __le32 object_size; 377 __le32 object_size;
378 __le32 file_replication; 378 __le32 file_replication;
379 __le32 unused; /* used to be preferred osd */ 379 __le32 mask; /* CEPH_CAP_* */
380 __le32 old_size;
380 } __attribute__ ((packed)) open; 381 } __attribute__ ((packed)) open;
381 struct { 382 struct {
382 __le32 flags; 383 __le32 flags;