aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2014-11-09 03:38:39 -0500
committerChris Mason <clm@fb.com>2014-11-20 20:20:07 -0500
commit5f5bc6b1e2d5a6f827bc860ef2dc5b6f365d1339 (patch)
treed17afbf1a528b003f34d43cf557d2da62a7a6b1b /fs/btrfs
parentc7bc6319c59cc791743cf1b6e98f86be69444495 (diff)
Btrfs: make xattr replace operations atomic
Replacing a xattr consists of doing a lookup for its existing value, delete the current value from the respective leaf, release the search path and then finally insert the new value. This leaves a time window where readers (getxattr, listxattrs) won't see any value for the xattr. Xattrs are used to store ACLs, so this has security implications. This change also fixes 2 other existing issues which were: *) Deleting the old xattr value without verifying first if the new xattr will fit in the existing leaf item (in case multiple xattrs are packed in the same item due to name hash collision); *) Returning -EEXIST when the flag XATTR_CREATE is given and the xattr doesn't exist but we have have an existing item that packs muliple xattrs with the same name hash as the input xattr. In this case we should return ENOSPC. A test case for xfstests follows soon. Thanks to Alexandre Oliva for reporting the non-atomicity of the xattr replace implementation. Reported-by: Alexandre Oliva <oliva@gnu.org> Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/ctree.c2
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/dir-item.c10
-rw-r--r--fs/btrfs/xattr.c150
4 files changed, 102 insertions, 65 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 19bc6162fb8e..817234168a7f 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2939,7 +2939,7 @@ done:
2939 */ 2939 */
2940 if (!p->leave_spinning) 2940 if (!p->leave_spinning)
2941 btrfs_set_path_blocking(p); 2941 btrfs_set_path_blocking(p);
2942 if (ret < 0) 2942 if (ret < 0 && !p->skip_release_on_error)
2943 btrfs_release_path(p); 2943 btrfs_release_path(p);
2944 return ret; 2944 return ret;
2945} 2945}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index fa14081e3383..a9466e346358 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -607,6 +607,7 @@ struct btrfs_path {
607 unsigned int leave_spinning:1; 607 unsigned int leave_spinning:1;
608 unsigned int search_commit_root:1; 608 unsigned int search_commit_root:1;
609 unsigned int need_commit_sem:1; 609 unsigned int need_commit_sem:1;
610 unsigned int skip_release_on_error:1;
610}; 611};
611 612
612/* 613/*
@@ -3690,6 +3691,10 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
3690int verify_dir_item(struct btrfs_root *root, 3691int verify_dir_item(struct btrfs_root *root,
3691 struct extent_buffer *leaf, 3692 struct extent_buffer *leaf,
3692 struct btrfs_dir_item *dir_item); 3693 struct btrfs_dir_item *dir_item);
3694struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
3695 struct btrfs_path *path,
3696 const char *name,
3697 int name_len);
3693 3698
3694/* orphan.c */ 3699/* orphan.c */
3695int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, 3700int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index fc8df866e919..1752625fb4dd 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -21,10 +21,6 @@
21#include "hash.h" 21#include "hash.h"
22#include "transaction.h" 22#include "transaction.h"
23 23
24static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
25 struct btrfs_path *path,
26 const char *name, int name_len);
27
28/* 24/*
29 * insert a name into a directory, doing overflow properly if there is a hash 25 * insert a name into a directory, doing overflow properly if there is a hash
30 * collision. data_size indicates how big the item inserted should be. On 26 * collision. data_size indicates how big the item inserted should be. On
@@ -383,9 +379,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
383 * this walks through all the entries in a dir item and finds one 379 * this walks through all the entries in a dir item and finds one
384 * for a specific name. 380 * for a specific name.
385 */ 381 */
386static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, 382struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
387 struct btrfs_path *path, 383 struct btrfs_path *path,
388 const char *name, int name_len) 384 const char *name, int name_len)
389{ 385{
390 struct btrfs_dir_item *dir_item; 386 struct btrfs_dir_item *dir_item;
391 unsigned long name_ptr; 387 unsigned long name_ptr;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index dcf20131fbe4..47b19465f0dc 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -29,6 +29,7 @@
29#include "xattr.h" 29#include "xattr.h"
30#include "disk-io.h" 30#include "disk-io.h"
31#include "props.h" 31#include "props.h"
32#include "locking.h"
32 33
33 34
34ssize_t __btrfs_getxattr(struct inode *inode, const char *name, 35ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
@@ -91,7 +92,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
91 struct inode *inode, const char *name, 92 struct inode *inode, const char *name,
92 const void *value, size_t size, int flags) 93 const void *value, size_t size, int flags)
93{ 94{
94 struct btrfs_dir_item *di; 95 struct btrfs_dir_item *di = NULL;
95 struct btrfs_root *root = BTRFS_I(inode)->root; 96 struct btrfs_root *root = BTRFS_I(inode)->root;
96 struct btrfs_path *path; 97 struct btrfs_path *path;
97 size_t name_len = strlen(name); 98 size_t name_len = strlen(name);
@@ -103,84 +104,119 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
103 path = btrfs_alloc_path(); 104 path = btrfs_alloc_path();
104 if (!path) 105 if (!path)
105 return -ENOMEM; 106 return -ENOMEM;
107 path->skip_release_on_error = 1;
108
109 if (!value) {
110 di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
111 name, name_len, -1);
112 if (!di && (flags & XATTR_REPLACE))
113 ret = -ENODATA;
114 else if (di)
115 ret = btrfs_delete_one_dir_name(trans, root, path, di);
116 goto out;
117 }
106 118
119 /*
120 * For a replace we can't just do the insert blindly.
121 * Do a lookup first (read-only btrfs_search_slot), and return if xattr
122 * doesn't exist. If it exists, fall down below to the insert/replace
123 * path - we can't race with a concurrent xattr delete, because the VFS
124 * locks the inode's i_mutex before calling setxattr or removexattr.
125 */
107 if (flags & XATTR_REPLACE) { 126 if (flags & XATTR_REPLACE) {
108 di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, 127 ASSERT(mutex_is_locked(&inode->i_mutex));
109 name_len, -1); 128 di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
110 if (IS_ERR(di)) { 129 name, name_len, 0);
111 ret = PTR_ERR(di); 130 if (!di) {
112 goto out;
113 } else if (!di) {
114 ret = -ENODATA; 131 ret = -ENODATA;
115 goto out; 132 goto out;
116 } 133 }
117 ret = btrfs_delete_one_dir_name(trans, root, path, di);
118 if (ret)
119 goto out;
120 btrfs_release_path(path); 134 btrfs_release_path(path);
135 di = NULL;
136 }
121 137
138 ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
139 name, name_len, value, size);
140 if (ret == -EOVERFLOW) {
122 /* 141 /*
123 * remove the attribute 142 * We have an existing item in a leaf, split_leaf couldn't
143 * expand it. That item might have or not a dir_item that
144 * matches our target xattr, so lets check.
124 */ 145 */
125 if (!value) 146 ret = 0;
126 goto out; 147 btrfs_assert_tree_locked(path->nodes[0]);
127 } else { 148 di = btrfs_match_dir_item_name(root, path, name, name_len);
128 di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), 149 if (!di && !(flags & XATTR_REPLACE)) {
129 name, name_len, 0); 150 ret = -ENOSPC;
130 if (IS_ERR(di)) {
131 ret = PTR_ERR(di);
132 goto out; 151 goto out;
133 } 152 }
134 if (!di && !value) 153 } else if (ret == -EEXIST) {
135 goto out; 154 ret = 0;
136 btrfs_release_path(path); 155 di = btrfs_match_dir_item_name(root, path, name, name_len);
156 ASSERT(di); /* logic error */
157 } else if (ret) {
158 goto out;
137 } 159 }
138 160
139again: 161 if (di && (flags & XATTR_CREATE)) {
140 ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
141 name, name_len, value, size);
142 /*
143 * If we're setting an xattr to a new value but the new value is say
144 * exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting
145 * back from split_leaf. This is because it thinks we'll be extending
146 * the existing item size, but we're asking for enough space to add the
147 * item itself. So if we get EOVERFLOW just set ret to EEXIST and let
148 * the rest of the function figure it out.
149 */
150 if (ret == -EOVERFLOW)
151 ret = -EEXIST; 162 ret = -EEXIST;
163 goto out;
164 }
152 165
153 if (ret == -EEXIST) { 166 if (di) {
154 if (flags & XATTR_CREATE)
155 goto out;
156 /* 167 /*
157 * We can't use the path we already have since we won't have the 168 * We're doing a replace, and it must be atomic, that is, at
158 * proper locking for a delete, so release the path and 169 * any point in time we have either the old or the new xattr
159 * re-lookup to delete the thing. 170 * value in the tree. We don't want readers (getxattr and
171 * listxattrs) to miss a value, this is specially important
172 * for ACLs.
160 */ 173 */
161 btrfs_release_path(path); 174 const int slot = path->slots[0];
162 di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), 175 struct extent_buffer *leaf = path->nodes[0];
163 name, name_len, -1); 176 const u16 old_data_len = btrfs_dir_data_len(leaf, di);
164 if (IS_ERR(di)) { 177 const u32 item_size = btrfs_item_size_nr(leaf, slot);
165 ret = PTR_ERR(di); 178 const u32 data_size = sizeof(*di) + name_len + size;
166 goto out; 179 struct btrfs_item *item;
167 } else if (!di) { 180 unsigned long data_ptr;
168 /* Shouldn't happen but just in case... */ 181 char *ptr;
169 btrfs_release_path(path); 182
170 goto again; 183 if (size > old_data_len) {
184 if (btrfs_leaf_free_space(root, leaf) <
185 (size - old_data_len)) {
186 ret = -ENOSPC;
187 goto out;
188 }
171 } 189 }
172 190
173 ret = btrfs_delete_one_dir_name(trans, root, path, di); 191 if (old_data_len + name_len + sizeof(*di) == item_size) {
174 if (ret) 192 /* No other xattrs packed in the same leaf item. */
175 goto out; 193 if (size > old_data_len)
194 btrfs_extend_item(root, path,
195 size - old_data_len);
196 else if (size < old_data_len)
197 btrfs_truncate_item(root, path, data_size, 1);
198 } else {
199 /* There are other xattrs packed in the same item. */
200 ret = btrfs_delete_one_dir_name(trans, root, path, di);
201 if (ret)
202 goto out;
203 btrfs_extend_item(root, path, data_size);
204 }
176 205
206 item = btrfs_item_nr(slot);
207 ptr = btrfs_item_ptr(leaf, slot, char);
208 ptr += btrfs_item_size(leaf, item) - data_size;
209 di = (struct btrfs_dir_item *)ptr;
210 btrfs_set_dir_data_len(leaf, di, size);
211 data_ptr = ((unsigned long)(di + 1)) + name_len;
212 write_extent_buffer(leaf, value, data_ptr, size);
213 btrfs_mark_buffer_dirty(leaf);
214 } else {
177 /* 215 /*
178 * We have a value to set, so go back and try to insert it now. 216 * Insert, and we had space for the xattr, so path->slots[0] is
217 * where our xattr dir_item is and btrfs_insert_xattr_item()
218 * filled it.
179 */ 219 */
180 if (value) {
181 btrfs_release_path(path);
182 goto again;
183 }
184 } 220 }
185out: 221out:
186 btrfs_free_path(path); 222 btrfs_free_path(path);