aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-15 00:46:01 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-15 00:46:01 -0400
commit6b0490816671b2f4126a99998c9bf3c8c0472de2 (patch)
tree016543455c2bdbe47b422fed6a3b4ffb991c97d6 /fs/ceph
parentce9d7f7b45930ed16c512aabcfe651d44f1c8619 (diff)
parent0bc62284ee3f2a228c64902ed818b6ba8e04159b (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
Pull Ceph updates from Sage Weil: "There is the long-awaited discard support for RBD (Guangliang Zhao, Josh Durgin), a pile of RBD bug fixes that didn't belong in late -rc's (Ilya Dryomov, Li RongQing), a pile of fs/ceph bug fixes and performance and debugging improvements (Yan, Zheng, John Spray), and a smattering of cleanups (Chao Yu, Fabian Frederick, Joe Perches)" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: (40 commits) ceph: fix divide-by-zero in __validate_layout() rbd: rbd workqueues need a resque worker libceph: ceph-msgr workqueue needs a resque worker ceph: fix bool assignments libceph: separate multiple ops with commas in debugfs output libceph: sync osd op definitions in rados.h libceph: remove redundant declaration ceph: additional debugfs output ceph: export ceph_session_state_name function ceph: include the initial ACL in create/mkdir/mknod MDS requests ceph: use pagelist to present MDS request data libceph: reference counting pagelist ceph: fix llistxattr on symlink ceph: send client metadata to MDS ceph: remove redundant code for max file size verification ceph: remove redundant io_iter_advance() ceph: move ceph_find_inode() outside the s_mutex ceph: request xattrs if xattr_version is zero rbd: set the remaining discard properties to enable support rbd: use helpers to handle discard for layered images correctly ...
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/acl.c125
-rw-r--r--fs/ceph/addr.c9
-rw-r--r--fs/ceph/caps.c37
-rw-r--r--fs/ceph/debugfs.c46
-rw-r--r--fs/ceph/dir.c41
-rw-r--r--fs/ceph/file.c33
-rw-r--r--fs/ceph/inode.c16
-rw-r--r--fs/ceph/ioctl.c6
-rw-r--r--fs/ceph/mds_client.c136
-rw-r--r--fs/ceph/mds_client.h6
-rw-r--r--fs/ceph/super.h27
-rw-r--r--fs/ceph/xattr.c81
12 files changed, 386 insertions, 177 deletions
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index cebf2ebefb55..5bd853ba44ff 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -169,36 +169,109 @@ out:
169 return ret; 169 return ret;
170} 170}
171 171
172int ceph_init_acl(struct dentry *dentry, struct inode *inode, struct inode *dir) 172int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
173 struct ceph_acls_info *info)
173{ 174{
174 struct posix_acl *default_acl, *acl; 175 struct posix_acl *acl, *default_acl;
175 umode_t new_mode = inode->i_mode; 176 size_t val_size1 = 0, val_size2 = 0;
176 int error; 177 struct ceph_pagelist *pagelist = NULL;
177 178 void *tmp_buf = NULL;
178 error = posix_acl_create(dir, &new_mode, &default_acl, &acl); 179 int err;
179 if (error) 180
180 return error; 181 err = posix_acl_create(dir, mode, &default_acl, &acl);
181 182 if (err)
182 if (!default_acl && !acl) { 183 return err;
183 cache_no_acl(inode); 184
184 if (new_mode != inode->i_mode) { 185 if (acl) {
185 struct iattr newattrs = { 186 int ret = posix_acl_equiv_mode(acl, mode);
186 .ia_mode = new_mode, 187 if (ret < 0)
187 .ia_valid = ATTR_MODE, 188 goto out_err;
188 }; 189 if (ret == 0) {
189 error = ceph_setattr(dentry, &newattrs); 190 posix_acl_release(acl);
191 acl = NULL;
190 } 192 }
191 return error;
192 } 193 }
193 194
194 if (default_acl) { 195 if (!default_acl && !acl)
195 error = ceph_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); 196 return 0;
196 posix_acl_release(default_acl); 197
197 } 198 if (acl)
199 val_size1 = posix_acl_xattr_size(acl->a_count);
200 if (default_acl)
201 val_size2 = posix_acl_xattr_size(default_acl->a_count);
202
203 err = -ENOMEM;
204 tmp_buf = kmalloc(max(val_size1, val_size2), GFP_NOFS);
205 if (!tmp_buf)
206 goto out_err;
207 pagelist = kmalloc(sizeof(struct ceph_pagelist), GFP_NOFS);
208 if (!pagelist)
209 goto out_err;
210 ceph_pagelist_init(pagelist);
211
212 err = ceph_pagelist_reserve(pagelist, PAGE_SIZE);
213 if (err)
214 goto out_err;
215
216 ceph_pagelist_encode_32(pagelist, acl && default_acl ? 2 : 1);
217
198 if (acl) { 218 if (acl) {
199 if (!error) 219 size_t len = strlen(POSIX_ACL_XATTR_ACCESS);
200 error = ceph_set_acl(inode, acl, ACL_TYPE_ACCESS); 220 err = ceph_pagelist_reserve(pagelist, len + val_size1 + 8);
201 posix_acl_release(acl); 221 if (err)
222 goto out_err;
223 ceph_pagelist_encode_string(pagelist, POSIX_ACL_XATTR_ACCESS,
224 len);
225 err = posix_acl_to_xattr(&init_user_ns, acl,
226 tmp_buf, val_size1);
227 if (err < 0)
228 goto out_err;
229 ceph_pagelist_encode_32(pagelist, val_size1);
230 ceph_pagelist_append(pagelist, tmp_buf, val_size1);
202 } 231 }
203 return error; 232 if (default_acl) {
233 size_t len = strlen(POSIX_ACL_XATTR_DEFAULT);
234 err = ceph_pagelist_reserve(pagelist, len + val_size2 + 8);
235 if (err)
236 goto out_err;
237 err = ceph_pagelist_encode_string(pagelist,
238 POSIX_ACL_XATTR_DEFAULT, len);
239 err = posix_acl_to_xattr(&init_user_ns, default_acl,
240 tmp_buf, val_size2);
241 if (err < 0)
242 goto out_err;
243 ceph_pagelist_encode_32(pagelist, val_size2);
244 ceph_pagelist_append(pagelist, tmp_buf, val_size2);
245 }
246
247 kfree(tmp_buf);
248
249 info->acl = acl;
250 info->default_acl = default_acl;
251 info->pagelist = pagelist;
252 return 0;
253
254out_err:
255 posix_acl_release(acl);
256 posix_acl_release(default_acl);
257 kfree(tmp_buf);
258 if (pagelist)
259 ceph_pagelist_release(pagelist);
260 return err;
261}
262
263void ceph_init_inode_acls(struct inode* inode, struct ceph_acls_info *info)
264{
265 if (!inode)
266 return;
267 ceph_set_cached_acl(inode, ACL_TYPE_ACCESS, info->acl);
268 ceph_set_cached_acl(inode, ACL_TYPE_DEFAULT, info->default_acl);
269}
270
271void ceph_release_acls_info(struct ceph_acls_info *info)
272{
273 posix_acl_release(info->acl);
274 posix_acl_release(info->default_acl);
275 if (info->pagelist)
276 ceph_pagelist_release(info->pagelist);
204} 277}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 90b3954d48ed..18c06bbaf136 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1076,12 +1076,6 @@ retry_locked:
1076 /* past end of file? */ 1076 /* past end of file? */
1077 i_size = inode->i_size; /* caller holds i_mutex */ 1077 i_size = inode->i_size; /* caller holds i_mutex */
1078 1078
1079 if (i_size + len > inode->i_sb->s_maxbytes) {
1080 /* file is too big */
1081 r = -EINVAL;
1082 goto fail;
1083 }
1084
1085 if (page_off >= i_size || 1079 if (page_off >= i_size ||
1086 (pos_in_page == 0 && (pos+len) >= i_size && 1080 (pos_in_page == 0 && (pos+len) >= i_size &&
1087 end_in_page - pos_in_page != PAGE_CACHE_SIZE)) { 1081 end_in_page - pos_in_page != PAGE_CACHE_SIZE)) {
@@ -1099,9 +1093,6 @@ retry_locked:
1099 if (r < 0) 1093 if (r < 0)
1100 goto fail_nosnap; 1094 goto fail_nosnap;
1101 goto retry_locked; 1095 goto retry_locked;
1102
1103fail:
1104 up_read(&mdsc->snap_rwsem);
1105fail_nosnap: 1096fail_nosnap:
1106 unlock_page(page); 1097 unlock_page(page);
1107 return r; 1098 return r;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 6d1cd45dca89..659f2ea9e6f7 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2397,12 +2397,12 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
2397 u64 max_size = le64_to_cpu(grant->max_size); 2397 u64 max_size = le64_to_cpu(grant->max_size);
2398 struct timespec mtime, atime, ctime; 2398 struct timespec mtime, atime, ctime;
2399 int check_caps = 0; 2399 int check_caps = 0;
2400 bool wake = 0; 2400 bool wake = false;
2401 bool writeback = 0; 2401 bool writeback = false;
2402 bool queue_trunc = 0; 2402 bool queue_trunc = false;
2403 bool queue_invalidate = 0; 2403 bool queue_invalidate = false;
2404 bool queue_revalidate = 0; 2404 bool queue_revalidate = false;
2405 bool deleted_inode = 0; 2405 bool deleted_inode = false;
2406 2406
2407 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", 2407 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
2408 inode, cap, mds, seq, ceph_cap_string(newcaps)); 2408 inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2437,7 +2437,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
2437 /* there were locked pages.. invalidate later 2437 /* there were locked pages.. invalidate later
2438 in a separate thread. */ 2438 in a separate thread. */
2439 if (ci->i_rdcache_revoking != ci->i_rdcache_gen) { 2439 if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
2440 queue_invalidate = 1; 2440 queue_invalidate = true;
2441 ci->i_rdcache_revoking = ci->i_rdcache_gen; 2441 ci->i_rdcache_revoking = ci->i_rdcache_gen;
2442 } 2442 }
2443 } 2443 }
@@ -2466,7 +2466,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
2466 set_nlink(inode, le32_to_cpu(grant->nlink)); 2466 set_nlink(inode, le32_to_cpu(grant->nlink));
2467 if (inode->i_nlink == 0 && 2467 if (inode->i_nlink == 0 &&
2468 (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL))) 2468 (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
2469 deleted_inode = 1; 2469 deleted_inode = true;
2470 } 2470 }
2471 2471
2472 if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) { 2472 if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) {
@@ -2487,7 +2487,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
2487 /* Do we need to revalidate our fscache cookie. Don't bother on the 2487 /* Do we need to revalidate our fscache cookie. Don't bother on the
2488 * first cache cap as we already validate at cookie creation time. */ 2488 * first cache cap as we already validate at cookie creation time. */
2489 if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1) 2489 if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1)
2490 queue_revalidate = 1; 2490 queue_revalidate = true;
2491 2491
2492 if (newcaps & CEPH_CAP_ANY_RD) { 2492 if (newcaps & CEPH_CAP_ANY_RD) {
2493 /* ctime/mtime/atime? */ 2493 /* ctime/mtime/atime? */
@@ -2516,7 +2516,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
2516 ci->i_wanted_max_size = 0; /* reset */ 2516 ci->i_wanted_max_size = 0; /* reset */
2517 ci->i_requested_max_size = 0; 2517 ci->i_requested_max_size = 0;
2518 } 2518 }
2519 wake = 1; 2519 wake = true;
2520 } 2520 }
2521 } 2521 }
2522 2522
@@ -2546,7 +2546,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
2546 ceph_cap_string(newcaps), 2546 ceph_cap_string(newcaps),
2547 ceph_cap_string(revoking)); 2547 ceph_cap_string(revoking));
2548 if (revoking & used & CEPH_CAP_FILE_BUFFER) 2548 if (revoking & used & CEPH_CAP_FILE_BUFFER)
2549 writeback = 1; /* initiate writeback; will delay ack */ 2549 writeback = true; /* initiate writeback; will delay ack */
2550 else if (revoking == CEPH_CAP_FILE_CACHE && 2550 else if (revoking == CEPH_CAP_FILE_CACHE &&
2551 (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 && 2551 (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
2552 queue_invalidate) 2552 queue_invalidate)
@@ -2572,7 +2572,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
2572 cap->implemented |= newcaps; /* add bits only, to 2572 cap->implemented |= newcaps; /* add bits only, to
2573 * avoid stepping on a 2573 * avoid stepping on a
2574 * pending revocation */ 2574 * pending revocation */
2575 wake = 1; 2575 wake = true;
2576 } 2576 }
2577 BUG_ON(cap->issued & ~cap->implemented); 2577 BUG_ON(cap->issued & ~cap->implemented);
2578 2578
@@ -2586,7 +2586,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
2586 kick_flushing_inode_caps(mdsc, session, inode); 2586 kick_flushing_inode_caps(mdsc, session, inode);
2587 up_read(&mdsc->snap_rwsem); 2587 up_read(&mdsc->snap_rwsem);
2588 if (newcaps & ~issued) 2588 if (newcaps & ~issued)
2589 wake = 1; 2589 wake = true;
2590 } 2590 }
2591 2591
2592 if (queue_trunc) { 2592 if (queue_trunc) {
@@ -3045,6 +3045,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
3045 } 3045 }
3046 } 3046 }
3047 3047
3048 /* lookup ino */
3049 inode = ceph_find_inode(sb, vino);
3050 ci = ceph_inode(inode);
3051 dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
3052 vino.snap, inode);
3053
3048 mutex_lock(&session->s_mutex); 3054 mutex_lock(&session->s_mutex);
3049 session->s_seq++; 3055 session->s_seq++;
3050 dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, 3056 dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
@@ -3053,11 +3059,6 @@ void ceph_handle_caps(struct ceph_mds_session *session,
3053 if (op == CEPH_CAP_OP_IMPORT) 3059 if (op == CEPH_CAP_OP_IMPORT)
3054 ceph_add_cap_releases(mdsc, session); 3060 ceph_add_cap_releases(mdsc, session);
3055 3061
3056 /* lookup ino */
3057 inode = ceph_find_inode(sb, vino);
3058 ci = ceph_inode(inode);
3059 dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
3060 vino.snap, inode);
3061 if (!inode) { 3062 if (!inode) {
3062 dout(" i don't have ino %llx\n", vino.ino); 3063 dout(" i don't have ino %llx\n", vino.ino);
3063 3064
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 5a743ac141ab..5d5a4c8c8496 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -158,10 +158,47 @@ static int dentry_lru_show(struct seq_file *s, void *ptr)
158 return 0; 158 return 0;
159} 159}
160 160
161static int mds_sessions_show(struct seq_file *s, void *ptr)
162{
163 struct ceph_fs_client *fsc = s->private;
164 struct ceph_mds_client *mdsc = fsc->mdsc;
165 struct ceph_auth_client *ac = fsc->client->monc.auth;
166 struct ceph_options *opt = fsc->client->options;
167 int mds = -1;
168
169 mutex_lock(&mdsc->mutex);
170
171 /* The 'num' portion of an 'entity name' */
172 seq_printf(s, "global_id %llu\n", ac->global_id);
173
174 /* The -o name mount argument */
175 seq_printf(s, "name \"%s\"\n", opt->name ? opt->name : "");
176
177 /* The list of MDS session rank+state */
178 for (mds = 0; mds < mdsc->max_sessions; mds++) {
179 struct ceph_mds_session *session =
180 __ceph_lookup_mds_session(mdsc, mds);
181 if (!session) {
182 continue;
183 }
184 mutex_unlock(&mdsc->mutex);
185 seq_printf(s, "mds.%d %s\n",
186 session->s_mds,
187 ceph_session_state_name(session->s_state));
188
189 ceph_put_mds_session(session);
190 mutex_lock(&mdsc->mutex);
191 }
192 mutex_unlock(&mdsc->mutex);
193
194 return 0;
195}
196
161CEPH_DEFINE_SHOW_FUNC(mdsmap_show) 197CEPH_DEFINE_SHOW_FUNC(mdsmap_show)
162CEPH_DEFINE_SHOW_FUNC(mdsc_show) 198CEPH_DEFINE_SHOW_FUNC(mdsc_show)
163CEPH_DEFINE_SHOW_FUNC(caps_show) 199CEPH_DEFINE_SHOW_FUNC(caps_show)
164CEPH_DEFINE_SHOW_FUNC(dentry_lru_show) 200CEPH_DEFINE_SHOW_FUNC(dentry_lru_show)
201CEPH_DEFINE_SHOW_FUNC(mds_sessions_show)
165 202
166 203
167/* 204/*
@@ -193,6 +230,7 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
193 debugfs_remove(fsc->debugfs_bdi); 230 debugfs_remove(fsc->debugfs_bdi);
194 debugfs_remove(fsc->debugfs_congestion_kb); 231 debugfs_remove(fsc->debugfs_congestion_kb);
195 debugfs_remove(fsc->debugfs_mdsmap); 232 debugfs_remove(fsc->debugfs_mdsmap);
233 debugfs_remove(fsc->debugfs_mds_sessions);
196 debugfs_remove(fsc->debugfs_caps); 234 debugfs_remove(fsc->debugfs_caps);
197 debugfs_remove(fsc->debugfs_mdsc); 235 debugfs_remove(fsc->debugfs_mdsc);
198 debugfs_remove(fsc->debugfs_dentry_lru); 236 debugfs_remove(fsc->debugfs_dentry_lru);
@@ -231,6 +269,14 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
231 if (!fsc->debugfs_mdsmap) 269 if (!fsc->debugfs_mdsmap)
232 goto out; 270 goto out;
233 271
272 fsc->debugfs_mds_sessions = debugfs_create_file("mds_sessions",
273 0600,
274 fsc->client->debugfs_dir,
275 fsc,
276 &mds_sessions_show_fops);
277 if (!fsc->debugfs_mds_sessions)
278 goto out;
279
234 fsc->debugfs_mdsc = debugfs_create_file("mdsc", 280 fsc->debugfs_mdsc = debugfs_create_file("mdsc",
235 0600, 281 0600,
236 fsc->client->debugfs_dir, 282 fsc->client->debugfs_dir,
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index b6c59eaa4f64..e6d63f8f98c0 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -682,17 +682,22 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
682 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 682 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
683 struct ceph_mds_client *mdsc = fsc->mdsc; 683 struct ceph_mds_client *mdsc = fsc->mdsc;
684 struct ceph_mds_request *req; 684 struct ceph_mds_request *req;
685 struct ceph_acls_info acls = {};
685 int err; 686 int err;
686 687
687 if (ceph_snap(dir) != CEPH_NOSNAP) 688 if (ceph_snap(dir) != CEPH_NOSNAP)
688 return -EROFS; 689 return -EROFS;
689 690
691 err = ceph_pre_init_acls(dir, &mode, &acls);
692 if (err < 0)
693 return err;
694
690 dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n", 695 dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n",
691 dir, dentry, mode, rdev); 696 dir, dentry, mode, rdev);
692 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS); 697 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS);
693 if (IS_ERR(req)) { 698 if (IS_ERR(req)) {
694 d_drop(dentry); 699 err = PTR_ERR(req);
695 return PTR_ERR(req); 700 goto out;
696 } 701 }
697 req->r_dentry = dget(dentry); 702 req->r_dentry = dget(dentry);
698 req->r_num_caps = 2; 703 req->r_num_caps = 2;
@@ -701,15 +706,20 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
701 req->r_args.mknod.rdev = cpu_to_le32(rdev); 706 req->r_args.mknod.rdev = cpu_to_le32(rdev);
702 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 707 req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
703 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 708 req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
709 if (acls.pagelist) {
710 req->r_pagelist = acls.pagelist;
711 acls.pagelist = NULL;
712 }
704 err = ceph_mdsc_do_request(mdsc, dir, req); 713 err = ceph_mdsc_do_request(mdsc, dir, req);
705 if (!err && !req->r_reply_info.head->is_dentry) 714 if (!err && !req->r_reply_info.head->is_dentry)
706 err = ceph_handle_notrace_create(dir, dentry); 715 err = ceph_handle_notrace_create(dir, dentry);
707 ceph_mdsc_put_request(req); 716 ceph_mdsc_put_request(req);
708 717out:
709 if (!err) 718 if (!err)
710 ceph_init_acl(dentry, dentry->d_inode, dir); 719 ceph_init_inode_acls(dentry->d_inode, &acls);
711 else 720 else
712 d_drop(dentry); 721 d_drop(dentry);
722 ceph_release_acls_info(&acls);
713 return err; 723 return err;
714} 724}
715 725
@@ -733,8 +743,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
733 dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest); 743 dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
734 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS); 744 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
735 if (IS_ERR(req)) { 745 if (IS_ERR(req)) {
736 d_drop(dentry); 746 err = PTR_ERR(req);
737 return PTR_ERR(req); 747 goto out;
738 } 748 }
739 req->r_dentry = dget(dentry); 749 req->r_dentry = dget(dentry);
740 req->r_num_caps = 2; 750 req->r_num_caps = 2;
@@ -746,9 +756,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
746 if (!err && !req->r_reply_info.head->is_dentry) 756 if (!err && !req->r_reply_info.head->is_dentry)
747 err = ceph_handle_notrace_create(dir, dentry); 757 err = ceph_handle_notrace_create(dir, dentry);
748 ceph_mdsc_put_request(req); 758 ceph_mdsc_put_request(req);
749 if (!err) 759out:
750 ceph_init_acl(dentry, dentry->d_inode, dir); 760 if (err)
751 else
752 d_drop(dentry); 761 d_drop(dentry);
753 return err; 762 return err;
754} 763}
@@ -758,6 +767,7 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
758 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 767 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
759 struct ceph_mds_client *mdsc = fsc->mdsc; 768 struct ceph_mds_client *mdsc = fsc->mdsc;
760 struct ceph_mds_request *req; 769 struct ceph_mds_request *req;
770 struct ceph_acls_info acls = {};
761 int err = -EROFS; 771 int err = -EROFS;
762 int op; 772 int op;
763 773
@@ -772,6 +782,12 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
772 } else { 782 } else {
773 goto out; 783 goto out;
774 } 784 }
785
786 mode |= S_IFDIR;
787 err = ceph_pre_init_acls(dir, &mode, &acls);
788 if (err < 0)
789 goto out;
790
775 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); 791 req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
776 if (IS_ERR(req)) { 792 if (IS_ERR(req)) {
777 err = PTR_ERR(req); 793 err = PTR_ERR(req);
@@ -784,15 +800,20 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
784 req->r_args.mkdir.mode = cpu_to_le32(mode); 800 req->r_args.mkdir.mode = cpu_to_le32(mode);
785 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 801 req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
786 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 802 req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
803 if (acls.pagelist) {
804 req->r_pagelist = acls.pagelist;
805 acls.pagelist = NULL;
806 }
787 err = ceph_mdsc_do_request(mdsc, dir, req); 807 err = ceph_mdsc_do_request(mdsc, dir, req);
788 if (!err && !req->r_reply_info.head->is_dentry) 808 if (!err && !req->r_reply_info.head->is_dentry)
789 err = ceph_handle_notrace_create(dir, dentry); 809 err = ceph_handle_notrace_create(dir, dentry);
790 ceph_mdsc_put_request(req); 810 ceph_mdsc_put_request(req);
791out: 811out:
792 if (!err) 812 if (!err)
793 ceph_init_acl(dentry, dentry->d_inode, dir); 813 ceph_init_inode_acls(dentry->d_inode, &acls);
794 else 814 else
795 d_drop(dentry); 815 d_drop(dentry);
816 ceph_release_acls_info(&acls);
796 return err; 817 return err;
797} 818}
798 819
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 2eb02f80a0ab..d7e0da8366e6 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -235,6 +235,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
235 struct ceph_mds_client *mdsc = fsc->mdsc; 235 struct ceph_mds_client *mdsc = fsc->mdsc;
236 struct ceph_mds_request *req; 236 struct ceph_mds_request *req;
237 struct dentry *dn; 237 struct dentry *dn;
238 struct ceph_acls_info acls = {};
238 int err; 239 int err;
239 240
240 dout("atomic_open %p dentry %p '%.*s' %s flags %d mode 0%o\n", 241 dout("atomic_open %p dentry %p '%.*s' %s flags %d mode 0%o\n",
@@ -248,22 +249,34 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
248 if (err < 0) 249 if (err < 0)
249 return err; 250 return err;
250 251
252 if (flags & O_CREAT) {
253 err = ceph_pre_init_acls(dir, &mode, &acls);
254 if (err < 0)
255 return err;
256 }
257
251 /* do the open */ 258 /* do the open */
252 req = prepare_open_request(dir->i_sb, flags, mode); 259 req = prepare_open_request(dir->i_sb, flags, mode);
253 if (IS_ERR(req)) 260 if (IS_ERR(req)) {
254 return PTR_ERR(req); 261 err = PTR_ERR(req);
262 goto out_acl;
263 }
255 req->r_dentry = dget(dentry); 264 req->r_dentry = dget(dentry);
256 req->r_num_caps = 2; 265 req->r_num_caps = 2;
257 if (flags & O_CREAT) { 266 if (flags & O_CREAT) {
258 req->r_dentry_drop = CEPH_CAP_FILE_SHARED; 267 req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
259 req->r_dentry_unless = CEPH_CAP_FILE_EXCL; 268 req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
269 if (acls.pagelist) {
270 req->r_pagelist = acls.pagelist;
271 acls.pagelist = NULL;
272 }
260 } 273 }
261 req->r_locked_dir = dir; /* caller holds dir->i_mutex */ 274 req->r_locked_dir = dir; /* caller holds dir->i_mutex */
262 err = ceph_mdsc_do_request(mdsc, 275 err = ceph_mdsc_do_request(mdsc,
263 (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, 276 (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
264 req); 277 req);
265 if (err) 278 if (err)
266 goto out_err; 279 goto out_req;
267 280
268 err = ceph_handle_snapdir(req, dentry, err); 281 err = ceph_handle_snapdir(req, dentry, err);
269 if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) 282 if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
@@ -278,7 +291,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
278 dn = NULL; 291 dn = NULL;
279 } 292 }
280 if (err) 293 if (err)
281 goto out_err; 294 goto out_req;
282 if (dn || dentry->d_inode == NULL || S_ISLNK(dentry->d_inode->i_mode)) { 295 if (dn || dentry->d_inode == NULL || S_ISLNK(dentry->d_inode->i_mode)) {
283 /* make vfs retry on splice, ENOENT, or symlink */ 296 /* make vfs retry on splice, ENOENT, or symlink */
284 dout("atomic_open finish_no_open on dn %p\n", dn); 297 dout("atomic_open finish_no_open on dn %p\n", dn);
@@ -286,15 +299,17 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
286 } else { 299 } else {
287 dout("atomic_open finish_open on dn %p\n", dn); 300 dout("atomic_open finish_open on dn %p\n", dn);
288 if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) { 301 if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
289 ceph_init_acl(dentry, dentry->d_inode, dir); 302 ceph_init_inode_acls(dentry->d_inode, &acls);
290 *opened |= FILE_CREATED; 303 *opened |= FILE_CREATED;
291 } 304 }
292 err = finish_open(file, dentry, ceph_open, opened); 305 err = finish_open(file, dentry, ceph_open, opened);
293 } 306 }
294out_err: 307out_req:
295 if (!req->r_err && req->r_target_inode) 308 if (!req->r_err && req->r_target_inode)
296 ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode); 309 ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode);
297 ceph_mdsc_put_request(req); 310 ceph_mdsc_put_request(req);
311out_acl:
312 ceph_release_acls_info(&acls);
298 dout("atomic_open result=%d\n", err); 313 dout("atomic_open result=%d\n", err);
299 return err; 314 return err;
300} 315}
@@ -826,8 +841,7 @@ again:
826 ceph_put_cap_refs(ci, got); 841 ceph_put_cap_refs(ci, got);
827 842
828 if (checkeof && ret >= 0) { 843 if (checkeof && ret >= 0) {
829 int statret = ceph_do_getattr(inode, 844 int statret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
830 CEPH_STAT_CAP_SIZE);
831 845
832 /* hit EOF or hole? */ 846 /* hit EOF or hole? */
833 if (statret == 0 && iocb->ki_pos < inode->i_size && 847 if (statret == 0 && iocb->ki_pos < inode->i_size &&
@@ -836,7 +850,6 @@ again:
836 ", reading more\n", iocb->ki_pos, 850 ", reading more\n", iocb->ki_pos,
837 inode->i_size); 851 inode->i_size);
838 852
839 iov_iter_advance(to, ret);
840 read += ret; 853 read += ret;
841 len -= ret; 854 len -= ret;
842 checkeof = 0; 855 checkeof = 0;
@@ -995,7 +1008,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
995 mutex_lock(&inode->i_mutex); 1008 mutex_lock(&inode->i_mutex);
996 1009
997 if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) { 1010 if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
998 ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE); 1011 ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
999 if (ret < 0) { 1012 if (ret < 0) {
1000 offset = ret; 1013 offset = ret;
1001 goto out; 1014 goto out;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 04c89c266cec..7b6139004401 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -766,7 +766,7 @@ static int fill_inode(struct inode *inode,
766 766
767 /* xattrs */ 767 /* xattrs */
768 /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */ 768 /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */
769 if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && 769 if ((ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) &&
770 le64_to_cpu(info->xattr_version) > ci->i_xattrs.version) { 770 le64_to_cpu(info->xattr_version) > ci->i_xattrs.version) {
771 if (ci->i_xattrs.blob) 771 if (ci->i_xattrs.blob)
772 ceph_buffer_put(ci->i_xattrs.blob); 772 ceph_buffer_put(ci->i_xattrs.blob);
@@ -1813,10 +1813,6 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1813 if (ia_valid & ATTR_SIZE) { 1813 if (ia_valid & ATTR_SIZE) {
1814 dout("setattr %p size %lld -> %lld\n", inode, 1814 dout("setattr %p size %lld -> %lld\n", inode,
1815 inode->i_size, attr->ia_size); 1815 inode->i_size, attr->ia_size);
1816 if (attr->ia_size > inode->i_sb->s_maxbytes) {
1817 err = -EINVAL;
1818 goto out;
1819 }
1820 if ((issued & CEPH_CAP_FILE_EXCL) && 1816 if ((issued & CEPH_CAP_FILE_EXCL) &&
1821 attr->ia_size > inode->i_size) { 1817 attr->ia_size > inode->i_size) {
1822 inode->i_size = attr->ia_size; 1818 inode->i_size = attr->ia_size;
@@ -1896,8 +1892,6 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1896 if (mask & CEPH_SETATTR_SIZE) 1892 if (mask & CEPH_SETATTR_SIZE)
1897 __ceph_do_pending_vmtruncate(inode); 1893 __ceph_do_pending_vmtruncate(inode);
1898 return err; 1894 return err;
1899out:
1900 spin_unlock(&ci->i_ceph_lock);
1901out_put: 1895out_put:
1902 ceph_mdsc_put_request(req); 1896 ceph_mdsc_put_request(req);
1903 return err; 1897 return err;
@@ -1907,7 +1901,7 @@ out_put:
1907 * Verify that we have a lease on the given mask. If not, 1901 * Verify that we have a lease on the given mask. If not,
1908 * do a getattr against an mds. 1902 * do a getattr against an mds.
1909 */ 1903 */
1910int ceph_do_getattr(struct inode *inode, int mask) 1904int ceph_do_getattr(struct inode *inode, int mask, bool force)
1911{ 1905{
1912 struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); 1906 struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
1913 struct ceph_mds_client *mdsc = fsc->mdsc; 1907 struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -1920,7 +1914,7 @@ int ceph_do_getattr(struct inode *inode, int mask)
1920 } 1914 }
1921 1915
1922 dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode); 1916 dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode);
1923 if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) 1917 if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
1924 return 0; 1918 return 0;
1925 1919
1926 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); 1920 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
@@ -1948,7 +1942,7 @@ int ceph_permission(struct inode *inode, int mask)
1948 if (mask & MAY_NOT_BLOCK) 1942 if (mask & MAY_NOT_BLOCK)
1949 return -ECHILD; 1943 return -ECHILD;
1950 1944
1951 err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED); 1945 err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED, false);
1952 1946
1953 if (!err) 1947 if (!err)
1954 err = generic_permission(inode, mask); 1948 err = generic_permission(inode, mask);
@@ -1966,7 +1960,7 @@ int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
1966 struct ceph_inode_info *ci = ceph_inode(inode); 1960 struct ceph_inode_info *ci = ceph_inode(inode);
1967 int err; 1961 int err;
1968 1962
1969 err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL); 1963 err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL, false);
1970 if (!err) { 1964 if (!err) {
1971 generic_fillattr(inode, stat); 1965 generic_fillattr(inode, stat);
1972 stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino); 1966 stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index a822a6e58290..f851d8d70158 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -19,7 +19,7 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
19 struct ceph_ioctl_layout l; 19 struct ceph_ioctl_layout l;
20 int err; 20 int err;
21 21
22 err = ceph_do_getattr(file_inode(file), CEPH_STAT_CAP_LAYOUT); 22 err = ceph_do_getattr(file_inode(file), CEPH_STAT_CAP_LAYOUT, false);
23 if (!err) { 23 if (!err) {
24 l.stripe_unit = ceph_file_layout_su(ci->i_layout); 24 l.stripe_unit = ceph_file_layout_su(ci->i_layout);
25 l.stripe_count = ceph_file_layout_stripe_count(ci->i_layout); 25 l.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
@@ -41,7 +41,7 @@ static long __validate_layout(struct ceph_mds_client *mdsc,
41 /* validate striping parameters */ 41 /* validate striping parameters */
42 if ((l->object_size & ~PAGE_MASK) || 42 if ((l->object_size & ~PAGE_MASK) ||
43 (l->stripe_unit & ~PAGE_MASK) || 43 (l->stripe_unit & ~PAGE_MASK) ||
44 (l->stripe_unit != 0 && 44 ((unsigned)l->stripe_unit != 0 &&
45 ((unsigned)l->object_size % (unsigned)l->stripe_unit))) 45 ((unsigned)l->object_size % (unsigned)l->stripe_unit)))
46 return -EINVAL; 46 return -EINVAL;
47 47
@@ -74,7 +74,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
74 return -EFAULT; 74 return -EFAULT;
75 75
76 /* validate changed params against current layout */ 76 /* validate changed params against current layout */
77 err = ceph_do_getattr(file_inode(file), CEPH_STAT_CAP_LAYOUT); 77 err = ceph_do_getattr(file_inode(file), CEPH_STAT_CAP_LAYOUT, false);
78 if (err) 78 if (err)
79 return err; 79 return err;
80 80
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index bad07c09f91e..a92d3f5c6c12 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -7,6 +7,7 @@
7#include <linux/sched.h> 7#include <linux/sched.h>
8#include <linux/debugfs.h> 8#include <linux/debugfs.h>
9#include <linux/seq_file.h> 9#include <linux/seq_file.h>
10#include <linux/utsname.h>
10 11
11#include "super.h" 12#include "super.h"
12#include "mds_client.h" 13#include "mds_client.h"
@@ -334,7 +335,7 @@ static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
334/* 335/*
335 * sessions 336 * sessions
336 */ 337 */
337static const char *session_state_name(int s) 338const char *ceph_session_state_name(int s)
338{ 339{
339 switch (s) { 340 switch (s) {
340 case CEPH_MDS_SESSION_NEW: return "new"; 341 case CEPH_MDS_SESSION_NEW: return "new";
@@ -542,6 +543,8 @@ void ceph_mdsc_release_request(struct kref *kref)
542 } 543 }
543 kfree(req->r_path1); 544 kfree(req->r_path1);
544 kfree(req->r_path2); 545 kfree(req->r_path2);
546 if (req->r_pagelist)
547 ceph_pagelist_release(req->r_pagelist);
545 put_request_session(req); 548 put_request_session(req);
546 ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation); 549 ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation);
547 kfree(req); 550 kfree(req);
@@ -812,6 +815,74 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq)
812 h = msg->front.iov_base; 815 h = msg->front.iov_base;
813 h->op = cpu_to_le32(op); 816 h->op = cpu_to_le32(op);
814 h->seq = cpu_to_le64(seq); 817 h->seq = cpu_to_le64(seq);
818
819 return msg;
820}
821
822/*
823 * session message, specialization for CEPH_SESSION_REQUEST_OPEN
824 * to include additional client metadata fields.
825 */
826static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u64 seq)
827{
828 struct ceph_msg *msg;
829 struct ceph_mds_session_head *h;
830 int i = -1;
831 int metadata_bytes = 0;
832 int metadata_key_count = 0;
833 struct ceph_options *opt = mdsc->fsc->client->options;
834 void *p;
835
836 const char* metadata[3][2] = {
837 {"hostname", utsname()->nodename},
838 {"entity_id", opt->name ? opt->name : ""},
839 {NULL, NULL}
840 };
841
842 /* Calculate serialized length of metadata */
843 metadata_bytes = 4; /* map length */
844 for (i = 0; metadata[i][0] != NULL; ++i) {
845 metadata_bytes += 8 + strlen(metadata[i][0]) +
846 strlen(metadata[i][1]);
847 metadata_key_count++;
848 }
849
850 /* Allocate the message */
851 msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + metadata_bytes,
852 GFP_NOFS, false);
853 if (!msg) {
854 pr_err("create_session_msg ENOMEM creating msg\n");
855 return NULL;
856 }
857 h = msg->front.iov_base;
858 h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN);
859 h->seq = cpu_to_le64(seq);
860
861 /*
862 * Serialize client metadata into waiting buffer space, using
863 * the format that userspace expects for map<string, string>
864 */
865 msg->hdr.version = 2; /* ClientSession messages with metadata are v2 */
866
867 /* The write pointer, following the session_head structure */
868 p = msg->front.iov_base + sizeof(*h);
869
870 /* Number of entries in the map */
871 ceph_encode_32(&p, metadata_key_count);
872
873 /* Two length-prefixed strings for each entry in the map */
874 for (i = 0; metadata[i][0] != NULL; ++i) {
875 size_t const key_len = strlen(metadata[i][0]);
876 size_t const val_len = strlen(metadata[i][1]);
877
878 ceph_encode_32(&p, key_len);
879 memcpy(p, metadata[i][0], key_len);
880 p += key_len;
881 ceph_encode_32(&p, val_len);
882 memcpy(p, metadata[i][1], val_len);
883 p += val_len;
884 }
885
815 return msg; 886 return msg;
816} 887}
817 888
@@ -835,7 +906,7 @@ static int __open_session(struct ceph_mds_client *mdsc,
835 session->s_renew_requested = jiffies; 906 session->s_renew_requested = jiffies;
836 907
837 /* send connect message */ 908 /* send connect message */
838 msg = create_session_msg(CEPH_SESSION_REQUEST_OPEN, session->s_seq); 909 msg = create_session_open_msg(mdsc, session->s_seq);
839 if (!msg) 910 if (!msg)
840 return -ENOMEM; 911 return -ENOMEM;
841 ceph_con_send(&session->s_con, msg); 912 ceph_con_send(&session->s_con, msg);
@@ -1164,7 +1235,7 @@ static int send_flushmsg_ack(struct ceph_mds_client *mdsc,
1164 struct ceph_msg *msg; 1235 struct ceph_msg *msg;
1165 1236
1166 dout("send_flushmsg_ack to mds%d (%s)s seq %lld\n", 1237 dout("send_flushmsg_ack to mds%d (%s)s seq %lld\n",
1167 session->s_mds, session_state_name(session->s_state), seq); 1238 session->s_mds, ceph_session_state_name(session->s_state), seq);
1168 msg = create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq); 1239 msg = create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq);
1169 if (!msg) 1240 if (!msg)
1170 return -ENOMEM; 1241 return -ENOMEM;
@@ -1216,7 +1287,7 @@ static int request_close_session(struct ceph_mds_client *mdsc,
1216 struct ceph_msg *msg; 1287 struct ceph_msg *msg;
1217 1288
1218 dout("request_close_session mds%d state %s seq %lld\n", 1289 dout("request_close_session mds%d state %s seq %lld\n",
1219 session->s_mds, session_state_name(session->s_state), 1290 session->s_mds, ceph_session_state_name(session->s_state),
1220 session->s_seq); 1291 session->s_seq);
1221 msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq); 1292 msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq);
1222 if (!msg) 1293 if (!msg)
@@ -1847,13 +1918,15 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1847 msg->front.iov_len = p - msg->front.iov_base; 1918 msg->front.iov_len = p - msg->front.iov_base;
1848 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 1919 msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
1849 1920
1850 if (req->r_data_len) { 1921 if (req->r_pagelist) {
1851 /* outbound data set only by ceph_sync_setxattr() */ 1922 struct ceph_pagelist *pagelist = req->r_pagelist;
1852 BUG_ON(!req->r_pages); 1923 atomic_inc(&pagelist->refcnt);
1853 ceph_msg_data_add_pages(msg, req->r_pages, req->r_data_len, 0); 1924 ceph_msg_data_add_pagelist(msg, pagelist);
1925 msg->hdr.data_len = cpu_to_le32(pagelist->length);
1926 } else {
1927 msg->hdr.data_len = 0;
1854 } 1928 }
1855 1929
1856 msg->hdr.data_len = cpu_to_le32(req->r_data_len);
1857 msg->hdr.data_off = cpu_to_le16(0); 1930 msg->hdr.data_off = cpu_to_le16(0);
1858 1931
1859out_free2: 1932out_free2:
@@ -2007,7 +2080,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
2007 req->r_session = get_session(session); 2080 req->r_session = get_session(session);
2008 2081
2009 dout("do_request mds%d session %p state %s\n", mds, session, 2082 dout("do_request mds%d session %p state %s\n", mds, session,
2010 session_state_name(session->s_state)); 2083 ceph_session_state_name(session->s_state));
2011 if (session->s_state != CEPH_MDS_SESSION_OPEN && 2084 if (session->s_state != CEPH_MDS_SESSION_OPEN &&
2012 session->s_state != CEPH_MDS_SESSION_HUNG) { 2085 session->s_state != CEPH_MDS_SESSION_HUNG) {
2013 if (session->s_state == CEPH_MDS_SESSION_NEW || 2086 if (session->s_state == CEPH_MDS_SESSION_NEW ||
@@ -2078,6 +2151,7 @@ static void kick_requests(struct ceph_mds_client *mdsc, int mds)
2078 if (req->r_session && 2151 if (req->r_session &&
2079 req->r_session->s_mds == mds) { 2152 req->r_session->s_mds == mds) {
2080 dout(" kicking tid %llu\n", req->r_tid); 2153 dout(" kicking tid %llu\n", req->r_tid);
2154 list_del_init(&req->r_wait);
2081 __do_request(mdsc, req); 2155 __do_request(mdsc, req);
2082 } 2156 }
2083 } 2157 }
@@ -2444,7 +2518,7 @@ static void handle_session(struct ceph_mds_session *session,
2444 2518
2445 dout("handle_session mds%d %s %p state %s seq %llu\n", 2519 dout("handle_session mds%d %s %p state %s seq %llu\n",
2446 mds, ceph_session_op_name(op), session, 2520 mds, ceph_session_op_name(op), session,
2447 session_state_name(session->s_state), seq); 2521 ceph_session_state_name(session->s_state), seq);
2448 2522
2449 if (session->s_state == CEPH_MDS_SESSION_HUNG) { 2523 if (session->s_state == CEPH_MDS_SESSION_HUNG) {
2450 session->s_state = CEPH_MDS_SESSION_OPEN; 2524 session->s_state = CEPH_MDS_SESSION_OPEN;
@@ -2471,9 +2545,8 @@ static void handle_session(struct ceph_mds_session *session,
2471 if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) 2545 if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
2472 pr_info("mds%d reconnect denied\n", session->s_mds); 2546 pr_info("mds%d reconnect denied\n", session->s_mds);
2473 remove_session_caps(session); 2547 remove_session_caps(session);
2474 wake = 1; /* for good measure */ 2548 wake = 2; /* for good measure */
2475 wake_up_all(&mdsc->session_close_wq); 2549 wake_up_all(&mdsc->session_close_wq);
2476 kick_requests(mdsc, mds);
2477 break; 2550 break;
2478 2551
2479 case CEPH_SESSION_STALE: 2552 case CEPH_SESSION_STALE:
@@ -2503,6 +2576,8 @@ static void handle_session(struct ceph_mds_session *session,
2503 if (wake) { 2576 if (wake) {
2504 mutex_lock(&mdsc->mutex); 2577 mutex_lock(&mdsc->mutex);
2505 __wake_requests(mdsc, &session->s_waiting); 2578 __wake_requests(mdsc, &session->s_waiting);
2579 if (wake == 2)
2580 kick_requests(mdsc, mds);
2506 mutex_unlock(&mdsc->mutex); 2581 mutex_unlock(&mdsc->mutex);
2507 } 2582 }
2508 return; 2583 return;
@@ -2695,18 +2770,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
2695 session->s_state = CEPH_MDS_SESSION_RECONNECTING; 2770 session->s_state = CEPH_MDS_SESSION_RECONNECTING;
2696 session->s_seq = 0; 2771 session->s_seq = 0;
2697 2772
2698 ceph_con_close(&session->s_con);
2699 ceph_con_open(&session->s_con,
2700 CEPH_ENTITY_TYPE_MDS, mds,
2701 ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
2702
2703 /* replay unsafe requests */
2704 replay_unsafe_requests(mdsc, session);
2705
2706 down_read(&mdsc->snap_rwsem);
2707
2708 dout("session %p state %s\n", session, 2773 dout("session %p state %s\n", session,
2709 session_state_name(session->s_state)); 2774 ceph_session_state_name(session->s_state));
2710 2775
2711 spin_lock(&session->s_gen_ttl_lock); 2776 spin_lock(&session->s_gen_ttl_lock);
2712 session->s_cap_gen++; 2777 session->s_cap_gen++;
@@ -2723,6 +2788,19 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
2723 discard_cap_releases(mdsc, session); 2788 discard_cap_releases(mdsc, session);
2724 spin_unlock(&session->s_cap_lock); 2789 spin_unlock(&session->s_cap_lock);
2725 2790
2791 /* trim unused caps to reduce MDS's cache rejoin time */
2792 shrink_dcache_parent(mdsc->fsc->sb->s_root);
2793
2794 ceph_con_close(&session->s_con);
2795 ceph_con_open(&session->s_con,
2796 CEPH_ENTITY_TYPE_MDS, mds,
2797 ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
2798
2799 /* replay unsafe requests */
2800 replay_unsafe_requests(mdsc, session);
2801
2802 down_read(&mdsc->snap_rwsem);
2803
2726 /* traverse this session's caps */ 2804 /* traverse this session's caps */
2727 s_nr_caps = session->s_nr_caps; 2805 s_nr_caps = session->s_nr_caps;
2728 err = ceph_pagelist_encode_32(pagelist, s_nr_caps); 2806 err = ceph_pagelist_encode_32(pagelist, s_nr_caps);
@@ -2791,7 +2869,6 @@ fail:
2791 mutex_unlock(&session->s_mutex); 2869 mutex_unlock(&session->s_mutex);
2792fail_nomsg: 2870fail_nomsg:
2793 ceph_pagelist_release(pagelist); 2871 ceph_pagelist_release(pagelist);
2794 kfree(pagelist);
2795fail_nopagelist: 2872fail_nopagelist:
2796 pr_err("error %d preparing reconnect for mds%d\n", err, mds); 2873 pr_err("error %d preparing reconnect for mds%d\n", err, mds);
2797 return; 2874 return;
@@ -2827,7 +2904,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
2827 ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "", 2904 ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "",
2828 ceph_mds_state_name(newstate), 2905 ceph_mds_state_name(newstate),
2829 ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", 2906 ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
2830 session_state_name(s->s_state)); 2907 ceph_session_state_name(s->s_state));
2831 2908
2832 if (i >= newmap->m_max_mds || 2909 if (i >= newmap->m_max_mds ||
2833 memcmp(ceph_mdsmap_get_addr(oldmap, i), 2910 memcmp(ceph_mdsmap_get_addr(oldmap, i),
@@ -2939,14 +3016,15 @@ static void handle_lease(struct ceph_mds_client *mdsc,
2939 if (dname.len != get_unaligned_le32(h+1)) 3016 if (dname.len != get_unaligned_le32(h+1))
2940 goto bad; 3017 goto bad;
2941 3018
2942 mutex_lock(&session->s_mutex);
2943 session->s_seq++;
2944
2945 /* lookup inode */ 3019 /* lookup inode */
2946 inode = ceph_find_inode(sb, vino); 3020 inode = ceph_find_inode(sb, vino);
2947 dout("handle_lease %s, ino %llx %p %.*s\n", 3021 dout("handle_lease %s, ino %llx %p %.*s\n",
2948 ceph_lease_op_name(h->action), vino.ino, inode, 3022 ceph_lease_op_name(h->action), vino.ino, inode,
2949 dname.len, dname.name); 3023 dname.len, dname.name);
3024
3025 mutex_lock(&session->s_mutex);
3026 session->s_seq++;
3027
2950 if (inode == NULL) { 3028 if (inode == NULL) {
2951 dout("handle_lease no inode %llx\n", vino.ino); 3029 dout("handle_lease no inode %llx\n", vino.ino);
2952 goto release; 3030 goto release;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index e00737cf523c..3288359353e9 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -202,9 +202,7 @@ struct ceph_mds_request {
202 bool r_direct_is_hash; /* true if r_direct_hash is valid */ 202 bool r_direct_is_hash; /* true if r_direct_hash is valid */
203 203
204 /* data payload is used for xattr ops */ 204 /* data payload is used for xattr ops */
205 struct page **r_pages; 205 struct ceph_pagelist *r_pagelist;
206 int r_num_pages;
207 int r_data_len;
208 206
209 /* what caps shall we drop? */ 207 /* what caps shall we drop? */
210 int r_inode_drop, r_inode_unless; 208 int r_inode_drop, r_inode_unless;
@@ -332,6 +330,8 @@ ceph_get_mds_session(struct ceph_mds_session *s)
332 return s; 330 return s;
333} 331}
334 332
333extern const char *ceph_session_state_name(int s);
334
335extern void ceph_put_mds_session(struct ceph_mds_session *s); 335extern void ceph_put_mds_session(struct ceph_mds_session *s);
336 336
337extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, 337extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc,
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 12b20744e386..b82f507979b8 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -95,6 +95,7 @@ struct ceph_fs_client {
95 struct dentry *debugfs_congestion_kb; 95 struct dentry *debugfs_congestion_kb;
96 struct dentry *debugfs_bdi; 96 struct dentry *debugfs_bdi;
97 struct dentry *debugfs_mdsc, *debugfs_mdsmap; 97 struct dentry *debugfs_mdsc, *debugfs_mdsmap;
98 struct dentry *debugfs_mds_sessions;
98#endif 99#endif
99 100
100#ifdef CONFIG_CEPH_FSCACHE 101#ifdef CONFIG_CEPH_FSCACHE
@@ -714,7 +715,7 @@ extern void ceph_queue_vmtruncate(struct inode *inode);
714extern void ceph_queue_invalidate(struct inode *inode); 715extern void ceph_queue_invalidate(struct inode *inode);
715extern void ceph_queue_writeback(struct inode *inode); 716extern void ceph_queue_writeback(struct inode *inode);
716 717
717extern int ceph_do_getattr(struct inode *inode, int mask); 718extern int ceph_do_getattr(struct inode *inode, int mask, bool force);
718extern int ceph_permission(struct inode *inode, int mask); 719extern int ceph_permission(struct inode *inode, int mask);
719extern int ceph_setattr(struct dentry *dentry, struct iattr *attr); 720extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
720extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, 721extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -733,15 +734,23 @@ extern void __ceph_build_xattrs_blob(struct ceph_inode_info *ci);
733extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci); 734extern void __ceph_destroy_xattrs(struct ceph_inode_info *ci);
734extern void __init ceph_xattr_init(void); 735extern void __init ceph_xattr_init(void);
735extern void ceph_xattr_exit(void); 736extern void ceph_xattr_exit(void);
737extern const struct xattr_handler *ceph_xattr_handlers[];
736 738
737/* acl.c */ 739/* acl.c */
738extern const struct xattr_handler *ceph_xattr_handlers[]; 740struct ceph_acls_info {
741 void *default_acl;
742 void *acl;
743 struct ceph_pagelist *pagelist;
744};
739 745
740#ifdef CONFIG_CEPH_FS_POSIX_ACL 746#ifdef CONFIG_CEPH_FS_POSIX_ACL
741 747
742struct posix_acl *ceph_get_acl(struct inode *, int); 748struct posix_acl *ceph_get_acl(struct inode *, int);
743int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type); 749int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type);
744int ceph_init_acl(struct dentry *, struct inode *, struct inode *); 750int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
751 struct ceph_acls_info *info);
752void ceph_init_inode_acls(struct inode *inode, struct ceph_acls_info *info);
753void ceph_release_acls_info(struct ceph_acls_info *info);
745 754
746static inline void ceph_forget_all_cached_acls(struct inode *inode) 755static inline void ceph_forget_all_cached_acls(struct inode *inode)
747{ 756{
@@ -753,12 +762,18 @@ static inline void ceph_forget_all_cached_acls(struct inode *inode)
753#define ceph_get_acl NULL 762#define ceph_get_acl NULL
754#define ceph_set_acl NULL 763#define ceph_set_acl NULL
755 764
756static inline int ceph_init_acl(struct dentry *dentry, struct inode *inode, 765static inline int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
757 struct inode *dir) 766 struct ceph_acls_info *info)
758{ 767{
759 return 0; 768 return 0;
760} 769}
761 770static inline void ceph_init_inode_acls(struct inode *inode,
771 struct ceph_acls_info *info)
772{
773}
774static inline void ceph_release_acls_info(struct ceph_acls_info *info)
775{
776}
762static inline int ceph_acl_chmod(struct dentry *dentry, struct inode *inode) 777static inline int ceph_acl_chmod(struct dentry *dentry, struct inode *inode)
763{ 778{
764 return 0; 779 return 0;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 12f58d22e017..678b0d2bbbc4 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -1,4 +1,5 @@
1#include <linux/ceph/ceph_debug.h> 1#include <linux/ceph/ceph_debug.h>
2#include <linux/ceph/pagelist.h>
2 3
3#include "super.h" 4#include "super.h"
4#include "mds_client.h" 5#include "mds_client.h"
@@ -284,8 +285,7 @@ static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
284 return ceph_dir_vxattrs_name_size; 285 return ceph_dir_vxattrs_name_size;
285 if (vxattrs == ceph_file_vxattrs) 286 if (vxattrs == ceph_file_vxattrs)
286 return ceph_file_vxattrs_name_size; 287 return ceph_file_vxattrs_name_size;
287 BUG(); 288 BUG_ON(vxattrs);
288
289 return 0; 289 return 0;
290} 290}
291 291
@@ -736,24 +736,20 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
736 dout("getxattr %p ver=%lld index_ver=%lld\n", inode, 736 dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
737 ci->i_xattrs.version, ci->i_xattrs.index_version); 737 ci->i_xattrs.version, ci->i_xattrs.index_version);
738 738
739 if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && 739 if (ci->i_xattrs.version == 0 ||
740 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { 740 !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) {
741 goto get_xattr;
742 } else {
743 spin_unlock(&ci->i_ceph_lock); 741 spin_unlock(&ci->i_ceph_lock);
744 /* get xattrs from mds (if we don't already have them) */ 742 /* get xattrs from mds (if we don't already have them) */
745 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR); 743 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
746 if (err) 744 if (err)
747 return err; 745 return err;
746 spin_lock(&ci->i_ceph_lock);
748 } 747 }
749 748
750 spin_lock(&ci->i_ceph_lock);
751
752 err = __build_xattrs(inode); 749 err = __build_xattrs(inode);
753 if (err < 0) 750 if (err < 0)
754 goto out; 751 goto out;
755 752
756get_xattr:
757 err = -ENODATA; /* == ENOATTR */ 753 err = -ENODATA; /* == ENOATTR */
758 xattr = __get_xattr(ci, name); 754 xattr = __get_xattr(ci, name);
759 if (!xattr) 755 if (!xattr)
@@ -798,23 +794,18 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
798 dout("listxattr %p ver=%lld index_ver=%lld\n", inode, 794 dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
799 ci->i_xattrs.version, ci->i_xattrs.index_version); 795 ci->i_xattrs.version, ci->i_xattrs.index_version);
800 796
801 if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && 797 if (ci->i_xattrs.version == 0 ||
802 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { 798 !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) {
803 goto list_xattr;
804 } else {
805 spin_unlock(&ci->i_ceph_lock); 799 spin_unlock(&ci->i_ceph_lock);
806 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR); 800 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
807 if (err) 801 if (err)
808 return err; 802 return err;
803 spin_lock(&ci->i_ceph_lock);
809 } 804 }
810 805
811 spin_lock(&ci->i_ceph_lock);
812
813 err = __build_xattrs(inode); 806 err = __build_xattrs(inode);
814 if (err < 0) 807 if (err < 0)
815 goto out; 808 goto out;
816
817list_xattr:
818 /* 809 /*
819 * Start with virtual dir xattr names (if any) (including 810 * Start with virtual dir xattr names (if any) (including
820 * terminating '\0' characters for each). 811 * terminating '\0' characters for each).
@@ -860,35 +851,25 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
860 struct ceph_inode_info *ci = ceph_inode(inode); 851 struct ceph_inode_info *ci = ceph_inode(inode);
861 struct ceph_mds_request *req; 852 struct ceph_mds_request *req;
862 struct ceph_mds_client *mdsc = fsc->mdsc; 853 struct ceph_mds_client *mdsc = fsc->mdsc;
854 struct ceph_pagelist *pagelist = NULL;
863 int err; 855 int err;
864 int i, nr_pages; 856
865 struct page **pages = NULL; 857 if (value) {
866 void *kaddr; 858 /* copy value into pagelist */
867 859 pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
868 /* copy value into some pages */ 860 if (!pagelist)
869 nr_pages = calc_pages_for(0, size);
870 if (nr_pages) {
871 pages = kmalloc(sizeof(pages[0])*nr_pages, GFP_NOFS);
872 if (!pages)
873 return -ENOMEM; 861 return -ENOMEM;
874 err = -ENOMEM; 862
875 for (i = 0; i < nr_pages; i++) { 863 ceph_pagelist_init(pagelist);
876 pages[i] = __page_cache_alloc(GFP_NOFS); 864 err = ceph_pagelist_append(pagelist, value, size);
877 if (!pages[i]) { 865 if (err)
878 nr_pages = i; 866 goto out;
879 goto out; 867 } else {
880 } 868 flags |= CEPH_XATTR_REMOVE;
881 kaddr = kmap(pages[i]);
882 memcpy(kaddr, value + i*PAGE_CACHE_SIZE,
883 min(PAGE_CACHE_SIZE, size-i*PAGE_CACHE_SIZE));
884 }
885 } 869 }
886 870
887 dout("setxattr value=%.*s\n", (int)size, value); 871 dout("setxattr value=%.*s\n", (int)size, value);
888 872
889 if (!value)
890 flags |= CEPH_XATTR_REMOVE;
891
892 /* do request */ 873 /* do request */
893 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETXATTR, 874 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETXATTR,
894 USE_AUTH_MDS); 875 USE_AUTH_MDS);
@@ -903,9 +884,8 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
903 req->r_args.setxattr.flags = cpu_to_le32(flags); 884 req->r_args.setxattr.flags = cpu_to_le32(flags);
904 req->r_path2 = kstrdup(name, GFP_NOFS); 885 req->r_path2 = kstrdup(name, GFP_NOFS);
905 886
906 req->r_pages = pages; 887 req->r_pagelist = pagelist;
907 req->r_num_pages = nr_pages; 888 pagelist = NULL;
908 req->r_data_len = size;
909 889
910 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); 890 dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
911 err = ceph_mdsc_do_request(mdsc, NULL, req); 891 err = ceph_mdsc_do_request(mdsc, NULL, req);
@@ -913,11 +893,8 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
913 dout("xattr.ver (after): %lld\n", ci->i_xattrs.version); 893 dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
914 894
915out: 895out:
916 if (pages) { 896 if (pagelist)
917 for (i = 0; i < nr_pages; i++) 897 ceph_pagelist_release(pagelist);
918 __free_page(pages[i]);
919 kfree(pages);
920 }
921 return err; 898 return err;
922} 899}
923 900
@@ -968,7 +945,7 @@ int __ceph_setxattr(struct dentry *dentry, const char *name,
968retry: 945retry:
969 issued = __ceph_caps_issued(ci, NULL); 946 issued = __ceph_caps_issued(ci, NULL);
970 dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued)); 947 dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
971 if (!(issued & CEPH_CAP_XATTR_EXCL)) 948 if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
972 goto do_sync; 949 goto do_sync;
973 __build_xattrs(inode); 950 __build_xattrs(inode);
974 951
@@ -1077,7 +1054,7 @@ retry:
1077 issued = __ceph_caps_issued(ci, NULL); 1054 issued = __ceph_caps_issued(ci, NULL);
1078 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); 1055 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
1079 1056
1080 if (!(issued & CEPH_CAP_XATTR_EXCL)) 1057 if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
1081 goto do_sync; 1058 goto do_sync;
1082 __build_xattrs(inode); 1059 __build_xattrs(inode);
1083 1060