aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-11-19 18:32:22 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-11-19 18:32:22 -0500
commit76db8ac45fc738f7d7664fe9b56d15c594a45228 (patch)
treeeca23feab074d505b375e27714473f4ad337bd85 /fs/ceph
parentcaf8394524fdc039b090cd3af99157e9e76f4f06 (diff)
parent3105c19c450ac7c18ab28c19d364b588767261b3 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client: ceph: fix readdir EOVERFLOW on 32-bit archs ceph: fix frag offset for non-leftmost frags ceph: fix dangling pointer ceph: explicitly specify page alignment in network messages ceph: make page alignment explicit in osd interface ceph: fix comment, remove extraneous args ceph: fix update of ctime from MDS ceph: fix version check on racing inode updates ceph: fix uid/gid on resent mds requests ceph: fix rdcache_gen usage and invalidate ceph: re-request max_size if cap auth changes ceph: only let auth caps update max_size ceph: fix open for write on clustered mds ceph: fix bad pointer dereference in ceph_fill_trace ceph: fix small seq message skipping Revert "ceph: update issue_seq on cap grant"
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c6
-rw-r--r--fs/ceph/caps.c17
-rw-r--r--fs/ceph/dir.c16
-rw-r--r--fs/ceph/file.c52
-rw-r--r--fs/ceph/inode.c49
-rw-r--r--fs/ceph/mds_client.c7
-rw-r--r--fs/ceph/mds_client.h2
-rw-r--r--fs/ceph/super.h4
8 files changed, 98 insertions, 55 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index e9c874abc9e1..561438b6a50c 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -204,7 +204,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
204 err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, 204 err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
205 page->index << PAGE_CACHE_SHIFT, &len, 205 page->index << PAGE_CACHE_SHIFT, &len,
206 ci->i_truncate_seq, ci->i_truncate_size, 206 ci->i_truncate_seq, ci->i_truncate_size,
207 &page, 1); 207 &page, 1, 0);
208 if (err == -ENOENT) 208 if (err == -ENOENT)
209 err = 0; 209 err = 0;
210 if (err < 0) { 210 if (err < 0) {
@@ -287,7 +287,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
287 rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, 287 rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
288 offset, &len, 288 offset, &len,
289 ci->i_truncate_seq, ci->i_truncate_size, 289 ci->i_truncate_seq, ci->i_truncate_size,
290 pages, nr_pages); 290 pages, nr_pages, 0);
291 if (rc == -ENOENT) 291 if (rc == -ENOENT)
292 rc = 0; 292 rc = 0;
293 if (rc < 0) 293 if (rc < 0)
@@ -774,7 +774,7 @@ get_more_pages:
774 snapc, do_sync, 774 snapc, do_sync,
775 ci->i_truncate_seq, 775 ci->i_truncate_seq,
776 ci->i_truncate_size, 776 ci->i_truncate_size,
777 &inode->i_mtime, true, 1); 777 &inode->i_mtime, true, 1, 0);
778 max_pages = req->r_num_pages; 778 max_pages = req->r_num_pages;
779 779
780 alloc_page_vec(fsc, req); 780 alloc_page_vec(fsc, req);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 98ab13e2b71d..60d27bc9eb83 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1430,8 +1430,8 @@ static int try_nonblocking_invalidate(struct inode *inode)
1430 invalidating_gen == ci->i_rdcache_gen) { 1430 invalidating_gen == ci->i_rdcache_gen) {
1431 /* success. */ 1431 /* success. */
1432 dout("try_nonblocking_invalidate %p success\n", inode); 1432 dout("try_nonblocking_invalidate %p success\n", inode);
1433 ci->i_rdcache_gen = 0; 1433 /* save any racing async invalidate some trouble */
1434 ci->i_rdcache_revoking = 0; 1434 ci->i_rdcache_revoking = ci->i_rdcache_gen - 1;
1435 return 0; 1435 return 0;
1436 } 1436 }
1437 dout("try_nonblocking_invalidate %p failed\n", inode); 1437 dout("try_nonblocking_invalidate %p failed\n", inode);
@@ -2273,8 +2273,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2273{ 2273{
2274 struct ceph_inode_info *ci = ceph_inode(inode); 2274 struct ceph_inode_info *ci = ceph_inode(inode);
2275 int mds = session->s_mds; 2275 int mds = session->s_mds;
2276 unsigned seq = le32_to_cpu(grant->seq); 2276 int seq = le32_to_cpu(grant->seq);
2277 unsigned issue_seq = le32_to_cpu(grant->issue_seq);
2278 int newcaps = le32_to_cpu(grant->caps); 2277 int newcaps = le32_to_cpu(grant->caps);
2279 int issued, implemented, used, wanted, dirty; 2278 int issued, implemented, used, wanted, dirty;
2280 u64 size = le64_to_cpu(grant->size); 2279 u64 size = le64_to_cpu(grant->size);
@@ -2286,8 +2285,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2286 int revoked_rdcache = 0; 2285 int revoked_rdcache = 0;
2287 int queue_invalidate = 0; 2286 int queue_invalidate = 0;
2288 2287
2289 dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n", 2288 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
2290 inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps)); 2289 inode, cap, mds, seq, ceph_cap_string(newcaps));
2291 dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, 2290 dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
2292 inode->i_size); 2291 inode->i_size);
2293 2292
@@ -2383,7 +2382,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2383 } 2382 }
2384 2383
2385 cap->seq = seq; 2384 cap->seq = seq;
2386 cap->issue_seq = issue_seq;
2387 2385
2388 /* file layout may have changed */ 2386 /* file layout may have changed */
2389 ci->i_layout = grant->layout; 2387 ci->i_layout = grant->layout;
@@ -2691,6 +2689,11 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
2691 NULL /* no caps context */); 2689 NULL /* no caps context */);
2692 try_flush_caps(inode, session, NULL); 2690 try_flush_caps(inode, session, NULL);
2693 up_read(&mdsc->snap_rwsem); 2691 up_read(&mdsc->snap_rwsem);
2692
2693 /* make sure we re-request max_size, if necessary */
2694 spin_lock(&inode->i_lock);
2695 ci->i_requested_max_size = 0;
2696 spin_unlock(&inode->i_lock);
2694} 2697}
2695 2698
2696/* 2699/*
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index e0a2dc6fcafc..7d447af84ec4 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -336,7 +336,10 @@ more:
336 if (req->r_reply_info.dir_end) { 336 if (req->r_reply_info.dir_end) {
337 kfree(fi->last_name); 337 kfree(fi->last_name);
338 fi->last_name = NULL; 338 fi->last_name = NULL;
339 fi->next_offset = 2; 339 if (ceph_frag_is_rightmost(frag))
340 fi->next_offset = 2;
341 else
342 fi->next_offset = 0;
340 } else { 343 } else {
341 rinfo = &req->r_reply_info; 344 rinfo = &req->r_reply_info;
342 err = note_last_dentry(fi, 345 err = note_last_dentry(fi,
@@ -355,18 +358,22 @@ more:
355 u64 pos = ceph_make_fpos(frag, off); 358 u64 pos = ceph_make_fpos(frag, off);
356 struct ceph_mds_reply_inode *in = 359 struct ceph_mds_reply_inode *in =
357 rinfo->dir_in[off - fi->offset].in; 360 rinfo->dir_in[off - fi->offset].in;
361 struct ceph_vino vino;
362 ino_t ino;
363
358 dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", 364 dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n",
359 off, off - fi->offset, rinfo->dir_nr, pos, 365 off, off - fi->offset, rinfo->dir_nr, pos,
360 rinfo->dir_dname_len[off - fi->offset], 366 rinfo->dir_dname_len[off - fi->offset],
361 rinfo->dir_dname[off - fi->offset], in); 367 rinfo->dir_dname[off - fi->offset], in);
362 BUG_ON(!in); 368 BUG_ON(!in);
363 ftype = le32_to_cpu(in->mode) >> 12; 369 ftype = le32_to_cpu(in->mode) >> 12;
370 vino.ino = le64_to_cpu(in->ino);
371 vino.snap = le64_to_cpu(in->snapid);
372 ino = ceph_vino_to_ino(vino);
364 if (filldir(dirent, 373 if (filldir(dirent,
365 rinfo->dir_dname[off - fi->offset], 374 rinfo->dir_dname[off - fi->offset],
366 rinfo->dir_dname_len[off - fi->offset], 375 rinfo->dir_dname_len[off - fi->offset],
367 pos, 376 pos, ino, ftype) < 0) {
368 le64_to_cpu(in->ino),
369 ftype) < 0) {
370 dout("filldir stopping us...\n"); 377 dout("filldir stopping us...\n");
371 return 0; 378 return 0;
372 } 379 }
@@ -414,6 +421,7 @@ static void reset_readdir(struct ceph_file_info *fi)
414 fi->last_readdir = NULL; 421 fi->last_readdir = NULL;
415 } 422 }
416 kfree(fi->last_name); 423 kfree(fi->last_name);
424 fi->last_name = NULL;
417 fi->next_offset = 2; /* compensate for . and .. */ 425 fi->next_offset = 2; /* compensate for . and .. */
418 if (fi->dentry) { 426 if (fi->dentry) {
419 dput(fi->dentry); 427 dput(fi->dentry);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index e77c28cf3690..8d79b8912e31 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -154,11 +154,13 @@ int ceph_open(struct inode *inode, struct file *file)
154 } 154 }
155 155
156 /* 156 /*
157 * No need to block if we have any caps. Update wanted set 157 * No need to block if we have caps on the auth MDS (for
158 * write) or any MDS (for read). Update wanted set
158 * asynchronously. 159 * asynchronously.
159 */ 160 */
160 spin_lock(&inode->i_lock); 161 spin_lock(&inode->i_lock);
161 if (__ceph_is_any_real_caps(ci)) { 162 if (__ceph_is_any_real_caps(ci) &&
163 (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
162 int mds_wanted = __ceph_caps_mds_wanted(ci); 164 int mds_wanted = __ceph_caps_mds_wanted(ci);
163 int issued = __ceph_caps_issued(ci, NULL); 165 int issued = __ceph_caps_issued(ci, NULL);
164 166
@@ -280,11 +282,12 @@ int ceph_release(struct inode *inode, struct file *file)
280static int striped_read(struct inode *inode, 282static int striped_read(struct inode *inode,
281 u64 off, u64 len, 283 u64 off, u64 len,
282 struct page **pages, int num_pages, 284 struct page **pages, int num_pages,
283 int *checkeof) 285 int *checkeof, bool align_to_pages)
284{ 286{
285 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 287 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
286 struct ceph_inode_info *ci = ceph_inode(inode); 288 struct ceph_inode_info *ci = ceph_inode(inode);
287 u64 pos, this_len; 289 u64 pos, this_len;
290 int io_align, page_align;
288 int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ 291 int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
289 int left, pages_left; 292 int left, pages_left;
290 int read; 293 int read;
@@ -300,14 +303,19 @@ static int striped_read(struct inode *inode,
300 page_pos = pages; 303 page_pos = pages;
301 pages_left = num_pages; 304 pages_left = num_pages;
302 read = 0; 305 read = 0;
306 io_align = off & ~PAGE_MASK;
303 307
304more: 308more:
309 if (align_to_pages)
310 page_align = (pos - io_align) & ~PAGE_MASK;
311 else
312 page_align = pos & ~PAGE_MASK;
305 this_len = left; 313 this_len = left;
306 ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), 314 ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
307 &ci->i_layout, pos, &this_len, 315 &ci->i_layout, pos, &this_len,
308 ci->i_truncate_seq, 316 ci->i_truncate_seq,
309 ci->i_truncate_size, 317 ci->i_truncate_size,
310 page_pos, pages_left); 318 page_pos, pages_left, page_align);
311 hit_stripe = this_len < left; 319 hit_stripe = this_len < left;
312 was_short = ret >= 0 && ret < this_len; 320 was_short = ret >= 0 && ret < this_len;
313 if (ret == -ENOENT) 321 if (ret == -ENOENT)
@@ -374,26 +382,25 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
374 dout("sync_read on file %p %llu~%u %s\n", file, off, len, 382 dout("sync_read on file %p %llu~%u %s\n", file, off, len,
375 (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); 383 (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
376 384
377 if (file->f_flags & O_DIRECT) { 385 if (file->f_flags & O_DIRECT)
378 pages = ceph_get_direct_page_vector(data, num_pages, off, len); 386 pages = ceph_get_direct_page_vector(data, num_pages);
379 387 else
380 /*
381 * flush any page cache pages in this range. this
382 * will make concurrent normal and O_DIRECT io slow,
383 * but it will at least behave sensibly when they are
384 * in sequence.
385 */
386 } else {
387 pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); 388 pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
388 }
389 if (IS_ERR(pages)) 389 if (IS_ERR(pages))
390 return PTR_ERR(pages); 390 return PTR_ERR(pages);
391 391
392 /*
393 * flush any page cache pages in this range. this
394 * will make concurrent normal and sync io slow,
395 * but it will at least behave sensibly when they are
396 * in sequence.
397 */
392 ret = filemap_write_and_wait(inode->i_mapping); 398 ret = filemap_write_and_wait(inode->i_mapping);
393 if (ret < 0) 399 if (ret < 0)
394 goto done; 400 goto done;
395 401
396 ret = striped_read(inode, off, len, pages, num_pages, checkeof); 402 ret = striped_read(inode, off, len, pages, num_pages, checkeof,
403 file->f_flags & O_DIRECT);
397 404
398 if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) 405 if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
399 ret = ceph_copy_page_vector_to_user(pages, data, off, ret); 406 ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
@@ -448,6 +455,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
448 int flags; 455 int flags;
449 int do_sync = 0; 456 int do_sync = 0;
450 int check_caps = 0; 457 int check_caps = 0;
458 int page_align, io_align;
451 int ret; 459 int ret;
452 struct timespec mtime = CURRENT_TIME; 460 struct timespec mtime = CURRENT_TIME;
453 461
@@ -462,6 +470,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
462 else 470 else
463 pos = *offset; 471 pos = *offset;
464 472
473 io_align = pos & ~PAGE_MASK;
474
465 ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); 475 ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
466 if (ret < 0) 476 if (ret < 0)
467 return ret; 477 return ret;
@@ -486,20 +496,26 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
486 */ 496 */
487more: 497more:
488 len = left; 498 len = left;
499 if (file->f_flags & O_DIRECT)
500 /* write from beginning of first page, regardless of
501 io alignment */
502 page_align = (pos - io_align) & ~PAGE_MASK;
503 else
504 page_align = pos & ~PAGE_MASK;
489 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, 505 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
490 ceph_vino(inode), pos, &len, 506 ceph_vino(inode), pos, &len,
491 CEPH_OSD_OP_WRITE, flags, 507 CEPH_OSD_OP_WRITE, flags,
492 ci->i_snap_realm->cached_context, 508 ci->i_snap_realm->cached_context,
493 do_sync, 509 do_sync,
494 ci->i_truncate_seq, ci->i_truncate_size, 510 ci->i_truncate_seq, ci->i_truncate_size,
495 &mtime, false, 2); 511 &mtime, false, 2, page_align);
496 if (!req) 512 if (!req)
497 return -ENOMEM; 513 return -ENOMEM;
498 514
499 num_pages = calc_pages_for(pos, len); 515 num_pages = calc_pages_for(pos, len);
500 516
501 if (file->f_flags & O_DIRECT) { 517 if (file->f_flags & O_DIRECT) {
502 pages = ceph_get_direct_page_vector(data, num_pages, pos, len); 518 pages = ceph_get_direct_page_vector(data, num_pages);
503 if (IS_ERR(pages)) { 519 if (IS_ERR(pages)) {
504 ret = PTR_ERR(pages); 520 ret = PTR_ERR(pages);
505 goto out; 521 goto out;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 524b80be4482..bf1286588f26 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -470,7 +470,9 @@ void ceph_fill_file_time(struct inode *inode, int issued,
470 470
471 if (issued & (CEPH_CAP_FILE_EXCL| 471 if (issued & (CEPH_CAP_FILE_EXCL|
472 CEPH_CAP_FILE_WR| 472 CEPH_CAP_FILE_WR|
473 CEPH_CAP_FILE_BUFFER)) { 473 CEPH_CAP_FILE_BUFFER|
474 CEPH_CAP_AUTH_EXCL|
475 CEPH_CAP_XATTR_EXCL)) {
474 if (timespec_compare(ctime, &inode->i_ctime) > 0) { 476 if (timespec_compare(ctime, &inode->i_ctime) > 0) {
475 dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", 477 dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n",
476 inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, 478 inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
@@ -510,7 +512,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
510 warn = 1; 512 warn = 1;
511 } 513 }
512 } else { 514 } else {
513 /* we have no write caps; whatever the MDS says is true */ 515 /* we have no write|excl caps; whatever the MDS says is true */
514 if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { 516 if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
515 inode->i_ctime = *ctime; 517 inode->i_ctime = *ctime;
516 inode->i_mtime = *mtime; 518 inode->i_mtime = *mtime;
@@ -566,12 +568,17 @@ static int fill_inode(struct inode *inode,
566 568
567 /* 569 /*
568 * provided version will be odd if inode value is projected, 570 * provided version will be odd if inode value is projected,
569 * even if stable. skip the update if we have a newer info 571 * even if stable. skip the update if we have newer stable
570 * (e.g., due to inode info racing form multiple MDSs), or if 572 * info (ours>=theirs, e.g. due to racing mds replies), unless
571 * we are getting projected (unstable) inode info. 573 * we are getting projected (unstable) info (in which case the
574 * version is odd, and we want ours>theirs).
575 * us them
576 * 2 2 skip
577 * 3 2 skip
578 * 3 3 update
572 */ 579 */
573 if (le64_to_cpu(info->version) > 0 && 580 if (le64_to_cpu(info->version) > 0 &&
574 (ci->i_version & ~1) > le64_to_cpu(info->version)) 581 (ci->i_version & ~1) >= le64_to_cpu(info->version))
575 goto no_change; 582 goto no_change;
576 583
577 issued = __ceph_caps_issued(ci, &implemented); 584 issued = __ceph_caps_issued(ci, &implemented);
@@ -605,7 +612,14 @@ static int fill_inode(struct inode *inode,
605 le32_to_cpu(info->time_warp_seq), 612 le32_to_cpu(info->time_warp_seq),
606 &ctime, &mtime, &atime); 613 &ctime, &mtime, &atime);
607 614
608 ci->i_max_size = le64_to_cpu(info->max_size); 615 /* only update max_size on auth cap */
616 if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
617 ci->i_max_size != le64_to_cpu(info->max_size)) {
618 dout("max_size %lld -> %llu\n", ci->i_max_size,
619 le64_to_cpu(info->max_size));
620 ci->i_max_size = le64_to_cpu(info->max_size);
621 }
622
609 ci->i_layout = info->layout; 623 ci->i_layout = info->layout;
610 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; 624 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
611 625
@@ -1054,7 +1068,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1054 ininfo = rinfo->targeti.in; 1068 ininfo = rinfo->targeti.in;
1055 vino.ino = le64_to_cpu(ininfo->ino); 1069 vino.ino = le64_to_cpu(ininfo->ino);
1056 vino.snap = le64_to_cpu(ininfo->snapid); 1070 vino.snap = le64_to_cpu(ininfo->snapid);
1057 if (!dn->d_inode) { 1071 in = dn->d_inode;
1072 if (!in) {
1058 in = ceph_get_inode(sb, vino); 1073 in = ceph_get_inode(sb, vino);
1059 if (IS_ERR(in)) { 1074 if (IS_ERR(in)) {
1060 pr_err("fill_trace bad get_inode " 1075 pr_err("fill_trace bad get_inode "
@@ -1385,11 +1400,8 @@ static void ceph_invalidate_work(struct work_struct *work)
1385 spin_lock(&inode->i_lock); 1400 spin_lock(&inode->i_lock);
1386 dout("invalidate_pages %p gen %d revoking %d\n", inode, 1401 dout("invalidate_pages %p gen %d revoking %d\n", inode,
1387 ci->i_rdcache_gen, ci->i_rdcache_revoking); 1402 ci->i_rdcache_gen, ci->i_rdcache_revoking);
1388 if (ci->i_rdcache_gen == 0 || 1403 if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
1389 ci->i_rdcache_revoking != ci->i_rdcache_gen) {
1390 BUG_ON(ci->i_rdcache_revoking > ci->i_rdcache_gen);
1391 /* nevermind! */ 1404 /* nevermind! */
1392 ci->i_rdcache_revoking = 0;
1393 spin_unlock(&inode->i_lock); 1405 spin_unlock(&inode->i_lock);
1394 goto out; 1406 goto out;
1395 } 1407 }
@@ -1399,15 +1411,16 @@ static void ceph_invalidate_work(struct work_struct *work)
1399 ceph_invalidate_nondirty_pages(inode->i_mapping); 1411 ceph_invalidate_nondirty_pages(inode->i_mapping);
1400 1412
1401 spin_lock(&inode->i_lock); 1413 spin_lock(&inode->i_lock);
1402 if (orig_gen == ci->i_rdcache_gen) { 1414 if (orig_gen == ci->i_rdcache_gen &&
1415 orig_gen == ci->i_rdcache_revoking) {
1403 dout("invalidate_pages %p gen %d successful\n", inode, 1416 dout("invalidate_pages %p gen %d successful\n", inode,
1404 ci->i_rdcache_gen); 1417 ci->i_rdcache_gen);
1405 ci->i_rdcache_gen = 0; 1418 ci->i_rdcache_revoking--;
1406 ci->i_rdcache_revoking = 0;
1407 check = 1; 1419 check = 1;
1408 } else { 1420 } else {
1409 dout("invalidate_pages %p gen %d raced, gen now %d\n", 1421 dout("invalidate_pages %p gen %d raced, now %d revoking %d\n",
1410 inode, orig_gen, ci->i_rdcache_gen); 1422 inode, orig_gen, ci->i_rdcache_gen,
1423 ci->i_rdcache_revoking);
1411 } 1424 }
1412 spin_unlock(&inode->i_lock); 1425 spin_unlock(&inode->i_lock);
1413 1426
@@ -1738,7 +1751,7 @@ int ceph_do_getattr(struct inode *inode, int mask)
1738 return 0; 1751 return 0;
1739 } 1752 }
1740 1753
1741 dout("do_getattr inode %p mask %s\n", inode, ceph_cap_string(mask)); 1754 dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode);
1742 if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) 1755 if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
1743 return 0; 1756 return 0;
1744 1757
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 7799cac2b629..098b18508479 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -528,6 +528,9 @@ static void __register_request(struct ceph_mds_client *mdsc,
528 ceph_mdsc_get_request(req); 528 ceph_mdsc_get_request(req);
529 __insert_request(mdsc, req); 529 __insert_request(mdsc, req);
530 530
531 req->r_uid = current_fsuid();
532 req->r_gid = current_fsgid();
533
531 if (dir) { 534 if (dir) {
532 struct ceph_inode_info *ci = ceph_inode(dir); 535 struct ceph_inode_info *ci = ceph_inode(dir);
533 536
@@ -1587,8 +1590,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1587 1590
1588 head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); 1591 head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
1589 head->op = cpu_to_le32(req->r_op); 1592 head->op = cpu_to_le32(req->r_op);
1590 head->caller_uid = cpu_to_le32(current_fsuid()); 1593 head->caller_uid = cpu_to_le32(req->r_uid);
1591 head->caller_gid = cpu_to_le32(current_fsgid()); 1594 head->caller_gid = cpu_to_le32(req->r_gid);
1592 head->args = req->r_args; 1595 head->args = req->r_args;
1593 1596
1594 ceph_encode_filepath(&p, end, ino1, path1); 1597 ceph_encode_filepath(&p, end, ino1, path1);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index d66d63c72355..9341fd4f1432 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -170,6 +170,8 @@ struct ceph_mds_request {
170 170
171 union ceph_mds_request_args r_args; 171 union ceph_mds_request_args r_args;
172 int r_fmode; /* file mode, if expecting cap */ 172 int r_fmode; /* file mode, if expecting cap */
173 uid_t r_uid;
174 gid_t r_gid;
173 175
174 /* for choosing which mds to send this request to */ 176 /* for choosing which mds to send this request to */
175 int r_direct_mode; 177 int r_direct_mode;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 1886294e12f7..7f01728a4657 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -293,9 +293,7 @@ struct ceph_inode_info {
293 int i_rd_ref, i_rdcache_ref, i_wr_ref; 293 int i_rd_ref, i_rdcache_ref, i_wr_ref;
294 int i_wrbuffer_ref, i_wrbuffer_ref_head; 294 int i_wrbuffer_ref, i_wrbuffer_ref_head;
295 u32 i_shared_gen; /* increment each time we get FILE_SHARED */ 295 u32 i_shared_gen; /* increment each time we get FILE_SHARED */
296 u32 i_rdcache_gen; /* we increment this each time we get 296 u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */
297 FILE_CACHE. If it's non-zero, we
298 _may_ have cached pages. */
299 u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ 297 u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
300 298
301 struct list_head i_unsafe_writes; /* uncommitted sync writes */ 299 struct list_head i_unsafe_writes; /* uncommitted sync writes */