diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-11-19 18:32:22 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-11-19 18:32:22 -0500 |
commit | 76db8ac45fc738f7d7664fe9b56d15c594a45228 (patch) | |
tree | eca23feab074d505b375e27714473f4ad337bd85 /fs/ceph | |
parent | caf8394524fdc039b090cd3af99157e9e76f4f06 (diff) | |
parent | 3105c19c450ac7c18ab28c19d364b588767261b3 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: fix readdir EOVERFLOW on 32-bit archs
ceph: fix frag offset for non-leftmost frags
ceph: fix dangling pointer
ceph: explicitly specify page alignment in network messages
ceph: make page alignment explicit in osd interface
ceph: fix comment, remove extraneous args
ceph: fix update of ctime from MDS
ceph: fix version check on racing inode updates
ceph: fix uid/gid on resent mds requests
ceph: fix rdcache_gen usage and invalidate
ceph: re-request max_size if cap auth changes
ceph: only let auth caps update max_size
ceph: fix open for write on clustered mds
ceph: fix bad pointer dereference in ceph_fill_trace
ceph: fix small seq message skipping
Revert "ceph: update issue_seq on cap grant"
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/addr.c | 6 | ||||
-rw-r--r-- | fs/ceph/caps.c | 17 | ||||
-rw-r--r-- | fs/ceph/dir.c | 16 | ||||
-rw-r--r-- | fs/ceph/file.c | 52 | ||||
-rw-r--r-- | fs/ceph/inode.c | 49 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 7 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 2 | ||||
-rw-r--r-- | fs/ceph/super.h | 4 |
8 files changed, 98 insertions, 55 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index e9c874abc9e1..561438b6a50c 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -204,7 +204,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
204 | err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, | 204 | err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, |
205 | page->index << PAGE_CACHE_SHIFT, &len, | 205 | page->index << PAGE_CACHE_SHIFT, &len, |
206 | ci->i_truncate_seq, ci->i_truncate_size, | 206 | ci->i_truncate_seq, ci->i_truncate_size, |
207 | &page, 1); | 207 | &page, 1, 0); |
208 | if (err == -ENOENT) | 208 | if (err == -ENOENT) |
209 | err = 0; | 209 | err = 0; |
210 | if (err < 0) { | 210 | if (err < 0) { |
@@ -287,7 +287,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
287 | rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, | 287 | rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, |
288 | offset, &len, | 288 | offset, &len, |
289 | ci->i_truncate_seq, ci->i_truncate_size, | 289 | ci->i_truncate_seq, ci->i_truncate_size, |
290 | pages, nr_pages); | 290 | pages, nr_pages, 0); |
291 | if (rc == -ENOENT) | 291 | if (rc == -ENOENT) |
292 | rc = 0; | 292 | rc = 0; |
293 | if (rc < 0) | 293 | if (rc < 0) |
@@ -774,7 +774,7 @@ get_more_pages: | |||
774 | snapc, do_sync, | 774 | snapc, do_sync, |
775 | ci->i_truncate_seq, | 775 | ci->i_truncate_seq, |
776 | ci->i_truncate_size, | 776 | ci->i_truncate_size, |
777 | &inode->i_mtime, true, 1); | 777 | &inode->i_mtime, true, 1, 0); |
778 | max_pages = req->r_num_pages; | 778 | max_pages = req->r_num_pages; |
779 | 779 | ||
780 | alloc_page_vec(fsc, req); | 780 | alloc_page_vec(fsc, req); |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 98ab13e2b71d..60d27bc9eb83 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -1430,8 +1430,8 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
1430 | invalidating_gen == ci->i_rdcache_gen) { | 1430 | invalidating_gen == ci->i_rdcache_gen) { |
1431 | /* success. */ | 1431 | /* success. */ |
1432 | dout("try_nonblocking_invalidate %p success\n", inode); | 1432 | dout("try_nonblocking_invalidate %p success\n", inode); |
1433 | ci->i_rdcache_gen = 0; | 1433 | /* save any racing async invalidate some trouble */ |
1434 | ci->i_rdcache_revoking = 0; | 1434 | ci->i_rdcache_revoking = ci->i_rdcache_gen - 1; |
1435 | return 0; | 1435 | return 0; |
1436 | } | 1436 | } |
1437 | dout("try_nonblocking_invalidate %p failed\n", inode); | 1437 | dout("try_nonblocking_invalidate %p failed\n", inode); |
@@ -2273,8 +2273,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2273 | { | 2273 | { |
2274 | struct ceph_inode_info *ci = ceph_inode(inode); | 2274 | struct ceph_inode_info *ci = ceph_inode(inode); |
2275 | int mds = session->s_mds; | 2275 | int mds = session->s_mds; |
2276 | unsigned seq = le32_to_cpu(grant->seq); | 2276 | int seq = le32_to_cpu(grant->seq); |
2277 | unsigned issue_seq = le32_to_cpu(grant->issue_seq); | ||
2278 | int newcaps = le32_to_cpu(grant->caps); | 2277 | int newcaps = le32_to_cpu(grant->caps); |
2279 | int issued, implemented, used, wanted, dirty; | 2278 | int issued, implemented, used, wanted, dirty; |
2280 | u64 size = le64_to_cpu(grant->size); | 2279 | u64 size = le64_to_cpu(grant->size); |
@@ -2286,8 +2285,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2286 | int revoked_rdcache = 0; | 2285 | int revoked_rdcache = 0; |
2287 | int queue_invalidate = 0; | 2286 | int queue_invalidate = 0; |
2288 | 2287 | ||
2289 | dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n", | 2288 | dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", |
2290 | inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps)); | 2289 | inode, cap, mds, seq, ceph_cap_string(newcaps)); |
2291 | dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, | 2290 | dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, |
2292 | inode->i_size); | 2291 | inode->i_size); |
2293 | 2292 | ||
@@ -2383,7 +2382,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2383 | } | 2382 | } |
2384 | 2383 | ||
2385 | cap->seq = seq; | 2384 | cap->seq = seq; |
2386 | cap->issue_seq = issue_seq; | ||
2387 | 2385 | ||
2388 | /* file layout may have changed */ | 2386 | /* file layout may have changed */ |
2389 | ci->i_layout = grant->layout; | 2387 | ci->i_layout = grant->layout; |
@@ -2691,6 +2689,11 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, | |||
2691 | NULL /* no caps context */); | 2689 | NULL /* no caps context */); |
2692 | try_flush_caps(inode, session, NULL); | 2690 | try_flush_caps(inode, session, NULL); |
2693 | up_read(&mdsc->snap_rwsem); | 2691 | up_read(&mdsc->snap_rwsem); |
2692 | |||
2693 | /* make sure we re-request max_size, if necessary */ | ||
2694 | spin_lock(&inode->i_lock); | ||
2695 | ci->i_requested_max_size = 0; | ||
2696 | spin_unlock(&inode->i_lock); | ||
2694 | } | 2697 | } |
2695 | 2698 | ||
2696 | /* | 2699 | /* |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index e0a2dc6fcafc..7d447af84ec4 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -336,7 +336,10 @@ more: | |||
336 | if (req->r_reply_info.dir_end) { | 336 | if (req->r_reply_info.dir_end) { |
337 | kfree(fi->last_name); | 337 | kfree(fi->last_name); |
338 | fi->last_name = NULL; | 338 | fi->last_name = NULL; |
339 | fi->next_offset = 2; | 339 | if (ceph_frag_is_rightmost(frag)) |
340 | fi->next_offset = 2; | ||
341 | else | ||
342 | fi->next_offset = 0; | ||
340 | } else { | 343 | } else { |
341 | rinfo = &req->r_reply_info; | 344 | rinfo = &req->r_reply_info; |
342 | err = note_last_dentry(fi, | 345 | err = note_last_dentry(fi, |
@@ -355,18 +358,22 @@ more: | |||
355 | u64 pos = ceph_make_fpos(frag, off); | 358 | u64 pos = ceph_make_fpos(frag, off); |
356 | struct ceph_mds_reply_inode *in = | 359 | struct ceph_mds_reply_inode *in = |
357 | rinfo->dir_in[off - fi->offset].in; | 360 | rinfo->dir_in[off - fi->offset].in; |
361 | struct ceph_vino vino; | ||
362 | ino_t ino; | ||
363 | |||
358 | dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", | 364 | dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", |
359 | off, off - fi->offset, rinfo->dir_nr, pos, | 365 | off, off - fi->offset, rinfo->dir_nr, pos, |
360 | rinfo->dir_dname_len[off - fi->offset], | 366 | rinfo->dir_dname_len[off - fi->offset], |
361 | rinfo->dir_dname[off - fi->offset], in); | 367 | rinfo->dir_dname[off - fi->offset], in); |
362 | BUG_ON(!in); | 368 | BUG_ON(!in); |
363 | ftype = le32_to_cpu(in->mode) >> 12; | 369 | ftype = le32_to_cpu(in->mode) >> 12; |
370 | vino.ino = le64_to_cpu(in->ino); | ||
371 | vino.snap = le64_to_cpu(in->snapid); | ||
372 | ino = ceph_vino_to_ino(vino); | ||
364 | if (filldir(dirent, | 373 | if (filldir(dirent, |
365 | rinfo->dir_dname[off - fi->offset], | 374 | rinfo->dir_dname[off - fi->offset], |
366 | rinfo->dir_dname_len[off - fi->offset], | 375 | rinfo->dir_dname_len[off - fi->offset], |
367 | pos, | 376 | pos, ino, ftype) < 0) { |
368 | le64_to_cpu(in->ino), | ||
369 | ftype) < 0) { | ||
370 | dout("filldir stopping us...\n"); | 377 | dout("filldir stopping us...\n"); |
371 | return 0; | 378 | return 0; |
372 | } | 379 | } |
@@ -414,6 +421,7 @@ static void reset_readdir(struct ceph_file_info *fi) | |||
414 | fi->last_readdir = NULL; | 421 | fi->last_readdir = NULL; |
415 | } | 422 | } |
416 | kfree(fi->last_name); | 423 | kfree(fi->last_name); |
424 | fi->last_name = NULL; | ||
417 | fi->next_offset = 2; /* compensate for . and .. */ | 425 | fi->next_offset = 2; /* compensate for . and .. */ |
418 | if (fi->dentry) { | 426 | if (fi->dentry) { |
419 | dput(fi->dentry); | 427 | dput(fi->dentry); |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index e77c28cf3690..8d79b8912e31 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -154,11 +154,13 @@ int ceph_open(struct inode *inode, struct file *file) | |||
154 | } | 154 | } |
155 | 155 | ||
156 | /* | 156 | /* |
157 | * No need to block if we have any caps. Update wanted set | 157 | * No need to block if we have caps on the auth MDS (for |
158 | * write) or any MDS (for read). Update wanted set | ||
158 | * asynchronously. | 159 | * asynchronously. |
159 | */ | 160 | */ |
160 | spin_lock(&inode->i_lock); | 161 | spin_lock(&inode->i_lock); |
161 | if (__ceph_is_any_real_caps(ci)) { | 162 | if (__ceph_is_any_real_caps(ci) && |
163 | (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { | ||
162 | int mds_wanted = __ceph_caps_mds_wanted(ci); | 164 | int mds_wanted = __ceph_caps_mds_wanted(ci); |
163 | int issued = __ceph_caps_issued(ci, NULL); | 165 | int issued = __ceph_caps_issued(ci, NULL); |
164 | 166 | ||
@@ -280,11 +282,12 @@ int ceph_release(struct inode *inode, struct file *file) | |||
280 | static int striped_read(struct inode *inode, | 282 | static int striped_read(struct inode *inode, |
281 | u64 off, u64 len, | 283 | u64 off, u64 len, |
282 | struct page **pages, int num_pages, | 284 | struct page **pages, int num_pages, |
283 | int *checkeof) | 285 | int *checkeof, bool align_to_pages) |
284 | { | 286 | { |
285 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | 287 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
286 | struct ceph_inode_info *ci = ceph_inode(inode); | 288 | struct ceph_inode_info *ci = ceph_inode(inode); |
287 | u64 pos, this_len; | 289 | u64 pos, this_len; |
290 | int io_align, page_align; | ||
288 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ | 291 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ |
289 | int left, pages_left; | 292 | int left, pages_left; |
290 | int read; | 293 | int read; |
@@ -300,14 +303,19 @@ static int striped_read(struct inode *inode, | |||
300 | page_pos = pages; | 303 | page_pos = pages; |
301 | pages_left = num_pages; | 304 | pages_left = num_pages; |
302 | read = 0; | 305 | read = 0; |
306 | io_align = off & ~PAGE_MASK; | ||
303 | 307 | ||
304 | more: | 308 | more: |
309 | if (align_to_pages) | ||
310 | page_align = (pos - io_align) & ~PAGE_MASK; | ||
311 | else | ||
312 | page_align = pos & ~PAGE_MASK; | ||
305 | this_len = left; | 313 | this_len = left; |
306 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), | 314 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), |
307 | &ci->i_layout, pos, &this_len, | 315 | &ci->i_layout, pos, &this_len, |
308 | ci->i_truncate_seq, | 316 | ci->i_truncate_seq, |
309 | ci->i_truncate_size, | 317 | ci->i_truncate_size, |
310 | page_pos, pages_left); | 318 | page_pos, pages_left, page_align); |
311 | hit_stripe = this_len < left; | 319 | hit_stripe = this_len < left; |
312 | was_short = ret >= 0 && ret < this_len; | 320 | was_short = ret >= 0 && ret < this_len; |
313 | if (ret == -ENOENT) | 321 | if (ret == -ENOENT) |
@@ -374,26 +382,25 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
374 | dout("sync_read on file %p %llu~%u %s\n", file, off, len, | 382 | dout("sync_read on file %p %llu~%u %s\n", file, off, len, |
375 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); | 383 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); |
376 | 384 | ||
377 | if (file->f_flags & O_DIRECT) { | 385 | if (file->f_flags & O_DIRECT) |
378 | pages = ceph_get_direct_page_vector(data, num_pages, off, len); | 386 | pages = ceph_get_direct_page_vector(data, num_pages); |
379 | 387 | else | |
380 | /* | ||
381 | * flush any page cache pages in this range. this | ||
382 | * will make concurrent normal and O_DIRECT io slow, | ||
383 | * but it will at least behave sensibly when they are | ||
384 | * in sequence. | ||
385 | */ | ||
386 | } else { | ||
387 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); | 388 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
388 | } | ||
389 | if (IS_ERR(pages)) | 389 | if (IS_ERR(pages)) |
390 | return PTR_ERR(pages); | 390 | return PTR_ERR(pages); |
391 | 391 | ||
392 | /* | ||
393 | * flush any page cache pages in this range. this | ||
394 | * will make concurrent normal and sync io slow, | ||
395 | * but it will at least behave sensibly when they are | ||
396 | * in sequence. | ||
397 | */ | ||
392 | ret = filemap_write_and_wait(inode->i_mapping); | 398 | ret = filemap_write_and_wait(inode->i_mapping); |
393 | if (ret < 0) | 399 | if (ret < 0) |
394 | goto done; | 400 | goto done; |
395 | 401 | ||
396 | ret = striped_read(inode, off, len, pages, num_pages, checkeof); | 402 | ret = striped_read(inode, off, len, pages, num_pages, checkeof, |
403 | file->f_flags & O_DIRECT); | ||
397 | 404 | ||
398 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) | 405 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) |
399 | ret = ceph_copy_page_vector_to_user(pages, data, off, ret); | 406 | ret = ceph_copy_page_vector_to_user(pages, data, off, ret); |
@@ -448,6 +455,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
448 | int flags; | 455 | int flags; |
449 | int do_sync = 0; | 456 | int do_sync = 0; |
450 | int check_caps = 0; | 457 | int check_caps = 0; |
458 | int page_align, io_align; | ||
451 | int ret; | 459 | int ret; |
452 | struct timespec mtime = CURRENT_TIME; | 460 | struct timespec mtime = CURRENT_TIME; |
453 | 461 | ||
@@ -462,6 +470,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
462 | else | 470 | else |
463 | pos = *offset; | 471 | pos = *offset; |
464 | 472 | ||
473 | io_align = pos & ~PAGE_MASK; | ||
474 | |||
465 | ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); | 475 | ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); |
466 | if (ret < 0) | 476 | if (ret < 0) |
467 | return ret; | 477 | return ret; |
@@ -486,20 +496,26 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
486 | */ | 496 | */ |
487 | more: | 497 | more: |
488 | len = left; | 498 | len = left; |
499 | if (file->f_flags & O_DIRECT) | ||
500 | /* write from beginning of first page, regardless of | ||
501 | io alignment */ | ||
502 | page_align = (pos - io_align) & ~PAGE_MASK; | ||
503 | else | ||
504 | page_align = pos & ~PAGE_MASK; | ||
489 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, | 505 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, |
490 | ceph_vino(inode), pos, &len, | 506 | ceph_vino(inode), pos, &len, |
491 | CEPH_OSD_OP_WRITE, flags, | 507 | CEPH_OSD_OP_WRITE, flags, |
492 | ci->i_snap_realm->cached_context, | 508 | ci->i_snap_realm->cached_context, |
493 | do_sync, | 509 | do_sync, |
494 | ci->i_truncate_seq, ci->i_truncate_size, | 510 | ci->i_truncate_seq, ci->i_truncate_size, |
495 | &mtime, false, 2); | 511 | &mtime, false, 2, page_align); |
496 | if (!req) | 512 | if (!req) |
497 | return -ENOMEM; | 513 | return -ENOMEM; |
498 | 514 | ||
499 | num_pages = calc_pages_for(pos, len); | 515 | num_pages = calc_pages_for(pos, len); |
500 | 516 | ||
501 | if (file->f_flags & O_DIRECT) { | 517 | if (file->f_flags & O_DIRECT) { |
502 | pages = ceph_get_direct_page_vector(data, num_pages, pos, len); | 518 | pages = ceph_get_direct_page_vector(data, num_pages); |
503 | if (IS_ERR(pages)) { | 519 | if (IS_ERR(pages)) { |
504 | ret = PTR_ERR(pages); | 520 | ret = PTR_ERR(pages); |
505 | goto out; | 521 | goto out; |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 524b80be4482..bf1286588f26 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -470,7 +470,9 @@ void ceph_fill_file_time(struct inode *inode, int issued, | |||
470 | 470 | ||
471 | if (issued & (CEPH_CAP_FILE_EXCL| | 471 | if (issued & (CEPH_CAP_FILE_EXCL| |
472 | CEPH_CAP_FILE_WR| | 472 | CEPH_CAP_FILE_WR| |
473 | CEPH_CAP_FILE_BUFFER)) { | 473 | CEPH_CAP_FILE_BUFFER| |
474 | CEPH_CAP_AUTH_EXCL| | ||
475 | CEPH_CAP_XATTR_EXCL)) { | ||
474 | if (timespec_compare(ctime, &inode->i_ctime) > 0) { | 476 | if (timespec_compare(ctime, &inode->i_ctime) > 0) { |
475 | dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", | 477 | dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", |
476 | inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, | 478 | inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, |
@@ -510,7 +512,7 @@ void ceph_fill_file_time(struct inode *inode, int issued, | |||
510 | warn = 1; | 512 | warn = 1; |
511 | } | 513 | } |
512 | } else { | 514 | } else { |
513 | /* we have no write caps; whatever the MDS says is true */ | 515 | /* we have no write|excl caps; whatever the MDS says is true */ |
514 | if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { | 516 | if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { |
515 | inode->i_ctime = *ctime; | 517 | inode->i_ctime = *ctime; |
516 | inode->i_mtime = *mtime; | 518 | inode->i_mtime = *mtime; |
@@ -566,12 +568,17 @@ static int fill_inode(struct inode *inode, | |||
566 | 568 | ||
567 | /* | 569 | /* |
568 | * provided version will be odd if inode value is projected, | 570 | * provided version will be odd if inode value is projected, |
569 | * even if stable. skip the update if we have a newer info | 571 | * even if stable. skip the update if we have newer stable |
570 | * (e.g., due to inode info racing form multiple MDSs), or if | 572 | * info (ours>=theirs, e.g. due to racing mds replies), unless |
571 | * we are getting projected (unstable) inode info. | 573 | * we are getting projected (unstable) info (in which case the |
574 | * version is odd, and we want ours>theirs). | ||
575 | * us them | ||
576 | * 2 2 skip | ||
577 | * 3 2 skip | ||
578 | * 3 3 update | ||
572 | */ | 579 | */ |
573 | if (le64_to_cpu(info->version) > 0 && | 580 | if (le64_to_cpu(info->version) > 0 && |
574 | (ci->i_version & ~1) > le64_to_cpu(info->version)) | 581 | (ci->i_version & ~1) >= le64_to_cpu(info->version)) |
575 | goto no_change; | 582 | goto no_change; |
576 | 583 | ||
577 | issued = __ceph_caps_issued(ci, &implemented); | 584 | issued = __ceph_caps_issued(ci, &implemented); |
@@ -605,7 +612,14 @@ static int fill_inode(struct inode *inode, | |||
605 | le32_to_cpu(info->time_warp_seq), | 612 | le32_to_cpu(info->time_warp_seq), |
606 | &ctime, &mtime, &atime); | 613 | &ctime, &mtime, &atime); |
607 | 614 | ||
608 | ci->i_max_size = le64_to_cpu(info->max_size); | 615 | /* only update max_size on auth cap */ |
616 | if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && | ||
617 | ci->i_max_size != le64_to_cpu(info->max_size)) { | ||
618 | dout("max_size %lld -> %llu\n", ci->i_max_size, | ||
619 | le64_to_cpu(info->max_size)); | ||
620 | ci->i_max_size = le64_to_cpu(info->max_size); | ||
621 | } | ||
622 | |||
609 | ci->i_layout = info->layout; | 623 | ci->i_layout = info->layout; |
610 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; | 624 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; |
611 | 625 | ||
@@ -1054,7 +1068,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1054 | ininfo = rinfo->targeti.in; | 1068 | ininfo = rinfo->targeti.in; |
1055 | vino.ino = le64_to_cpu(ininfo->ino); | 1069 | vino.ino = le64_to_cpu(ininfo->ino); |
1056 | vino.snap = le64_to_cpu(ininfo->snapid); | 1070 | vino.snap = le64_to_cpu(ininfo->snapid); |
1057 | if (!dn->d_inode) { | 1071 | in = dn->d_inode; |
1072 | if (!in) { | ||
1058 | in = ceph_get_inode(sb, vino); | 1073 | in = ceph_get_inode(sb, vino); |
1059 | if (IS_ERR(in)) { | 1074 | if (IS_ERR(in)) { |
1060 | pr_err("fill_trace bad get_inode " | 1075 | pr_err("fill_trace bad get_inode " |
@@ -1385,11 +1400,8 @@ static void ceph_invalidate_work(struct work_struct *work) | |||
1385 | spin_lock(&inode->i_lock); | 1400 | spin_lock(&inode->i_lock); |
1386 | dout("invalidate_pages %p gen %d revoking %d\n", inode, | 1401 | dout("invalidate_pages %p gen %d revoking %d\n", inode, |
1387 | ci->i_rdcache_gen, ci->i_rdcache_revoking); | 1402 | ci->i_rdcache_gen, ci->i_rdcache_revoking); |
1388 | if (ci->i_rdcache_gen == 0 || | 1403 | if (ci->i_rdcache_revoking != ci->i_rdcache_gen) { |
1389 | ci->i_rdcache_revoking != ci->i_rdcache_gen) { | ||
1390 | BUG_ON(ci->i_rdcache_revoking > ci->i_rdcache_gen); | ||
1391 | /* nevermind! */ | 1404 | /* nevermind! */ |
1392 | ci->i_rdcache_revoking = 0; | ||
1393 | spin_unlock(&inode->i_lock); | 1405 | spin_unlock(&inode->i_lock); |
1394 | goto out; | 1406 | goto out; |
1395 | } | 1407 | } |
@@ -1399,15 +1411,16 @@ static void ceph_invalidate_work(struct work_struct *work) | |||
1399 | ceph_invalidate_nondirty_pages(inode->i_mapping); | 1411 | ceph_invalidate_nondirty_pages(inode->i_mapping); |
1400 | 1412 | ||
1401 | spin_lock(&inode->i_lock); | 1413 | spin_lock(&inode->i_lock); |
1402 | if (orig_gen == ci->i_rdcache_gen) { | 1414 | if (orig_gen == ci->i_rdcache_gen && |
1415 | orig_gen == ci->i_rdcache_revoking) { | ||
1403 | dout("invalidate_pages %p gen %d successful\n", inode, | 1416 | dout("invalidate_pages %p gen %d successful\n", inode, |
1404 | ci->i_rdcache_gen); | 1417 | ci->i_rdcache_gen); |
1405 | ci->i_rdcache_gen = 0; | 1418 | ci->i_rdcache_revoking--; |
1406 | ci->i_rdcache_revoking = 0; | ||
1407 | check = 1; | 1419 | check = 1; |
1408 | } else { | 1420 | } else { |
1409 | dout("invalidate_pages %p gen %d raced, gen now %d\n", | 1421 | dout("invalidate_pages %p gen %d raced, now %d revoking %d\n", |
1410 | inode, orig_gen, ci->i_rdcache_gen); | 1422 | inode, orig_gen, ci->i_rdcache_gen, |
1423 | ci->i_rdcache_revoking); | ||
1411 | } | 1424 | } |
1412 | spin_unlock(&inode->i_lock); | 1425 | spin_unlock(&inode->i_lock); |
1413 | 1426 | ||
@@ -1738,7 +1751,7 @@ int ceph_do_getattr(struct inode *inode, int mask) | |||
1738 | return 0; | 1751 | return 0; |
1739 | } | 1752 | } |
1740 | 1753 | ||
1741 | dout("do_getattr inode %p mask %s\n", inode, ceph_cap_string(mask)); | 1754 | dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode); |
1742 | if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) | 1755 | if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) |
1743 | return 0; | 1756 | return 0; |
1744 | 1757 | ||
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 7799cac2b629..098b18508479 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -528,6 +528,9 @@ static void __register_request(struct ceph_mds_client *mdsc, | |||
528 | ceph_mdsc_get_request(req); | 528 | ceph_mdsc_get_request(req); |
529 | __insert_request(mdsc, req); | 529 | __insert_request(mdsc, req); |
530 | 530 | ||
531 | req->r_uid = current_fsuid(); | ||
532 | req->r_gid = current_fsgid(); | ||
533 | |||
531 | if (dir) { | 534 | if (dir) { |
532 | struct ceph_inode_info *ci = ceph_inode(dir); | 535 | struct ceph_inode_info *ci = ceph_inode(dir); |
533 | 536 | ||
@@ -1587,8 +1590,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
1587 | 1590 | ||
1588 | head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); | 1591 | head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); |
1589 | head->op = cpu_to_le32(req->r_op); | 1592 | head->op = cpu_to_le32(req->r_op); |
1590 | head->caller_uid = cpu_to_le32(current_fsuid()); | 1593 | head->caller_uid = cpu_to_le32(req->r_uid); |
1591 | head->caller_gid = cpu_to_le32(current_fsgid()); | 1594 | head->caller_gid = cpu_to_le32(req->r_gid); |
1592 | head->args = req->r_args; | 1595 | head->args = req->r_args; |
1593 | 1596 | ||
1594 | ceph_encode_filepath(&p, end, ino1, path1); | 1597 | ceph_encode_filepath(&p, end, ino1, path1); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index d66d63c72355..9341fd4f1432 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -170,6 +170,8 @@ struct ceph_mds_request { | |||
170 | 170 | ||
171 | union ceph_mds_request_args r_args; | 171 | union ceph_mds_request_args r_args; |
172 | int r_fmode; /* file mode, if expecting cap */ | 172 | int r_fmode; /* file mode, if expecting cap */ |
173 | uid_t r_uid; | ||
174 | gid_t r_gid; | ||
173 | 175 | ||
174 | /* for choosing which mds to send this request to */ | 176 | /* for choosing which mds to send this request to */ |
175 | int r_direct_mode; | 177 | int r_direct_mode; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 1886294e12f7..7f01728a4657 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -293,9 +293,7 @@ struct ceph_inode_info { | |||
293 | int i_rd_ref, i_rdcache_ref, i_wr_ref; | 293 | int i_rd_ref, i_rdcache_ref, i_wr_ref; |
294 | int i_wrbuffer_ref, i_wrbuffer_ref_head; | 294 | int i_wrbuffer_ref, i_wrbuffer_ref_head; |
295 | u32 i_shared_gen; /* increment each time we get FILE_SHARED */ | 295 | u32 i_shared_gen; /* increment each time we get FILE_SHARED */ |
296 | u32 i_rdcache_gen; /* we increment this each time we get | 296 | u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ |
297 | FILE_CACHE. If it's non-zero, we | ||
298 | _may_ have cached pages. */ | ||
299 | u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ | 297 | u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ |
300 | 298 | ||
301 | struct list_head i_unsafe_writes; /* uncommitted sync writes */ | 299 | struct list_head i_unsafe_writes; /* uncommitted sync writes */ |