diff options
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/addr.c | 6 | ||||
-rw-r--r-- | fs/ceph/caps.c | 17 | ||||
-rw-r--r-- | fs/ceph/dir.c | 23 | ||||
-rw-r--r-- | fs/ceph/file.c | 65 | ||||
-rw-r--r-- | fs/ceph/inode.c | 50 | ||||
-rw-r--r-- | fs/ceph/ioctl.h | 2 | ||||
-rw-r--r-- | fs/ceph/locks.c | 94 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 49 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 33 | ||||
-rw-r--r-- | fs/ceph/super.h | 4 |
10 files changed, 221 insertions, 122 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index e9c874abc9e1..561438b6a50c 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -204,7 +204,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
204 | err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, | 204 | err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, |
205 | page->index << PAGE_CACHE_SHIFT, &len, | 205 | page->index << PAGE_CACHE_SHIFT, &len, |
206 | ci->i_truncate_seq, ci->i_truncate_size, | 206 | ci->i_truncate_seq, ci->i_truncate_size, |
207 | &page, 1); | 207 | &page, 1, 0); |
208 | if (err == -ENOENT) | 208 | if (err == -ENOENT) |
209 | err = 0; | 209 | err = 0; |
210 | if (err < 0) { | 210 | if (err < 0) { |
@@ -287,7 +287,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
287 | rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, | 287 | rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, |
288 | offset, &len, | 288 | offset, &len, |
289 | ci->i_truncate_seq, ci->i_truncate_size, | 289 | ci->i_truncate_seq, ci->i_truncate_size, |
290 | pages, nr_pages); | 290 | pages, nr_pages, 0); |
291 | if (rc == -ENOENT) | 291 | if (rc == -ENOENT) |
292 | rc = 0; | 292 | rc = 0; |
293 | if (rc < 0) | 293 | if (rc < 0) |
@@ -774,7 +774,7 @@ get_more_pages: | |||
774 | snapc, do_sync, | 774 | snapc, do_sync, |
775 | ci->i_truncate_seq, | 775 | ci->i_truncate_seq, |
776 | ci->i_truncate_size, | 776 | ci->i_truncate_size, |
777 | &inode->i_mtime, true, 1); | 777 | &inode->i_mtime, true, 1, 0); |
778 | max_pages = req->r_num_pages; | 778 | max_pages = req->r_num_pages; |
779 | 779 | ||
780 | alloc_page_vec(fsc, req); | 780 | alloc_page_vec(fsc, req); |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 98ab13e2b71d..60d27bc9eb83 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -1430,8 +1430,8 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
1430 | invalidating_gen == ci->i_rdcache_gen) { | 1430 | invalidating_gen == ci->i_rdcache_gen) { |
1431 | /* success. */ | 1431 | /* success. */ |
1432 | dout("try_nonblocking_invalidate %p success\n", inode); | 1432 | dout("try_nonblocking_invalidate %p success\n", inode); |
1433 | ci->i_rdcache_gen = 0; | 1433 | /* save any racing async invalidate some trouble */ |
1434 | ci->i_rdcache_revoking = 0; | 1434 | ci->i_rdcache_revoking = ci->i_rdcache_gen - 1; |
1435 | return 0; | 1435 | return 0; |
1436 | } | 1436 | } |
1437 | dout("try_nonblocking_invalidate %p failed\n", inode); | 1437 | dout("try_nonblocking_invalidate %p failed\n", inode); |
@@ -2273,8 +2273,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2273 | { | 2273 | { |
2274 | struct ceph_inode_info *ci = ceph_inode(inode); | 2274 | struct ceph_inode_info *ci = ceph_inode(inode); |
2275 | int mds = session->s_mds; | 2275 | int mds = session->s_mds; |
2276 | unsigned seq = le32_to_cpu(grant->seq); | 2276 | int seq = le32_to_cpu(grant->seq); |
2277 | unsigned issue_seq = le32_to_cpu(grant->issue_seq); | ||
2278 | int newcaps = le32_to_cpu(grant->caps); | 2277 | int newcaps = le32_to_cpu(grant->caps); |
2279 | int issued, implemented, used, wanted, dirty; | 2278 | int issued, implemented, used, wanted, dirty; |
2280 | u64 size = le64_to_cpu(grant->size); | 2279 | u64 size = le64_to_cpu(grant->size); |
@@ -2286,8 +2285,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2286 | int revoked_rdcache = 0; | 2285 | int revoked_rdcache = 0; |
2287 | int queue_invalidate = 0; | 2286 | int queue_invalidate = 0; |
2288 | 2287 | ||
2289 | dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n", | 2288 | dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", |
2290 | inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps)); | 2289 | inode, cap, mds, seq, ceph_cap_string(newcaps)); |
2291 | dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, | 2290 | dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, |
2292 | inode->i_size); | 2291 | inode->i_size); |
2293 | 2292 | ||
@@ -2383,7 +2382,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2383 | } | 2382 | } |
2384 | 2383 | ||
2385 | cap->seq = seq; | 2384 | cap->seq = seq; |
2386 | cap->issue_seq = issue_seq; | ||
2387 | 2385 | ||
2388 | /* file layout may have changed */ | 2386 | /* file layout may have changed */ |
2389 | ci->i_layout = grant->layout; | 2387 | ci->i_layout = grant->layout; |
@@ -2691,6 +2689,11 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, | |||
2691 | NULL /* no caps context */); | 2689 | NULL /* no caps context */); |
2692 | try_flush_caps(inode, session, NULL); | 2690 | try_flush_caps(inode, session, NULL); |
2693 | up_read(&mdsc->snap_rwsem); | 2691 | up_read(&mdsc->snap_rwsem); |
2692 | |||
2693 | /* make sure we re-request max_size, if necessary */ | ||
2694 | spin_lock(&inode->i_lock); | ||
2695 | ci->i_requested_max_size = 0; | ||
2696 | spin_unlock(&inode->i_lock); | ||
2694 | } | 2697 | } |
2695 | 2698 | ||
2696 | /* | 2699 | /* |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index e0a2dc6fcafc..d902948a90d8 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -40,7 +40,8 @@ int ceph_init_dentry(struct dentry *dentry) | |||
40 | if (dentry->d_fsdata) | 40 | if (dentry->d_fsdata) |
41 | return 0; | 41 | return 0; |
42 | 42 | ||
43 | if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) | 43 | if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ |
44 | ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) | ||
44 | dentry->d_op = &ceph_dentry_ops; | 45 | dentry->d_op = &ceph_dentry_ops; |
45 | else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) | 46 | else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) |
46 | dentry->d_op = &ceph_snapdir_dentry_ops; | 47 | dentry->d_op = &ceph_snapdir_dentry_ops; |
@@ -114,8 +115,8 @@ static int __dcache_readdir(struct file *filp, | |||
114 | spin_lock(&dcache_lock); | 115 | spin_lock(&dcache_lock); |
115 | 116 | ||
116 | /* start at beginning? */ | 117 | /* start at beginning? */ |
117 | if (filp->f_pos == 2 || (last && | 118 | if (filp->f_pos == 2 || last == NULL || |
118 | filp->f_pos < ceph_dentry(last)->offset)) { | 119 | filp->f_pos < ceph_dentry(last)->offset) { |
119 | if (list_empty(&parent->d_subdirs)) | 120 | if (list_empty(&parent->d_subdirs)) |
120 | goto out_unlock; | 121 | goto out_unlock; |
121 | p = parent->d_subdirs.prev; | 122 | p = parent->d_subdirs.prev; |
@@ -336,7 +337,10 @@ more: | |||
336 | if (req->r_reply_info.dir_end) { | 337 | if (req->r_reply_info.dir_end) { |
337 | kfree(fi->last_name); | 338 | kfree(fi->last_name); |
338 | fi->last_name = NULL; | 339 | fi->last_name = NULL; |
339 | fi->next_offset = 2; | 340 | if (ceph_frag_is_rightmost(frag)) |
341 | fi->next_offset = 2; | ||
342 | else | ||
343 | fi->next_offset = 0; | ||
340 | } else { | 344 | } else { |
341 | rinfo = &req->r_reply_info; | 345 | rinfo = &req->r_reply_info; |
342 | err = note_last_dentry(fi, | 346 | err = note_last_dentry(fi, |
@@ -355,18 +359,22 @@ more: | |||
355 | u64 pos = ceph_make_fpos(frag, off); | 359 | u64 pos = ceph_make_fpos(frag, off); |
356 | struct ceph_mds_reply_inode *in = | 360 | struct ceph_mds_reply_inode *in = |
357 | rinfo->dir_in[off - fi->offset].in; | 361 | rinfo->dir_in[off - fi->offset].in; |
362 | struct ceph_vino vino; | ||
363 | ino_t ino; | ||
364 | |||
358 | dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", | 365 | dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", |
359 | off, off - fi->offset, rinfo->dir_nr, pos, | 366 | off, off - fi->offset, rinfo->dir_nr, pos, |
360 | rinfo->dir_dname_len[off - fi->offset], | 367 | rinfo->dir_dname_len[off - fi->offset], |
361 | rinfo->dir_dname[off - fi->offset], in); | 368 | rinfo->dir_dname[off - fi->offset], in); |
362 | BUG_ON(!in); | 369 | BUG_ON(!in); |
363 | ftype = le32_to_cpu(in->mode) >> 12; | 370 | ftype = le32_to_cpu(in->mode) >> 12; |
371 | vino.ino = le64_to_cpu(in->ino); | ||
372 | vino.snap = le64_to_cpu(in->snapid); | ||
373 | ino = ceph_vino_to_ino(vino); | ||
364 | if (filldir(dirent, | 374 | if (filldir(dirent, |
365 | rinfo->dir_dname[off - fi->offset], | 375 | rinfo->dir_dname[off - fi->offset], |
366 | rinfo->dir_dname_len[off - fi->offset], | 376 | rinfo->dir_dname_len[off - fi->offset], |
367 | pos, | 377 | pos, ino, ftype) < 0) { |
368 | le64_to_cpu(in->ino), | ||
369 | ftype) < 0) { | ||
370 | dout("filldir stopping us...\n"); | 378 | dout("filldir stopping us...\n"); |
371 | return 0; | 379 | return 0; |
372 | } | 380 | } |
@@ -414,6 +422,7 @@ static void reset_readdir(struct ceph_file_info *fi) | |||
414 | fi->last_readdir = NULL; | 422 | fi->last_readdir = NULL; |
415 | } | 423 | } |
416 | kfree(fi->last_name); | 424 | kfree(fi->last_name); |
425 | fi->last_name = NULL; | ||
417 | fi->next_offset = 2; /* compensate for . and .. */ | 426 | fi->next_offset = 2; /* compensate for . and .. */ |
418 | if (fi->dentry) { | 427 | if (fi->dentry) { |
419 | dput(fi->dentry); | 428 | dput(fi->dentry); |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index e77c28cf3690..7d0e4a82d898 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -154,11 +154,13 @@ int ceph_open(struct inode *inode, struct file *file) | |||
154 | } | 154 | } |
155 | 155 | ||
156 | /* | 156 | /* |
157 | * No need to block if we have any caps. Update wanted set | 157 | * No need to block if we have caps on the auth MDS (for |
158 | * write) or any MDS (for read). Update wanted set | ||
158 | * asynchronously. | 159 | * asynchronously. |
159 | */ | 160 | */ |
160 | spin_lock(&inode->i_lock); | 161 | spin_lock(&inode->i_lock); |
161 | if (__ceph_is_any_real_caps(ci)) { | 162 | if (__ceph_is_any_real_caps(ci) && |
163 | (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { | ||
162 | int mds_wanted = __ceph_caps_mds_wanted(ci); | 164 | int mds_wanted = __ceph_caps_mds_wanted(ci); |
163 | int issued = __ceph_caps_issued(ci, NULL); | 165 | int issued = __ceph_caps_issued(ci, NULL); |
164 | 166 | ||
@@ -280,11 +282,13 @@ int ceph_release(struct inode *inode, struct file *file) | |||
280 | static int striped_read(struct inode *inode, | 282 | static int striped_read(struct inode *inode, |
281 | u64 off, u64 len, | 283 | u64 off, u64 len, |
282 | struct page **pages, int num_pages, | 284 | struct page **pages, int num_pages, |
283 | int *checkeof) | 285 | int *checkeof, bool align_to_pages, |
286 | unsigned long buf_align) | ||
284 | { | 287 | { |
285 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | 288 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
286 | struct ceph_inode_info *ci = ceph_inode(inode); | 289 | struct ceph_inode_info *ci = ceph_inode(inode); |
287 | u64 pos, this_len; | 290 | u64 pos, this_len; |
291 | int io_align, page_align; | ||
288 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ | 292 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ |
289 | int left, pages_left; | 293 | int left, pages_left; |
290 | int read; | 294 | int read; |
@@ -300,14 +304,19 @@ static int striped_read(struct inode *inode, | |||
300 | page_pos = pages; | 304 | page_pos = pages; |
301 | pages_left = num_pages; | 305 | pages_left = num_pages; |
302 | read = 0; | 306 | read = 0; |
307 | io_align = off & ~PAGE_MASK; | ||
303 | 308 | ||
304 | more: | 309 | more: |
310 | if (align_to_pages) | ||
311 | page_align = (pos - io_align + buf_align) & ~PAGE_MASK; | ||
312 | else | ||
313 | page_align = pos & ~PAGE_MASK; | ||
305 | this_len = left; | 314 | this_len = left; |
306 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), | 315 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), |
307 | &ci->i_layout, pos, &this_len, | 316 | &ci->i_layout, pos, &this_len, |
308 | ci->i_truncate_seq, | 317 | ci->i_truncate_seq, |
309 | ci->i_truncate_size, | 318 | ci->i_truncate_size, |
310 | page_pos, pages_left); | 319 | page_pos, pages_left, page_align); |
311 | hit_stripe = this_len < left; | 320 | hit_stripe = this_len < left; |
312 | was_short = ret >= 0 && ret < this_len; | 321 | was_short = ret >= 0 && ret < this_len; |
313 | if (ret == -ENOENT) | 322 | if (ret == -ENOENT) |
@@ -368,32 +377,34 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
368 | struct inode *inode = file->f_dentry->d_inode; | 377 | struct inode *inode = file->f_dentry->d_inode; |
369 | struct page **pages; | 378 | struct page **pages; |
370 | u64 off = *poff; | 379 | u64 off = *poff; |
371 | int num_pages = calc_pages_for(off, len); | 380 | int num_pages, ret; |
372 | int ret; | ||
373 | 381 | ||
374 | dout("sync_read on file %p %llu~%u %s\n", file, off, len, | 382 | dout("sync_read on file %p %llu~%u %s\n", file, off, len, |
375 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); | 383 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); |
376 | 384 | ||
377 | if (file->f_flags & O_DIRECT) { | 385 | if (file->f_flags & O_DIRECT) { |
378 | pages = ceph_get_direct_page_vector(data, num_pages, off, len); | 386 | num_pages = calc_pages_for((unsigned long)data, len); |
379 | 387 | pages = ceph_get_direct_page_vector(data, num_pages, true); | |
380 | /* | ||
381 | * flush any page cache pages in this range. this | ||
382 | * will make concurrent normal and O_DIRECT io slow, | ||
383 | * but it will at least behave sensibly when they are | ||
384 | * in sequence. | ||
385 | */ | ||
386 | } else { | 388 | } else { |
389 | num_pages = calc_pages_for(off, len); | ||
387 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); | 390 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
388 | } | 391 | } |
389 | if (IS_ERR(pages)) | 392 | if (IS_ERR(pages)) |
390 | return PTR_ERR(pages); | 393 | return PTR_ERR(pages); |
391 | 394 | ||
395 | /* | ||
396 | * flush any page cache pages in this range. this | ||
397 | * will make concurrent normal and sync io slow, | ||
398 | * but it will at least behave sensibly when they are | ||
399 | * in sequence. | ||
400 | */ | ||
392 | ret = filemap_write_and_wait(inode->i_mapping); | 401 | ret = filemap_write_and_wait(inode->i_mapping); |
393 | if (ret < 0) | 402 | if (ret < 0) |
394 | goto done; | 403 | goto done; |
395 | 404 | ||
396 | ret = striped_read(inode, off, len, pages, num_pages, checkeof); | 405 | ret = striped_read(inode, off, len, pages, num_pages, checkeof, |
406 | file->f_flags & O_DIRECT, | ||
407 | (unsigned long)data & ~PAGE_MASK); | ||
397 | 408 | ||
398 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) | 409 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) |
399 | ret = ceph_copy_page_vector_to_user(pages, data, off, ret); | 410 | ret = ceph_copy_page_vector_to_user(pages, data, off, ret); |
@@ -402,7 +413,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
402 | 413 | ||
403 | done: | 414 | done: |
404 | if (file->f_flags & O_DIRECT) | 415 | if (file->f_flags & O_DIRECT) |
405 | ceph_put_page_vector(pages, num_pages); | 416 | ceph_put_page_vector(pages, num_pages, true); |
406 | else | 417 | else |
407 | ceph_release_page_vector(pages, num_pages); | 418 | ceph_release_page_vector(pages, num_pages); |
408 | dout("sync_read result %d\n", ret); | 419 | dout("sync_read result %d\n", ret); |
@@ -448,6 +459,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
448 | int flags; | 459 | int flags; |
449 | int do_sync = 0; | 460 | int do_sync = 0; |
450 | int check_caps = 0; | 461 | int check_caps = 0; |
462 | int page_align, io_align; | ||
463 | unsigned long buf_align; | ||
451 | int ret; | 464 | int ret; |
452 | struct timespec mtime = CURRENT_TIME; | 465 | struct timespec mtime = CURRENT_TIME; |
453 | 466 | ||
@@ -462,6 +475,9 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
462 | else | 475 | else |
463 | pos = *offset; | 476 | pos = *offset; |
464 | 477 | ||
478 | io_align = pos & ~PAGE_MASK; | ||
479 | buf_align = (unsigned long)data & ~PAGE_MASK; | ||
480 | |||
465 | ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); | 481 | ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); |
466 | if (ret < 0) | 482 | if (ret < 0) |
467 | return ret; | 483 | return ret; |
@@ -486,20 +502,27 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
486 | */ | 502 | */ |
487 | more: | 503 | more: |
488 | len = left; | 504 | len = left; |
505 | if (file->f_flags & O_DIRECT) { | ||
506 | /* write from beginning of first page, regardless of | ||
507 | io alignment */ | ||
508 | page_align = (pos - io_align + buf_align) & ~PAGE_MASK; | ||
509 | num_pages = calc_pages_for((unsigned long)data, len); | ||
510 | } else { | ||
511 | page_align = pos & ~PAGE_MASK; | ||
512 | num_pages = calc_pages_for(pos, len); | ||
513 | } | ||
489 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, | 514 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, |
490 | ceph_vino(inode), pos, &len, | 515 | ceph_vino(inode), pos, &len, |
491 | CEPH_OSD_OP_WRITE, flags, | 516 | CEPH_OSD_OP_WRITE, flags, |
492 | ci->i_snap_realm->cached_context, | 517 | ci->i_snap_realm->cached_context, |
493 | do_sync, | 518 | do_sync, |
494 | ci->i_truncate_seq, ci->i_truncate_size, | 519 | ci->i_truncate_seq, ci->i_truncate_size, |
495 | &mtime, false, 2); | 520 | &mtime, false, 2, page_align); |
496 | if (!req) | 521 | if (!req) |
497 | return -ENOMEM; | 522 | return -ENOMEM; |
498 | 523 | ||
499 | num_pages = calc_pages_for(pos, len); | ||
500 | |||
501 | if (file->f_flags & O_DIRECT) { | 524 | if (file->f_flags & O_DIRECT) { |
502 | pages = ceph_get_direct_page_vector(data, num_pages, pos, len); | 525 | pages = ceph_get_direct_page_vector(data, num_pages, false); |
503 | if (IS_ERR(pages)) { | 526 | if (IS_ERR(pages)) { |
504 | ret = PTR_ERR(pages); | 527 | ret = PTR_ERR(pages); |
505 | goto out; | 528 | goto out; |
@@ -549,7 +572,7 @@ more: | |||
549 | } | 572 | } |
550 | 573 | ||
551 | if (file->f_flags & O_DIRECT) | 574 | if (file->f_flags & O_DIRECT) |
552 | ceph_put_page_vector(pages, num_pages); | 575 | ceph_put_page_vector(pages, num_pages, false); |
553 | else if (file->f_flags & O_SYNC) | 576 | else if (file->f_flags & O_SYNC) |
554 | ceph_release_page_vector(pages, num_pages); | 577 | ceph_release_page_vector(pages, num_pages); |
555 | 578 | ||
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 1d6a45b5a04c..bf1286588f26 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -2,7 +2,6 @@ | |||
2 | 2 | ||
3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
4 | #include <linux/fs.h> | 4 | #include <linux/fs.h> |
5 | #include <linux/smp_lock.h> | ||
6 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
7 | #include <linux/string.h> | 6 | #include <linux/string.h> |
8 | #include <linux/uaccess.h> | 7 | #include <linux/uaccess.h> |
@@ -471,7 +470,9 @@ void ceph_fill_file_time(struct inode *inode, int issued, | |||
471 | 470 | ||
472 | if (issued & (CEPH_CAP_FILE_EXCL| | 471 | if (issued & (CEPH_CAP_FILE_EXCL| |
473 | CEPH_CAP_FILE_WR| | 472 | CEPH_CAP_FILE_WR| |
474 | CEPH_CAP_FILE_BUFFER)) { | 473 | CEPH_CAP_FILE_BUFFER| |
474 | CEPH_CAP_AUTH_EXCL| | ||
475 | CEPH_CAP_XATTR_EXCL)) { | ||
475 | if (timespec_compare(ctime, &inode->i_ctime) > 0) { | 476 | if (timespec_compare(ctime, &inode->i_ctime) > 0) { |
476 | dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", | 477 | dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", |
477 | inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, | 478 | inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, |
@@ -511,7 +512,7 @@ void ceph_fill_file_time(struct inode *inode, int issued, | |||
511 | warn = 1; | 512 | warn = 1; |
512 | } | 513 | } |
513 | } else { | 514 | } else { |
514 | /* we have no write caps; whatever the MDS says is true */ | 515 | /* we have no write|excl caps; whatever the MDS says is true */ |
515 | if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { | 516 | if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { |
516 | inode->i_ctime = *ctime; | 517 | inode->i_ctime = *ctime; |
517 | inode->i_mtime = *mtime; | 518 | inode->i_mtime = *mtime; |
@@ -567,12 +568,17 @@ static int fill_inode(struct inode *inode, | |||
567 | 568 | ||
568 | /* | 569 | /* |
569 | * provided version will be odd if inode value is projected, | 570 | * provided version will be odd if inode value is projected, |
570 | * even if stable. skip the update if we have a newer info | 571 | * even if stable. skip the update if we have newer stable |
571 | * (e.g., due to inode info racing form multiple MDSs), or if | 572 | * info (ours>=theirs, e.g. due to racing mds replies), unless |
572 | * we are getting projected (unstable) inode info. | 573 | * we are getting projected (unstable) info (in which case the |
574 | * version is odd, and we want ours>theirs). | ||
575 | * us them | ||
576 | * 2 2 skip | ||
577 | * 3 2 skip | ||
578 | * 3 3 update | ||
573 | */ | 579 | */ |
574 | if (le64_to_cpu(info->version) > 0 && | 580 | if (le64_to_cpu(info->version) > 0 && |
575 | (ci->i_version & ~1) > le64_to_cpu(info->version)) | 581 | (ci->i_version & ~1) >= le64_to_cpu(info->version)) |
576 | goto no_change; | 582 | goto no_change; |
577 | 583 | ||
578 | issued = __ceph_caps_issued(ci, &implemented); | 584 | issued = __ceph_caps_issued(ci, &implemented); |
@@ -606,7 +612,14 @@ static int fill_inode(struct inode *inode, | |||
606 | le32_to_cpu(info->time_warp_seq), | 612 | le32_to_cpu(info->time_warp_seq), |
607 | &ctime, &mtime, &atime); | 613 | &ctime, &mtime, &atime); |
608 | 614 | ||
609 | ci->i_max_size = le64_to_cpu(info->max_size); | 615 | /* only update max_size on auth cap */ |
616 | if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && | ||
617 | ci->i_max_size != le64_to_cpu(info->max_size)) { | ||
618 | dout("max_size %lld -> %llu\n", ci->i_max_size, | ||
619 | le64_to_cpu(info->max_size)); | ||
620 | ci->i_max_size = le64_to_cpu(info->max_size); | ||
621 | } | ||
622 | |||
610 | ci->i_layout = info->layout; | 623 | ci->i_layout = info->layout; |
611 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; | 624 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; |
612 | 625 | ||
@@ -1055,7 +1068,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1055 | ininfo = rinfo->targeti.in; | 1068 | ininfo = rinfo->targeti.in; |
1056 | vino.ino = le64_to_cpu(ininfo->ino); | 1069 | vino.ino = le64_to_cpu(ininfo->ino); |
1057 | vino.snap = le64_to_cpu(ininfo->snapid); | 1070 | vino.snap = le64_to_cpu(ininfo->snapid); |
1058 | if (!dn->d_inode) { | 1071 | in = dn->d_inode; |
1072 | if (!in) { | ||
1059 | in = ceph_get_inode(sb, vino); | 1073 | in = ceph_get_inode(sb, vino); |
1060 | if (IS_ERR(in)) { | 1074 | if (IS_ERR(in)) { |
1061 | pr_err("fill_trace bad get_inode " | 1075 | pr_err("fill_trace bad get_inode " |
@@ -1386,11 +1400,8 @@ static void ceph_invalidate_work(struct work_struct *work) | |||
1386 | spin_lock(&inode->i_lock); | 1400 | spin_lock(&inode->i_lock); |
1387 | dout("invalidate_pages %p gen %d revoking %d\n", inode, | 1401 | dout("invalidate_pages %p gen %d revoking %d\n", inode, |
1388 | ci->i_rdcache_gen, ci->i_rdcache_revoking); | 1402 | ci->i_rdcache_gen, ci->i_rdcache_revoking); |
1389 | if (ci->i_rdcache_gen == 0 || | 1403 | if (ci->i_rdcache_revoking != ci->i_rdcache_gen) { |
1390 | ci->i_rdcache_revoking != ci->i_rdcache_gen) { | ||
1391 | BUG_ON(ci->i_rdcache_revoking > ci->i_rdcache_gen); | ||
1392 | /* nevermind! */ | 1404 | /* nevermind! */ |
1393 | ci->i_rdcache_revoking = 0; | ||
1394 | spin_unlock(&inode->i_lock); | 1405 | spin_unlock(&inode->i_lock); |
1395 | goto out; | 1406 | goto out; |
1396 | } | 1407 | } |
@@ -1400,15 +1411,16 @@ static void ceph_invalidate_work(struct work_struct *work) | |||
1400 | ceph_invalidate_nondirty_pages(inode->i_mapping); | 1411 | ceph_invalidate_nondirty_pages(inode->i_mapping); |
1401 | 1412 | ||
1402 | spin_lock(&inode->i_lock); | 1413 | spin_lock(&inode->i_lock); |
1403 | if (orig_gen == ci->i_rdcache_gen) { | 1414 | if (orig_gen == ci->i_rdcache_gen && |
1415 | orig_gen == ci->i_rdcache_revoking) { | ||
1404 | dout("invalidate_pages %p gen %d successful\n", inode, | 1416 | dout("invalidate_pages %p gen %d successful\n", inode, |
1405 | ci->i_rdcache_gen); | 1417 | ci->i_rdcache_gen); |
1406 | ci->i_rdcache_gen = 0; | 1418 | ci->i_rdcache_revoking--; |
1407 | ci->i_rdcache_revoking = 0; | ||
1408 | check = 1; | 1419 | check = 1; |
1409 | } else { | 1420 | } else { |
1410 | dout("invalidate_pages %p gen %d raced, gen now %d\n", | 1421 | dout("invalidate_pages %p gen %d raced, now %d revoking %d\n", |
1411 | inode, orig_gen, ci->i_rdcache_gen); | 1422 | inode, orig_gen, ci->i_rdcache_gen, |
1423 | ci->i_rdcache_revoking); | ||
1412 | } | 1424 | } |
1413 | spin_unlock(&inode->i_lock); | 1425 | spin_unlock(&inode->i_lock); |
1414 | 1426 | ||
@@ -1739,7 +1751,7 @@ int ceph_do_getattr(struct inode *inode, int mask) | |||
1739 | return 0; | 1751 | return 0; |
1740 | } | 1752 | } |
1741 | 1753 | ||
1742 | dout("do_getattr inode %p mask %s\n", inode, ceph_cap_string(mask)); | 1754 | dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode); |
1743 | if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) | 1755 | if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) |
1744 | return 0; | 1756 | return 0; |
1745 | 1757 | ||
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h index a6ce54e94eb5..52e8fd74d450 100644 --- a/fs/ceph/ioctl.h +++ b/fs/ceph/ioctl.h | |||
@@ -4,7 +4,7 @@ | |||
4 | #include <linux/ioctl.h> | 4 | #include <linux/ioctl.h> |
5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
6 | 6 | ||
7 | #define CEPH_IOCTL_MAGIC 0x98 | 7 | #define CEPH_IOCTL_MAGIC 0x97 |
8 | 8 | ||
9 | /* just use u64 to align sanely on all archs */ | 9 | /* just use u64 to align sanely on all archs */ |
10 | struct ceph_ioctl_layout { | 10 | struct ceph_ioctl_layout { |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 40abde93c345..476b329867d4 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
@@ -11,40 +11,68 @@ | |||
11 | * Implement fcntl and flock locking functions. | 11 | * Implement fcntl and flock locking functions. |
12 | */ | 12 | */ |
13 | static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | 13 | static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, |
14 | u64 pid, u64 pid_ns, | 14 | int cmd, u8 wait, struct file_lock *fl) |
15 | int cmd, u64 start, u64 length, u8 wait) | ||
16 | { | 15 | { |
17 | struct inode *inode = file->f_dentry->d_inode; | 16 | struct inode *inode = file->f_dentry->d_inode; |
18 | struct ceph_mds_client *mdsc = | 17 | struct ceph_mds_client *mdsc = |
19 | ceph_sb_to_client(inode->i_sb)->mdsc; | 18 | ceph_sb_to_client(inode->i_sb)->mdsc; |
20 | struct ceph_mds_request *req; | 19 | struct ceph_mds_request *req; |
21 | int err; | 20 | int err; |
21 | u64 length = 0; | ||
22 | 22 | ||
23 | req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); | 23 | req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); |
24 | if (IS_ERR(req)) | 24 | if (IS_ERR(req)) |
25 | return PTR_ERR(req); | 25 | return PTR_ERR(req); |
26 | req->r_inode = igrab(inode); | 26 | req->r_inode = igrab(inode); |
27 | 27 | ||
28 | /* mds requires start and length rather than start and end */ | ||
29 | if (LLONG_MAX == fl->fl_end) | ||
30 | length = 0; | ||
31 | else | ||
32 | length = fl->fl_end - fl->fl_start + 1; | ||
33 | |||
28 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | 34 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " |
29 | "length: %llu, wait: %d, type`: %d", (int)lock_type, | 35 | "length: %llu, wait: %d, type`: %d", (int)lock_type, |
30 | (int)operation, pid, start, length, wait, cmd); | 36 | (int)operation, (u64)fl->fl_pid, fl->fl_start, |
37 | length, wait, fl->fl_type); | ||
38 | |||
31 | 39 | ||
32 | req->r_args.filelock_change.rule = lock_type; | 40 | req->r_args.filelock_change.rule = lock_type; |
33 | req->r_args.filelock_change.type = cmd; | 41 | req->r_args.filelock_change.type = cmd; |
34 | req->r_args.filelock_change.pid = cpu_to_le64(pid); | 42 | req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); |
35 | /* This should be adjusted, but I'm not sure if | 43 | /* This should be adjusted, but I'm not sure if |
36 | namespaces actually get id numbers*/ | 44 | namespaces actually get id numbers*/ |
37 | req->r_args.filelock_change.pid_namespace = | 45 | req->r_args.filelock_change.pid_namespace = |
38 | cpu_to_le64((u64)pid_ns); | 46 | cpu_to_le64((u64)(unsigned long)fl->fl_nspid); |
39 | req->r_args.filelock_change.start = cpu_to_le64(start); | 47 | req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start); |
40 | req->r_args.filelock_change.length = cpu_to_le64(length); | 48 | req->r_args.filelock_change.length = cpu_to_le64(length); |
41 | req->r_args.filelock_change.wait = wait; | 49 | req->r_args.filelock_change.wait = wait; |
42 | 50 | ||
43 | err = ceph_mdsc_do_request(mdsc, inode, req); | 51 | err = ceph_mdsc_do_request(mdsc, inode, req); |
52 | |||
53 | if ( operation == CEPH_MDS_OP_GETFILELOCK){ | ||
54 | fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid); | ||
55 | if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) | ||
56 | fl->fl_type = F_RDLCK; | ||
57 | else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type) | ||
58 | fl->fl_type = F_WRLCK; | ||
59 | else | ||
60 | fl->fl_type = F_UNLCK; | ||
61 | |||
62 | fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start); | ||
63 | length = le64_to_cpu(req->r_reply_info.filelock_reply->start) + | ||
64 | le64_to_cpu(req->r_reply_info.filelock_reply->length); | ||
65 | if (length >= 1) | ||
66 | fl->fl_end = length -1; | ||
67 | else | ||
68 | fl->fl_end = 0; | ||
69 | |||
70 | } | ||
44 | ceph_mdsc_put_request(req); | 71 | ceph_mdsc_put_request(req); |
45 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | 72 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " |
46 | "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type, | 73 | "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type, |
47 | (int)operation, pid, start, length, wait, cmd, err); | 74 | (int)operation, (u64)fl->fl_pid, fl->fl_start, |
75 | length, wait, fl->fl_type, err); | ||
48 | return err; | 76 | return err; |
49 | } | 77 | } |
50 | 78 | ||
@@ -54,7 +82,6 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | |||
54 | */ | 82 | */ |
55 | int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | 83 | int ceph_lock(struct file *file, int cmd, struct file_lock *fl) |
56 | { | 84 | { |
57 | u64 length; | ||
58 | u8 lock_cmd; | 85 | u8 lock_cmd; |
59 | int err; | 86 | int err; |
60 | u8 wait = 0; | 87 | u8 wait = 0; |
@@ -76,29 +103,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
76 | else | 103 | else |
77 | lock_cmd = CEPH_LOCK_UNLOCK; | 104 | lock_cmd = CEPH_LOCK_UNLOCK; |
78 | 105 | ||
79 | if (LLONG_MAX == fl->fl_end) | 106 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); |
80 | length = 0; | ||
81 | else | ||
82 | length = fl->fl_end - fl->fl_start + 1; | ||
83 | |||
84 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | ||
85 | (u64)fl->fl_pid, | ||
86 | (u64)(unsigned long)fl->fl_nspid, | ||
87 | lock_cmd, fl->fl_start, | ||
88 | length, wait); | ||
89 | if (!err) { | 107 | if (!err) { |
90 | dout("mds locked, locking locally"); | 108 | if ( op != CEPH_MDS_OP_GETFILELOCK ){ |
91 | err = posix_lock_file(file, fl, NULL); | 109 | dout("mds locked, locking locally"); |
92 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { | 110 | err = posix_lock_file(file, fl, NULL); |
93 | /* undo! This should only happen if the kernel detects | 111 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { |
94 | * local deadlock. */ | 112 | /* undo! This should only happen if the kernel detects |
95 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, | 113 | * local deadlock. */ |
96 | (u64)fl->fl_pid, | 114 | ceph_lock_message(CEPH_LOCK_FCNTL, op, file, |
97 | (u64)(unsigned long)fl->fl_nspid, | 115 | CEPH_LOCK_UNLOCK, 0, fl); |
98 | CEPH_LOCK_UNLOCK, fl->fl_start, | 116 | dout("got %d on posix_lock_file, undid lock", err); |
99 | length, 0); | 117 | } |
100 | dout("got %d on posix_lock_file, undid lock", err); | ||
101 | } | 118 | } |
119 | |||
102 | } else { | 120 | } else { |
103 | dout("mds returned error code %d", err); | 121 | dout("mds returned error code %d", err); |
104 | } | 122 | } |
@@ -107,7 +125,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
107 | 125 | ||
108 | int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | 126 | int ceph_flock(struct file *file, int cmd, struct file_lock *fl) |
109 | { | 127 | { |
110 | u64 length; | ||
111 | u8 lock_cmd; | 128 | u8 lock_cmd; |
112 | int err; | 129 | int err; |
113 | u8 wait = 1; | 130 | u8 wait = 1; |
@@ -127,26 +144,15 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |||
127 | lock_cmd = CEPH_LOCK_EXCL; | 144 | lock_cmd = CEPH_LOCK_EXCL; |
128 | else | 145 | else |
129 | lock_cmd = CEPH_LOCK_UNLOCK; | 146 | lock_cmd = CEPH_LOCK_UNLOCK; |
130 | /* mds requires start and length rather than start and end */ | ||
131 | if (LLONG_MAX == fl->fl_end) | ||
132 | length = 0; | ||
133 | else | ||
134 | length = fl->fl_end - fl->fl_start + 1; | ||
135 | 147 | ||
136 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, | 148 | err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, |
137 | file, (u64)fl->fl_pid, | 149 | file, lock_cmd, wait, fl); |
138 | (u64)(unsigned long)fl->fl_nspid, | ||
139 | lock_cmd, fl->fl_start, | ||
140 | length, wait); | ||
141 | if (!err) { | 150 | if (!err) { |
142 | err = flock_lock_file_wait(file, fl); | 151 | err = flock_lock_file_wait(file, fl); |
143 | if (err) { | 152 | if (err) { |
144 | ceph_lock_message(CEPH_LOCK_FLOCK, | 153 | ceph_lock_message(CEPH_LOCK_FLOCK, |
145 | CEPH_MDS_OP_SETFILELOCK, | 154 | CEPH_MDS_OP_SETFILELOCK, |
146 | file, (u64)fl->fl_pid, | 155 | file, CEPH_LOCK_UNLOCK, 0, fl); |
147 | (u64)(unsigned long)fl->fl_nspid, | ||
148 | CEPH_LOCK_UNLOCK, fl->fl_start, | ||
149 | length, 0); | ||
150 | dout("got %d on flock_lock_file_wait, undid lock", err); | 156 | dout("got %d on flock_lock_file_wait, undid lock", err); |
151 | } | 157 | } |
152 | } else { | 158 | } else { |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 3142b15940c2..38800eaa81d0 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -6,7 +6,6 @@ | |||
6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
7 | #include <linux/debugfs.h> | 7 | #include <linux/debugfs.h> |
8 | #include <linux/seq_file.h> | 8 | #include <linux/seq_file.h> |
9 | #include <linux/smp_lock.h> | ||
10 | 9 | ||
11 | #include "super.h" | 10 | #include "super.h" |
12 | #include "mds_client.h" | 11 | #include "mds_client.h" |
@@ -203,6 +202,38 @@ out_bad: | |||
203 | } | 202 | } |
204 | 203 | ||
205 | /* | 204 | /* |
205 | * parse fcntl F_GETLK results | ||
206 | */ | ||
207 | static int parse_reply_info_filelock(void **p, void *end, | ||
208 | struct ceph_mds_reply_info_parsed *info) | ||
209 | { | ||
210 | if (*p + sizeof(*info->filelock_reply) > end) | ||
211 | goto bad; | ||
212 | |||
213 | info->filelock_reply = *p; | ||
214 | *p += sizeof(*info->filelock_reply); | ||
215 | |||
216 | if (unlikely(*p != end)) | ||
217 | goto bad; | ||
218 | return 0; | ||
219 | |||
220 | bad: | ||
221 | return -EIO; | ||
222 | } | ||
223 | |||
224 | /* | ||
225 | * parse extra results | ||
226 | */ | ||
227 | static int parse_reply_info_extra(void **p, void *end, | ||
228 | struct ceph_mds_reply_info_parsed *info) | ||
229 | { | ||
230 | if (info->head->op == CEPH_MDS_OP_GETFILELOCK) | ||
231 | return parse_reply_info_filelock(p, end, info); | ||
232 | else | ||
233 | return parse_reply_info_dir(p, end, info); | ||
234 | } | ||
235 | |||
236 | /* | ||
206 | * parse entire mds reply | 237 | * parse entire mds reply |
207 | */ | 238 | */ |
208 | static int parse_reply_info(struct ceph_msg *msg, | 239 | static int parse_reply_info(struct ceph_msg *msg, |
@@ -224,10 +255,10 @@ static int parse_reply_info(struct ceph_msg *msg, | |||
224 | goto out_bad; | 255 | goto out_bad; |
225 | } | 256 | } |
226 | 257 | ||
227 | /* dir content */ | 258 | /* extra */ |
228 | ceph_decode_32_safe(&p, end, len, bad); | 259 | ceph_decode_32_safe(&p, end, len, bad); |
229 | if (len > 0) { | 260 | if (len > 0) { |
230 | err = parse_reply_info_dir(&p, p+len, info); | 261 | err = parse_reply_info_extra(&p, p+len, info); |
231 | if (err < 0) | 262 | if (err < 0) |
232 | goto out_bad; | 263 | goto out_bad; |
233 | } | 264 | } |
@@ -529,6 +560,9 @@ static void __register_request(struct ceph_mds_client *mdsc, | |||
529 | ceph_mdsc_get_request(req); | 560 | ceph_mdsc_get_request(req); |
530 | __insert_request(mdsc, req); | 561 | __insert_request(mdsc, req); |
531 | 562 | ||
563 | req->r_uid = current_fsuid(); | ||
564 | req->r_gid = current_fsgid(); | ||
565 | |||
532 | if (dir) { | 566 | if (dir) { |
533 | struct ceph_inode_info *ci = ceph_inode(dir); | 567 | struct ceph_inode_info *ci = ceph_inode(dir); |
534 | 568 | ||
@@ -1588,8 +1622,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
1588 | 1622 | ||
1589 | head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); | 1623 | head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); |
1590 | head->op = cpu_to_le32(req->r_op); | 1624 | head->op = cpu_to_le32(req->r_op); |
1591 | head->caller_uid = cpu_to_le32(current_fsuid()); | 1625 | head->caller_uid = cpu_to_le32(req->r_uid); |
1592 | head->caller_gid = cpu_to_le32(current_fsgid()); | 1626 | head->caller_gid = cpu_to_le32(req->r_gid); |
1593 | head->args = req->r_args; | 1627 | head->args = req->r_args; |
1594 | 1628 | ||
1595 | ceph_encode_filepath(&p, end, ino1, path1); | 1629 | ceph_encode_filepath(&p, end, ino1, path1); |
@@ -2072,7 +2106,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2072 | 2106 | ||
2073 | mutex_lock(&session->s_mutex); | 2107 | mutex_lock(&session->s_mutex); |
2074 | if (err < 0) { | 2108 | if (err < 0) { |
2075 | pr_err("mdsc_handle_reply got corrupt reply mds%d\n", mds); | 2109 | pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid); |
2076 | ceph_msg_dump(msg); | 2110 | ceph_msg_dump(msg); |
2077 | goto out_err; | 2111 | goto out_err; |
2078 | } | 2112 | } |
@@ -2092,7 +2126,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2092 | mutex_lock(&req->r_fill_mutex); | 2126 | mutex_lock(&req->r_fill_mutex); |
2093 | err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); | 2127 | err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); |
2094 | if (err == 0) { | 2128 | if (err == 0) { |
2095 | if (result == 0 && rinfo->dir_nr) | 2129 | if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK && |
2130 | rinfo->dir_nr) | ||
2096 | ceph_readdir_prepopulate(req, req->r_session); | 2131 | ceph_readdir_prepopulate(req, req->r_session); |
2097 | ceph_unreserve_caps(mdsc, &req->r_caps_reservation); | 2132 | ceph_unreserve_caps(mdsc, &req->r_caps_reservation); |
2098 | } | 2133 | } |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index d66d63c72355..aabe563b54db 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -42,26 +42,37 @@ struct ceph_mds_reply_info_in { | |||
42 | }; | 42 | }; |
43 | 43 | ||
44 | /* | 44 | /* |
45 | * parsed info about an mds reply, including information about the | 45 | * parsed info about an mds reply, including information about |
46 | * target inode and/or its parent directory and dentry, and directory | 46 | * either: 1) the target inode and/or its parent directory and dentry, |
47 | * contents (for readdir results). | 47 | * and directory contents (for readdir results), or |
48 | * 2) the file range lock info (for fcntl F_GETLK results). | ||
48 | */ | 49 | */ |
49 | struct ceph_mds_reply_info_parsed { | 50 | struct ceph_mds_reply_info_parsed { |
50 | struct ceph_mds_reply_head *head; | 51 | struct ceph_mds_reply_head *head; |
51 | 52 | ||
53 | /* trace */ | ||
52 | struct ceph_mds_reply_info_in diri, targeti; | 54 | struct ceph_mds_reply_info_in diri, targeti; |
53 | struct ceph_mds_reply_dirfrag *dirfrag; | 55 | struct ceph_mds_reply_dirfrag *dirfrag; |
54 | char *dname; | 56 | char *dname; |
55 | u32 dname_len; | 57 | u32 dname_len; |
56 | struct ceph_mds_reply_lease *dlease; | 58 | struct ceph_mds_reply_lease *dlease; |
57 | 59 | ||
58 | struct ceph_mds_reply_dirfrag *dir_dir; | 60 | /* extra */ |
59 | int dir_nr; | 61 | union { |
60 | char **dir_dname; | 62 | /* for fcntl F_GETLK results */ |
61 | u32 *dir_dname_len; | 63 | struct ceph_filelock *filelock_reply; |
62 | struct ceph_mds_reply_lease **dir_dlease; | 64 | |
63 | struct ceph_mds_reply_info_in *dir_in; | 65 | /* for readdir results */ |
64 | u8 dir_complete, dir_end; | 66 | struct { |
67 | struct ceph_mds_reply_dirfrag *dir_dir; | ||
68 | int dir_nr; | ||
69 | char **dir_dname; | ||
70 | u32 *dir_dname_len; | ||
71 | struct ceph_mds_reply_lease **dir_dlease; | ||
72 | struct ceph_mds_reply_info_in *dir_in; | ||
73 | u8 dir_complete, dir_end; | ||
74 | }; | ||
75 | }; | ||
65 | 76 | ||
66 | /* encoded blob describing snapshot contexts for certain | 77 | /* encoded blob describing snapshot contexts for certain |
67 | operations (e.g., open) */ | 78 | operations (e.g., open) */ |
@@ -170,6 +181,8 @@ struct ceph_mds_request { | |||
170 | 181 | ||
171 | union ceph_mds_request_args r_args; | 182 | union ceph_mds_request_args r_args; |
172 | int r_fmode; /* file mode, if expecting cap */ | 183 | int r_fmode; /* file mode, if expecting cap */ |
184 | uid_t r_uid; | ||
185 | gid_t r_gid; | ||
173 | 186 | ||
174 | /* for choosing which mds to send this request to */ | 187 | /* for choosing which mds to send this request to */ |
175 | int r_direct_mode; | 188 | int r_direct_mode; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 1886294e12f7..7f01728a4657 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -293,9 +293,7 @@ struct ceph_inode_info { | |||
293 | int i_rd_ref, i_rdcache_ref, i_wr_ref; | 293 | int i_rd_ref, i_rdcache_ref, i_wr_ref; |
294 | int i_wrbuffer_ref, i_wrbuffer_ref_head; | 294 | int i_wrbuffer_ref, i_wrbuffer_ref_head; |
295 | u32 i_shared_gen; /* increment each time we get FILE_SHARED */ | 295 | u32 i_shared_gen; /* increment each time we get FILE_SHARED */ |
296 | u32 i_rdcache_gen; /* we increment this each time we get | 296 | u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ |
297 | FILE_CACHE. If it's non-zero, we | ||
298 | _may_ have cached pages. */ | ||
299 | u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ | 297 | u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ |
300 | 298 | ||
301 | struct list_head i_unsafe_writes; /* uncommitted sync writes */ | 299 | struct list_head i_unsafe_writes; /* uncommitted sync writes */ |