diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-11-19 18:32:22 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-11-19 18:32:22 -0500 |
commit | 76db8ac45fc738f7d7664fe9b56d15c594a45228 (patch) | |
tree | eca23feab074d505b375e27714473f4ad337bd85 | |
parent | caf8394524fdc039b090cd3af99157e9e76f4f06 (diff) | |
parent | 3105c19c450ac7c18ab28c19d364b588767261b3 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
ceph: fix readdir EOVERFLOW on 32-bit archs
ceph: fix frag offset for non-leftmost frags
ceph: fix dangling pointer
ceph: explicitly specify page alignment in network messages
ceph: make page alignment explicit in osd interface
ceph: fix comment, remove extraneous args
ceph: fix update of ctime from MDS
ceph: fix version check on racing inode updates
ceph: fix uid/gid on resent mds requests
ceph: fix rdcache_gen usage and invalidate
ceph: re-request max_size if cap auth changes
ceph: only let auth caps update max_size
ceph: fix open for write on clustered mds
ceph: fix bad pointer dereference in ceph_fill_trace
ceph: fix small seq message skipping
Revert "ceph: update issue_seq on cap grant"
-rw-r--r-- | fs/ceph/addr.c | 6 | ||||
-rw-r--r-- | fs/ceph/caps.c | 17 | ||||
-rw-r--r-- | fs/ceph/dir.c | 16 | ||||
-rw-r--r-- | fs/ceph/file.c | 52 | ||||
-rw-r--r-- | fs/ceph/inode.c | 49 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 7 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 2 | ||||
-rw-r--r-- | fs/ceph/super.h | 4 | ||||
-rw-r--r-- | include/linux/ceph/libceph.h | 3 | ||||
-rw-r--r-- | include/linux/ceph/messenger.h | 1 | ||||
-rw-r--r-- | include/linux/ceph/osd_client.h | 7 | ||||
-rw-r--r-- | net/ceph/messenger.c | 13 | ||||
-rw-r--r-- | net/ceph/osd_client.c | 25 | ||||
-rw-r--r-- | net/ceph/pagevec.c | 3 |
14 files changed, 129 insertions, 76 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index e9c874abc9e..561438b6a50 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -204,7 +204,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
204 | err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, | 204 | err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, |
205 | page->index << PAGE_CACHE_SHIFT, &len, | 205 | page->index << PAGE_CACHE_SHIFT, &len, |
206 | ci->i_truncate_seq, ci->i_truncate_size, | 206 | ci->i_truncate_seq, ci->i_truncate_size, |
207 | &page, 1); | 207 | &page, 1, 0); |
208 | if (err == -ENOENT) | 208 | if (err == -ENOENT) |
209 | err = 0; | 209 | err = 0; |
210 | if (err < 0) { | 210 | if (err < 0) { |
@@ -287,7 +287,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
287 | rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, | 287 | rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, |
288 | offset, &len, | 288 | offset, &len, |
289 | ci->i_truncate_seq, ci->i_truncate_size, | 289 | ci->i_truncate_seq, ci->i_truncate_size, |
290 | pages, nr_pages); | 290 | pages, nr_pages, 0); |
291 | if (rc == -ENOENT) | 291 | if (rc == -ENOENT) |
292 | rc = 0; | 292 | rc = 0; |
293 | if (rc < 0) | 293 | if (rc < 0) |
@@ -774,7 +774,7 @@ get_more_pages: | |||
774 | snapc, do_sync, | 774 | snapc, do_sync, |
775 | ci->i_truncate_seq, | 775 | ci->i_truncate_seq, |
776 | ci->i_truncate_size, | 776 | ci->i_truncate_size, |
777 | &inode->i_mtime, true, 1); | 777 | &inode->i_mtime, true, 1, 0); |
778 | max_pages = req->r_num_pages; | 778 | max_pages = req->r_num_pages; |
779 | 779 | ||
780 | alloc_page_vec(fsc, req); | 780 | alloc_page_vec(fsc, req); |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 98ab13e2b71..60d27bc9eb8 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -1430,8 +1430,8 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
1430 | invalidating_gen == ci->i_rdcache_gen) { | 1430 | invalidating_gen == ci->i_rdcache_gen) { |
1431 | /* success. */ | 1431 | /* success. */ |
1432 | dout("try_nonblocking_invalidate %p success\n", inode); | 1432 | dout("try_nonblocking_invalidate %p success\n", inode); |
1433 | ci->i_rdcache_gen = 0; | 1433 | /* save any racing async invalidate some trouble */ |
1434 | ci->i_rdcache_revoking = 0; | 1434 | ci->i_rdcache_revoking = ci->i_rdcache_gen - 1; |
1435 | return 0; | 1435 | return 0; |
1436 | } | 1436 | } |
1437 | dout("try_nonblocking_invalidate %p failed\n", inode); | 1437 | dout("try_nonblocking_invalidate %p failed\n", inode); |
@@ -2273,8 +2273,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2273 | { | 2273 | { |
2274 | struct ceph_inode_info *ci = ceph_inode(inode); | 2274 | struct ceph_inode_info *ci = ceph_inode(inode); |
2275 | int mds = session->s_mds; | 2275 | int mds = session->s_mds; |
2276 | unsigned seq = le32_to_cpu(grant->seq); | 2276 | int seq = le32_to_cpu(grant->seq); |
2277 | unsigned issue_seq = le32_to_cpu(grant->issue_seq); | ||
2278 | int newcaps = le32_to_cpu(grant->caps); | 2277 | int newcaps = le32_to_cpu(grant->caps); |
2279 | int issued, implemented, used, wanted, dirty; | 2278 | int issued, implemented, used, wanted, dirty; |
2280 | u64 size = le64_to_cpu(grant->size); | 2279 | u64 size = le64_to_cpu(grant->size); |
@@ -2286,8 +2285,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2286 | int revoked_rdcache = 0; | 2285 | int revoked_rdcache = 0; |
2287 | int queue_invalidate = 0; | 2286 | int queue_invalidate = 0; |
2288 | 2287 | ||
2289 | dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n", | 2288 | dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", |
2290 | inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps)); | 2289 | inode, cap, mds, seq, ceph_cap_string(newcaps)); |
2291 | dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, | 2290 | dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, |
2292 | inode->i_size); | 2291 | inode->i_size); |
2293 | 2292 | ||
@@ -2383,7 +2382,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2383 | } | 2382 | } |
2384 | 2383 | ||
2385 | cap->seq = seq; | 2384 | cap->seq = seq; |
2386 | cap->issue_seq = issue_seq; | ||
2387 | 2385 | ||
2388 | /* file layout may have changed */ | 2386 | /* file layout may have changed */ |
2389 | ci->i_layout = grant->layout; | 2387 | ci->i_layout = grant->layout; |
@@ -2691,6 +2689,11 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, | |||
2691 | NULL /* no caps context */); | 2689 | NULL /* no caps context */); |
2692 | try_flush_caps(inode, session, NULL); | 2690 | try_flush_caps(inode, session, NULL); |
2693 | up_read(&mdsc->snap_rwsem); | 2691 | up_read(&mdsc->snap_rwsem); |
2692 | |||
2693 | /* make sure we re-request max_size, if necessary */ | ||
2694 | spin_lock(&inode->i_lock); | ||
2695 | ci->i_requested_max_size = 0; | ||
2696 | spin_unlock(&inode->i_lock); | ||
2694 | } | 2697 | } |
2695 | 2698 | ||
2696 | /* | 2699 | /* |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index e0a2dc6fcaf..7d447af84ec 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -336,7 +336,10 @@ more: | |||
336 | if (req->r_reply_info.dir_end) { | 336 | if (req->r_reply_info.dir_end) { |
337 | kfree(fi->last_name); | 337 | kfree(fi->last_name); |
338 | fi->last_name = NULL; | 338 | fi->last_name = NULL; |
339 | fi->next_offset = 2; | 339 | if (ceph_frag_is_rightmost(frag)) |
340 | fi->next_offset = 2; | ||
341 | else | ||
342 | fi->next_offset = 0; | ||
340 | } else { | 343 | } else { |
341 | rinfo = &req->r_reply_info; | 344 | rinfo = &req->r_reply_info; |
342 | err = note_last_dentry(fi, | 345 | err = note_last_dentry(fi, |
@@ -355,18 +358,22 @@ more: | |||
355 | u64 pos = ceph_make_fpos(frag, off); | 358 | u64 pos = ceph_make_fpos(frag, off); |
356 | struct ceph_mds_reply_inode *in = | 359 | struct ceph_mds_reply_inode *in = |
357 | rinfo->dir_in[off - fi->offset].in; | 360 | rinfo->dir_in[off - fi->offset].in; |
361 | struct ceph_vino vino; | ||
362 | ino_t ino; | ||
363 | |||
358 | dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", | 364 | dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", |
359 | off, off - fi->offset, rinfo->dir_nr, pos, | 365 | off, off - fi->offset, rinfo->dir_nr, pos, |
360 | rinfo->dir_dname_len[off - fi->offset], | 366 | rinfo->dir_dname_len[off - fi->offset], |
361 | rinfo->dir_dname[off - fi->offset], in); | 367 | rinfo->dir_dname[off - fi->offset], in); |
362 | BUG_ON(!in); | 368 | BUG_ON(!in); |
363 | ftype = le32_to_cpu(in->mode) >> 12; | 369 | ftype = le32_to_cpu(in->mode) >> 12; |
370 | vino.ino = le64_to_cpu(in->ino); | ||
371 | vino.snap = le64_to_cpu(in->snapid); | ||
372 | ino = ceph_vino_to_ino(vino); | ||
364 | if (filldir(dirent, | 373 | if (filldir(dirent, |
365 | rinfo->dir_dname[off - fi->offset], | 374 | rinfo->dir_dname[off - fi->offset], |
366 | rinfo->dir_dname_len[off - fi->offset], | 375 | rinfo->dir_dname_len[off - fi->offset], |
367 | pos, | 376 | pos, ino, ftype) < 0) { |
368 | le64_to_cpu(in->ino), | ||
369 | ftype) < 0) { | ||
370 | dout("filldir stopping us...\n"); | 377 | dout("filldir stopping us...\n"); |
371 | return 0; | 378 | return 0; |
372 | } | 379 | } |
@@ -414,6 +421,7 @@ static void reset_readdir(struct ceph_file_info *fi) | |||
414 | fi->last_readdir = NULL; | 421 | fi->last_readdir = NULL; |
415 | } | 422 | } |
416 | kfree(fi->last_name); | 423 | kfree(fi->last_name); |
424 | fi->last_name = NULL; | ||
417 | fi->next_offset = 2; /* compensate for . and .. */ | 425 | fi->next_offset = 2; /* compensate for . and .. */ |
418 | if (fi->dentry) { | 426 | if (fi->dentry) { |
419 | dput(fi->dentry); | 427 | dput(fi->dentry); |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index e77c28cf369..8d79b8912e3 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -154,11 +154,13 @@ int ceph_open(struct inode *inode, struct file *file) | |||
154 | } | 154 | } |
155 | 155 | ||
156 | /* | 156 | /* |
157 | * No need to block if we have any caps. Update wanted set | 157 | * No need to block if we have caps on the auth MDS (for |
158 | * write) or any MDS (for read). Update wanted set | ||
158 | * asynchronously. | 159 | * asynchronously. |
159 | */ | 160 | */ |
160 | spin_lock(&inode->i_lock); | 161 | spin_lock(&inode->i_lock); |
161 | if (__ceph_is_any_real_caps(ci)) { | 162 | if (__ceph_is_any_real_caps(ci) && |
163 | (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { | ||
162 | int mds_wanted = __ceph_caps_mds_wanted(ci); | 164 | int mds_wanted = __ceph_caps_mds_wanted(ci); |
163 | int issued = __ceph_caps_issued(ci, NULL); | 165 | int issued = __ceph_caps_issued(ci, NULL); |
164 | 166 | ||
@@ -280,11 +282,12 @@ int ceph_release(struct inode *inode, struct file *file) | |||
280 | static int striped_read(struct inode *inode, | 282 | static int striped_read(struct inode *inode, |
281 | u64 off, u64 len, | 283 | u64 off, u64 len, |
282 | struct page **pages, int num_pages, | 284 | struct page **pages, int num_pages, |
283 | int *checkeof) | 285 | int *checkeof, bool align_to_pages) |
284 | { | 286 | { |
285 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | 287 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
286 | struct ceph_inode_info *ci = ceph_inode(inode); | 288 | struct ceph_inode_info *ci = ceph_inode(inode); |
287 | u64 pos, this_len; | 289 | u64 pos, this_len; |
290 | int io_align, page_align; | ||
288 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ | 291 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ |
289 | int left, pages_left; | 292 | int left, pages_left; |
290 | int read; | 293 | int read; |
@@ -300,14 +303,19 @@ static int striped_read(struct inode *inode, | |||
300 | page_pos = pages; | 303 | page_pos = pages; |
301 | pages_left = num_pages; | 304 | pages_left = num_pages; |
302 | read = 0; | 305 | read = 0; |
306 | io_align = off & ~PAGE_MASK; | ||
303 | 307 | ||
304 | more: | 308 | more: |
309 | if (align_to_pages) | ||
310 | page_align = (pos - io_align) & ~PAGE_MASK; | ||
311 | else | ||
312 | page_align = pos & ~PAGE_MASK; | ||
305 | this_len = left; | 313 | this_len = left; |
306 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), | 314 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), |
307 | &ci->i_layout, pos, &this_len, | 315 | &ci->i_layout, pos, &this_len, |
308 | ci->i_truncate_seq, | 316 | ci->i_truncate_seq, |
309 | ci->i_truncate_size, | 317 | ci->i_truncate_size, |
310 | page_pos, pages_left); | 318 | page_pos, pages_left, page_align); |
311 | hit_stripe = this_len < left; | 319 | hit_stripe = this_len < left; |
312 | was_short = ret >= 0 && ret < this_len; | 320 | was_short = ret >= 0 && ret < this_len; |
313 | if (ret == -ENOENT) | 321 | if (ret == -ENOENT) |
@@ -374,26 +382,25 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
374 | dout("sync_read on file %p %llu~%u %s\n", file, off, len, | 382 | dout("sync_read on file %p %llu~%u %s\n", file, off, len, |
375 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); | 383 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); |
376 | 384 | ||
377 | if (file->f_flags & O_DIRECT) { | 385 | if (file->f_flags & O_DIRECT) |
378 | pages = ceph_get_direct_page_vector(data, num_pages, off, len); | 386 | pages = ceph_get_direct_page_vector(data, num_pages); |
379 | 387 | else | |
380 | /* | ||
381 | * flush any page cache pages in this range. this | ||
382 | * will make concurrent normal and O_DIRECT io slow, | ||
383 | * but it will at least behave sensibly when they are | ||
384 | * in sequence. | ||
385 | */ | ||
386 | } else { | ||
387 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); | 388 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
388 | } | ||
389 | if (IS_ERR(pages)) | 389 | if (IS_ERR(pages)) |
390 | return PTR_ERR(pages); | 390 | return PTR_ERR(pages); |
391 | 391 | ||
392 | /* | ||
393 | * flush any page cache pages in this range. this | ||
394 | * will make concurrent normal and sync io slow, | ||
395 | * but it will at least behave sensibly when they are | ||
396 | * in sequence. | ||
397 | */ | ||
392 | ret = filemap_write_and_wait(inode->i_mapping); | 398 | ret = filemap_write_and_wait(inode->i_mapping); |
393 | if (ret < 0) | 399 | if (ret < 0) |
394 | goto done; | 400 | goto done; |
395 | 401 | ||
396 | ret = striped_read(inode, off, len, pages, num_pages, checkeof); | 402 | ret = striped_read(inode, off, len, pages, num_pages, checkeof, |
403 | file->f_flags & O_DIRECT); | ||
397 | 404 | ||
398 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) | 405 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) |
399 | ret = ceph_copy_page_vector_to_user(pages, data, off, ret); | 406 | ret = ceph_copy_page_vector_to_user(pages, data, off, ret); |
@@ -448,6 +455,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
448 | int flags; | 455 | int flags; |
449 | int do_sync = 0; | 456 | int do_sync = 0; |
450 | int check_caps = 0; | 457 | int check_caps = 0; |
458 | int page_align, io_align; | ||
451 | int ret; | 459 | int ret; |
452 | struct timespec mtime = CURRENT_TIME; | 460 | struct timespec mtime = CURRENT_TIME; |
453 | 461 | ||
@@ -462,6 +470,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
462 | else | 470 | else |
463 | pos = *offset; | 471 | pos = *offset; |
464 | 472 | ||
473 | io_align = pos & ~PAGE_MASK; | ||
474 | |||
465 | ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); | 475 | ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); |
466 | if (ret < 0) | 476 | if (ret < 0) |
467 | return ret; | 477 | return ret; |
@@ -486,20 +496,26 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
486 | */ | 496 | */ |
487 | more: | 497 | more: |
488 | len = left; | 498 | len = left; |
499 | if (file->f_flags & O_DIRECT) | ||
500 | /* write from beginning of first page, regardless of | ||
501 | io alignment */ | ||
502 | page_align = (pos - io_align) & ~PAGE_MASK; | ||
503 | else | ||
504 | page_align = pos & ~PAGE_MASK; | ||
489 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, | 505 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, |
490 | ceph_vino(inode), pos, &len, | 506 | ceph_vino(inode), pos, &len, |
491 | CEPH_OSD_OP_WRITE, flags, | 507 | CEPH_OSD_OP_WRITE, flags, |
492 | ci->i_snap_realm->cached_context, | 508 | ci->i_snap_realm->cached_context, |
493 | do_sync, | 509 | do_sync, |
494 | ci->i_truncate_seq, ci->i_truncate_size, | 510 | ci->i_truncate_seq, ci->i_truncate_size, |
495 | &mtime, false, 2); | 511 | &mtime, false, 2, page_align); |
496 | if (!req) | 512 | if (!req) |
497 | return -ENOMEM; | 513 | return -ENOMEM; |
498 | 514 | ||
499 | num_pages = calc_pages_for(pos, len); | 515 | num_pages = calc_pages_for(pos, len); |
500 | 516 | ||
501 | if (file->f_flags & O_DIRECT) { | 517 | if (file->f_flags & O_DIRECT) { |
502 | pages = ceph_get_direct_page_vector(data, num_pages, pos, len); | 518 | pages = ceph_get_direct_page_vector(data, num_pages); |
503 | if (IS_ERR(pages)) { | 519 | if (IS_ERR(pages)) { |
504 | ret = PTR_ERR(pages); | 520 | ret = PTR_ERR(pages); |
505 | goto out; | 521 | goto out; |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 524b80be448..bf1286588f2 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -470,7 +470,9 @@ void ceph_fill_file_time(struct inode *inode, int issued, | |||
470 | 470 | ||
471 | if (issued & (CEPH_CAP_FILE_EXCL| | 471 | if (issued & (CEPH_CAP_FILE_EXCL| |
472 | CEPH_CAP_FILE_WR| | 472 | CEPH_CAP_FILE_WR| |
473 | CEPH_CAP_FILE_BUFFER)) { | 473 | CEPH_CAP_FILE_BUFFER| |
474 | CEPH_CAP_AUTH_EXCL| | ||
475 | CEPH_CAP_XATTR_EXCL)) { | ||
474 | if (timespec_compare(ctime, &inode->i_ctime) > 0) { | 476 | if (timespec_compare(ctime, &inode->i_ctime) > 0) { |
475 | dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", | 477 | dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", |
476 | inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, | 478 | inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, |
@@ -510,7 +512,7 @@ void ceph_fill_file_time(struct inode *inode, int issued, | |||
510 | warn = 1; | 512 | warn = 1; |
511 | } | 513 | } |
512 | } else { | 514 | } else { |
513 | /* we have no write caps; whatever the MDS says is true */ | 515 | /* we have no write|excl caps; whatever the MDS says is true */ |
514 | if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { | 516 | if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { |
515 | inode->i_ctime = *ctime; | 517 | inode->i_ctime = *ctime; |
516 | inode->i_mtime = *mtime; | 518 | inode->i_mtime = *mtime; |
@@ -566,12 +568,17 @@ static int fill_inode(struct inode *inode, | |||
566 | 568 | ||
567 | /* | 569 | /* |
568 | * provided version will be odd if inode value is projected, | 570 | * provided version will be odd if inode value is projected, |
569 | * even if stable. skip the update if we have a newer info | 571 | * even if stable. skip the update if we have newer stable |
570 | * (e.g., due to inode info racing form multiple MDSs), or if | 572 | * info (ours>=theirs, e.g. due to racing mds replies), unless |
571 | * we are getting projected (unstable) inode info. | 573 | * we are getting projected (unstable) info (in which case the |
574 | * version is odd, and we want ours>theirs). | ||
575 | * us them | ||
576 | * 2 2 skip | ||
577 | * 3 2 skip | ||
578 | * 3 3 update | ||
572 | */ | 579 | */ |
573 | if (le64_to_cpu(info->version) > 0 && | 580 | if (le64_to_cpu(info->version) > 0 && |
574 | (ci->i_version & ~1) > le64_to_cpu(info->version)) | 581 | (ci->i_version & ~1) >= le64_to_cpu(info->version)) |
575 | goto no_change; | 582 | goto no_change; |
576 | 583 | ||
577 | issued = __ceph_caps_issued(ci, &implemented); | 584 | issued = __ceph_caps_issued(ci, &implemented); |
@@ -605,7 +612,14 @@ static int fill_inode(struct inode *inode, | |||
605 | le32_to_cpu(info->time_warp_seq), | 612 | le32_to_cpu(info->time_warp_seq), |
606 | &ctime, &mtime, &atime); | 613 | &ctime, &mtime, &atime); |
607 | 614 | ||
608 | ci->i_max_size = le64_to_cpu(info->max_size); | 615 | /* only update max_size on auth cap */ |
616 | if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && | ||
617 | ci->i_max_size != le64_to_cpu(info->max_size)) { | ||
618 | dout("max_size %lld -> %llu\n", ci->i_max_size, | ||
619 | le64_to_cpu(info->max_size)); | ||
620 | ci->i_max_size = le64_to_cpu(info->max_size); | ||
621 | } | ||
622 | |||
609 | ci->i_layout = info->layout; | 623 | ci->i_layout = info->layout; |
610 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; | 624 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; |
611 | 625 | ||
@@ -1054,7 +1068,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1054 | ininfo = rinfo->targeti.in; | 1068 | ininfo = rinfo->targeti.in; |
1055 | vino.ino = le64_to_cpu(ininfo->ino); | 1069 | vino.ino = le64_to_cpu(ininfo->ino); |
1056 | vino.snap = le64_to_cpu(ininfo->snapid); | 1070 | vino.snap = le64_to_cpu(ininfo->snapid); |
1057 | if (!dn->d_inode) { | 1071 | in = dn->d_inode; |
1072 | if (!in) { | ||
1058 | in = ceph_get_inode(sb, vino); | 1073 | in = ceph_get_inode(sb, vino); |
1059 | if (IS_ERR(in)) { | 1074 | if (IS_ERR(in)) { |
1060 | pr_err("fill_trace bad get_inode " | 1075 | pr_err("fill_trace bad get_inode " |
@@ -1385,11 +1400,8 @@ static void ceph_invalidate_work(struct work_struct *work) | |||
1385 | spin_lock(&inode->i_lock); | 1400 | spin_lock(&inode->i_lock); |
1386 | dout("invalidate_pages %p gen %d revoking %d\n", inode, | 1401 | dout("invalidate_pages %p gen %d revoking %d\n", inode, |
1387 | ci->i_rdcache_gen, ci->i_rdcache_revoking); | 1402 | ci->i_rdcache_gen, ci->i_rdcache_revoking); |
1388 | if (ci->i_rdcache_gen == 0 || | 1403 | if (ci->i_rdcache_revoking != ci->i_rdcache_gen) { |
1389 | ci->i_rdcache_revoking != ci->i_rdcache_gen) { | ||
1390 | BUG_ON(ci->i_rdcache_revoking > ci->i_rdcache_gen); | ||
1391 | /* nevermind! */ | 1404 | /* nevermind! */ |
1392 | ci->i_rdcache_revoking = 0; | ||
1393 | spin_unlock(&inode->i_lock); | 1405 | spin_unlock(&inode->i_lock); |
1394 | goto out; | 1406 | goto out; |
1395 | } | 1407 | } |
@@ -1399,15 +1411,16 @@ static void ceph_invalidate_work(struct work_struct *work) | |||
1399 | ceph_invalidate_nondirty_pages(inode->i_mapping); | 1411 | ceph_invalidate_nondirty_pages(inode->i_mapping); |
1400 | 1412 | ||
1401 | spin_lock(&inode->i_lock); | 1413 | spin_lock(&inode->i_lock); |
1402 | if (orig_gen == ci->i_rdcache_gen) { | 1414 | if (orig_gen == ci->i_rdcache_gen && |
1415 | orig_gen == ci->i_rdcache_revoking) { | ||
1403 | dout("invalidate_pages %p gen %d successful\n", inode, | 1416 | dout("invalidate_pages %p gen %d successful\n", inode, |
1404 | ci->i_rdcache_gen); | 1417 | ci->i_rdcache_gen); |
1405 | ci->i_rdcache_gen = 0; | 1418 | ci->i_rdcache_revoking--; |
1406 | ci->i_rdcache_revoking = 0; | ||
1407 | check = 1; | 1419 | check = 1; |
1408 | } else { | 1420 | } else { |
1409 | dout("invalidate_pages %p gen %d raced, gen now %d\n", | 1421 | dout("invalidate_pages %p gen %d raced, now %d revoking %d\n", |
1410 | inode, orig_gen, ci->i_rdcache_gen); | 1422 | inode, orig_gen, ci->i_rdcache_gen, |
1423 | ci->i_rdcache_revoking); | ||
1411 | } | 1424 | } |
1412 | spin_unlock(&inode->i_lock); | 1425 | spin_unlock(&inode->i_lock); |
1413 | 1426 | ||
@@ -1738,7 +1751,7 @@ int ceph_do_getattr(struct inode *inode, int mask) | |||
1738 | return 0; | 1751 | return 0; |
1739 | } | 1752 | } |
1740 | 1753 | ||
1741 | dout("do_getattr inode %p mask %s\n", inode, ceph_cap_string(mask)); | 1754 | dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode); |
1742 | if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) | 1755 | if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) |
1743 | return 0; | 1756 | return 0; |
1744 | 1757 | ||
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 7799cac2b62..098b1850847 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -528,6 +528,9 @@ static void __register_request(struct ceph_mds_client *mdsc, | |||
528 | ceph_mdsc_get_request(req); | 528 | ceph_mdsc_get_request(req); |
529 | __insert_request(mdsc, req); | 529 | __insert_request(mdsc, req); |
530 | 530 | ||
531 | req->r_uid = current_fsuid(); | ||
532 | req->r_gid = current_fsgid(); | ||
533 | |||
531 | if (dir) { | 534 | if (dir) { |
532 | struct ceph_inode_info *ci = ceph_inode(dir); | 535 | struct ceph_inode_info *ci = ceph_inode(dir); |
533 | 536 | ||
@@ -1587,8 +1590,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, | |||
1587 | 1590 | ||
1588 | head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); | 1591 | head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); |
1589 | head->op = cpu_to_le32(req->r_op); | 1592 | head->op = cpu_to_le32(req->r_op); |
1590 | head->caller_uid = cpu_to_le32(current_fsuid()); | 1593 | head->caller_uid = cpu_to_le32(req->r_uid); |
1591 | head->caller_gid = cpu_to_le32(current_fsgid()); | 1594 | head->caller_gid = cpu_to_le32(req->r_gid); |
1592 | head->args = req->r_args; | 1595 | head->args = req->r_args; |
1593 | 1596 | ||
1594 | ceph_encode_filepath(&p, end, ino1, path1); | 1597 | ceph_encode_filepath(&p, end, ino1, path1); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index d66d63c7235..9341fd4f143 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -170,6 +170,8 @@ struct ceph_mds_request { | |||
170 | 170 | ||
171 | union ceph_mds_request_args r_args; | 171 | union ceph_mds_request_args r_args; |
172 | int r_fmode; /* file mode, if expecting cap */ | 172 | int r_fmode; /* file mode, if expecting cap */ |
173 | uid_t r_uid; | ||
174 | gid_t r_gid; | ||
173 | 175 | ||
174 | /* for choosing which mds to send this request to */ | 176 | /* for choosing which mds to send this request to */ |
175 | int r_direct_mode; | 177 | int r_direct_mode; |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 1886294e12f..7f01728a465 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -293,9 +293,7 @@ struct ceph_inode_info { | |||
293 | int i_rd_ref, i_rdcache_ref, i_wr_ref; | 293 | int i_rd_ref, i_rdcache_ref, i_wr_ref; |
294 | int i_wrbuffer_ref, i_wrbuffer_ref_head; | 294 | int i_wrbuffer_ref, i_wrbuffer_ref_head; |
295 | u32 i_shared_gen; /* increment each time we get FILE_SHARED */ | 295 | u32 i_shared_gen; /* increment each time we get FILE_SHARED */ |
296 | u32 i_rdcache_gen; /* we increment this each time we get | 296 | u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */ |
297 | FILE_CACHE. If it's non-zero, we | ||
298 | _may_ have cached pages. */ | ||
299 | u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ | 297 | u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ |
300 | 298 | ||
301 | struct list_head i_unsafe_writes; /* uncommitted sync writes */ | 299 | struct list_head i_unsafe_writes; /* uncommitted sync writes */ |
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index f22b2e94168..9e76d35670d 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h | |||
@@ -227,8 +227,7 @@ extern int ceph_open_session(struct ceph_client *client); | |||
227 | extern void ceph_release_page_vector(struct page **pages, int num_pages); | 227 | extern void ceph_release_page_vector(struct page **pages, int num_pages); |
228 | 228 | ||
229 | extern struct page **ceph_get_direct_page_vector(const char __user *data, | 229 | extern struct page **ceph_get_direct_page_vector(const char __user *data, |
230 | int num_pages, | 230 | int num_pages); |
231 | loff_t off, size_t len); | ||
232 | extern void ceph_put_page_vector(struct page **pages, int num_pages); | 231 | extern void ceph_put_page_vector(struct page **pages, int num_pages); |
233 | extern void ceph_release_page_vector(struct page **pages, int num_pages); | 232 | extern void ceph_release_page_vector(struct page **pages, int num_pages); |
234 | extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); | 233 | extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); |
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 5956d62c305..a108b425fee 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h | |||
@@ -82,6 +82,7 @@ struct ceph_msg { | |||
82 | struct ceph_buffer *middle; | 82 | struct ceph_buffer *middle; |
83 | struct page **pages; /* data payload. NOT OWNER. */ | 83 | struct page **pages; /* data payload. NOT OWNER. */ |
84 | unsigned nr_pages; /* size of page array */ | 84 | unsigned nr_pages; /* size of page array */ |
85 | unsigned page_alignment; /* io offset in first page */ | ||
85 | struct ceph_pagelist *pagelist; /* instead of pages */ | 86 | struct ceph_pagelist *pagelist; /* instead of pages */ |
86 | struct list_head list_head; | 87 | struct list_head list_head; |
87 | struct kref kref; | 88 | struct kref kref; |
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 6c91fb032c3..a1af29648fb 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h | |||
@@ -79,6 +79,7 @@ struct ceph_osd_request { | |||
79 | struct ceph_file_layout r_file_layout; | 79 | struct ceph_file_layout r_file_layout; |
80 | struct ceph_snap_context *r_snapc; /* snap context for writes */ | 80 | struct ceph_snap_context *r_snapc; /* snap context for writes */ |
81 | unsigned r_num_pages; /* size of page array (follows) */ | 81 | unsigned r_num_pages; /* size of page array (follows) */ |
82 | unsigned r_page_alignment; /* io offset in first page */ | ||
82 | struct page **r_pages; /* pages for data payload */ | 83 | struct page **r_pages; /* pages for data payload */ |
83 | int r_pages_from_pool; | 84 | int r_pages_from_pool; |
84 | int r_own_pages; /* if true, i own page list */ | 85 | int r_own_pages; /* if true, i own page list */ |
@@ -194,7 +195,8 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, | |||
194 | int do_sync, u32 truncate_seq, | 195 | int do_sync, u32 truncate_seq, |
195 | u64 truncate_size, | 196 | u64 truncate_size, |
196 | struct timespec *mtime, | 197 | struct timespec *mtime, |
197 | bool use_mempool, int num_reply); | 198 | bool use_mempool, int num_reply, |
199 | int page_align); | ||
198 | 200 | ||
199 | static inline void ceph_osdc_get_request(struct ceph_osd_request *req) | 201 | static inline void ceph_osdc_get_request(struct ceph_osd_request *req) |
200 | { | 202 | { |
@@ -218,7 +220,8 @@ extern int ceph_osdc_readpages(struct ceph_osd_client *osdc, | |||
218 | struct ceph_file_layout *layout, | 220 | struct ceph_file_layout *layout, |
219 | u64 off, u64 *plen, | 221 | u64 off, u64 *plen, |
220 | u32 truncate_seq, u64 truncate_size, | 222 | u32 truncate_seq, u64 truncate_size, |
221 | struct page **pages, int nr_pages); | 223 | struct page **pages, int nr_pages, |
224 | int page_align); | ||
222 | 225 | ||
223 | extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, | 226 | extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, |
224 | struct ceph_vino vino, | 227 | struct ceph_vino vino, |
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 0e8157ee5d4..1c7a2ec4f3c 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c | |||
@@ -540,8 +540,7 @@ static void prepare_write_message(struct ceph_connection *con) | |||
540 | /* initialize page iterator */ | 540 | /* initialize page iterator */ |
541 | con->out_msg_pos.page = 0; | 541 | con->out_msg_pos.page = 0; |
542 | if (m->pages) | 542 | if (m->pages) |
543 | con->out_msg_pos.page_pos = | 543 | con->out_msg_pos.page_pos = m->page_alignment; |
544 | le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK; | ||
545 | else | 544 | else |
546 | con->out_msg_pos.page_pos = 0; | 545 | con->out_msg_pos.page_pos = 0; |
547 | con->out_msg_pos.data_pos = 0; | 546 | con->out_msg_pos.data_pos = 0; |
@@ -1491,7 +1490,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
1491 | struct ceph_msg *m = con->in_msg; | 1490 | struct ceph_msg *m = con->in_msg; |
1492 | int ret; | 1491 | int ret; |
1493 | int to, left; | 1492 | int to, left; |
1494 | unsigned front_len, middle_len, data_len, data_off; | 1493 | unsigned front_len, middle_len, data_len; |
1495 | int datacrc = con->msgr->nocrc; | 1494 | int datacrc = con->msgr->nocrc; |
1496 | int skip; | 1495 | int skip; |
1497 | u64 seq; | 1496 | u64 seq; |
@@ -1527,19 +1526,17 @@ static int read_partial_message(struct ceph_connection *con) | |||
1527 | data_len = le32_to_cpu(con->in_hdr.data_len); | 1526 | data_len = le32_to_cpu(con->in_hdr.data_len); |
1528 | if (data_len > CEPH_MSG_MAX_DATA_LEN) | 1527 | if (data_len > CEPH_MSG_MAX_DATA_LEN) |
1529 | return -EIO; | 1528 | return -EIO; |
1530 | data_off = le16_to_cpu(con->in_hdr.data_off); | ||
1531 | 1529 | ||
1532 | /* verify seq# */ | 1530 | /* verify seq# */ |
1533 | seq = le64_to_cpu(con->in_hdr.seq); | 1531 | seq = le64_to_cpu(con->in_hdr.seq); |
1534 | if ((s64)seq - (s64)con->in_seq < 1) { | 1532 | if ((s64)seq - (s64)con->in_seq < 1) { |
1535 | pr_info("skipping %s%lld %s seq %lld, expected %lld\n", | 1533 | pr_info("skipping %s%lld %s seq %lld expected %lld\n", |
1536 | ENTITY_NAME(con->peer_name), | 1534 | ENTITY_NAME(con->peer_name), |
1537 | ceph_pr_addr(&con->peer_addr.in_addr), | 1535 | ceph_pr_addr(&con->peer_addr.in_addr), |
1538 | seq, con->in_seq + 1); | 1536 | seq, con->in_seq + 1); |
1539 | con->in_base_pos = -front_len - middle_len - data_len - | 1537 | con->in_base_pos = -front_len - middle_len - data_len - |
1540 | sizeof(m->footer); | 1538 | sizeof(m->footer); |
1541 | con->in_tag = CEPH_MSGR_TAG_READY; | 1539 | con->in_tag = CEPH_MSGR_TAG_READY; |
1542 | con->in_seq++; | ||
1543 | return 0; | 1540 | return 0; |
1544 | } else if ((s64)seq - (s64)con->in_seq > 1) { | 1541 | } else if ((s64)seq - (s64)con->in_seq > 1) { |
1545 | pr_err("read_partial_message bad seq %lld expected %lld\n", | 1542 | pr_err("read_partial_message bad seq %lld expected %lld\n", |
@@ -1576,7 +1573,7 @@ static int read_partial_message(struct ceph_connection *con) | |||
1576 | 1573 | ||
1577 | con->in_msg_pos.page = 0; | 1574 | con->in_msg_pos.page = 0; |
1578 | if (m->pages) | 1575 | if (m->pages) |
1579 | con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; | 1576 | con->in_msg_pos.page_pos = m->page_alignment; |
1580 | else | 1577 | else |
1581 | con->in_msg_pos.page_pos = 0; | 1578 | con->in_msg_pos.page_pos = 0; |
1582 | con->in_msg_pos.data_pos = 0; | 1579 | con->in_msg_pos.data_pos = 0; |
@@ -2301,6 +2298,7 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) | |||
2301 | 2298 | ||
2302 | /* data */ | 2299 | /* data */ |
2303 | m->nr_pages = 0; | 2300 | m->nr_pages = 0; |
2301 | m->page_alignment = 0; | ||
2304 | m->pages = NULL; | 2302 | m->pages = NULL; |
2305 | m->pagelist = NULL; | 2303 | m->pagelist = NULL; |
2306 | m->bio = NULL; | 2304 | m->bio = NULL; |
@@ -2370,6 +2368,7 @@ static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | |||
2370 | type, front_len); | 2368 | type, front_len); |
2371 | return NULL; | 2369 | return NULL; |
2372 | } | 2370 | } |
2371 | msg->page_alignment = le16_to_cpu(hdr->data_off); | ||
2373 | } | 2372 | } |
2374 | memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); | 2373 | memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); |
2375 | 2374 | ||
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 79391994b3e..3e20a122ffa 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c | |||
@@ -71,6 +71,7 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc, | |||
71 | op->extent.length = objlen; | 71 | op->extent.length = objlen; |
72 | } | 72 | } |
73 | req->r_num_pages = calc_pages_for(off, *plen); | 73 | req->r_num_pages = calc_pages_for(off, *plen); |
74 | req->r_page_alignment = off & ~PAGE_MASK; | ||
74 | if (op->op == CEPH_OSD_OP_WRITE) | 75 | if (op->op == CEPH_OSD_OP_WRITE) |
75 | op->payload_len = *plen; | 76 | op->payload_len = *plen; |
76 | 77 | ||
@@ -390,6 +391,8 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, | |||
390 | req->r_request->hdr.data_len = cpu_to_le32(data_len); | 391 | req->r_request->hdr.data_len = cpu_to_le32(data_len); |
391 | } | 392 | } |
392 | 393 | ||
394 | req->r_request->page_alignment = req->r_page_alignment; | ||
395 | |||
393 | BUG_ON(p > msg->front.iov_base + msg->front.iov_len); | 396 | BUG_ON(p > msg->front.iov_base + msg->front.iov_len); |
394 | msg_size = p - msg->front.iov_base; | 397 | msg_size = p - msg->front.iov_base; |
395 | msg->front.iov_len = msg_size; | 398 | msg->front.iov_len = msg_size; |
@@ -419,7 +422,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
419 | u32 truncate_seq, | 422 | u32 truncate_seq, |
420 | u64 truncate_size, | 423 | u64 truncate_size, |
421 | struct timespec *mtime, | 424 | struct timespec *mtime, |
422 | bool use_mempool, int num_reply) | 425 | bool use_mempool, int num_reply, |
426 | int page_align) | ||
423 | { | 427 | { |
424 | struct ceph_osd_req_op ops[3]; | 428 | struct ceph_osd_req_op ops[3]; |
425 | struct ceph_osd_request *req; | 429 | struct ceph_osd_request *req; |
@@ -447,6 +451,10 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | |||
447 | calc_layout(osdc, vino, layout, off, plen, req, ops); | 451 | calc_layout(osdc, vino, layout, off, plen, req, ops); |
448 | req->r_file_layout = *layout; /* keep a copy */ | 452 | req->r_file_layout = *layout; /* keep a copy */ |
449 | 453 | ||
454 | /* in case it differs from natural alignment that calc_layout | ||
455 | filled in for us */ | ||
456 | req->r_page_alignment = page_align; | ||
457 | |||
450 | ceph_osdc_build_request(req, off, plen, ops, | 458 | ceph_osdc_build_request(req, off, plen, ops, |
451 | snapc, | 459 | snapc, |
452 | mtime, | 460 | mtime, |
@@ -1489,7 +1497,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, | |||
1489 | struct ceph_vino vino, struct ceph_file_layout *layout, | 1497 | struct ceph_vino vino, struct ceph_file_layout *layout, |
1490 | u64 off, u64 *plen, | 1498 | u64 off, u64 *plen, |
1491 | u32 truncate_seq, u64 truncate_size, | 1499 | u32 truncate_seq, u64 truncate_size, |
1492 | struct page **pages, int num_pages) | 1500 | struct page **pages, int num_pages, int page_align) |
1493 | { | 1501 | { |
1494 | struct ceph_osd_request *req; | 1502 | struct ceph_osd_request *req; |
1495 | int rc = 0; | 1503 | int rc = 0; |
@@ -1499,15 +1507,15 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, | |||
1499 | req = ceph_osdc_new_request(osdc, layout, vino, off, plen, | 1507 | req = ceph_osdc_new_request(osdc, layout, vino, off, plen, |
1500 | CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, | 1508 | CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, |
1501 | NULL, 0, truncate_seq, truncate_size, NULL, | 1509 | NULL, 0, truncate_seq, truncate_size, NULL, |
1502 | false, 1); | 1510 | false, 1, page_align); |
1503 | if (!req) | 1511 | if (!req) |
1504 | return -ENOMEM; | 1512 | return -ENOMEM; |
1505 | 1513 | ||
1506 | /* it may be a short read due to an object boundary */ | 1514 | /* it may be a short read due to an object boundary */ |
1507 | req->r_pages = pages; | 1515 | req->r_pages = pages; |
1508 | 1516 | ||
1509 | dout("readpages final extent is %llu~%llu (%d pages)\n", | 1517 | dout("readpages final extent is %llu~%llu (%d pages align %d)\n", |
1510 | off, *plen, req->r_num_pages); | 1518 | off, *plen, req->r_num_pages, page_align); |
1511 | 1519 | ||
1512 | rc = ceph_osdc_start_request(osdc, req, false); | 1520 | rc = ceph_osdc_start_request(osdc, req, false); |
1513 | if (!rc) | 1521 | if (!rc) |
@@ -1533,6 +1541,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||
1533 | { | 1541 | { |
1534 | struct ceph_osd_request *req; | 1542 | struct ceph_osd_request *req; |
1535 | int rc = 0; | 1543 | int rc = 0; |
1544 | int page_align = off & ~PAGE_MASK; | ||
1536 | 1545 | ||
1537 | BUG_ON(vino.snap != CEPH_NOSNAP); | 1546 | BUG_ON(vino.snap != CEPH_NOSNAP); |
1538 | req = ceph_osdc_new_request(osdc, layout, vino, off, &len, | 1547 | req = ceph_osdc_new_request(osdc, layout, vino, off, &len, |
@@ -1541,7 +1550,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | |||
1541 | CEPH_OSD_FLAG_WRITE, | 1550 | CEPH_OSD_FLAG_WRITE, |
1542 | snapc, do_sync, | 1551 | snapc, do_sync, |
1543 | truncate_seq, truncate_size, mtime, | 1552 | truncate_seq, truncate_size, mtime, |
1544 | nofail, 1); | 1553 | nofail, 1, page_align); |
1545 | if (!req) | 1554 | if (!req) |
1546 | return -ENOMEM; | 1555 | return -ENOMEM; |
1547 | 1556 | ||
@@ -1638,8 +1647,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
1638 | m = ceph_msg_get(req->r_reply); | 1647 | m = ceph_msg_get(req->r_reply); |
1639 | 1648 | ||
1640 | if (data_len > 0) { | 1649 | if (data_len > 0) { |
1641 | unsigned data_off = le16_to_cpu(hdr->data_off); | 1650 | int want = calc_pages_for(req->r_page_alignment, data_len); |
1642 | int want = calc_pages_for(data_off & ~PAGE_MASK, data_len); | ||
1643 | 1651 | ||
1644 | if (unlikely(req->r_num_pages < want)) { | 1652 | if (unlikely(req->r_num_pages < want)) { |
1645 | pr_warning("tid %lld reply %d > expected %d pages\n", | 1653 | pr_warning("tid %lld reply %d > expected %d pages\n", |
@@ -1651,6 +1659,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, | |||
1651 | } | 1659 | } |
1652 | m->pages = req->r_pages; | 1660 | m->pages = req->r_pages; |
1653 | m->nr_pages = req->r_num_pages; | 1661 | m->nr_pages = req->r_num_pages; |
1662 | m->page_alignment = req->r_page_alignment; | ||
1654 | #ifdef CONFIG_BLOCK | 1663 | #ifdef CONFIG_BLOCK |
1655 | m->bio = req->r_bio; | 1664 | m->bio = req->r_bio; |
1656 | #endif | 1665 | #endif |
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 54caf068715..ac34feeb2b3 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c | |||
@@ -13,8 +13,7 @@ | |||
13 | * build a vector of user pages | 13 | * build a vector of user pages |
14 | */ | 14 | */ |
15 | struct page **ceph_get_direct_page_vector(const char __user *data, | 15 | struct page **ceph_get_direct_page_vector(const char __user *data, |
16 | int num_pages, | 16 | int num_pages) |
17 | loff_t off, size_t len) | ||
18 | { | 17 | { |
19 | struct page **pages; | 18 | struct page **pages; |
20 | int rc; | 19 | int rc; |