diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /fs/ceph/file.c | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'fs/ceph/file.c')
-rw-r--r-- | fs/ceph/file.c | 312 |
1 files changed, 96 insertions, 216 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 66e4da6dba22..4698a5c553dc 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -1,5 +1,6 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/module.h> | ||
3 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
5 | #include <linux/file.h> | 6 | #include <linux/file.h> |
@@ -38,8 +39,8 @@ | |||
38 | static struct ceph_mds_request * | 39 | static struct ceph_mds_request * |
39 | prepare_open_request(struct super_block *sb, int flags, int create_mode) | 40 | prepare_open_request(struct super_block *sb, int flags, int create_mode) |
40 | { | 41 | { |
41 | struct ceph_client *client = ceph_sb_to_client(sb); | 42 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
42 | struct ceph_mds_client *mdsc = &client->mdsc; | 43 | struct ceph_mds_client *mdsc = fsc->mdsc; |
43 | struct ceph_mds_request *req; | 44 | struct ceph_mds_request *req; |
44 | int want_auth = USE_ANY_MDS; | 45 | int want_auth = USE_ANY_MDS; |
45 | int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN; | 46 | int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN; |
@@ -117,8 +118,8 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) | |||
117 | int ceph_open(struct inode *inode, struct file *file) | 118 | int ceph_open(struct inode *inode, struct file *file) |
118 | { | 119 | { |
119 | struct ceph_inode_info *ci = ceph_inode(inode); | 120 | struct ceph_inode_info *ci = ceph_inode(inode); |
120 | struct ceph_client *client = ceph_sb_to_client(inode->i_sb); | 121 | struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); |
121 | struct ceph_mds_client *mdsc = &client->mdsc; | 122 | struct ceph_mds_client *mdsc = fsc->mdsc; |
122 | struct ceph_mds_request *req; | 123 | struct ceph_mds_request *req; |
123 | struct ceph_file_info *cf = file->private_data; | 124 | struct ceph_file_info *cf = file->private_data; |
124 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; | 125 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; |
@@ -153,11 +154,13 @@ int ceph_open(struct inode *inode, struct file *file) | |||
153 | } | 154 | } |
154 | 155 | ||
155 | /* | 156 | /* |
156 | * No need to block if we have any caps. Update wanted set | 157 | * No need to block if we have caps on the auth MDS (for |
158 | * write) or any MDS (for read). Update wanted set | ||
157 | * asynchronously. | 159 | * asynchronously. |
158 | */ | 160 | */ |
159 | spin_lock(&inode->i_lock); | 161 | spin_lock(&inode->i_lock); |
160 | if (__ceph_is_any_real_caps(ci)) { | 162 | if (__ceph_is_any_real_caps(ci) && |
163 | (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { | ||
161 | int mds_wanted = __ceph_caps_mds_wanted(ci); | 164 | int mds_wanted = __ceph_caps_mds_wanted(ci); |
162 | int issued = __ceph_caps_issued(ci, NULL); | 165 | int issued = __ceph_caps_issued(ci, NULL); |
163 | 166 | ||
@@ -188,7 +191,8 @@ int ceph_open(struct inode *inode, struct file *file) | |||
188 | err = PTR_ERR(req); | 191 | err = PTR_ERR(req); |
189 | goto out; | 192 | goto out; |
190 | } | 193 | } |
191 | req->r_inode = igrab(inode); | 194 | req->r_inode = inode; |
195 | ihold(inode); | ||
192 | req->r_num_caps = 1; | 196 | req->r_num_caps = 1; |
193 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | 197 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); |
194 | if (!err) | 198 | if (!err) |
@@ -216,8 +220,8 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, | |||
216 | struct nameidata *nd, int mode, | 220 | struct nameidata *nd, int mode, |
217 | int locked_dir) | 221 | int locked_dir) |
218 | { | 222 | { |
219 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 223 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
220 | struct ceph_mds_client *mdsc = &client->mdsc; | 224 | struct ceph_mds_client *mdsc = fsc->mdsc; |
221 | struct file *file = nd->intent.open.file; | 225 | struct file *file = nd->intent.open.file; |
222 | struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); | 226 | struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); |
223 | struct ceph_mds_request *req; | 227 | struct ceph_mds_request *req; |
@@ -270,163 +274,6 @@ int ceph_release(struct inode *inode, struct file *file) | |||
270 | } | 274 | } |
271 | 275 | ||
272 | /* | 276 | /* |
273 | * build a vector of user pages | ||
274 | */ | ||
275 | static struct page **get_direct_page_vector(const char __user *data, | ||
276 | int num_pages, | ||
277 | loff_t off, size_t len) | ||
278 | { | ||
279 | struct page **pages; | ||
280 | int rc; | ||
281 | |||
282 | pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); | ||
283 | if (!pages) | ||
284 | return ERR_PTR(-ENOMEM); | ||
285 | |||
286 | down_read(¤t->mm->mmap_sem); | ||
287 | rc = get_user_pages(current, current->mm, (unsigned long)data, | ||
288 | num_pages, 0, 0, pages, NULL); | ||
289 | up_read(¤t->mm->mmap_sem); | ||
290 | if (rc < 0) | ||
291 | goto fail; | ||
292 | return pages; | ||
293 | |||
294 | fail: | ||
295 | kfree(pages); | ||
296 | return ERR_PTR(rc); | ||
297 | } | ||
298 | |||
299 | static void put_page_vector(struct page **pages, int num_pages) | ||
300 | { | ||
301 | int i; | ||
302 | |||
303 | for (i = 0; i < num_pages; i++) | ||
304 | put_page(pages[i]); | ||
305 | kfree(pages); | ||
306 | } | ||
307 | |||
308 | void ceph_release_page_vector(struct page **pages, int num_pages) | ||
309 | { | ||
310 | int i; | ||
311 | |||
312 | for (i = 0; i < num_pages; i++) | ||
313 | __free_pages(pages[i], 0); | ||
314 | kfree(pages); | ||
315 | } | ||
316 | |||
317 | /* | ||
318 | * allocate a vector new pages | ||
319 | */ | ||
320 | static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) | ||
321 | { | ||
322 | struct page **pages; | ||
323 | int i; | ||
324 | |||
325 | pages = kmalloc(sizeof(*pages) * num_pages, flags); | ||
326 | if (!pages) | ||
327 | return ERR_PTR(-ENOMEM); | ||
328 | for (i = 0; i < num_pages; i++) { | ||
329 | pages[i] = __page_cache_alloc(flags); | ||
330 | if (pages[i] == NULL) { | ||
331 | ceph_release_page_vector(pages, i); | ||
332 | return ERR_PTR(-ENOMEM); | ||
333 | } | ||
334 | } | ||
335 | return pages; | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * copy user data into a page vector | ||
340 | */ | ||
341 | static int copy_user_to_page_vector(struct page **pages, | ||
342 | const char __user *data, | ||
343 | loff_t off, size_t len) | ||
344 | { | ||
345 | int i = 0; | ||
346 | int po = off & ~PAGE_CACHE_MASK; | ||
347 | int left = len; | ||
348 | int l, bad; | ||
349 | |||
350 | while (left > 0) { | ||
351 | l = min_t(int, PAGE_CACHE_SIZE-po, left); | ||
352 | bad = copy_from_user(page_address(pages[i]) + po, data, l); | ||
353 | if (bad == l) | ||
354 | return -EFAULT; | ||
355 | data += l - bad; | ||
356 | left -= l - bad; | ||
357 | po += l - bad; | ||
358 | if (po == PAGE_CACHE_SIZE) { | ||
359 | po = 0; | ||
360 | i++; | ||
361 | } | ||
362 | } | ||
363 | return len; | ||
364 | } | ||
365 | |||
366 | /* | ||
367 | * copy user data from a page vector into a user pointer | ||
368 | */ | ||
369 | static int copy_page_vector_to_user(struct page **pages, char __user *data, | ||
370 | loff_t off, size_t len) | ||
371 | { | ||
372 | int i = 0; | ||
373 | int po = off & ~PAGE_CACHE_MASK; | ||
374 | int left = len; | ||
375 | int l, bad; | ||
376 | |||
377 | while (left > 0) { | ||
378 | l = min_t(int, left, PAGE_CACHE_SIZE-po); | ||
379 | bad = copy_to_user(data, page_address(pages[i]) + po, l); | ||
380 | if (bad == l) | ||
381 | return -EFAULT; | ||
382 | data += l - bad; | ||
383 | left -= l - bad; | ||
384 | if (po) { | ||
385 | po += l - bad; | ||
386 | if (po == PAGE_CACHE_SIZE) | ||
387 | po = 0; | ||
388 | } | ||
389 | i++; | ||
390 | } | ||
391 | return len; | ||
392 | } | ||
393 | |||
394 | /* | ||
395 | * Zero an extent within a page vector. Offset is relative to the | ||
396 | * start of the first page. | ||
397 | */ | ||
398 | static void zero_page_vector_range(int off, int len, struct page **pages) | ||
399 | { | ||
400 | int i = off >> PAGE_CACHE_SHIFT; | ||
401 | |||
402 | off &= ~PAGE_CACHE_MASK; | ||
403 | |||
404 | dout("zero_page_vector_page %u~%u\n", off, len); | ||
405 | |||
406 | /* leading partial page? */ | ||
407 | if (off) { | ||
408 | int end = min((int)PAGE_CACHE_SIZE, off + len); | ||
409 | dout("zeroing %d %p head from %d\n", i, pages[i], | ||
410 | (int)off); | ||
411 | zero_user_segment(pages[i], off, end); | ||
412 | len -= (end - off); | ||
413 | i++; | ||
414 | } | ||
415 | while (len >= PAGE_CACHE_SIZE) { | ||
416 | dout("zeroing %d %p len=%d\n", i, pages[i], len); | ||
417 | zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); | ||
418 | len -= PAGE_CACHE_SIZE; | ||
419 | i++; | ||
420 | } | ||
421 | /* trailing partial page? */ | ||
422 | if (len) { | ||
423 | dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len); | ||
424 | zero_user_segment(pages[i], 0, len); | ||
425 | } | ||
426 | } | ||
427 | |||
428 | |||
429 | /* | ||
430 | * Read a range of bytes striped over one or more objects. Iterate over | 277 | * Read a range of bytes striped over one or more objects. Iterate over |
431 | * objects we stripe over. (That's not atomic, but good enough for now.) | 278 | * objects we stripe over. (That's not atomic, but good enough for now.) |
432 | * | 279 | * |
@@ -436,12 +283,13 @@ static void zero_page_vector_range(int off, int len, struct page **pages) | |||
436 | static int striped_read(struct inode *inode, | 283 | static int striped_read(struct inode *inode, |
437 | u64 off, u64 len, | 284 | u64 off, u64 len, |
438 | struct page **pages, int num_pages, | 285 | struct page **pages, int num_pages, |
439 | int *checkeof) | 286 | int *checkeof, bool o_direct, |
287 | unsigned long buf_align) | ||
440 | { | 288 | { |
441 | struct ceph_client *client = ceph_inode_to_client(inode); | 289 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
442 | struct ceph_inode_info *ci = ceph_inode(inode); | 290 | struct ceph_inode_info *ci = ceph_inode(inode); |
443 | u64 pos, this_len; | 291 | u64 pos, this_len; |
444 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ | 292 | int io_align, page_align; |
445 | int left, pages_left; | 293 | int left, pages_left; |
446 | int read; | 294 | int read; |
447 | struct page **page_pos; | 295 | struct page **page_pos; |
@@ -456,29 +304,33 @@ static int striped_read(struct inode *inode, | |||
456 | page_pos = pages; | 304 | page_pos = pages; |
457 | pages_left = num_pages; | 305 | pages_left = num_pages; |
458 | read = 0; | 306 | read = 0; |
307 | io_align = off & ~PAGE_MASK; | ||
459 | 308 | ||
460 | more: | 309 | more: |
310 | if (o_direct) | ||
311 | page_align = (pos - io_align + buf_align) & ~PAGE_MASK; | ||
312 | else | ||
313 | page_align = pos & ~PAGE_MASK; | ||
461 | this_len = left; | 314 | this_len = left; |
462 | ret = ceph_osdc_readpages(&client->osdc, ceph_vino(inode), | 315 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), |
463 | &ci->i_layout, pos, &this_len, | 316 | &ci->i_layout, pos, &this_len, |
464 | ci->i_truncate_seq, | 317 | ci->i_truncate_seq, |
465 | ci->i_truncate_size, | 318 | ci->i_truncate_size, |
466 | page_pos, pages_left); | 319 | page_pos, pages_left, page_align); |
467 | hit_stripe = this_len < left; | ||
468 | was_short = ret >= 0 && ret < this_len; | ||
469 | if (ret == -ENOENT) | 320 | if (ret == -ENOENT) |
470 | ret = 0; | 321 | ret = 0; |
322 | hit_stripe = this_len < left; | ||
323 | was_short = ret >= 0 && ret < this_len; | ||
471 | dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read, | 324 | dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read, |
472 | ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); | 325 | ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); |
473 | 326 | ||
474 | if (ret > 0) { | 327 | if (ret > 0) { |
475 | int didpages = | 328 | int didpages = (page_align + ret) >> PAGE_CACHE_SHIFT; |
476 | ((pos & ~PAGE_CACHE_MASK) + ret) >> PAGE_CACHE_SHIFT; | ||
477 | 329 | ||
478 | if (read < pos - off) { | 330 | if (read < pos - off) { |
479 | dout(" zero gap %llu to %llu\n", off + read, pos); | 331 | dout(" zero gap %llu to %llu\n", off + read, pos); |
480 | zero_page_vector_range(page_off + read, | 332 | ceph_zero_page_vector_range(page_align + read, |
481 | pos - off - read, pages); | 333 | pos - off - read, pages); |
482 | } | 334 | } |
483 | pos += ret; | 335 | pos += ret; |
484 | read = pos - off; | 336 | read = pos - off; |
@@ -492,20 +344,22 @@ more: | |||
492 | } | 344 | } |
493 | 345 | ||
494 | if (was_short) { | 346 | if (was_short) { |
495 | /* was original extent fully inside i_size? */ | 347 | /* did we bounce off eof? */ |
496 | if (pos + left <= inode->i_size) { | 348 | if (pos + left > inode->i_size) |
497 | dout("zero tail\n"); | 349 | *checkeof = 1; |
498 | zero_page_vector_range(page_off + read, len - read, | 350 | |
499 | pages); | 351 | /* zero trailing bytes (inside i_size) */ |
500 | read = len; | 352 | if (left > 0 && pos < inode->i_size) { |
501 | goto out; | 353 | if (pos + left > inode->i_size) |
354 | left = inode->i_size - pos; | ||
355 | |||
356 | dout("zero tail %d\n", left); | ||
357 | ceph_zero_page_vector_range(page_align + read, left, | ||
358 | pages); | ||
359 | read += left; | ||
502 | } | 360 | } |
503 | |||
504 | /* check i_size */ | ||
505 | *checkeof = 1; | ||
506 | } | 361 | } |
507 | 362 | ||
508 | out: | ||
509 | if (ret >= 0) | 363 | if (ret >= 0) |
510 | ret = read; | 364 | ret = read; |
511 | dout("striped_read returns %d\n", ret); | 365 | dout("striped_read returns %d\n", ret); |
@@ -524,41 +378,43 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
524 | struct inode *inode = file->f_dentry->d_inode; | 378 | struct inode *inode = file->f_dentry->d_inode; |
525 | struct page **pages; | 379 | struct page **pages; |
526 | u64 off = *poff; | 380 | u64 off = *poff; |
527 | int num_pages = calc_pages_for(off, len); | 381 | int num_pages, ret; |
528 | int ret; | ||
529 | 382 | ||
530 | dout("sync_read on file %p %llu~%u %s\n", file, off, len, | 383 | dout("sync_read on file %p %llu~%u %s\n", file, off, len, |
531 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); | 384 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); |
532 | 385 | ||
533 | if (file->f_flags & O_DIRECT) { | 386 | if (file->f_flags & O_DIRECT) { |
534 | pages = get_direct_page_vector(data, num_pages, off, len); | 387 | num_pages = calc_pages_for((unsigned long)data, len); |
535 | 388 | pages = ceph_get_direct_page_vector(data, num_pages, true); | |
536 | /* | ||
537 | * flush any page cache pages in this range. this | ||
538 | * will make concurrent normal and O_DIRECT io slow, | ||
539 | * but it will at least behave sensibly when they are | ||
540 | * in sequence. | ||
541 | */ | ||
542 | } else { | 389 | } else { |
390 | num_pages = calc_pages_for(off, len); | ||
543 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); | 391 | pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); |
544 | } | 392 | } |
545 | if (IS_ERR(pages)) | 393 | if (IS_ERR(pages)) |
546 | return PTR_ERR(pages); | 394 | return PTR_ERR(pages); |
547 | 395 | ||
396 | /* | ||
397 | * flush any page cache pages in this range. this | ||
398 | * will make concurrent normal and sync io slow, | ||
399 | * but it will at least behave sensibly when they are | ||
400 | * in sequence. | ||
401 | */ | ||
548 | ret = filemap_write_and_wait(inode->i_mapping); | 402 | ret = filemap_write_and_wait(inode->i_mapping); |
549 | if (ret < 0) | 403 | if (ret < 0) |
550 | goto done; | 404 | goto done; |
551 | 405 | ||
552 | ret = striped_read(inode, off, len, pages, num_pages, checkeof); | 406 | ret = striped_read(inode, off, len, pages, num_pages, checkeof, |
407 | file->f_flags & O_DIRECT, | ||
408 | (unsigned long)data & ~PAGE_MASK); | ||
553 | 409 | ||
554 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) | 410 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) |
555 | ret = copy_page_vector_to_user(pages, data, off, ret); | 411 | ret = ceph_copy_page_vector_to_user(pages, data, off, ret); |
556 | if (ret >= 0) | 412 | if (ret >= 0) |
557 | *poff = off + ret; | 413 | *poff = off + ret; |
558 | 414 | ||
559 | done: | 415 | done: |
560 | if (file->f_flags & O_DIRECT) | 416 | if (file->f_flags & O_DIRECT) |
561 | put_page_vector(pages, num_pages); | 417 | ceph_put_page_vector(pages, num_pages, true); |
562 | else | 418 | else |
563 | ceph_release_page_vector(pages, num_pages); | 419 | ceph_release_page_vector(pages, num_pages); |
564 | dout("sync_read result %d\n", ret); | 420 | dout("sync_read result %d\n", ret); |
@@ -594,7 +450,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
594 | { | 450 | { |
595 | struct inode *inode = file->f_dentry->d_inode; | 451 | struct inode *inode = file->f_dentry->d_inode; |
596 | struct ceph_inode_info *ci = ceph_inode(inode); | 452 | struct ceph_inode_info *ci = ceph_inode(inode); |
597 | struct ceph_client *client = ceph_inode_to_client(inode); | 453 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
598 | struct ceph_osd_request *req; | 454 | struct ceph_osd_request *req; |
599 | struct page **pages; | 455 | struct page **pages; |
600 | int num_pages; | 456 | int num_pages; |
@@ -604,6 +460,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
604 | int flags; | 460 | int flags; |
605 | int do_sync = 0; | 461 | int do_sync = 0; |
606 | int check_caps = 0; | 462 | int check_caps = 0; |
463 | int page_align, io_align; | ||
464 | unsigned long buf_align; | ||
607 | int ret; | 465 | int ret; |
608 | struct timespec mtime = CURRENT_TIME; | 466 | struct timespec mtime = CURRENT_TIME; |
609 | 467 | ||
@@ -641,21 +499,30 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
641 | * boundary. this isn't atomic, unfortunately. :( | 499 | * boundary. this isn't atomic, unfortunately. :( |
642 | */ | 500 | */ |
643 | more: | 501 | more: |
502 | io_align = pos & ~PAGE_MASK; | ||
503 | buf_align = (unsigned long)data & ~PAGE_MASK; | ||
644 | len = left; | 504 | len = left; |
645 | req = ceph_osdc_new_request(&client->osdc, &ci->i_layout, | 505 | if (file->f_flags & O_DIRECT) { |
506 | /* write from beginning of first page, regardless of | ||
507 | io alignment */ | ||
508 | page_align = (pos - io_align + buf_align) & ~PAGE_MASK; | ||
509 | num_pages = calc_pages_for((unsigned long)data, len); | ||
510 | } else { | ||
511 | page_align = pos & ~PAGE_MASK; | ||
512 | num_pages = calc_pages_for(pos, len); | ||
513 | } | ||
514 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, | ||
646 | ceph_vino(inode), pos, &len, | 515 | ceph_vino(inode), pos, &len, |
647 | CEPH_OSD_OP_WRITE, flags, | 516 | CEPH_OSD_OP_WRITE, flags, |
648 | ci->i_snap_realm->cached_context, | 517 | ci->i_snap_realm->cached_context, |
649 | do_sync, | 518 | do_sync, |
650 | ci->i_truncate_seq, ci->i_truncate_size, | 519 | ci->i_truncate_seq, ci->i_truncate_size, |
651 | &mtime, false, 2); | 520 | &mtime, false, 2, page_align); |
652 | if (!req) | 521 | if (!req) |
653 | return -ENOMEM; | 522 | return -ENOMEM; |
654 | 523 | ||
655 | num_pages = calc_pages_for(pos, len); | ||
656 | |||
657 | if (file->f_flags & O_DIRECT) { | 524 | if (file->f_flags & O_DIRECT) { |
658 | pages = get_direct_page_vector(data, num_pages, pos, len); | 525 | pages = ceph_get_direct_page_vector(data, num_pages, false); |
659 | if (IS_ERR(pages)) { | 526 | if (IS_ERR(pages)) { |
660 | ret = PTR_ERR(pages); | 527 | ret = PTR_ERR(pages); |
661 | goto out; | 528 | goto out; |
@@ -673,7 +540,7 @@ more: | |||
673 | ret = PTR_ERR(pages); | 540 | ret = PTR_ERR(pages); |
674 | goto out; | 541 | goto out; |
675 | } | 542 | } |
676 | ret = copy_user_to_page_vector(pages, data, pos, len); | 543 | ret = ceph_copy_user_to_page_vector(pages, data, pos, len); |
677 | if (ret < 0) { | 544 | if (ret < 0) { |
678 | ceph_release_page_vector(pages, num_pages); | 545 | ceph_release_page_vector(pages, num_pages); |
679 | goto out; | 546 | goto out; |
@@ -689,7 +556,7 @@ more: | |||
689 | req->r_num_pages = num_pages; | 556 | req->r_num_pages = num_pages; |
690 | req->r_inode = inode; | 557 | req->r_inode = inode; |
691 | 558 | ||
692 | ret = ceph_osdc_start_request(&client->osdc, req, false); | 559 | ret = ceph_osdc_start_request(&fsc->client->osdc, req, false); |
693 | if (!ret) { | 560 | if (!ret) { |
694 | if (req->r_safe_callback) { | 561 | if (req->r_safe_callback) { |
695 | /* | 562 | /* |
@@ -697,15 +564,23 @@ more: | |||
697 | * start_request so that a tid has been assigned. | 564 | * start_request so that a tid has been assigned. |
698 | */ | 565 | */ |
699 | spin_lock(&ci->i_unsafe_lock); | 566 | spin_lock(&ci->i_unsafe_lock); |
700 | list_add(&req->r_unsafe_item, &ci->i_unsafe_writes); | 567 | list_add_tail(&req->r_unsafe_item, |
568 | &ci->i_unsafe_writes); | ||
701 | spin_unlock(&ci->i_unsafe_lock); | 569 | spin_unlock(&ci->i_unsafe_lock); |
702 | ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); | 570 | ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); |
703 | } | 571 | } |
704 | ret = ceph_osdc_wait_request(&client->osdc, req); | 572 | |
573 | ret = ceph_osdc_wait_request(&fsc->client->osdc, req); | ||
574 | if (ret < 0 && req->r_safe_callback) { | ||
575 | spin_lock(&ci->i_unsafe_lock); | ||
576 | list_del_init(&req->r_unsafe_item); | ||
577 | spin_unlock(&ci->i_unsafe_lock); | ||
578 | ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR); | ||
579 | } | ||
705 | } | 580 | } |
706 | 581 | ||
707 | if (file->f_flags & O_DIRECT) | 582 | if (file->f_flags & O_DIRECT) |
708 | put_page_vector(pages, num_pages); | 583 | ceph_put_page_vector(pages, num_pages, false); |
709 | else if (file->f_flags & O_SYNC) | 584 | else if (file->f_flags & O_SYNC) |
710 | ceph_release_page_vector(pages, num_pages); | 585 | ceph_release_page_vector(pages, num_pages); |
711 | 586 | ||
@@ -715,6 +590,7 @@ out: | |||
715 | pos += len; | 590 | pos += len; |
716 | written += len; | 591 | written += len; |
717 | left -= len; | 592 | left -= len; |
593 | data += written; | ||
718 | if (left) | 594 | if (left) |
719 | goto more; | 595 | goto more; |
720 | 596 | ||
@@ -783,7 +659,7 @@ out: | |||
783 | 659 | ||
784 | /* hit EOF or hole? */ | 660 | /* hit EOF or hole? */ |
785 | if (statret == 0 && *ppos < inode->i_size) { | 661 | if (statret == 0 && *ppos < inode->i_size) { |
786 | dout("aio_read sync_read hit hole, reading more\n"); | 662 | dout("aio_read sync_read hit hole, ppos %lld < size %lld, reading more\n", *ppos, inode->i_size); |
787 | read += ret; | 663 | read += ret; |
788 | base += ret; | 664 | base += ret; |
789 | len -= ret; | 665 | len -= ret; |
@@ -814,7 +690,8 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
814 | struct ceph_file_info *fi = file->private_data; | 690 | struct ceph_file_info *fi = file->private_data; |
815 | struct inode *inode = file->f_dentry->d_inode; | 691 | struct inode *inode = file->f_dentry->d_inode; |
816 | struct ceph_inode_info *ci = ceph_inode(inode); | 692 | struct ceph_inode_info *ci = ceph_inode(inode); |
817 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; | 693 | struct ceph_osd_client *osdc = |
694 | &ceph_sb_to_client(inode->i_sb)->client->osdc; | ||
818 | loff_t endoff = pos + iov->iov_len; | 695 | loff_t endoff = pos + iov->iov_len; |
819 | int want, got = 0; | 696 | int want, got = 0; |
820 | int ret, err; | 697 | int ret, err; |
@@ -858,9 +735,12 @@ retry_snap: | |||
858 | } | 735 | } |
859 | } | 736 | } |
860 | if (ret >= 0) { | 737 | if (ret >= 0) { |
738 | int dirty; | ||
861 | spin_lock(&inode->i_lock); | 739 | spin_lock(&inode->i_lock); |
862 | __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); | 740 | dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); |
863 | spin_unlock(&inode->i_lock); | 741 | spin_unlock(&inode->i_lock); |
742 | if (dirty) | ||
743 | __mark_inode_dirty(inode, dirty); | ||
864 | } | 744 | } |
865 | 745 | ||
866 | out: | 746 | out: |