diff options
Diffstat (limited to 'fs/ocfs2/file.c')
-rw-r--r-- | fs/ocfs2/file.c | 744 |
1 files changed, 643 insertions, 101 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 4979b6675717..5727cd18302a 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/splice.h> | 34 | #include <linux/splice.h> |
35 | #include <linux/mount.h> | 35 | #include <linux/mount.h> |
36 | #include <linux/writeback.h> | 36 | #include <linux/writeback.h> |
37 | #include <linux/falloc.h> | ||
37 | 38 | ||
38 | #define MLOG_MASK_PREFIX ML_INODE | 39 | #define MLOG_MASK_PREFIX ML_INODE |
39 | #include <cluster/masklog.h> | 40 | #include <cluster/masklog.h> |
@@ -263,6 +264,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, | |||
263 | int status; | 264 | int status; |
264 | handle_t *handle; | 265 | handle_t *handle; |
265 | struct ocfs2_dinode *di; | 266 | struct ocfs2_dinode *di; |
267 | u64 cluster_bytes; | ||
266 | 268 | ||
267 | mlog_entry_void(); | 269 | mlog_entry_void(); |
268 | 270 | ||
@@ -286,7 +288,9 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, | |||
286 | /* | 288 | /* |
287 | * Do this before setting i_size. | 289 | * Do this before setting i_size. |
288 | */ | 290 | */ |
289 | status = ocfs2_zero_tail_for_truncate(inode, handle, new_i_size); | 291 | cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, new_i_size); |
292 | status = ocfs2_zero_range_for_truncate(inode, handle, new_i_size, | ||
293 | cluster_bytes); | ||
290 | if (status) { | 294 | if (status) { |
291 | mlog_errno(status); | 295 | mlog_errno(status); |
292 | goto out_commit; | 296 | goto out_commit; |
@@ -326,9 +330,6 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
326 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 330 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
327 | (unsigned long long)new_i_size); | 331 | (unsigned long long)new_i_size); |
328 | 332 | ||
329 | unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); | ||
330 | truncate_inode_pages(inode->i_mapping, new_i_size); | ||
331 | |||
332 | fe = (struct ocfs2_dinode *) di_bh->b_data; | 333 | fe = (struct ocfs2_dinode *) di_bh->b_data; |
333 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 334 | if (!OCFS2_IS_VALID_DINODE(fe)) { |
334 | OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); | 335 | OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); |
@@ -363,16 +364,23 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
363 | if (new_i_size == le64_to_cpu(fe->i_size)) | 364 | if (new_i_size == le64_to_cpu(fe->i_size)) |
364 | goto bail; | 365 | goto bail; |
365 | 366 | ||
367 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
368 | |||
366 | /* This forces other nodes to sync and drop their pages. Do | 369 | /* This forces other nodes to sync and drop their pages. Do |
367 | * this even if we have a truncate without allocation change - | 370 | * this even if we have a truncate without allocation change - |
368 | * ocfs2 cluster sizes can be much greater than page size, so | 371 | * ocfs2 cluster sizes can be much greater than page size, so |
369 | * we have to truncate them anyway. */ | 372 | * we have to truncate them anyway. */ |
370 | status = ocfs2_data_lock(inode, 1); | 373 | status = ocfs2_data_lock(inode, 1); |
371 | if (status < 0) { | 374 | if (status < 0) { |
375 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
376 | |||
372 | mlog_errno(status); | 377 | mlog_errno(status); |
373 | goto bail; | 378 | goto bail; |
374 | } | 379 | } |
375 | 380 | ||
381 | unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); | ||
382 | truncate_inode_pages(inode->i_mapping, new_i_size); | ||
383 | |||
376 | /* alright, we're going to need to do a full blown alloc size | 384 | /* alright, we're going to need to do a full blown alloc size |
377 | * change. Orphan the inode so that recovery can complete the | 385 | * change. Orphan the inode so that recovery can complete the |
378 | * truncate if necessary. This does the task of marking | 386 | * truncate if necessary. This does the task of marking |
@@ -399,6 +407,8 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
399 | bail_unlock_data: | 407 | bail_unlock_data: |
400 | ocfs2_data_unlock(inode, 1); | 408 | ocfs2_data_unlock(inode, 1); |
401 | 409 | ||
410 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
411 | |||
402 | bail: | 412 | bail: |
403 | 413 | ||
404 | mlog_exit(status); | 414 | mlog_exit(status); |
@@ -419,6 +429,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb, | |||
419 | struct inode *inode, | 429 | struct inode *inode, |
420 | u32 *logical_offset, | 430 | u32 *logical_offset, |
421 | u32 clusters_to_add, | 431 | u32 clusters_to_add, |
432 | int mark_unwritten, | ||
422 | struct buffer_head *fe_bh, | 433 | struct buffer_head *fe_bh, |
423 | handle_t *handle, | 434 | handle_t *handle, |
424 | struct ocfs2_alloc_context *data_ac, | 435 | struct ocfs2_alloc_context *data_ac, |
@@ -431,9 +442,13 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb, | |||
431 | enum ocfs2_alloc_restarted reason = RESTART_NONE; | 442 | enum ocfs2_alloc_restarted reason = RESTART_NONE; |
432 | u32 bit_off, num_bits; | 443 | u32 bit_off, num_bits; |
433 | u64 block; | 444 | u64 block; |
445 | u8 flags = 0; | ||
434 | 446 | ||
435 | BUG_ON(!clusters_to_add); | 447 | BUG_ON(!clusters_to_add); |
436 | 448 | ||
449 | if (mark_unwritten) | ||
450 | flags = OCFS2_EXT_UNWRITTEN; | ||
451 | |||
437 | free_extents = ocfs2_num_free_extents(osb, inode, fe); | 452 | free_extents = ocfs2_num_free_extents(osb, inode, fe); |
438 | if (free_extents < 0) { | 453 | if (free_extents < 0) { |
439 | status = free_extents; | 454 | status = free_extents; |
@@ -483,7 +498,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb, | |||
483 | num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); | 498 | num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); |
484 | status = ocfs2_insert_extent(osb, handle, inode, fe_bh, | 499 | status = ocfs2_insert_extent(osb, handle, inode, fe_bh, |
485 | *logical_offset, block, num_bits, | 500 | *logical_offset, block, num_bits, |
486 | meta_ac); | 501 | flags, meta_ac); |
487 | if (status < 0) { | 502 | if (status < 0) { |
488 | mlog_errno(status); | 503 | mlog_errno(status); |
489 | goto leave; | 504 | goto leave; |
@@ -516,25 +531,31 @@ leave: | |||
516 | * For a given allocation, determine which allocators will need to be | 531 | * For a given allocation, determine which allocators will need to be |
517 | * accessed, and lock them, reserving the appropriate number of bits. | 532 | * accessed, and lock them, reserving the appropriate number of bits. |
518 | * | 533 | * |
519 | * Called from ocfs2_extend_allocation() for file systems which don't | 534 | * Sparse file systems call this from ocfs2_write_begin_nolock() |
520 | * support holes, and from ocfs2_write() for file systems which | 535 | * and ocfs2_allocate_unwritten_extents(). |
521 | * understand sparse inodes. | 536 | * |
537 | * File systems which don't support holes call this from | ||
538 | * ocfs2_extend_allocation(). | ||
522 | */ | 539 | */ |
523 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | 540 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, |
524 | u32 clusters_to_add, | 541 | u32 clusters_to_add, u32 extents_to_split, |
525 | struct ocfs2_alloc_context **data_ac, | 542 | struct ocfs2_alloc_context **data_ac, |
526 | struct ocfs2_alloc_context **meta_ac) | 543 | struct ocfs2_alloc_context **meta_ac) |
527 | { | 544 | { |
528 | int ret, num_free_extents; | 545 | int ret = 0, num_free_extents; |
546 | unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; | ||
529 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 547 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
530 | 548 | ||
531 | *meta_ac = NULL; | 549 | *meta_ac = NULL; |
532 | *data_ac = NULL; | 550 | if (data_ac) |
551 | *data_ac = NULL; | ||
552 | |||
553 | BUG_ON(clusters_to_add != 0 && data_ac == NULL); | ||
533 | 554 | ||
534 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " | 555 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " |
535 | "clusters_to_add = %u\n", | 556 | "clusters_to_add = %u, extents_to_split = %u\n", |
536 | (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), | 557 | (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), |
537 | le32_to_cpu(di->i_clusters), clusters_to_add); | 558 | le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split); |
538 | 559 | ||
539 | num_free_extents = ocfs2_num_free_extents(osb, inode, di); | 560 | num_free_extents = ocfs2_num_free_extents(osb, inode, di); |
540 | if (num_free_extents < 0) { | 561 | if (num_free_extents < 0) { |
@@ -552,9 +573,12 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | |||
552 | * | 573 | * |
553 | * Most of the time we'll only be seeing this 1 cluster at a time | 574 | * Most of the time we'll only be seeing this 1 cluster at a time |
554 | * anyway. | 575 | * anyway. |
576 | * | ||
577 | * Always lock for any unwritten extents - we might want to | ||
578 | * add blocks during a split. | ||
555 | */ | 579 | */ |
556 | if (!num_free_extents || | 580 | if (!num_free_extents || |
557 | (ocfs2_sparse_alloc(osb) && num_free_extents < clusters_to_add)) { | 581 | (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) { |
558 | ret = ocfs2_reserve_new_metadata(osb, di, meta_ac); | 582 | ret = ocfs2_reserve_new_metadata(osb, di, meta_ac); |
559 | if (ret < 0) { | 583 | if (ret < 0) { |
560 | if (ret != -ENOSPC) | 584 | if (ret != -ENOSPC) |
@@ -563,6 +587,9 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | |||
563 | } | 587 | } |
564 | } | 588 | } |
565 | 589 | ||
590 | if (clusters_to_add == 0) | ||
591 | goto out; | ||
592 | |||
566 | ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); | 593 | ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); |
567 | if (ret < 0) { | 594 | if (ret < 0) { |
568 | if (ret != -ENOSPC) | 595 | if (ret != -ENOSPC) |
@@ -585,14 +612,13 @@ out: | |||
585 | return ret; | 612 | return ret; |
586 | } | 613 | } |
587 | 614 | ||
588 | static int ocfs2_extend_allocation(struct inode *inode, | 615 | static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, |
589 | u32 clusters_to_add) | 616 | u32 clusters_to_add, int mark_unwritten) |
590 | { | 617 | { |
591 | int status = 0; | 618 | int status = 0; |
592 | int restart_func = 0; | 619 | int restart_func = 0; |
593 | int drop_alloc_sem = 0; | ||
594 | int credits; | 620 | int credits; |
595 | u32 prev_clusters, logical_start; | 621 | u32 prev_clusters; |
596 | struct buffer_head *bh = NULL; | 622 | struct buffer_head *bh = NULL; |
597 | struct ocfs2_dinode *fe = NULL; | 623 | struct ocfs2_dinode *fe = NULL; |
598 | handle_t *handle = NULL; | 624 | handle_t *handle = NULL; |
@@ -607,7 +633,7 @@ static int ocfs2_extend_allocation(struct inode *inode, | |||
607 | * This function only exists for file systems which don't | 633 | * This function only exists for file systems which don't |
608 | * support holes. | 634 | * support holes. |
609 | */ | 635 | */ |
610 | BUG_ON(ocfs2_sparse_alloc(osb)); | 636 | BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); |
611 | 637 | ||
612 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, | 638 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, |
613 | OCFS2_BH_CACHED, inode); | 639 | OCFS2_BH_CACHED, inode); |
@@ -623,19 +649,10 @@ static int ocfs2_extend_allocation(struct inode *inode, | |||
623 | goto leave; | 649 | goto leave; |
624 | } | 650 | } |
625 | 651 | ||
626 | logical_start = OCFS2_I(inode)->ip_clusters; | ||
627 | |||
628 | restart_all: | 652 | restart_all: |
629 | BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); | 653 | BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); |
630 | 654 | ||
631 | /* blocks peope in read/write from reading our allocation | 655 | status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac, |
632 | * until we're done changing it. We depend on i_mutex to block | ||
633 | * other extend/truncate calls while we're here. Ordering wrt | ||
634 | * start_trans is important here -- always do it before! */ | ||
635 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
636 | drop_alloc_sem = 1; | ||
637 | |||
638 | status = ocfs2_lock_allocators(inode, fe, clusters_to_add, &data_ac, | ||
639 | &meta_ac); | 656 | &meta_ac); |
640 | if (status) { | 657 | if (status) { |
641 | mlog_errno(status); | 658 | mlog_errno(status); |
@@ -668,6 +685,7 @@ restarted_transaction: | |||
668 | inode, | 685 | inode, |
669 | &logical_start, | 686 | &logical_start, |
670 | clusters_to_add, | 687 | clusters_to_add, |
688 | mark_unwritten, | ||
671 | bh, | 689 | bh, |
672 | handle, | 690 | handle, |
673 | data_ac, | 691 | data_ac, |
@@ -720,10 +738,6 @@ restarted_transaction: | |||
720 | OCFS2_I(inode)->ip_clusters, i_size_read(inode)); | 738 | OCFS2_I(inode)->ip_clusters, i_size_read(inode)); |
721 | 739 | ||
722 | leave: | 740 | leave: |
723 | if (drop_alloc_sem) { | ||
724 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
725 | drop_alloc_sem = 0; | ||
726 | } | ||
727 | if (handle) { | 741 | if (handle) { |
728 | ocfs2_commit_trans(osb, handle); | 742 | ocfs2_commit_trans(osb, handle); |
729 | handle = NULL; | 743 | handle = NULL; |
@@ -749,6 +763,25 @@ leave: | |||
749 | return status; | 763 | return status; |
750 | } | 764 | } |
751 | 765 | ||
766 | static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start, | ||
767 | u32 clusters_to_add, int mark_unwritten) | ||
768 | { | ||
769 | int ret; | ||
770 | |||
771 | /* | ||
772 | * The alloc sem blocks peope in read/write from reading our | ||
773 | * allocation until we're done changing it. We depend on | ||
774 | * i_mutex to block other extend/truncate calls while we're | ||
775 | * here. | ||
776 | */ | ||
777 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
778 | ret = __ocfs2_extend_allocation(inode, logical_start, clusters_to_add, | ||
779 | mark_unwritten); | ||
780 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
781 | |||
782 | return ret; | ||
783 | } | ||
784 | |||
752 | /* Some parts of this taken from generic_cont_expand, which turned out | 785 | /* Some parts of this taken from generic_cont_expand, which turned out |
753 | * to be too fragile to do exactly what we need without us having to | 786 | * to be too fragile to do exactly what we need without us having to |
754 | * worry about recursive locking in ->prepare_write() and | 787 | * worry about recursive locking in ->prepare_write() and |
@@ -890,7 +923,9 @@ static int ocfs2_extend_file(struct inode *inode, | |||
890 | } | 923 | } |
891 | 924 | ||
892 | if (clusters_to_add) { | 925 | if (clusters_to_add) { |
893 | ret = ocfs2_extend_allocation(inode, clusters_to_add); | 926 | ret = ocfs2_extend_allocation(inode, |
927 | OCFS2_I(inode)->ip_clusters, | ||
928 | clusters_to_add, 0); | ||
894 | if (ret < 0) { | 929 | if (ret < 0) { |
895 | mlog_errno(ret); | 930 | mlog_errno(ret); |
896 | goto out_unlock; | 931 | goto out_unlock; |
@@ -995,6 +1030,13 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
995 | goto bail_unlock; | 1030 | goto bail_unlock; |
996 | } | 1031 | } |
997 | 1032 | ||
1033 | /* | ||
1034 | * This will intentionally not wind up calling vmtruncate(), | ||
1035 | * since all the work for a size change has been done above. | ||
1036 | * Otherwise, we could get into problems with truncate as | ||
1037 | * ip_alloc_sem is used there to protect against i_size | ||
1038 | * changes. | ||
1039 | */ | ||
998 | status = inode_setattr(inode, attr); | 1040 | status = inode_setattr(inode, attr); |
999 | if (status < 0) { | 1041 | if (status < 0) { |
1000 | mlog_errno(status); | 1042 | mlog_errno(status); |
@@ -1070,17 +1112,16 @@ out: | |||
1070 | return ret; | 1112 | return ret; |
1071 | } | 1113 | } |
1072 | 1114 | ||
1073 | static int ocfs2_write_remove_suid(struct inode *inode) | 1115 | static int __ocfs2_write_remove_suid(struct inode *inode, |
1116 | struct buffer_head *bh) | ||
1074 | { | 1117 | { |
1075 | int ret; | 1118 | int ret; |
1076 | struct buffer_head *bh = NULL; | ||
1077 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
1078 | handle_t *handle; | 1119 | handle_t *handle; |
1079 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1120 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1080 | struct ocfs2_dinode *di; | 1121 | struct ocfs2_dinode *di; |
1081 | 1122 | ||
1082 | mlog_entry("(Inode %llu, mode 0%o)\n", | 1123 | mlog_entry("(Inode %llu, mode 0%o)\n", |
1083 | (unsigned long long)oi->ip_blkno, inode->i_mode); | 1124 | (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode); |
1084 | 1125 | ||
1085 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 1126 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
1086 | if (handle == NULL) { | 1127 | if (handle == NULL) { |
@@ -1089,17 +1130,11 @@ static int ocfs2_write_remove_suid(struct inode *inode) | |||
1089 | goto out; | 1130 | goto out; |
1090 | } | 1131 | } |
1091 | 1132 | ||
1092 | ret = ocfs2_read_block(osb, oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode); | ||
1093 | if (ret < 0) { | ||
1094 | mlog_errno(ret); | ||
1095 | goto out_trans; | ||
1096 | } | ||
1097 | |||
1098 | ret = ocfs2_journal_access(handle, inode, bh, | 1133 | ret = ocfs2_journal_access(handle, inode, bh, |
1099 | OCFS2_JOURNAL_ACCESS_WRITE); | 1134 | OCFS2_JOURNAL_ACCESS_WRITE); |
1100 | if (ret < 0) { | 1135 | if (ret < 0) { |
1101 | mlog_errno(ret); | 1136 | mlog_errno(ret); |
1102 | goto out_bh; | 1137 | goto out_trans; |
1103 | } | 1138 | } |
1104 | 1139 | ||
1105 | inode->i_mode &= ~S_ISUID; | 1140 | inode->i_mode &= ~S_ISUID; |
@@ -1112,8 +1147,7 @@ static int ocfs2_write_remove_suid(struct inode *inode) | |||
1112 | ret = ocfs2_journal_dirty(handle, bh); | 1147 | ret = ocfs2_journal_dirty(handle, bh); |
1113 | if (ret < 0) | 1148 | if (ret < 0) |
1114 | mlog_errno(ret); | 1149 | mlog_errno(ret); |
1115 | out_bh: | 1150 | |
1116 | brelse(bh); | ||
1117 | out_trans: | 1151 | out_trans: |
1118 | ocfs2_commit_trans(osb, handle); | 1152 | ocfs2_commit_trans(osb, handle); |
1119 | out: | 1153 | out: |
@@ -1159,6 +1193,499 @@ out: | |||
1159 | return ret; | 1193 | return ret; |
1160 | } | 1194 | } |
1161 | 1195 | ||
1196 | static int ocfs2_write_remove_suid(struct inode *inode) | ||
1197 | { | ||
1198 | int ret; | ||
1199 | struct buffer_head *bh = NULL; | ||
1200 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
1201 | |||
1202 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | ||
1203 | oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode); | ||
1204 | if (ret < 0) { | ||
1205 | mlog_errno(ret); | ||
1206 | goto out; | ||
1207 | } | ||
1208 | |||
1209 | ret = __ocfs2_write_remove_suid(inode, bh); | ||
1210 | out: | ||
1211 | brelse(bh); | ||
1212 | return ret; | ||
1213 | } | ||
1214 | |||
1215 | /* | ||
1216 | * Allocate enough extents to cover the region starting at byte offset | ||
1217 | * start for len bytes. Existing extents are skipped, any extents | ||
1218 | * added are marked as "unwritten". | ||
1219 | */ | ||
1220 | static int ocfs2_allocate_unwritten_extents(struct inode *inode, | ||
1221 | u64 start, u64 len) | ||
1222 | { | ||
1223 | int ret; | ||
1224 | u32 cpos, phys_cpos, clusters, alloc_size; | ||
1225 | |||
1226 | /* | ||
1227 | * We consider both start and len to be inclusive. | ||
1228 | */ | ||
1229 | cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; | ||
1230 | clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len); | ||
1231 | clusters -= cpos; | ||
1232 | |||
1233 | while (clusters) { | ||
1234 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, | ||
1235 | &alloc_size, NULL); | ||
1236 | if (ret) { | ||
1237 | mlog_errno(ret); | ||
1238 | goto out; | ||
1239 | } | ||
1240 | |||
1241 | /* | ||
1242 | * Hole or existing extent len can be arbitrary, so | ||
1243 | * cap it to our own allocation request. | ||
1244 | */ | ||
1245 | if (alloc_size > clusters) | ||
1246 | alloc_size = clusters; | ||
1247 | |||
1248 | if (phys_cpos) { | ||
1249 | /* | ||
1250 | * We already have an allocation at this | ||
1251 | * region so we can safely skip it. | ||
1252 | */ | ||
1253 | goto next; | ||
1254 | } | ||
1255 | |||
1256 | ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1); | ||
1257 | if (ret) { | ||
1258 | if (ret != -ENOSPC) | ||
1259 | mlog_errno(ret); | ||
1260 | goto out; | ||
1261 | } | ||
1262 | |||
1263 | next: | ||
1264 | cpos += alloc_size; | ||
1265 | clusters -= alloc_size; | ||
1266 | } | ||
1267 | |||
1268 | ret = 0; | ||
1269 | out: | ||
1270 | return ret; | ||
1271 | } | ||
1272 | |||
1273 | static int __ocfs2_remove_inode_range(struct inode *inode, | ||
1274 | struct buffer_head *di_bh, | ||
1275 | u32 cpos, u32 phys_cpos, u32 len, | ||
1276 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
1277 | { | ||
1278 | int ret; | ||
1279 | u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||
1280 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1281 | struct inode *tl_inode = osb->osb_tl_inode; | ||
1282 | handle_t *handle; | ||
1283 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
1284 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
1285 | |||
1286 | ret = ocfs2_lock_allocators(inode, di, 0, 1, NULL, &meta_ac); | ||
1287 | if (ret) { | ||
1288 | mlog_errno(ret); | ||
1289 | return ret; | ||
1290 | } | ||
1291 | |||
1292 | mutex_lock(&tl_inode->i_mutex); | ||
1293 | |||
1294 | if (ocfs2_truncate_log_needs_flush(osb)) { | ||
1295 | ret = __ocfs2_flush_truncate_log(osb); | ||
1296 | if (ret < 0) { | ||
1297 | mlog_errno(ret); | ||
1298 | goto out; | ||
1299 | } | ||
1300 | } | ||
1301 | |||
1302 | handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); | ||
1303 | if (handle == NULL) { | ||
1304 | ret = -ENOMEM; | ||
1305 | mlog_errno(ret); | ||
1306 | goto out; | ||
1307 | } | ||
1308 | |||
1309 | ret = ocfs2_journal_access(handle, inode, di_bh, | ||
1310 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
1311 | if (ret) { | ||
1312 | mlog_errno(ret); | ||
1313 | goto out; | ||
1314 | } | ||
1315 | |||
1316 | ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac, | ||
1317 | dealloc); | ||
1318 | if (ret) { | ||
1319 | mlog_errno(ret); | ||
1320 | goto out_commit; | ||
1321 | } | ||
1322 | |||
1323 | OCFS2_I(inode)->ip_clusters -= len; | ||
1324 | di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); | ||
1325 | |||
1326 | ret = ocfs2_journal_dirty(handle, di_bh); | ||
1327 | if (ret) { | ||
1328 | mlog_errno(ret); | ||
1329 | goto out_commit; | ||
1330 | } | ||
1331 | |||
1332 | ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); | ||
1333 | if (ret) | ||
1334 | mlog_errno(ret); | ||
1335 | |||
1336 | out_commit: | ||
1337 | ocfs2_commit_trans(osb, handle); | ||
1338 | out: | ||
1339 | mutex_unlock(&tl_inode->i_mutex); | ||
1340 | |||
1341 | if (meta_ac) | ||
1342 | ocfs2_free_alloc_context(meta_ac); | ||
1343 | |||
1344 | return ret; | ||
1345 | } | ||
1346 | |||
1347 | /* | ||
1348 | * Truncate a byte range, avoiding pages within partial clusters. This | ||
1349 | * preserves those pages for the zeroing code to write to. | ||
1350 | */ | ||
1351 | static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start, | ||
1352 | u64 byte_len) | ||
1353 | { | ||
1354 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1355 | loff_t start, end; | ||
1356 | struct address_space *mapping = inode->i_mapping; | ||
1357 | |||
1358 | start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start); | ||
1359 | end = byte_start + byte_len; | ||
1360 | end = end & ~(osb->s_clustersize - 1); | ||
1361 | |||
1362 | if (start < end) { | ||
1363 | unmap_mapping_range(mapping, start, end - start, 0); | ||
1364 | truncate_inode_pages_range(mapping, start, end - 1); | ||
1365 | } | ||
1366 | } | ||
1367 | |||
1368 | static int ocfs2_zero_partial_clusters(struct inode *inode, | ||
1369 | u64 start, u64 len) | ||
1370 | { | ||
1371 | int ret = 0; | ||
1372 | u64 tmpend, end = start + len; | ||
1373 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1374 | unsigned int csize = osb->s_clustersize; | ||
1375 | handle_t *handle; | ||
1376 | |||
1377 | /* | ||
1378 | * The "start" and "end" values are NOT necessarily part of | ||
1379 | * the range whose allocation is being deleted. Rather, this | ||
1380 | * is what the user passed in with the request. We must zero | ||
1381 | * partial clusters here. There's no need to worry about | ||
1382 | * physical allocation - the zeroing code knows to skip holes. | ||
1383 | */ | ||
1384 | mlog(0, "byte start: %llu, end: %llu\n", | ||
1385 | (unsigned long long)start, (unsigned long long)end); | ||
1386 | |||
1387 | /* | ||
1388 | * If both edges are on a cluster boundary then there's no | ||
1389 | * zeroing required as the region is part of the allocation to | ||
1390 | * be truncated. | ||
1391 | */ | ||
1392 | if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0) | ||
1393 | goto out; | ||
1394 | |||
1395 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||
1396 | if (handle == NULL) { | ||
1397 | ret = -ENOMEM; | ||
1398 | mlog_errno(ret); | ||
1399 | goto out; | ||
1400 | } | ||
1401 | |||
1402 | /* | ||
1403 | * We want to get the byte offset of the end of the 1st cluster. | ||
1404 | */ | ||
1405 | tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1)); | ||
1406 | if (tmpend > end) | ||
1407 | tmpend = end; | ||
1408 | |||
1409 | mlog(0, "1st range: start: %llu, tmpend: %llu\n", | ||
1410 | (unsigned long long)start, (unsigned long long)tmpend); | ||
1411 | |||
1412 | ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); | ||
1413 | if (ret) | ||
1414 | mlog_errno(ret); | ||
1415 | |||
1416 | if (tmpend < end) { | ||
1417 | /* | ||
1418 | * This may make start and end equal, but the zeroing | ||
1419 | * code will skip any work in that case so there's no | ||
1420 | * need to catch it up here. | ||
1421 | */ | ||
1422 | start = end & ~(osb->s_clustersize - 1); | ||
1423 | |||
1424 | mlog(0, "2nd range: start: %llu, end: %llu\n", | ||
1425 | (unsigned long long)start, (unsigned long long)end); | ||
1426 | |||
1427 | ret = ocfs2_zero_range_for_truncate(inode, handle, start, end); | ||
1428 | if (ret) | ||
1429 | mlog_errno(ret); | ||
1430 | } | ||
1431 | |||
1432 | ocfs2_commit_trans(osb, handle); | ||
1433 | out: | ||
1434 | return ret; | ||
1435 | } | ||
1436 | |||
1437 | static int ocfs2_remove_inode_range(struct inode *inode, | ||
1438 | struct buffer_head *di_bh, u64 byte_start, | ||
1439 | u64 byte_len) | ||
1440 | { | ||
1441 | int ret = 0; | ||
1442 | u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size; | ||
1443 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1444 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
1445 | |||
1446 | ocfs2_init_dealloc_ctxt(&dealloc); | ||
1447 | |||
1448 | if (byte_len == 0) | ||
1449 | return 0; | ||
1450 | |||
1451 | trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); | ||
1452 | trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; | ||
1453 | if (trunc_len >= trunc_start) | ||
1454 | trunc_len -= trunc_start; | ||
1455 | else | ||
1456 | trunc_len = 0; | ||
1457 | |||
1458 | mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n", | ||
1459 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
1460 | (unsigned long long)byte_start, | ||
1461 | (unsigned long long)byte_len, trunc_start, trunc_len); | ||
1462 | |||
1463 | ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); | ||
1464 | if (ret) { | ||
1465 | mlog_errno(ret); | ||
1466 | goto out; | ||
1467 | } | ||
1468 | |||
1469 | cpos = trunc_start; | ||
1470 | while (trunc_len) { | ||
1471 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, | ||
1472 | &alloc_size, NULL); | ||
1473 | if (ret) { | ||
1474 | mlog_errno(ret); | ||
1475 | goto out; | ||
1476 | } | ||
1477 | |||
1478 | if (alloc_size > trunc_len) | ||
1479 | alloc_size = trunc_len; | ||
1480 | |||
1481 | /* Only do work for non-holes */ | ||
1482 | if (phys_cpos != 0) { | ||
1483 | ret = __ocfs2_remove_inode_range(inode, di_bh, cpos, | ||
1484 | phys_cpos, alloc_size, | ||
1485 | &dealloc); | ||
1486 | if (ret) { | ||
1487 | mlog_errno(ret); | ||
1488 | goto out; | ||
1489 | } | ||
1490 | } | ||
1491 | |||
1492 | cpos += alloc_size; | ||
1493 | trunc_len -= alloc_size; | ||
1494 | } | ||
1495 | |||
1496 | ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); | ||
1497 | |||
1498 | out: | ||
1499 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
1500 | ocfs2_run_deallocs(osb, &dealloc); | ||
1501 | |||
1502 | return ret; | ||
1503 | } | ||
1504 | |||
1505 | /* | ||
1506 | * Parts of this function taken from xfs_change_file_space() | ||
1507 | */ | ||
1508 | static int __ocfs2_change_file_space(struct file *file, struct inode *inode, | ||
1509 | loff_t f_pos, unsigned int cmd, | ||
1510 | struct ocfs2_space_resv *sr, | ||
1511 | int change_size) | ||
1512 | { | ||
1513 | int ret; | ||
1514 | s64 llen; | ||
1515 | loff_t size; | ||
1516 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1517 | struct buffer_head *di_bh = NULL; | ||
1518 | handle_t *handle; | ||
1519 | unsigned long long max_off = ocfs2_max_file_offset(inode->i_sb->s_blocksize_bits); | ||
1520 | |||
1521 | if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) | ||
1522 | return -EROFS; | ||
1523 | |||
1524 | mutex_lock(&inode->i_mutex); | ||
1525 | |||
1526 | /* | ||
1527 | * This prevents concurrent writes on other nodes | ||
1528 | */ | ||
1529 | ret = ocfs2_rw_lock(inode, 1); | ||
1530 | if (ret) { | ||
1531 | mlog_errno(ret); | ||
1532 | goto out; | ||
1533 | } | ||
1534 | |||
1535 | ret = ocfs2_meta_lock(inode, &di_bh, 1); | ||
1536 | if (ret) { | ||
1537 | mlog_errno(ret); | ||
1538 | goto out_rw_unlock; | ||
1539 | } | ||
1540 | |||
1541 | if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { | ||
1542 | ret = -EPERM; | ||
1543 | goto out_meta_unlock; | ||
1544 | } | ||
1545 | |||
1546 | switch (sr->l_whence) { | ||
1547 | case 0: /*SEEK_SET*/ | ||
1548 | break; | ||
1549 | case 1: /*SEEK_CUR*/ | ||
1550 | sr->l_start += f_pos; | ||
1551 | break; | ||
1552 | case 2: /*SEEK_END*/ | ||
1553 | sr->l_start += i_size_read(inode); | ||
1554 | break; | ||
1555 | default: | ||
1556 | ret = -EINVAL; | ||
1557 | goto out_meta_unlock; | ||
1558 | } | ||
1559 | sr->l_whence = 0; | ||
1560 | |||
1561 | llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len; | ||
1562 | |||
1563 | if (sr->l_start < 0 | ||
1564 | || sr->l_start > max_off | ||
1565 | || (sr->l_start + llen) < 0 | ||
1566 | || (sr->l_start + llen) > max_off) { | ||
1567 | ret = -EINVAL; | ||
1568 | goto out_meta_unlock; | ||
1569 | } | ||
1570 | size = sr->l_start + sr->l_len; | ||
1571 | |||
1572 | if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) { | ||
1573 | if (sr->l_len <= 0) { | ||
1574 | ret = -EINVAL; | ||
1575 | goto out_meta_unlock; | ||
1576 | } | ||
1577 | } | ||
1578 | |||
1579 | if (file && should_remove_suid(file->f_path.dentry)) { | ||
1580 | ret = __ocfs2_write_remove_suid(inode, di_bh); | ||
1581 | if (ret) { | ||
1582 | mlog_errno(ret); | ||
1583 | goto out_meta_unlock; | ||
1584 | } | ||
1585 | } | ||
1586 | |||
1587 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
1588 | switch (cmd) { | ||
1589 | case OCFS2_IOC_RESVSP: | ||
1590 | case OCFS2_IOC_RESVSP64: | ||
1591 | /* | ||
1592 | * This takes unsigned offsets, but the signed ones we | ||
1593 | * pass have been checked against overflow above. | ||
1594 | */ | ||
1595 | ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start, | ||
1596 | sr->l_len); | ||
1597 | break; | ||
1598 | case OCFS2_IOC_UNRESVSP: | ||
1599 | case OCFS2_IOC_UNRESVSP64: | ||
1600 | ret = ocfs2_remove_inode_range(inode, di_bh, sr->l_start, | ||
1601 | sr->l_len); | ||
1602 | break; | ||
1603 | default: | ||
1604 | ret = -EINVAL; | ||
1605 | } | ||
1606 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | ||
1607 | if (ret) { | ||
1608 | mlog_errno(ret); | ||
1609 | goto out_meta_unlock; | ||
1610 | } | ||
1611 | |||
1612 | /* | ||
1613 | * We update c/mtime for these changes | ||
1614 | */ | ||
1615 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||
1616 | if (IS_ERR(handle)) { | ||
1617 | ret = PTR_ERR(handle); | ||
1618 | mlog_errno(ret); | ||
1619 | goto out_meta_unlock; | ||
1620 | } | ||
1621 | |||
1622 | if (change_size && i_size_read(inode) < size) | ||
1623 | i_size_write(inode, size); | ||
1624 | |||
1625 | inode->i_ctime = inode->i_mtime = CURRENT_TIME; | ||
1626 | ret = ocfs2_mark_inode_dirty(handle, inode, di_bh); | ||
1627 | if (ret < 0) | ||
1628 | mlog_errno(ret); | ||
1629 | |||
1630 | ocfs2_commit_trans(osb, handle); | ||
1631 | |||
1632 | out_meta_unlock: | ||
1633 | brelse(di_bh); | ||
1634 | ocfs2_meta_unlock(inode, 1); | ||
1635 | out_rw_unlock: | ||
1636 | ocfs2_rw_unlock(inode, 1); | ||
1637 | |||
1638 | mutex_unlock(&inode->i_mutex); | ||
1639 | out: | ||
1640 | return ret; | ||
1641 | } | ||
1642 | |||
1643 | int ocfs2_change_file_space(struct file *file, unsigned int cmd, | ||
1644 | struct ocfs2_space_resv *sr) | ||
1645 | { | ||
1646 | struct inode *inode = file->f_path.dentry->d_inode; | ||
1647 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);; | ||
1648 | |||
1649 | if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) && | ||
1650 | !ocfs2_writes_unwritten_extents(osb)) | ||
1651 | return -ENOTTY; | ||
1652 | else if ((cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) && | ||
1653 | !ocfs2_sparse_alloc(osb)) | ||
1654 | return -ENOTTY; | ||
1655 | |||
1656 | if (!S_ISREG(inode->i_mode)) | ||
1657 | return -EINVAL; | ||
1658 | |||
1659 | if (!(file->f_mode & FMODE_WRITE)) | ||
1660 | return -EBADF; | ||
1661 | |||
1662 | return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0); | ||
1663 | } | ||
1664 | |||
1665 | static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset, | ||
1666 | loff_t len) | ||
1667 | { | ||
1668 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1669 | struct ocfs2_space_resv sr; | ||
1670 | int change_size = 1; | ||
1671 | |||
1672 | if (!ocfs2_writes_unwritten_extents(osb)) | ||
1673 | return -EOPNOTSUPP; | ||
1674 | |||
1675 | if (S_ISDIR(inode->i_mode)) | ||
1676 | return -ENODEV; | ||
1677 | |||
1678 | if (mode & FALLOC_FL_KEEP_SIZE) | ||
1679 | change_size = 0; | ||
1680 | |||
1681 | sr.l_whence = 0; | ||
1682 | sr.l_start = (s64)offset; | ||
1683 | sr.l_len = (s64)len; | ||
1684 | |||
1685 | return __ocfs2_change_file_space(NULL, inode, offset, | ||
1686 | OCFS2_IOC_RESVSP64, &sr, change_size); | ||
1687 | } | ||
1688 | |||
1162 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | 1689 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, |
1163 | loff_t *ppos, | 1690 | loff_t *ppos, |
1164 | size_t count, | 1691 | size_t count, |
@@ -1329,15 +1856,16 @@ ocfs2_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes) | |||
1329 | *basep = base; | 1856 | *basep = base; |
1330 | } | 1857 | } |
1331 | 1858 | ||
1332 | static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp, | 1859 | static struct page * ocfs2_get_write_source(char **ret_src_buf, |
1333 | const struct iovec *cur_iov, | 1860 | const struct iovec *cur_iov, |
1334 | size_t iov_offset) | 1861 | size_t iov_offset) |
1335 | { | 1862 | { |
1336 | int ret; | 1863 | int ret; |
1337 | char *buf; | 1864 | char *buf = cur_iov->iov_base + iov_offset; |
1338 | struct page *src_page = NULL; | 1865 | struct page *src_page = NULL; |
1866 | unsigned long off; | ||
1339 | 1867 | ||
1340 | buf = cur_iov->iov_base + iov_offset; | 1868 | off = (unsigned long)(buf) & ~PAGE_CACHE_MASK; |
1341 | 1869 | ||
1342 | if (!segment_eq(get_fs(), KERNEL_DS)) { | 1870 | if (!segment_eq(get_fs(), KERNEL_DS)) { |
1343 | /* | 1871 | /* |
@@ -1349,18 +1877,17 @@ static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp | |||
1349 | (unsigned long)buf & PAGE_CACHE_MASK, 1, | 1877 | (unsigned long)buf & PAGE_CACHE_MASK, 1, |
1350 | 0, 0, &src_page, NULL); | 1878 | 0, 0, &src_page, NULL); |
1351 | if (ret == 1) | 1879 | if (ret == 1) |
1352 | bp->b_src_buf = kmap(src_page); | 1880 | *ret_src_buf = kmap(src_page) + off; |
1353 | else | 1881 | else |
1354 | src_page = ERR_PTR(-EFAULT); | 1882 | src_page = ERR_PTR(-EFAULT); |
1355 | } else { | 1883 | } else { |
1356 | bp->b_src_buf = buf; | 1884 | *ret_src_buf = buf; |
1357 | } | 1885 | } |
1358 | 1886 | ||
1359 | return src_page; | 1887 | return src_page; |
1360 | } | 1888 | } |
1361 | 1889 | ||
1362 | static void ocfs2_put_write_source(struct ocfs2_buffered_write_priv *bp, | 1890 | static void ocfs2_put_write_source(struct page *page) |
1363 | struct page *page) | ||
1364 | { | 1891 | { |
1365 | if (page) { | 1892 | if (page) { |
1366 | kunmap(page); | 1893 | kunmap(page); |
@@ -1376,10 +1903,13 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos, | |||
1376 | { | 1903 | { |
1377 | int ret = 0; | 1904 | int ret = 0; |
1378 | ssize_t copied, total = 0; | 1905 | ssize_t copied, total = 0; |
1379 | size_t iov_offset = 0; | 1906 | size_t iov_offset = 0, bytes; |
1907 | loff_t pos; | ||
1380 | const struct iovec *cur_iov = iov; | 1908 | const struct iovec *cur_iov = iov; |
1381 | struct ocfs2_buffered_write_priv bp; | 1909 | struct page *user_page, *page; |
1382 | struct page *page; | 1910 | char * uninitialized_var(buf); |
1911 | char *dst; | ||
1912 | void *fsdata; | ||
1383 | 1913 | ||
1384 | /* | 1914 | /* |
1385 | * handle partial DIO write. Adjust cur_iov if needed. | 1915 | * handle partial DIO write. Adjust cur_iov if needed. |
@@ -1387,21 +1917,38 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos, | |||
1387 | ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written); | 1917 | ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written); |
1388 | 1918 | ||
1389 | do { | 1919 | do { |
1390 | bp.b_cur_off = iov_offset; | 1920 | pos = *ppos; |
1391 | bp.b_cur_iov = cur_iov; | ||
1392 | 1921 | ||
1393 | page = ocfs2_get_write_source(&bp, cur_iov, iov_offset); | 1922 | user_page = ocfs2_get_write_source(&buf, cur_iov, iov_offset); |
1394 | if (IS_ERR(page)) { | 1923 | if (IS_ERR(user_page)) { |
1395 | ret = PTR_ERR(page); | 1924 | ret = PTR_ERR(user_page); |
1396 | goto out; | 1925 | goto out; |
1397 | } | 1926 | } |
1398 | 1927 | ||
1399 | copied = ocfs2_buffered_write_cluster(file, *ppos, count, | 1928 | /* Stay within our page boundaries */ |
1400 | ocfs2_map_and_write_user_data, | 1929 | bytes = min((PAGE_CACHE_SIZE - ((unsigned long)pos & ~PAGE_CACHE_MASK)), |
1401 | &bp); | 1930 | (PAGE_CACHE_SIZE - ((unsigned long)buf & ~PAGE_CACHE_MASK))); |
1931 | /* Stay within the vector boundary */ | ||
1932 | bytes = min_t(size_t, bytes, cur_iov->iov_len - iov_offset); | ||
1933 | /* Stay within count */ | ||
1934 | bytes = min(bytes, count); | ||
1935 | |||
1936 | page = NULL; | ||
1937 | ret = ocfs2_write_begin(file, file->f_mapping, pos, bytes, 0, | ||
1938 | &page, &fsdata); | ||
1939 | if (ret) { | ||
1940 | mlog_errno(ret); | ||
1941 | goto out; | ||
1942 | } | ||
1402 | 1943 | ||
1403 | ocfs2_put_write_source(&bp, page); | 1944 | dst = kmap_atomic(page, KM_USER0); |
1945 | memcpy(dst + (pos & (PAGE_CACHE_SIZE - 1)), buf, bytes); | ||
1946 | kunmap_atomic(dst, KM_USER0); | ||
1947 | flush_dcache_page(page); | ||
1948 | ocfs2_put_write_source(user_page); | ||
1404 | 1949 | ||
1950 | copied = ocfs2_write_end(file, file->f_mapping, pos, bytes, | ||
1951 | bytes, page, fsdata); | ||
1405 | if (copied < 0) { | 1952 | if (copied < 0) { |
1406 | mlog_errno(copied); | 1953 | mlog_errno(copied); |
1407 | ret = copied; | 1954 | ret = copied; |
@@ -1409,7 +1956,7 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos, | |||
1409 | } | 1956 | } |
1410 | 1957 | ||
1411 | total += copied; | 1958 | total += copied; |
1412 | *ppos = *ppos + copied; | 1959 | *ppos = pos + copied; |
1413 | count -= copied; | 1960 | count -= copied; |
1414 | 1961 | ||
1415 | ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied); | 1962 | ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied); |
@@ -1579,52 +2126,46 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe, | |||
1579 | struct pipe_buffer *buf, | 2126 | struct pipe_buffer *buf, |
1580 | struct splice_desc *sd) | 2127 | struct splice_desc *sd) |
1581 | { | 2128 | { |
1582 | int ret, count, total = 0; | 2129 | int ret, count; |
1583 | ssize_t copied = 0; | 2130 | ssize_t copied = 0; |
1584 | struct ocfs2_splice_write_priv sp; | 2131 | struct file *file = sd->u.file; |
2132 | unsigned int offset; | ||
2133 | struct page *page = NULL; | ||
2134 | void *fsdata; | ||
2135 | char *src, *dst; | ||
1585 | 2136 | ||
1586 | ret = buf->ops->confirm(pipe, buf); | 2137 | ret = buf->ops->confirm(pipe, buf); |
1587 | if (ret) | 2138 | if (ret) |
1588 | goto out; | 2139 | goto out; |
1589 | 2140 | ||
1590 | sp.s_sd = sd; | 2141 | offset = sd->pos & ~PAGE_CACHE_MASK; |
1591 | sp.s_buf = buf; | ||
1592 | sp.s_pipe = pipe; | ||
1593 | sp.s_offset = sd->pos & ~PAGE_CACHE_MASK; | ||
1594 | sp.s_buf_offset = buf->offset; | ||
1595 | |||
1596 | count = sd->len; | 2142 | count = sd->len; |
1597 | if (count + sp.s_offset > PAGE_CACHE_SIZE) | 2143 | if (count + offset > PAGE_CACHE_SIZE) |
1598 | count = PAGE_CACHE_SIZE - sp.s_offset; | 2144 | count = PAGE_CACHE_SIZE - offset; |
1599 | 2145 | ||
1600 | do { | 2146 | ret = ocfs2_write_begin(file, file->f_mapping, sd->pos, count, 0, |
1601 | /* | 2147 | &page, &fsdata); |
1602 | * splice wants us to copy up to one page at a | 2148 | if (ret) { |
1603 | * time. For pagesize > cluster size, this means we | 2149 | mlog_errno(ret); |
1604 | * might enter ocfs2_buffered_write_cluster() more | 2150 | goto out; |
1605 | * than once, so keep track of our progress here. | 2151 | } |
1606 | */ | ||
1607 | copied = ocfs2_buffered_write_cluster(sd->u.file, | ||
1608 | (loff_t)sd->pos + total, | ||
1609 | count, | ||
1610 | ocfs2_map_and_write_splice_data, | ||
1611 | &sp); | ||
1612 | if (copied < 0) { | ||
1613 | mlog_errno(copied); | ||
1614 | ret = copied; | ||
1615 | goto out; | ||
1616 | } | ||
1617 | 2152 | ||
1618 | count -= copied; | 2153 | src = buf->ops->map(pipe, buf, 1); |
1619 | sp.s_offset += copied; | 2154 | dst = kmap_atomic(page, KM_USER1); |
1620 | sp.s_buf_offset += copied; | 2155 | memcpy(dst + offset, src + buf->offset, count); |
1621 | total += copied; | 2156 | kunmap_atomic(page, KM_USER1); |
1622 | } while (count); | 2157 | buf->ops->unmap(pipe, buf, src); |
1623 | 2158 | ||
1624 | ret = 0; | 2159 | copied = ocfs2_write_end(file, file->f_mapping, sd->pos, count, count, |
2160 | page, fsdata); | ||
2161 | if (copied < 0) { | ||
2162 | mlog_errno(copied); | ||
2163 | ret = copied; | ||
2164 | goto out; | ||
2165 | } | ||
1625 | out: | 2166 | out: |
1626 | 2167 | ||
1627 | return total ? total : ret; | 2168 | return copied ? copied : ret; |
1628 | } | 2169 | } |
1629 | 2170 | ||
1630 | static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe, | 2171 | static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe, |
@@ -1811,6 +2352,7 @@ const struct inode_operations ocfs2_file_iops = { | |||
1811 | .setattr = ocfs2_setattr, | 2352 | .setattr = ocfs2_setattr, |
1812 | .getattr = ocfs2_getattr, | 2353 | .getattr = ocfs2_getattr, |
1813 | .permission = ocfs2_permission, | 2354 | .permission = ocfs2_permission, |
2355 | .fallocate = ocfs2_fallocate, | ||
1814 | }; | 2356 | }; |
1815 | 2357 | ||
1816 | const struct inode_operations ocfs2_special_file_iops = { | 2358 | const struct inode_operations ocfs2_special_file_iops = { |