diff options
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_file.c')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_file.c | 587 |
1 files changed, 361 insertions, 226 deletions
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index ba8ad422a165..a55c1b46b219 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -37,10 +37,45 @@ | |||
37 | #include "xfs_trace.h" | 37 | #include "xfs_trace.h" |
38 | 38 | ||
39 | #include <linux/dcache.h> | 39 | #include <linux/dcache.h> |
40 | #include <linux/falloc.h> | ||
40 | 41 | ||
41 | static const struct vm_operations_struct xfs_file_vm_ops; | 42 | static const struct vm_operations_struct xfs_file_vm_ops; |
42 | 43 | ||
43 | /* | 44 | /* |
45 | * Locking primitives for read and write IO paths to ensure we consistently use | ||
46 | * and order the inode->i_mutex, ip->i_lock and ip->i_iolock. | ||
47 | */ | ||
48 | static inline void | ||
49 | xfs_rw_ilock( | ||
50 | struct xfs_inode *ip, | ||
51 | int type) | ||
52 | { | ||
53 | if (type & XFS_IOLOCK_EXCL) | ||
54 | mutex_lock(&VFS_I(ip)->i_mutex); | ||
55 | xfs_ilock(ip, type); | ||
56 | } | ||
57 | |||
58 | static inline void | ||
59 | xfs_rw_iunlock( | ||
60 | struct xfs_inode *ip, | ||
61 | int type) | ||
62 | { | ||
63 | xfs_iunlock(ip, type); | ||
64 | if (type & XFS_IOLOCK_EXCL) | ||
65 | mutex_unlock(&VFS_I(ip)->i_mutex); | ||
66 | } | ||
67 | |||
68 | static inline void | ||
69 | xfs_rw_ilock_demote( | ||
70 | struct xfs_inode *ip, | ||
71 | int type) | ||
72 | { | ||
73 | xfs_ilock_demote(ip, type); | ||
74 | if (type & XFS_IOLOCK_EXCL) | ||
75 | mutex_unlock(&VFS_I(ip)->i_mutex); | ||
76 | } | ||
77 | |||
78 | /* | ||
44 | * xfs_iozero | 79 | * xfs_iozero |
45 | * | 80 | * |
46 | * xfs_iozero clears the specified range of buffer supplied, | 81 | * xfs_iozero clears the specified range of buffer supplied, |
@@ -262,22 +297,21 @@ xfs_file_aio_read( | |||
262 | if (XFS_FORCED_SHUTDOWN(mp)) | 297 | if (XFS_FORCED_SHUTDOWN(mp)) |
263 | return -EIO; | 298 | return -EIO; |
264 | 299 | ||
265 | if (unlikely(ioflags & IO_ISDIRECT)) | ||
266 | mutex_lock(&inode->i_mutex); | ||
267 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | ||
268 | |||
269 | if (unlikely(ioflags & IO_ISDIRECT)) { | 300 | if (unlikely(ioflags & IO_ISDIRECT)) { |
301 | xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); | ||
302 | |||
270 | if (inode->i_mapping->nrpages) { | 303 | if (inode->i_mapping->nrpages) { |
271 | ret = -xfs_flushinval_pages(ip, | 304 | ret = -xfs_flushinval_pages(ip, |
272 | (iocb->ki_pos & PAGE_CACHE_MASK), | 305 | (iocb->ki_pos & PAGE_CACHE_MASK), |
273 | -1, FI_REMAPF_LOCKED); | 306 | -1, FI_REMAPF_LOCKED); |
307 | if (ret) { | ||
308 | xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); | ||
309 | return ret; | ||
310 | } | ||
274 | } | 311 | } |
275 | mutex_unlock(&inode->i_mutex); | 312 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
276 | if (ret) { | 313 | } else |
277 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 314 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); |
278 | return ret; | ||
279 | } | ||
280 | } | ||
281 | 315 | ||
282 | trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); | 316 | trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); |
283 | 317 | ||
@@ -285,7 +319,7 @@ xfs_file_aio_read( | |||
285 | if (ret > 0) | 319 | if (ret > 0) |
286 | XFS_STATS_ADD(xs_read_bytes, ret); | 320 | XFS_STATS_ADD(xs_read_bytes, ret); |
287 | 321 | ||
288 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 322 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); |
289 | return ret; | 323 | return ret; |
290 | } | 324 | } |
291 | 325 | ||
@@ -309,7 +343,7 @@ xfs_file_splice_read( | |||
309 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 343 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
310 | return -EIO; | 344 | return -EIO; |
311 | 345 | ||
312 | xfs_ilock(ip, XFS_IOLOCK_SHARED); | 346 | xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); |
313 | 347 | ||
314 | trace_xfs_file_splice_read(ip, count, *ppos, ioflags); | 348 | trace_xfs_file_splice_read(ip, count, *ppos, ioflags); |
315 | 349 | ||
@@ -317,10 +351,61 @@ xfs_file_splice_read( | |||
317 | if (ret > 0) | 351 | if (ret > 0) |
318 | XFS_STATS_ADD(xs_read_bytes, ret); | 352 | XFS_STATS_ADD(xs_read_bytes, ret); |
319 | 353 | ||
320 | xfs_iunlock(ip, XFS_IOLOCK_SHARED); | 354 | xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); |
321 | return ret; | 355 | return ret; |
322 | } | 356 | } |
323 | 357 | ||
358 | STATIC void | ||
359 | xfs_aio_write_isize_update( | ||
360 | struct inode *inode, | ||
361 | loff_t *ppos, | ||
362 | ssize_t bytes_written) | ||
363 | { | ||
364 | struct xfs_inode *ip = XFS_I(inode); | ||
365 | xfs_fsize_t isize = i_size_read(inode); | ||
366 | |||
367 | if (bytes_written > 0) | ||
368 | XFS_STATS_ADD(xs_write_bytes, bytes_written); | ||
369 | |||
370 | if (unlikely(bytes_written < 0 && bytes_written != -EFAULT && | ||
371 | *ppos > isize)) | ||
372 | *ppos = isize; | ||
373 | |||
374 | if (*ppos > ip->i_size) { | ||
375 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | ||
376 | if (*ppos > ip->i_size) | ||
377 | ip->i_size = *ppos; | ||
378 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
379 | } | ||
380 | } | ||
381 | |||
382 | /* | ||
383 | * If this was a direct or synchronous I/O that failed (such as ENOSPC) then | ||
384 | * part of the I/O may have been written to disk before the error occured. In | ||
385 | * this case the on-disk file size may have been adjusted beyond the in-memory | ||
386 | * file size and now needs to be truncated back. | ||
387 | */ | ||
388 | STATIC void | ||
389 | xfs_aio_write_newsize_update( | ||
390 | struct xfs_inode *ip) | ||
391 | { | ||
392 | if (ip->i_new_size) { | ||
393 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL); | ||
394 | ip->i_new_size = 0; | ||
395 | if (ip->i_d.di_size > ip->i_size) | ||
396 | ip->i_d.di_size = ip->i_size; | ||
397 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); | ||
398 | } | ||
399 | } | ||
400 | |||
401 | /* | ||
402 | * xfs_file_splice_write() does not use xfs_rw_ilock() because | ||
403 | * generic_file_splice_write() takes the i_mutex itself. This, in theory, | ||
404 | * couuld cause lock inversions between the aio_write path and the splice path | ||
405 | * if someone is doing concurrent splice(2) based writes and write(2) based | ||
406 | * writes to the same inode. The only real way to fix this is to re-implement | ||
407 | * the generic code here with correct locking orders. | ||
408 | */ | ||
324 | STATIC ssize_t | 409 | STATIC ssize_t |
325 | xfs_file_splice_write( | 410 | xfs_file_splice_write( |
326 | struct pipe_inode_info *pipe, | 411 | struct pipe_inode_info *pipe, |
@@ -331,7 +416,7 @@ xfs_file_splice_write( | |||
331 | { | 416 | { |
332 | struct inode *inode = outfilp->f_mapping->host; | 417 | struct inode *inode = outfilp->f_mapping->host; |
333 | struct xfs_inode *ip = XFS_I(inode); | 418 | struct xfs_inode *ip = XFS_I(inode); |
334 | xfs_fsize_t isize, new_size; | 419 | xfs_fsize_t new_size; |
335 | int ioflags = 0; | 420 | int ioflags = 0; |
336 | ssize_t ret; | 421 | ssize_t ret; |
337 | 422 | ||
@@ -355,27 +440,9 @@ xfs_file_splice_write( | |||
355 | trace_xfs_file_splice_write(ip, count, *ppos, ioflags); | 440 | trace_xfs_file_splice_write(ip, count, *ppos, ioflags); |
356 | 441 | ||
357 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); | 442 | ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); |
358 | if (ret > 0) | ||
359 | XFS_STATS_ADD(xs_write_bytes, ret); | ||
360 | |||
361 | isize = i_size_read(inode); | ||
362 | if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) | ||
363 | *ppos = isize; | ||
364 | |||
365 | if (*ppos > ip->i_size) { | ||
366 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
367 | if (*ppos > ip->i_size) | ||
368 | ip->i_size = *ppos; | ||
369 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
370 | } | ||
371 | 443 | ||
372 | if (ip->i_new_size) { | 444 | xfs_aio_write_isize_update(inode, ppos, ret); |
373 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 445 | xfs_aio_write_newsize_update(ip); |
374 | ip->i_new_size = 0; | ||
375 | if (ip->i_d.di_size > ip->i_size) | ||
376 | ip->i_d.di_size = ip->i_size; | ||
377 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
378 | } | ||
379 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); | 446 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
380 | return ret; | 447 | return ret; |
381 | } | 448 | } |
@@ -562,247 +629,314 @@ out_lock: | |||
562 | return error; | 629 | return error; |
563 | } | 630 | } |
564 | 631 | ||
632 | /* | ||
633 | * Common pre-write limit and setup checks. | ||
634 | * | ||
635 | * Returns with iolock held according to @iolock. | ||
636 | */ | ||
565 | STATIC ssize_t | 637 | STATIC ssize_t |
566 | xfs_file_aio_write( | 638 | xfs_file_aio_write_checks( |
567 | struct kiocb *iocb, | 639 | struct file *file, |
568 | const struct iovec *iovp, | 640 | loff_t *pos, |
569 | unsigned long nr_segs, | 641 | size_t *count, |
570 | loff_t pos) | 642 | int *iolock) |
571 | { | 643 | { |
572 | struct file *file = iocb->ki_filp; | 644 | struct inode *inode = file->f_mapping->host; |
573 | struct address_space *mapping = file->f_mapping; | ||
574 | struct inode *inode = mapping->host; | ||
575 | struct xfs_inode *ip = XFS_I(inode); | 645 | struct xfs_inode *ip = XFS_I(inode); |
576 | struct xfs_mount *mp = ip->i_mount; | 646 | xfs_fsize_t new_size; |
577 | ssize_t ret = 0, error = 0; | 647 | int error = 0; |
578 | int ioflags = 0; | ||
579 | xfs_fsize_t isize, new_size; | ||
580 | int iolock; | ||
581 | size_t ocount = 0, count; | ||
582 | int need_i_mutex; | ||
583 | 648 | ||
584 | XFS_STATS_INC(xs_write_calls); | 649 | error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); |
650 | if (error) { | ||
651 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); | ||
652 | *iolock = 0; | ||
653 | return error; | ||
654 | } | ||
585 | 655 | ||
586 | BUG_ON(iocb->ki_pos != pos); | 656 | new_size = *pos + *count; |
657 | if (new_size > ip->i_size) | ||
658 | ip->i_new_size = new_size; | ||
587 | 659 | ||
588 | if (unlikely(file->f_flags & O_DIRECT)) | 660 | if (likely(!(file->f_mode & FMODE_NOCMTIME))) |
589 | ioflags |= IO_ISDIRECT; | 661 | file_update_time(file); |
590 | if (file->f_mode & FMODE_NOCMTIME) | 662 | |
591 | ioflags |= IO_INVIS; | 663 | /* |
664 | * If the offset is beyond the size of the file, we need to zero any | ||
665 | * blocks that fall between the existing EOF and the start of this | ||
666 | * write. | ||
667 | */ | ||
668 | if (*pos > ip->i_size) | ||
669 | error = -xfs_zero_eof(ip, *pos, ip->i_size); | ||
592 | 670 | ||
593 | error = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); | 671 | xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); |
594 | if (error) | 672 | if (error) |
595 | return error; | 673 | return error; |
596 | 674 | ||
597 | count = ocount; | 675 | /* |
598 | if (count == 0) | 676 | * If we're writing the file then make sure to clear the setuid and |
599 | return 0; | 677 | * setgid bits if the process is not being run by root. This keeps |
600 | 678 | * people from modifying setuid and setgid binaries. | |
601 | xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); | 679 | */ |
680 | return file_remove_suid(file); | ||
602 | 681 | ||
603 | if (XFS_FORCED_SHUTDOWN(mp)) | 682 | } |
604 | return -EIO; | ||
605 | 683 | ||
606 | relock: | 684 | /* |
607 | if (ioflags & IO_ISDIRECT) { | 685 | * xfs_file_dio_aio_write - handle direct IO writes |
608 | iolock = XFS_IOLOCK_SHARED; | 686 | * |
609 | need_i_mutex = 0; | 687 | * Lock the inode appropriately to prepare for and issue a direct IO write. |
610 | } else { | 688 | * By separating it from the buffered write path we remove all the tricky to |
611 | iolock = XFS_IOLOCK_EXCL; | 689 | * follow locking changes and looping. |
612 | need_i_mutex = 1; | 690 | * |
613 | mutex_lock(&inode->i_mutex); | 691 | * If there are cached pages or we're extending the file, we need IOLOCK_EXCL |
692 | * until we're sure the bytes at the new EOF have been zeroed and/or the cached | ||
693 | * pages are flushed out. | ||
694 | * | ||
695 | * In most cases the direct IO writes will be done holding IOLOCK_SHARED | ||
696 | * allowing them to be done in parallel with reads and other direct IO writes. | ||
697 | * However, if the IO is not aligned to filesystem blocks, the direct IO layer | ||
698 | * needs to do sub-block zeroing and that requires serialisation against other | ||
699 | * direct IOs to the same block. In this case we need to serialise the | ||
700 | * submission of the unaligned IOs so that we don't get racing block zeroing in | ||
701 | * the dio layer. To avoid the problem with aio, we also need to wait for | ||
702 | * outstanding IOs to complete so that unwritten extent conversion is completed | ||
703 | * before we try to map the overlapping block. This is currently implemented by | ||
704 | * hitting it with a big hammer (i.e. xfs_ioend_wait()). | ||
705 | * | ||
706 | * Returns with locks held indicated by @iolock and errors indicated by | ||
707 | * negative return values. | ||
708 | */ | ||
709 | STATIC ssize_t | ||
710 | xfs_file_dio_aio_write( | ||
711 | struct kiocb *iocb, | ||
712 | const struct iovec *iovp, | ||
713 | unsigned long nr_segs, | ||
714 | loff_t pos, | ||
715 | size_t ocount, | ||
716 | int *iolock) | ||
717 | { | ||
718 | struct file *file = iocb->ki_filp; | ||
719 | struct address_space *mapping = file->f_mapping; | ||
720 | struct inode *inode = mapping->host; | ||
721 | struct xfs_inode *ip = XFS_I(inode); | ||
722 | struct xfs_mount *mp = ip->i_mount; | ||
723 | ssize_t ret = 0; | ||
724 | size_t count = ocount; | ||
725 | int unaligned_io = 0; | ||
726 | struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? | ||
727 | mp->m_rtdev_targp : mp->m_ddev_targp; | ||
728 | |||
729 | *iolock = 0; | ||
730 | if ((pos & target->bt_smask) || (count & target->bt_smask)) | ||
731 | return -XFS_ERROR(EINVAL); | ||
732 | |||
733 | if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) | ||
734 | unaligned_io = 1; | ||
735 | |||
736 | if (unaligned_io || mapping->nrpages || pos > ip->i_size) | ||
737 | *iolock = XFS_IOLOCK_EXCL; | ||
738 | else | ||
739 | *iolock = XFS_IOLOCK_SHARED; | ||
740 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); | ||
741 | |||
742 | ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); | ||
743 | if (ret) | ||
744 | return ret; | ||
745 | |||
746 | if (mapping->nrpages) { | ||
747 | WARN_ON(*iolock != XFS_IOLOCK_EXCL); | ||
748 | ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, | ||
749 | FI_REMAPF_LOCKED); | ||
750 | if (ret) | ||
751 | return ret; | ||
614 | } | 752 | } |
615 | 753 | ||
616 | xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); | 754 | /* |
617 | 755 | * If we are doing unaligned IO, wait for all other IO to drain, | |
618 | start: | 756 | * otherwise demote the lock if we had to flush cached pages |
619 | error = -generic_write_checks(file, &pos, &count, | 757 | */ |
620 | S_ISBLK(inode->i_mode)); | 758 | if (unaligned_io) |
621 | if (error) { | 759 | xfs_ioend_wait(ip); |
622 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 760 | else if (*iolock == XFS_IOLOCK_EXCL) { |
623 | goto out_unlock_mutex; | 761 | xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); |
762 | *iolock = XFS_IOLOCK_SHARED; | ||
624 | } | 763 | } |
625 | 764 | ||
626 | if (ioflags & IO_ISDIRECT) { | 765 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); |
627 | xfs_buftarg_t *target = | 766 | ret = generic_file_direct_write(iocb, iovp, |
628 | XFS_IS_REALTIME_INODE(ip) ? | 767 | &nr_segs, pos, &iocb->ki_pos, count, ocount); |
629 | mp->m_rtdev_targp : mp->m_ddev_targp; | ||
630 | 768 | ||
631 | if ((pos & target->bt_smask) || (count & target->bt_smask)) { | 769 | /* No fallback to buffered IO on errors for XFS. */ |
632 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 770 | ASSERT(ret < 0 || ret == count); |
633 | return XFS_ERROR(-EINVAL); | 771 | return ret; |
634 | } | 772 | } |
635 | 773 | ||
636 | if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) { | 774 | STATIC ssize_t |
637 | xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); | 775 | xfs_file_buffered_aio_write( |
638 | iolock = XFS_IOLOCK_EXCL; | 776 | struct kiocb *iocb, |
639 | need_i_mutex = 1; | 777 | const struct iovec *iovp, |
640 | mutex_lock(&inode->i_mutex); | 778 | unsigned long nr_segs, |
641 | xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); | 779 | loff_t pos, |
642 | goto start; | 780 | size_t ocount, |
643 | } | 781 | int *iolock) |
644 | } | 782 | { |
783 | struct file *file = iocb->ki_filp; | ||
784 | struct address_space *mapping = file->f_mapping; | ||
785 | struct inode *inode = mapping->host; | ||
786 | struct xfs_inode *ip = XFS_I(inode); | ||
787 | ssize_t ret; | ||
788 | int enospc = 0; | ||
789 | size_t count = ocount; | ||
645 | 790 | ||
646 | new_size = pos + count; | 791 | *iolock = XFS_IOLOCK_EXCL; |
647 | if (new_size > ip->i_size) | 792 | xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); |
648 | ip->i_new_size = new_size; | ||
649 | 793 | ||
650 | if (likely(!(ioflags & IO_INVIS))) | 794 | ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); |
651 | file_update_time(file); | 795 | if (ret) |
796 | return ret; | ||
652 | 797 | ||
798 | /* We can write back this queue in page reclaim */ | ||
799 | current->backing_dev_info = mapping->backing_dev_info; | ||
800 | |||
801 | write_retry: | ||
802 | trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); | ||
803 | ret = generic_file_buffered_write(iocb, iovp, nr_segs, | ||
804 | pos, &iocb->ki_pos, count, ret); | ||
653 | /* | 805 | /* |
654 | * If the offset is beyond the size of the file, we have a couple | 806 | * if we just got an ENOSPC, flush the inode now we aren't holding any |
655 | * of things to do. First, if there is already space allocated | 807 | * page locks and retry *once* |
656 | * we need to either create holes or zero the disk or ... | ||
657 | * | ||
658 | * If there is a page where the previous size lands, we need | ||
659 | * to zero it out up to the new size. | ||
660 | */ | 808 | */ |
661 | 809 | if (ret == -ENOSPC && !enospc) { | |
662 | if (pos > ip->i_size) { | 810 | ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE); |
663 | error = xfs_zero_eof(ip, pos, ip->i_size); | 811 | if (ret) |
664 | if (error) { | 812 | return ret; |
665 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 813 | enospc = 1; |
666 | goto out_unlock_internal; | 814 | goto write_retry; |
667 | } | ||
668 | } | 815 | } |
669 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 816 | current->backing_dev_info = NULL; |
817 | return ret; | ||
818 | } | ||
670 | 819 | ||
671 | /* | 820 | STATIC ssize_t |
672 | * If we're writing the file then make sure to clear the | 821 | xfs_file_aio_write( |
673 | * setuid and setgid bits if the process is not being run | 822 | struct kiocb *iocb, |
674 | * by root. This keeps people from modifying setuid and | 823 | const struct iovec *iovp, |
675 | * setgid binaries. | 824 | unsigned long nr_segs, |
676 | */ | 825 | loff_t pos) |
677 | error = -file_remove_suid(file); | 826 | { |
678 | if (unlikely(error)) | 827 | struct file *file = iocb->ki_filp; |
679 | goto out_unlock_internal; | 828 | struct address_space *mapping = file->f_mapping; |
829 | struct inode *inode = mapping->host; | ||
830 | struct xfs_inode *ip = XFS_I(inode); | ||
831 | ssize_t ret; | ||
832 | int iolock; | ||
833 | size_t ocount = 0; | ||
680 | 834 | ||
681 | /* We can write back this queue in page reclaim */ | 835 | XFS_STATS_INC(xs_write_calls); |
682 | current->backing_dev_info = mapping->backing_dev_info; | ||
683 | 836 | ||
684 | if ((ioflags & IO_ISDIRECT)) { | 837 | BUG_ON(iocb->ki_pos != pos); |
685 | if (mapping->nrpages) { | ||
686 | WARN_ON(need_i_mutex == 0); | ||
687 | error = xfs_flushinval_pages(ip, | ||
688 | (pos & PAGE_CACHE_MASK), | ||
689 | -1, FI_REMAPF_LOCKED); | ||
690 | if (error) | ||
691 | goto out_unlock_internal; | ||
692 | } | ||
693 | 838 | ||
694 | if (need_i_mutex) { | 839 | ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); |
695 | /* demote the lock now the cached pages are gone */ | 840 | if (ret) |
696 | xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); | 841 | return ret; |
697 | mutex_unlock(&inode->i_mutex); | ||
698 | 842 | ||
699 | iolock = XFS_IOLOCK_SHARED; | 843 | if (ocount == 0) |
700 | need_i_mutex = 0; | 844 | return 0; |
701 | } | ||
702 | 845 | ||
703 | trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags); | 846 | xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE); |
704 | ret = generic_file_direct_write(iocb, iovp, | ||
705 | &nr_segs, pos, &iocb->ki_pos, count, ocount); | ||
706 | 847 | ||
707 | /* | 848 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
708 | * direct-io write to a hole: fall through to buffered I/O | 849 | return -EIO; |
709 | * for completing the rest of the request. | ||
710 | */ | ||
711 | if (ret >= 0 && ret != count) { | ||
712 | XFS_STATS_ADD(xs_write_bytes, ret); | ||
713 | 850 | ||
714 | pos += ret; | 851 | if (unlikely(file->f_flags & O_DIRECT)) |
715 | count -= ret; | 852 | ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, |
853 | ocount, &iolock); | ||
854 | else | ||
855 | ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, | ||
856 | ocount, &iolock); | ||
716 | 857 | ||
717 | ioflags &= ~IO_ISDIRECT; | 858 | xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret); |
718 | xfs_iunlock(ip, iolock); | ||
719 | goto relock; | ||
720 | } | ||
721 | } else { | ||
722 | int enospc = 0; | ||
723 | ssize_t ret2 = 0; | ||
724 | 859 | ||
725 | write_retry: | 860 | if (ret <= 0) |
726 | trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags); | 861 | goto out_unlock; |
727 | ret2 = generic_file_buffered_write(iocb, iovp, nr_segs, | ||
728 | pos, &iocb->ki_pos, count, ret); | ||
729 | /* | ||
730 | * if we just got an ENOSPC, flush the inode now we | ||
731 | * aren't holding any page locks and retry *once* | ||
732 | */ | ||
733 | if (ret2 == -ENOSPC && !enospc) { | ||
734 | error = xfs_flush_pages(ip, 0, -1, 0, FI_NONE); | ||
735 | if (error) | ||
736 | goto out_unlock_internal; | ||
737 | enospc = 1; | ||
738 | goto write_retry; | ||
739 | } | ||
740 | ret = ret2; | ||
741 | } | ||
742 | 862 | ||
743 | current->backing_dev_info = NULL; | 863 | /* Handle various SYNC-type writes */ |
864 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | ||
865 | loff_t end = pos + ret - 1; | ||
866 | int error, error2; | ||
744 | 867 | ||
745 | isize = i_size_read(inode); | 868 | xfs_rw_iunlock(ip, iolock); |
746 | if (unlikely(ret < 0 && ret != -EFAULT && iocb->ki_pos > isize)) | 869 | error = filemap_write_and_wait_range(mapping, pos, end); |
747 | iocb->ki_pos = isize; | 870 | xfs_rw_ilock(ip, iolock); |
748 | 871 | ||
749 | if (iocb->ki_pos > ip->i_size) { | 872 | error2 = -xfs_file_fsync(file, |
750 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 873 | (file->f_flags & __O_SYNC) ? 0 : 1); |
751 | if (iocb->ki_pos > ip->i_size) | 874 | if (error) |
752 | ip->i_size = iocb->ki_pos; | 875 | ret = error; |
753 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 876 | else if (error2) |
877 | ret = error2; | ||
754 | } | 878 | } |
755 | 879 | ||
756 | error = -ret; | 880 | out_unlock: |
757 | if (ret <= 0) | 881 | xfs_aio_write_newsize_update(ip); |
758 | goto out_unlock_internal; | 882 | xfs_rw_iunlock(ip, iolock); |
883 | return ret; | ||
884 | } | ||
759 | 885 | ||
760 | XFS_STATS_ADD(xs_write_bytes, ret); | 886 | STATIC long |
887 | xfs_file_fallocate( | ||
888 | struct file *file, | ||
889 | int mode, | ||
890 | loff_t offset, | ||
891 | loff_t len) | ||
892 | { | ||
893 | struct inode *inode = file->f_path.dentry->d_inode; | ||
894 | long error; | ||
895 | loff_t new_size = 0; | ||
896 | xfs_flock64_t bf; | ||
897 | xfs_inode_t *ip = XFS_I(inode); | ||
898 | int cmd = XFS_IOC_RESVSP; | ||
761 | 899 | ||
762 | /* Handle various SYNC-type writes */ | 900 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) |
763 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | 901 | return -EOPNOTSUPP; |
764 | loff_t end = pos + ret - 1; | ||
765 | int error2; | ||
766 | 902 | ||
767 | xfs_iunlock(ip, iolock); | 903 | bf.l_whence = 0; |
768 | if (need_i_mutex) | 904 | bf.l_start = offset; |
769 | mutex_unlock(&inode->i_mutex); | 905 | bf.l_len = len; |
770 | 906 | ||
771 | error2 = filemap_write_and_wait_range(mapping, pos, end); | 907 | xfs_ilock(ip, XFS_IOLOCK_EXCL); |
772 | if (!error) | ||
773 | error = error2; | ||
774 | if (need_i_mutex) | ||
775 | mutex_lock(&inode->i_mutex); | ||
776 | xfs_ilock(ip, iolock); | ||
777 | 908 | ||
778 | error2 = -xfs_file_fsync(file, | 909 | if (mode & FALLOC_FL_PUNCH_HOLE) |
779 | (file->f_flags & __O_SYNC) ? 0 : 1); | 910 | cmd = XFS_IOC_UNRESVSP; |
780 | if (!error) | 911 | |
781 | error = error2; | 912 | /* check the new inode size is valid before allocating */ |
913 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | ||
914 | offset + len > i_size_read(inode)) { | ||
915 | new_size = offset + len; | ||
916 | error = inode_newsize_ok(inode, new_size); | ||
917 | if (error) | ||
918 | goto out_unlock; | ||
782 | } | 919 | } |
783 | 920 | ||
784 | out_unlock_internal: | 921 | error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK); |
785 | if (ip->i_new_size) { | 922 | if (error) |
786 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 923 | goto out_unlock; |
787 | ip->i_new_size = 0; | 924 | |
788 | /* | 925 | /* Change file size if needed */ |
789 | * If this was a direct or synchronous I/O that failed (such | 926 | if (new_size) { |
790 | * as ENOSPC) then part of the I/O may have been written to | 927 | struct iattr iattr; |
791 | * disk before the error occured. In this case the on-disk | 928 | |
792 | * file size may have been adjusted beyond the in-memory file | 929 | iattr.ia_valid = ATTR_SIZE; |
793 | * size and now needs to be truncated back. | 930 | iattr.ia_size = new_size; |
794 | */ | 931 | error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); |
795 | if (ip->i_d.di_size > ip->i_size) | ||
796 | ip->i_d.di_size = ip->i_size; | ||
797 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
798 | } | 932 | } |
799 | xfs_iunlock(ip, iolock); | 933 | |
800 | out_unlock_mutex: | 934 | out_unlock: |
801 | if (need_i_mutex) | 935 | xfs_iunlock(ip, XFS_IOLOCK_EXCL); |
802 | mutex_unlock(&inode->i_mutex); | 936 | return error; |
803 | return -error; | ||
804 | } | 937 | } |
805 | 938 | ||
939 | |||
806 | STATIC int | 940 | STATIC int |
807 | xfs_file_open( | 941 | xfs_file_open( |
808 | struct inode *inode, | 942 | struct inode *inode, |
@@ -921,6 +1055,7 @@ const struct file_operations xfs_file_operations = { | |||
921 | .open = xfs_file_open, | 1055 | .open = xfs_file_open, |
922 | .release = xfs_file_release, | 1056 | .release = xfs_file_release, |
923 | .fsync = xfs_file_fsync, | 1057 | .fsync = xfs_file_fsync, |
1058 | .fallocate = xfs_file_fallocate, | ||
924 | }; | 1059 | }; |
925 | 1060 | ||
926 | const struct file_operations xfs_dir_file_operations = { | 1061 | const struct file_operations xfs_dir_file_operations = { |