aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/Locking2
-rw-r--r--Documentation/filesystems/vfs.txt2
-rw-r--r--arch/mn10300/include/asm/highmem.h4
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig1
-rw-r--r--arch/powerpc/configs/ps3_defconfig1
-rw-r--r--arch/s390/configs/default_defconfig1
-rw-r--r--arch/sh/configs/rsk7203_defconfig1
-rw-r--r--arch/xtensa/configs/iss_defconfig1
-rw-r--r--arch/xtensa/configs/s6105_defconfig1
-rw-r--r--block/blk-map.c2
-rw-r--r--drivers/block/drbd/drbd_receiver.c12
-rw-r--r--drivers/block/nbd.c48
-rw-r--r--drivers/char/virtio_console.c4
-rw-r--r--drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.c60
-rw-r--r--drivers/staging/lustre/lustre/libcfs/linux/linux-tcpip.c24
-rw-r--r--drivers/staging/lustre/lustre/llite/symlink.c23
-rw-r--r--drivers/staging/usbip/stub_dev.c8
-rw-r--r--drivers/staging/usbip/usbip_common.c25
-rw-r--r--drivers/staging/usbip/usbip_common.h1
-rw-r--r--drivers/staging/usbip/vhci_hcd.c4
-rw-r--r--drivers/staging/usbip/vhci_sysfs.c6
-rw-r--r--drivers/vhost/net.c14
-rw-r--r--fs/bio.c10
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/btrfs/file.c16
-rw-r--r--fs/buffer.c6
-rw-r--r--fs/cachefiles/bind.c1
-rw-r--r--fs/cachefiles/namei.c3
-rw-r--r--fs/ceph/file.c12
-rw-r--r--fs/cifs/cifsfs.c1
-rw-r--r--fs/cifs/file.c128
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/file.c11
-rw-r--r--fs/file_table.c43
-rw-r--r--fs/fuse/dev.c14
-rw-r--r--fs/fuse/file.c5
-rw-r--r--fs/mount.h5
-rw-r--r--fs/namei.c67
-rw-r--r--fs/namespace.c56
-rw-r--r--fs/ncpfs/inode.c50
-rw-r--r--fs/ncpfs/ncp_fs_sb.h2
-rw-r--r--fs/ntfs/inode.c2
-rw-r--r--fs/ocfs2/cluster/tcp.c49
-rw-r--r--fs/ocfs2/file.c9
-rw-r--r--fs/open.c68
-rw-r--r--fs/pipe.c133
-rw-r--r--fs/pnode.c198
-rw-r--r--fs/pnode.h3
-rw-r--r--fs/proc/namespaces.c14
-rw-r--r--fs/proc/self.c2
-rw-r--r--fs/proc_namespace.c1
-rw-r--r--fs/splice.c126
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/xfs/xfs_file.c13
-rw-r--r--fs/xfs/xfs_ioctl.c28
-rw-r--r--include/linux/bio.h5
-rw-r--r--include/linux/blkdev.h4
-rw-r--r--include/linux/buffer_head.h4
-rw-r--r--include/linux/fdtable.h2
-rw-r--r--include/linux/fs.h97
-rw-r--r--include/linux/mount.h3
-rw-r--r--include/linux/nbd.h3
-rw-r--r--include/linux/pipe_fs_i.h19
-rw-r--r--include/linux/uio.h52
-rw-r--r--kernel/relay.c4
-rw-r--r--kernel/trace/trace.c8
-rw-r--r--lib/Kconfig.debug10
-rw-r--r--mm/Makefile3
-rw-r--r--mm/filemap.c344
-rw-r--r--mm/iov_iter.c224
-rw-r--r--mm/process_vm_access.c250
-rw-r--r--mm/shmem.c79
-rw-r--r--security/integrity/evm/evm_crypto.c2
-rw-r--r--security/integrity/evm/evm_main.c2
-rw-r--r--security/tomoyo/realpath.c4
76 files changed, 911 insertions, 1537 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index efca5c1bbb10..eba790134253 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -202,7 +202,7 @@ prototypes:
202 unsigned long *); 202 unsigned long *);
203 int (*migratepage)(struct address_space *, struct page *, struct page *); 203 int (*migratepage)(struct address_space *, struct page *, struct page *);
204 int (*launder_page)(struct page *); 204 int (*launder_page)(struct page *);
205 int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long); 205 int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
206 int (*error_remove_page)(struct address_space *, struct page *); 206 int (*error_remove_page)(struct address_space *, struct page *);
207 int (*swap_activate)(struct file *); 207 int (*swap_activate)(struct file *);
208 int (*swap_deactivate)(struct file *); 208 int (*swap_deactivate)(struct file *);
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 94eb86287bcb..617f6d70c077 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -596,7 +596,7 @@ struct address_space_operations {
596 /* migrate the contents of a page to the specified target */ 596 /* migrate the contents of a page to the specified target */
597 int (*migratepage) (struct page *, struct page *); 597 int (*migratepage) (struct page *, struct page *);
598 int (*launder_page) (struct page *); 598 int (*launder_page) (struct page *);
599 int (*is_partially_uptodate) (struct page *, read_descriptor_t *, 599 int (*is_partially_uptodate) (struct page *, unsigned long,
600 unsigned long); 600 unsigned long);
601 void (*is_dirty_writeback) (struct page *, bool *, bool *); 601 void (*is_dirty_writeback) (struct page *, bool *, bool *);
602 int (*error_remove_page) (struct mapping *mapping, struct page *page); 602 int (*error_remove_page) (struct mapping *mapping, struct page *page);
diff --git a/arch/mn10300/include/asm/highmem.h b/arch/mn10300/include/asm/highmem.h
index 7c137cd8aa37..2fbbe4d920aa 100644
--- a/arch/mn10300/include/asm/highmem.h
+++ b/arch/mn10300/include/asm/highmem.h
@@ -70,7 +70,7 @@ static inline void kunmap(struct page *page)
70 * be used in IRQ contexts, so in some (very limited) cases we need 70 * be used in IRQ contexts, so in some (very limited) cases we need
71 * it. 71 * it.
72 */ 72 */
73static inline unsigned long kmap_atomic(struct page *page) 73static inline void *kmap_atomic(struct page *page)
74{ 74{
75 unsigned long vaddr; 75 unsigned long vaddr;
76 int idx, type; 76 int idx, type;
@@ -89,7 +89,7 @@ static inline unsigned long kmap_atomic(struct page *page)
89 set_pte(kmap_pte - idx, mk_pte(page, kmap_prot)); 89 set_pte(kmap_pte - idx, mk_pte(page, kmap_prot));
90 local_flush_tlb_one(vaddr); 90 local_flush_tlb_one(vaddr);
91 91
92 return vaddr; 92 return (void *)vaddr;
93} 93}
94 94
95static inline void __kunmap_atomic(unsigned long vaddr) 95static inline void __kunmap_atomic(unsigned long vaddr)
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index c2353bf059fd..175a8b99c196 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -1244,7 +1244,6 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y
1244CONFIG_DEBUG_HIGHMEM=y 1244CONFIG_DEBUG_HIGHMEM=y
1245CONFIG_DEBUG_INFO=y 1245CONFIG_DEBUG_INFO=y
1246CONFIG_DEBUG_VM=y 1246CONFIG_DEBUG_VM=y
1247CONFIG_DEBUG_WRITECOUNT=y
1248CONFIG_DEBUG_LIST=y 1247CONFIG_DEBUG_LIST=y
1249CONFIG_DEBUG_SG=y 1248CONFIG_DEBUG_SG=y
1250# CONFIG_RCU_CPU_STALL_DETECTOR is not set 1249# CONFIG_RCU_CPU_STALL_DETECTOR is not set
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index 139a8308070c..fdee37fab81c 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -174,7 +174,6 @@ CONFIG_DETECT_HUNG_TASK=y
174CONFIG_PROVE_LOCKING=y 174CONFIG_PROVE_LOCKING=y
175CONFIG_DEBUG_LOCKDEP=y 175CONFIG_DEBUG_LOCKDEP=y
176CONFIG_DEBUG_INFO=y 176CONFIG_DEBUG_INFO=y
177CONFIG_DEBUG_WRITECOUNT=y
178CONFIG_DEBUG_MEMORY_INIT=y 177CONFIG_DEBUG_MEMORY_INIT=y
179CONFIG_DEBUG_LIST=y 178CONFIG_DEBUG_LIST=y
180CONFIG_RCU_CPU_STALL_TIMEOUT=60 179CONFIG_RCU_CPU_STALL_TIMEOUT=60
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index ddaae2f5c913..8df022c43af7 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -581,7 +581,6 @@ CONFIG_LOCK_STAT=y
581CONFIG_DEBUG_LOCKDEP=y 581CONFIG_DEBUG_LOCKDEP=y
582CONFIG_DEBUG_ATOMIC_SLEEP=y 582CONFIG_DEBUG_ATOMIC_SLEEP=y
583CONFIG_DEBUG_LOCKING_API_SELFTESTS=y 583CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
584CONFIG_DEBUG_WRITECOUNT=y
585CONFIG_DEBUG_LIST=y 584CONFIG_DEBUG_LIST=y
586CONFIG_DEBUG_SG=y 585CONFIG_DEBUG_SG=y
587CONFIG_DEBUG_NOTIFIERS=y 586CONFIG_DEBUG_NOTIFIERS=y
diff --git a/arch/sh/configs/rsk7203_defconfig b/arch/sh/configs/rsk7203_defconfig
index 4e5229b0c5bb..47236573db83 100644
--- a/arch/sh/configs/rsk7203_defconfig
+++ b/arch/sh/configs/rsk7203_defconfig
@@ -128,7 +128,6 @@ CONFIG_DEBUG_MUTEXES=y
128CONFIG_DEBUG_SPINLOCK_SLEEP=y 128CONFIG_DEBUG_SPINLOCK_SLEEP=y
129CONFIG_DEBUG_INFO=y 129CONFIG_DEBUG_INFO=y
130CONFIG_DEBUG_VM=y 130CONFIG_DEBUG_VM=y
131CONFIG_DEBUG_WRITECOUNT=y
132CONFIG_DEBUG_LIST=y 131CONFIG_DEBUG_LIST=y
133CONFIG_DEBUG_SG=y 132CONFIG_DEBUG_SG=y
134CONFIG_FRAME_POINTER=y 133CONFIG_FRAME_POINTER=y
diff --git a/arch/xtensa/configs/iss_defconfig b/arch/xtensa/configs/iss_defconfig
index d57d917ff240..1493c68352d1 100644
--- a/arch/xtensa/configs/iss_defconfig
+++ b/arch/xtensa/configs/iss_defconfig
@@ -627,7 +627,6 @@ CONFIG_SCHED_DEBUG=y
627# CONFIG_DEBUG_KOBJECT is not set 627# CONFIG_DEBUG_KOBJECT is not set
628# CONFIG_DEBUG_INFO is not set 628# CONFIG_DEBUG_INFO is not set
629# CONFIG_DEBUG_VM is not set 629# CONFIG_DEBUG_VM is not set
630# CONFIG_DEBUG_WRITECOUNT is not set
631# CONFIG_DEBUG_MEMORY_INIT is not set 630# CONFIG_DEBUG_MEMORY_INIT is not set
632# CONFIG_DEBUG_LIST is not set 631# CONFIG_DEBUG_LIST is not set
633# CONFIG_DEBUG_SG is not set 632# CONFIG_DEBUG_SG is not set
diff --git a/arch/xtensa/configs/s6105_defconfig b/arch/xtensa/configs/s6105_defconfig
index 583c2b0974ca..12a492ab6d17 100644
--- a/arch/xtensa/configs/s6105_defconfig
+++ b/arch/xtensa/configs/s6105_defconfig
@@ -569,7 +569,6 @@ CONFIG_DEBUG_SPINLOCK_SLEEP=y
569# CONFIG_DEBUG_INFO is not set 569# CONFIG_DEBUG_INFO is not set
570# CONFIG_DEBUG_VM is not set 570# CONFIG_DEBUG_VM is not set
571CONFIG_DEBUG_NOMMU_REGIONS=y 571CONFIG_DEBUG_NOMMU_REGIONS=y
572# CONFIG_DEBUG_WRITECOUNT is not set
573# CONFIG_DEBUG_MEMORY_INIT is not set 572# CONFIG_DEBUG_MEMORY_INIT is not set
574# CONFIG_DEBUG_LIST is not set 573# CONFIG_DEBUG_LIST is not set
575# CONFIG_DEBUG_SG is not set 574# CONFIG_DEBUG_SG is not set
diff --git a/block/blk-map.c b/block/blk-map.c
index cca6356d216d..f7b22bc21518 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -188,7 +188,7 @@ EXPORT_SYMBOL(blk_rq_map_user);
188 * unmapping. 188 * unmapping.
189 */ 189 */
190int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, 190int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
191 struct rq_map_data *map_data, struct sg_iovec *iov, 191 struct rq_map_data *map_data, const struct sg_iovec *iov,
192 int iov_count, unsigned int len, gfp_t gfp_mask) 192 int iov_count, unsigned int len, gfp_t gfp_mask)
193{ 193{
194 struct bio *bio; 194 struct bio *bio;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 18c76e84d540..68e3992e8838 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -469,24 +469,14 @@ static void drbd_wait_ee_list_empty(struct drbd_device *device,
469 469
470static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags) 470static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
471{ 471{
472 mm_segment_t oldfs;
473 struct kvec iov = { 472 struct kvec iov = {
474 .iov_base = buf, 473 .iov_base = buf,
475 .iov_len = size, 474 .iov_len = size,
476 }; 475 };
477 struct msghdr msg = { 476 struct msghdr msg = {
478 .msg_iovlen = 1,
479 .msg_iov = (struct iovec *)&iov,
480 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL) 477 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
481 }; 478 };
482 int rv; 479 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
483
484 oldfs = get_fs();
485 set_fs(KERNEL_DS);
486 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
487 set_fs(oldfs);
488
489 return rv;
490} 480}
491 481
492static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size) 482static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 55298db36b2d..3a70ea2f7cd6 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -630,37 +630,29 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
630 } 630 }
631 631
632 case NBD_CLEAR_SOCK: { 632 case NBD_CLEAR_SOCK: {
633 struct file *file; 633 struct socket *sock = nbd->sock;
634
635 nbd->sock = NULL; 634 nbd->sock = NULL;
636 file = nbd->file;
637 nbd->file = NULL;
638 nbd_clear_que(nbd); 635 nbd_clear_que(nbd);
639 BUG_ON(!list_empty(&nbd->queue_head)); 636 BUG_ON(!list_empty(&nbd->queue_head));
640 BUG_ON(!list_empty(&nbd->waiting_queue)); 637 BUG_ON(!list_empty(&nbd->waiting_queue));
641 kill_bdev(bdev); 638 kill_bdev(bdev);
642 if (file) 639 if (sock)
643 fput(file); 640 sockfd_put(sock);
644 return 0; 641 return 0;
645 } 642 }
646 643
647 case NBD_SET_SOCK: { 644 case NBD_SET_SOCK: {
648 struct file *file; 645 struct socket *sock;
649 if (nbd->file) 646 int err;
647 if (nbd->sock)
650 return -EBUSY; 648 return -EBUSY;
651 file = fget(arg); 649 sock = sockfd_lookup(arg, &err);
652 if (file) { 650 if (sock) {
653 struct inode *inode = file_inode(file); 651 nbd->sock = sock;
654 if (S_ISSOCK(inode->i_mode)) { 652 if (max_part > 0)
655 nbd->file = file; 653 bdev->bd_invalidated = 1;
656 nbd->sock = SOCKET_I(inode); 654 nbd->disconnect = 0; /* we're connected now */
657 if (max_part > 0) 655 return 0;
658 bdev->bd_invalidated = 1;
659 nbd->disconnect = 0; /* we're connected now */
660 return 0;
661 } else {
662 fput(file);
663 }
664 } 656 }
665 return -EINVAL; 657 return -EINVAL;
666 } 658 }
@@ -697,12 +689,12 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
697 689
698 case NBD_DO_IT: { 690 case NBD_DO_IT: {
699 struct task_struct *thread; 691 struct task_struct *thread;
700 struct file *file; 692 struct socket *sock;
701 int error; 693 int error;
702 694
703 if (nbd->pid) 695 if (nbd->pid)
704 return -EBUSY; 696 return -EBUSY;
705 if (!nbd->file) 697 if (!nbd->sock)
706 return -EINVAL; 698 return -EINVAL;
707 699
708 mutex_unlock(&nbd->tx_lock); 700 mutex_unlock(&nbd->tx_lock);
@@ -731,15 +723,15 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
731 if (error) 723 if (error)
732 return error; 724 return error;
733 sock_shutdown(nbd, 0); 725 sock_shutdown(nbd, 0);
734 file = nbd->file; 726 sock = nbd->sock;
735 nbd->file = NULL; 727 nbd->sock = NULL;
736 nbd_clear_que(nbd); 728 nbd_clear_que(nbd);
737 dev_warn(disk_to_dev(nbd->disk), "queue cleared\n"); 729 dev_warn(disk_to_dev(nbd->disk), "queue cleared\n");
738 kill_bdev(bdev); 730 kill_bdev(bdev);
739 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); 731 queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
740 set_device_ro(bdev, false); 732 set_device_ro(bdev, false);
741 if (file) 733 if (sock)
742 fput(file); 734 sockfd_put(sock);
743 nbd->flags = 0; 735 nbd->flags = 0;
744 nbd->bytesize = 0; 736 nbd->bytesize = 0;
745 bdev->bd_inode->i_size = 0; 737 bdev->bd_inode->i_size = 0;
@@ -875,9 +867,7 @@ static int __init nbd_init(void)
875 867
876 for (i = 0; i < nbds_max; i++) { 868 for (i = 0; i < nbds_max; i++) {
877 struct gendisk *disk = nbd_dev[i].disk; 869 struct gendisk *disk = nbd_dev[i].disk;
878 nbd_dev[i].file = NULL;
879 nbd_dev[i].magic = NBD_MAGIC; 870 nbd_dev[i].magic = NBD_MAGIC;
880 nbd_dev[i].flags = 0;
881 INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); 871 INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
882 spin_lock_init(&nbd_dev[i].queue_lock); 872 spin_lock_init(&nbd_dev[i].queue_lock);
883 INIT_LIST_HEAD(&nbd_dev[i].queue_head); 873 INIT_LIST_HEAD(&nbd_dev[i].queue_head);
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 6928d094451d..60aafb8a1f2e 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -901,9 +901,9 @@ static int pipe_to_sg(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
901 if (len + offset > PAGE_SIZE) 901 if (len + offset > PAGE_SIZE)
902 len = PAGE_SIZE - offset; 902 len = PAGE_SIZE - offset;
903 903
904 src = buf->ops->map(pipe, buf, 1); 904 src = kmap_atomic(buf->page);
905 memcpy(page_address(page) + offset, src + buf->offset, len); 905 memcpy(page_address(page) + offset, src + buf->offset, len);
906 buf->ops->unmap(pipe, buf, src); 906 kunmap_atomic(src);
907 907
908 sg_set_page(&(sgl->sg[sgl->n]), page, len, offset); 908 sg_set_page(&(sgl->sg[sgl->n]), page, len, offset);
909 } 909 }
diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.c
index a54b506ba7ca..b87b246111c0 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.c
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib-linux.c
@@ -99,16 +99,7 @@ ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
99 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; 99 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
100 unsigned int niov = tx->tx_niov; 100 unsigned int niov = tx->tx_niov;
101#endif 101#endif
102 struct msghdr msg = { 102 struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
103 .msg_name = NULL,
104 .msg_namelen = 0,
105 .msg_iov = scratchiov,
106 .msg_iovlen = niov,
107 .msg_control = NULL,
108 .msg_controllen = 0,
109 .msg_flags = MSG_DONTWAIT
110 };
111 mm_segment_t oldmm = get_fs();
112 int i; 103 int i;
113 104
114 for (nob = i = 0; i < niov; i++) { 105 for (nob = i = 0; i < niov; i++) {
@@ -120,9 +111,7 @@ ksocknal_lib_send_iov (ksock_conn_t *conn, ksock_tx_t *tx)
120 nob < tx->tx_resid) 111 nob < tx->tx_resid)
121 msg.msg_flags |= MSG_MORE; 112 msg.msg_flags |= MSG_MORE;
122 113
123 set_fs (KERNEL_DS); 114 rc = kernel_sendmsg(sock, &msg, (struct kvec *)scratchiov, niov, nob);
124 rc = sock_sendmsg(sock, &msg, nob);
125 set_fs (oldmm);
126 } 115 }
127 return rc; 116 return rc;
128} 117}
@@ -174,16 +163,7 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
174 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov; 163 struct iovec *scratchiov = conn->ksnc_scheduler->kss_scratch_iov;
175 unsigned int niov = tx->tx_nkiov; 164 unsigned int niov = tx->tx_nkiov;
176#endif 165#endif
177 struct msghdr msg = { 166 struct msghdr msg = {.msg_flags = MSG_DONTWAIT};
178 .msg_name = NULL,
179 .msg_namelen = 0,
180 .msg_iov = scratchiov,
181 .msg_iovlen = niov,
182 .msg_control = NULL,
183 .msg_controllen = 0,
184 .msg_flags = MSG_DONTWAIT
185 };
186 mm_segment_t oldmm = get_fs();
187 int i; 167 int i;
188 168
189 for (nob = i = 0; i < niov; i++) { 169 for (nob = i = 0; i < niov; i++) {
@@ -196,9 +176,7 @@ ksocknal_lib_send_kiov (ksock_conn_t *conn, ksock_tx_t *tx)
196 nob < tx->tx_resid) 176 nob < tx->tx_resid)
197 msg.msg_flags |= MSG_MORE; 177 msg.msg_flags |= MSG_MORE;
198 178
199 set_fs (KERNEL_DS); 179 rc = kernel_sendmsg(sock, &msg, (struct kvec *)scratchiov, niov, nob);
200 rc = sock_sendmsg(sock, &msg, nob);
201 set_fs (oldmm);
202 180
203 for (i = 0; i < niov; i++) 181 for (i = 0; i < niov; i++)
204 kunmap(kiov[i].kiov_page); 182 kunmap(kiov[i].kiov_page);
@@ -237,15 +215,8 @@ ksocknal_lib_recv_iov (ksock_conn_t *conn)
237#endif 215#endif
238 struct iovec *iov = conn->ksnc_rx_iov; 216 struct iovec *iov = conn->ksnc_rx_iov;
239 struct msghdr msg = { 217 struct msghdr msg = {
240 .msg_name = NULL,
241 .msg_namelen = 0,
242 .msg_iov = scratchiov,
243 .msg_iovlen = niov,
244 .msg_control = NULL,
245 .msg_controllen = 0,
246 .msg_flags = 0 218 .msg_flags = 0
247 }; 219 };
248 mm_segment_t oldmm = get_fs();
249 int nob; 220 int nob;
250 int i; 221 int i;
251 int rc; 222 int rc;
@@ -263,10 +234,8 @@ ksocknal_lib_recv_iov (ksock_conn_t *conn)
263 } 234 }
264 LASSERT (nob <= conn->ksnc_rx_nob_wanted); 235 LASSERT (nob <= conn->ksnc_rx_nob_wanted);
265 236
266 set_fs (KERNEL_DS); 237 rc = kernel_recvmsg(conn->ksnc_sock, &msg,
267 rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT); 238 (struct kvec *)scratchiov, niov, nob, MSG_DONTWAIT);
268 /* NB this is just a boolean..........................^ */
269 set_fs (oldmm);
270 239
271 saved_csum = 0; 240 saved_csum = 0;
272 if (conn->ksnc_proto == &ksocknal_protocol_v2x) { 241 if (conn->ksnc_proto == &ksocknal_protocol_v2x) {
@@ -355,14 +324,8 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn)
355#endif 324#endif
356 lnet_kiov_t *kiov = conn->ksnc_rx_kiov; 325 lnet_kiov_t *kiov = conn->ksnc_rx_kiov;
357 struct msghdr msg = { 326 struct msghdr msg = {
358 .msg_name = NULL,
359 .msg_namelen = 0,
360 .msg_iov = scratchiov,
361 .msg_control = NULL,
362 .msg_controllen = 0,
363 .msg_flags = 0 327 .msg_flags = 0
364 }; 328 };
365 mm_segment_t oldmm = get_fs();
366 int nob; 329 int nob;
367 int i; 330 int i;
368 int rc; 331 int rc;
@@ -370,13 +333,14 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn)
370 void *addr; 333 void *addr;
371 int sum; 334 int sum;
372 int fragnob; 335 int fragnob;
336 int n;
373 337
374 /* NB we can't trust socket ops to either consume our iovs 338 /* NB we can't trust socket ops to either consume our iovs
375 * or leave them alone. */ 339 * or leave them alone. */
376 addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages); 340 addr = ksocknal_lib_kiov_vmap(kiov, niov, scratchiov, pages);
377 if (addr != NULL) { 341 if (addr != NULL) {
378 nob = scratchiov[0].iov_len; 342 nob = scratchiov[0].iov_len;
379 msg.msg_iovlen = 1; 343 n = 1;
380 344
381 } else { 345 } else {
382 for (nob = i = 0; i < niov; i++) { 346 for (nob = i = 0; i < niov; i++) {
@@ -384,15 +348,13 @@ ksocknal_lib_recv_kiov (ksock_conn_t *conn)
384 scratchiov[i].iov_base = kmap(kiov[i].kiov_page) + 348 scratchiov[i].iov_base = kmap(kiov[i].kiov_page) +
385 kiov[i].kiov_offset; 349 kiov[i].kiov_offset;
386 } 350 }
387 msg.msg_iovlen = niov; 351 n = niov;
388 } 352 }
389 353
390 LASSERT (nob <= conn->ksnc_rx_nob_wanted); 354 LASSERT (nob <= conn->ksnc_rx_nob_wanted);
391 355
392 set_fs (KERNEL_DS); 356 rc = kernel_recvmsg(conn->ksnc_sock, &msg,
393 rc = sock_recvmsg (conn->ksnc_sock, &msg, nob, MSG_DONTWAIT); 357 (struct kvec *)scratchiov, n, nob, MSG_DONTWAIT);
394 /* NB this is just a boolean.......................^ */
395 set_fs (oldmm);
396 358
397 if (conn->ksnc_msg.ksm_csum != 0) { 359 if (conn->ksnc_msg.ksm_csum != 0) {
398 for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { 360 for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) {
diff --git a/drivers/staging/lustre/lustre/libcfs/linux/linux-tcpip.c b/drivers/staging/lustre/lustre/libcfs/linux/linux-tcpip.c
index e6069d78af6b..7539fe16d76f 100644
--- a/drivers/staging/lustre/lustre/libcfs/linux/linux-tcpip.c
+++ b/drivers/staging/lustre/lustre/libcfs/linux/linux-tcpip.c
@@ -265,17 +265,11 @@ libcfs_sock_write (struct socket *sock, void *buffer, int nob, int timeout)
265 * empty enough to take the whole message immediately */ 265 * empty enough to take the whole message immediately */
266 266
267 for (;;) { 267 for (;;) {
268 struct iovec iov = { 268 struct kvec iov = {
269 .iov_base = buffer, 269 .iov_base = buffer,
270 .iov_len = nob 270 .iov_len = nob
271 }; 271 };
272 struct msghdr msg = { 272 struct msghdr msg = {
273 .msg_name = NULL,
274 .msg_namelen = 0,
275 .msg_iov = &iov,
276 .msg_iovlen = 1,
277 .msg_control = NULL,
278 .msg_controllen = 0,
279 .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0 273 .msg_flags = (timeout == 0) ? MSG_DONTWAIT : 0
280 }; 274 };
281 275
@@ -297,11 +291,9 @@ libcfs_sock_write (struct socket *sock, void *buffer, int nob, int timeout)
297 } 291 }
298 } 292 }
299 293
300 set_fs (KERNEL_DS);
301 then = jiffies; 294 then = jiffies;
302 rc = sock_sendmsg (sock, &msg, iov.iov_len); 295 rc = kernel_sendmsg(sock, &msg, &iov, 1, nob);
303 ticks -= jiffies - then; 296 ticks -= jiffies - then;
304 set_fs (oldmm);
305 297
306 if (rc == nob) 298 if (rc == nob)
307 return 0; 299 return 0;
@@ -338,17 +330,11 @@ libcfs_sock_read (struct socket *sock, void *buffer, int nob, int timeout)
338 LASSERT (ticks > 0); 330 LASSERT (ticks > 0);
339 331
340 for (;;) { 332 for (;;) {
341 struct iovec iov = { 333 struct kvec iov = {
342 .iov_base = buffer, 334 .iov_base = buffer,
343 .iov_len = nob 335 .iov_len = nob
344 }; 336 };
345 struct msghdr msg = { 337 struct msghdr msg = {
346 .msg_name = NULL,
347 .msg_namelen = 0,
348 .msg_iov = &iov,
349 .msg_iovlen = 1,
350 .msg_control = NULL,
351 .msg_controllen = 0,
352 .msg_flags = 0 338 .msg_flags = 0
353 }; 339 };
354 340
@@ -367,11 +353,9 @@ libcfs_sock_read (struct socket *sock, void *buffer, int nob, int timeout)
367 return rc; 353 return rc;
368 } 354 }
369 355
370 set_fs(KERNEL_DS);
371 then = jiffies; 356 then = jiffies;
372 rc = sock_recvmsg(sock, &msg, iov.iov_len, 0); 357 rc = kernel_recvmsg(sock, &msg, &iov, 1, nob, 0);
373 ticks -= jiffies - then; 358 ticks -= jiffies - then;
374 set_fs(oldmm);
375 359
376 if (rc < 0) 360 if (rc < 0)
377 return rc; 361 return rc;
diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c
index ab06891f7fc7..80d48b5ae247 100644
--- a/drivers/staging/lustre/lustre/llite/symlink.c
+++ b/drivers/staging/lustre/lustre/llite/symlink.c
@@ -115,27 +115,6 @@ failed:
115 return rc; 115 return rc;
116} 116}
117 117
118static int ll_readlink(struct dentry *dentry, char *buffer, int buflen)
119{
120 struct inode *inode = dentry->d_inode;
121 struct ptlrpc_request *request;
122 char *symname;
123 int rc;
124
125 CDEBUG(D_VFSTRACE, "VFS Op\n");
126
127 ll_inode_size_lock(inode);
128 rc = ll_readlink_internal(inode, &request, &symname);
129 if (rc)
130 GOTO(out, rc);
131
132 rc = vfs_readlink(dentry, buffer, buflen, symname);
133 out:
134 ptlrpc_req_finished(request);
135 ll_inode_size_unlock(inode);
136 return rc;
137}
138
139static void *ll_follow_link(struct dentry *dentry, struct nameidata *nd) 118static void *ll_follow_link(struct dentry *dentry, struct nameidata *nd)
140{ 119{
141 struct inode *inode = dentry->d_inode; 120 struct inode *inode = dentry->d_inode;
@@ -175,7 +154,7 @@ static void ll_put_link(struct dentry *dentry, struct nameidata *nd, void *cooki
175} 154}
176 155
177struct inode_operations ll_fast_symlink_inode_operations = { 156struct inode_operations ll_fast_symlink_inode_operations = {
178 .readlink = ll_readlink, 157 .readlink = generic_readlink,
179 .setattr = ll_setattr, 158 .setattr = ll_setattr,
180 .follow_link = ll_follow_link, 159 .follow_link = ll_follow_link,
181 .put_link = ll_put_link, 160 .put_link = ll_put_link,
diff --git a/drivers/staging/usbip/stub_dev.c b/drivers/staging/usbip/stub_dev.c
index 773d8ca07a00..de692d7011a5 100644
--- a/drivers/staging/usbip/stub_dev.c
+++ b/drivers/staging/usbip/stub_dev.c
@@ -86,7 +86,6 @@ static ssize_t store_sockfd(struct device *dev, struct device_attribute *attr,
86 struct stub_device *sdev = dev_get_drvdata(dev); 86 struct stub_device *sdev = dev_get_drvdata(dev);
87 int sockfd = 0; 87 int sockfd = 0;
88 struct socket *socket; 88 struct socket *socket;
89 ssize_t err = -EINVAL;
90 int rv; 89 int rv;
91 90
92 if (!sdev) { 91 if (!sdev) {
@@ -99,6 +98,7 @@ static ssize_t store_sockfd(struct device *dev, struct device_attribute *attr,
99 return -EINVAL; 98 return -EINVAL;
100 99
101 if (sockfd != -1) { 100 if (sockfd != -1) {
101 int err;
102 dev_info(dev, "stub up\n"); 102 dev_info(dev, "stub up\n");
103 103
104 spin_lock_irq(&sdev->ud.lock); 104 spin_lock_irq(&sdev->ud.lock);
@@ -108,7 +108,7 @@ static ssize_t store_sockfd(struct device *dev, struct device_attribute *attr,
108 goto err; 108 goto err;
109 } 109 }
110 110
111 socket = sockfd_to_socket(sockfd); 111 socket = sockfd_lookup(sockfd, &err);
112 if (!socket) 112 if (!socket)
113 goto err; 113 goto err;
114 114
@@ -141,7 +141,7 @@ static ssize_t store_sockfd(struct device *dev, struct device_attribute *attr,
141 141
142err: 142err:
143 spin_unlock_irq(&sdev->ud.lock); 143 spin_unlock_irq(&sdev->ud.lock);
144 return err; 144 return -EINVAL;
145} 145}
146static DEVICE_ATTR(usbip_sockfd, S_IWUSR, NULL, store_sockfd); 146static DEVICE_ATTR(usbip_sockfd, S_IWUSR, NULL, store_sockfd);
147 147
@@ -211,7 +211,7 @@ static void stub_shutdown_connection(struct usbip_device *ud)
211 * not touch NULL socket. 211 * not touch NULL socket.
212 */ 212 */
213 if (ud->tcp_socket) { 213 if (ud->tcp_socket) {
214 fput(ud->tcp_socket->file); 214 sockfd_put(ud->tcp_socket);
215 ud->tcp_socket = NULL; 215 ud->tcp_socket = NULL;
216 } 216 }
217 217
diff --git a/drivers/staging/usbip/usbip_common.c b/drivers/staging/usbip/usbip_common.c
index 184fa70365db..facaaf003f19 100644
--- a/drivers/staging/usbip/usbip_common.c
+++ b/drivers/staging/usbip/usbip_common.c
@@ -382,31 +382,6 @@ err:
382} 382}
383EXPORT_SYMBOL_GPL(usbip_recv); 383EXPORT_SYMBOL_GPL(usbip_recv);
384 384
385struct socket *sockfd_to_socket(unsigned int sockfd)
386{
387 struct socket *socket;
388 struct file *file;
389 struct inode *inode;
390
391 file = fget(sockfd);
392 if (!file) {
393 pr_err("invalid sockfd\n");
394 return NULL;
395 }
396
397 inode = file_inode(file);
398
399 if (!inode || !S_ISSOCK(inode->i_mode)) {
400 fput(file);
401 return NULL;
402 }
403
404 socket = SOCKET_I(inode);
405
406 return socket;
407}
408EXPORT_SYMBOL_GPL(sockfd_to_socket);
409
410/* there may be more cases to tweak the flags. */ 385/* there may be more cases to tweak the flags. */
411static unsigned int tweak_transfer_flags(unsigned int flags) 386static unsigned int tweak_transfer_flags(unsigned int flags)
412{ 387{
diff --git a/drivers/staging/usbip/usbip_common.h b/drivers/staging/usbip/usbip_common.h
index 732fb636a1e5..f555d834f134 100644
--- a/drivers/staging/usbip/usbip_common.h
+++ b/drivers/staging/usbip/usbip_common.h
@@ -299,7 +299,6 @@ void usbip_dump_urb(struct urb *purb);
299void usbip_dump_header(struct usbip_header *pdu); 299void usbip_dump_header(struct usbip_header *pdu);
300 300
301int usbip_recv(struct socket *sock, void *buf, int size); 301int usbip_recv(struct socket *sock, void *buf, int size);
302struct socket *sockfd_to_socket(unsigned int sockfd);
303 302
304void usbip_pack_pdu(struct usbip_header *pdu, struct urb *urb, int cmd, 303void usbip_pack_pdu(struct usbip_header *pdu, struct urb *urb, int cmd,
305 int pack); 304 int pack);
diff --git a/drivers/staging/usbip/vhci_hcd.c b/drivers/staging/usbip/vhci_hcd.c
index 1e84577230ef..70e17551943d 100644
--- a/drivers/staging/usbip/vhci_hcd.c
+++ b/drivers/staging/usbip/vhci_hcd.c
@@ -788,7 +788,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud)
788 788
789 /* active connection is closed */ 789 /* active connection is closed */
790 if (vdev->ud.tcp_socket) { 790 if (vdev->ud.tcp_socket) {
791 fput(vdev->ud.tcp_socket->file); 791 sockfd_put(vdev->ud.tcp_socket);
792 vdev->ud.tcp_socket = NULL; 792 vdev->ud.tcp_socket = NULL;
793 } 793 }
794 pr_info("release socket\n"); 794 pr_info("release socket\n");
@@ -835,7 +835,7 @@ static void vhci_device_reset(struct usbip_device *ud)
835 vdev->udev = NULL; 835 vdev->udev = NULL;
836 836
837 if (ud->tcp_socket) { 837 if (ud->tcp_socket) {
838 fput(ud->tcp_socket->file); 838 sockfd_put(ud->tcp_socket);
839 ud->tcp_socket = NULL; 839 ud->tcp_socket = NULL;
840 } 840 }
841 ud->status = VDEV_ST_NULL; 841 ud->status = VDEV_ST_NULL;
diff --git a/drivers/staging/usbip/vhci_sysfs.c b/drivers/staging/usbip/vhci_sysfs.c
index e0980324fb03..47bddcdde0a6 100644
--- a/drivers/staging/usbip/vhci_sysfs.c
+++ b/drivers/staging/usbip/vhci_sysfs.c
@@ -176,6 +176,7 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr,
176 struct socket *socket; 176 struct socket *socket;
177 int sockfd = 0; 177 int sockfd = 0;
178 __u32 rhport = 0, devid = 0, speed = 0; 178 __u32 rhport = 0, devid = 0, speed = 0;
179 int err;
179 180
180 /* 181 /*
181 * @rhport: port number of vhci_hcd 182 * @rhport: port number of vhci_hcd
@@ -194,8 +195,7 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr,
194 return -EINVAL; 195 return -EINVAL;
195 196
196 /* Extract socket from fd. */ 197 /* Extract socket from fd. */
197 /* The correct way to clean this up is to fput(socket->file). */ 198 socket = sockfd_lookup(sockfd, &err);
198 socket = sockfd_to_socket(sockfd);
199 if (!socket) 199 if (!socket)
200 return -EINVAL; 200 return -EINVAL;
201 201
@@ -211,7 +211,7 @@ static ssize_t store_attach(struct device *dev, struct device_attribute *attr,
211 spin_unlock(&vdev->ud.lock); 211 spin_unlock(&vdev->ud.lock);
212 spin_unlock(&the_controller->lock); 212 spin_unlock(&the_controller->lock);
213 213
214 fput(socket->file); 214 sockfd_put(socket);
215 215
216 dev_err(dev, "port %d already used\n", rhport); 216 dev_err(dev, "port %d already used\n", rhport);
217 return -EINVAL; 217 return -EINVAL;
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index e1e22e0f01e8..be414d2b2b22 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -818,9 +818,9 @@ static int vhost_net_release(struct inode *inode, struct file *f)
818 vhost_dev_cleanup(&n->dev, false); 818 vhost_dev_cleanup(&n->dev, false);
819 vhost_net_vq_reset(n); 819 vhost_net_vq_reset(n);
820 if (tx_sock) 820 if (tx_sock)
821 fput(tx_sock->file); 821 sockfd_put(tx_sock);
822 if (rx_sock) 822 if (rx_sock)
823 fput(rx_sock->file); 823 sockfd_put(rx_sock);
824 /* Make sure no callbacks are outstanding */ 824 /* Make sure no callbacks are outstanding */
825 synchronize_rcu_bh(); 825 synchronize_rcu_bh();
826 /* We do an extra flush before freeing memory, 826 /* We do an extra flush before freeing memory,
@@ -860,7 +860,7 @@ static struct socket *get_raw_socket(int fd)
860 } 860 }
861 return sock; 861 return sock;
862err: 862err:
863 fput(sock->file); 863 sockfd_put(sock);
864 return ERR_PTR(r); 864 return ERR_PTR(r);
865} 865}
866 866
@@ -966,7 +966,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
966 966
967 if (oldsock) { 967 if (oldsock) {
968 vhost_net_flush_vq(n, index); 968 vhost_net_flush_vq(n, index);
969 fput(oldsock->file); 969 sockfd_put(oldsock);
970 } 970 }
971 971
972 mutex_unlock(&n->dev.mutex); 972 mutex_unlock(&n->dev.mutex);
@@ -978,7 +978,7 @@ err_used:
978 if (ubufs) 978 if (ubufs)
979 vhost_net_ubuf_put_wait_and_free(ubufs); 979 vhost_net_ubuf_put_wait_and_free(ubufs);
980err_ubufs: 980err_ubufs:
981 fput(sock->file); 981 sockfd_put(sock);
982err_vq: 982err_vq:
983 mutex_unlock(&vq->mutex); 983 mutex_unlock(&vq->mutex);
984err: 984err:
@@ -1009,9 +1009,9 @@ static long vhost_net_reset_owner(struct vhost_net *n)
1009done: 1009done:
1010 mutex_unlock(&n->dev.mutex); 1010 mutex_unlock(&n->dev.mutex);
1011 if (tx_sock) 1011 if (tx_sock)
1012 fput(tx_sock->file); 1012 sockfd_put(tx_sock);
1013 if (rx_sock) 1013 if (rx_sock)
1014 fput(rx_sock->file); 1014 sockfd_put(rx_sock);
1015 return err; 1015 return err;
1016} 1016}
1017 1017
diff --git a/fs/bio.c b/fs/bio.c
index b1bc722b89aa..6f0362b77806 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1002,7 +1002,7 @@ struct bio_map_data {
1002}; 1002};
1003 1003
1004static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, 1004static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
1005 struct sg_iovec *iov, int iov_count, 1005 const struct sg_iovec *iov, int iov_count,
1006 int is_our_pages) 1006 int is_our_pages)
1007{ 1007{
1008 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); 1008 memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
@@ -1022,7 +1022,7 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs,
1022 sizeof(struct sg_iovec) * iov_count, gfp_mask); 1022 sizeof(struct sg_iovec) * iov_count, gfp_mask);
1023} 1023}
1024 1024
1025static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, 1025static int __bio_copy_iov(struct bio *bio, const struct sg_iovec *iov, int iov_count,
1026 int to_user, int from_user, int do_free_page) 1026 int to_user, int from_user, int do_free_page)
1027{ 1027{
1028 int ret = 0, i; 1028 int ret = 0, i;
@@ -1120,7 +1120,7 @@ EXPORT_SYMBOL(bio_uncopy_user);
1120 */ 1120 */
1121struct bio *bio_copy_user_iov(struct request_queue *q, 1121struct bio *bio_copy_user_iov(struct request_queue *q,
1122 struct rq_map_data *map_data, 1122 struct rq_map_data *map_data,
1123 struct sg_iovec *iov, int iov_count, 1123 const struct sg_iovec *iov, int iov_count,
1124 int write_to_vm, gfp_t gfp_mask) 1124 int write_to_vm, gfp_t gfp_mask)
1125{ 1125{
1126 struct bio_map_data *bmd; 1126 struct bio_map_data *bmd;
@@ -1259,7 +1259,7 @@ EXPORT_SYMBOL(bio_copy_user);
1259 1259
1260static struct bio *__bio_map_user_iov(struct request_queue *q, 1260static struct bio *__bio_map_user_iov(struct request_queue *q,
1261 struct block_device *bdev, 1261 struct block_device *bdev,
1262 struct sg_iovec *iov, int iov_count, 1262 const struct sg_iovec *iov, int iov_count,
1263 int write_to_vm, gfp_t gfp_mask) 1263 int write_to_vm, gfp_t gfp_mask)
1264{ 1264{
1265 int i, j; 1265 int i, j;
@@ -1407,7 +1407,7 @@ EXPORT_SYMBOL(bio_map_user);
1407 * device. Returns an error pointer in case of error. 1407 * device. Returns an error pointer in case of error.
1408 */ 1408 */
1409struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, 1409struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev,
1410 struct sg_iovec *iov, int iov_count, 1410 const struct sg_iovec *iov, int iov_count,
1411 int write_to_vm, gfp_t gfp_mask) 1411 int write_to_vm, gfp_t gfp_mask)
1412{ 1412{
1413 struct bio *bio; 1413 struct bio *bio;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ba0d2b05bb78..552a8d13bc32 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1518,7 +1518,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1518 BUG_ON(iocb->ki_pos != pos); 1518 BUG_ON(iocb->ki_pos != pos);
1519 1519
1520 blk_start_plug(&plug); 1520 blk_start_plug(&plug);
1521 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 1521 ret = __generic_file_aio_write(iocb, iov, nr_segs);
1522 if (ret > 0) { 1522 if (ret > 0) {
1523 ssize_t err; 1523 ssize_t err;
1524 1524
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c5998477fe60..eb742c07e7a4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -425,13 +425,8 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
425 struct page *page = prepared_pages[pg]; 425 struct page *page = prepared_pages[pg];
426 /* 426 /*
427 * Copy data from userspace to the current page 427 * Copy data from userspace to the current page
428 *
429 * Disable pagefault to avoid recursive lock since
430 * the pages are already locked
431 */ 428 */
432 pagefault_disable();
433 copied = iov_iter_copy_from_user_atomic(page, i, offset, count); 429 copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
434 pagefault_enable();
435 430
436 /* Flush processor's dcache for this page */ 431 /* Flush processor's dcache for this page */
437 flush_dcache_page(page); 432 flush_dcache_page(page);
@@ -1665,7 +1660,7 @@ again:
1665static ssize_t __btrfs_direct_write(struct kiocb *iocb, 1660static ssize_t __btrfs_direct_write(struct kiocb *iocb,
1666 const struct iovec *iov, 1661 const struct iovec *iov,
1667 unsigned long nr_segs, loff_t pos, 1662 unsigned long nr_segs, loff_t pos,
1668 loff_t *ppos, size_t count, size_t ocount) 1663 size_t count, size_t ocount)
1669{ 1664{
1670 struct file *file = iocb->ki_filp; 1665 struct file *file = iocb->ki_filp;
1671 struct iov_iter i; 1666 struct iov_iter i;
@@ -1674,7 +1669,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
1674 loff_t endbyte; 1669 loff_t endbyte;
1675 int err; 1670 int err;
1676 1671
1677 written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos, 1672 written = generic_file_direct_write(iocb, iov, &nr_segs, pos,
1678 count, ocount); 1673 count, ocount);
1679 1674
1680 if (written < 0 || written == count) 1675 if (written < 0 || written == count)
@@ -1693,7 +1688,7 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
1693 if (err) 1688 if (err)
1694 goto out; 1689 goto out;
1695 written += written_buffered; 1690 written += written_buffered;
1696 *ppos = pos + written_buffered; 1691 iocb->ki_pos = pos + written_buffered;
1697 invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT, 1692 invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
1698 endbyte >> PAGE_CACHE_SHIFT); 1693 endbyte >> PAGE_CACHE_SHIFT);
1699out: 1694out:
@@ -1725,7 +1720,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1725 struct file *file = iocb->ki_filp; 1720 struct file *file = iocb->ki_filp;
1726 struct inode *inode = file_inode(file); 1721 struct inode *inode = file_inode(file);
1727 struct btrfs_root *root = BTRFS_I(inode)->root; 1722 struct btrfs_root *root = BTRFS_I(inode)->root;
1728 loff_t *ppos = &iocb->ki_pos;
1729 u64 start_pos; 1723 u64 start_pos;
1730 u64 end_pos; 1724 u64 end_pos;
1731 ssize_t num_written = 0; 1725 ssize_t num_written = 0;
@@ -1796,7 +1790,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1796 1790
1797 if (unlikely(file->f_flags & O_DIRECT)) { 1791 if (unlikely(file->f_flags & O_DIRECT)) {
1798 num_written = __btrfs_direct_write(iocb, iov, nr_segs, 1792 num_written = __btrfs_direct_write(iocb, iov, nr_segs,
1799 pos, ppos, count, ocount); 1793 pos, count, ocount);
1800 } else { 1794 } else {
1801 struct iov_iter i; 1795 struct iov_iter i;
1802 1796
@@ -1804,7 +1798,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1804 1798
1805 num_written = __btrfs_buffered_write(file, &i, pos); 1799 num_written = __btrfs_buffered_write(file, &i, pos);
1806 if (num_written > 0) 1800 if (num_written > 0)
1807 *ppos = pos + num_written; 1801 iocb->ki_pos = pos + num_written;
1808 } 1802 }
1809 1803
1810 mutex_unlock(&inode->i_mutex); 1804 mutex_unlock(&inode->i_mutex);
diff --git a/fs/buffer.c b/fs/buffer.c
index 8c53a2b15ecb..9ddb9fc7d923 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2114,8 +2114,8 @@ EXPORT_SYMBOL(generic_write_end);
2114 * Returns true if all buffers which correspond to a file portion 2114 * Returns true if all buffers which correspond to a file portion
2115 * we want to read are uptodate. 2115 * we want to read are uptodate.
2116 */ 2116 */
2117int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, 2117int block_is_partially_uptodate(struct page *page, unsigned long from,
2118 unsigned long from) 2118 unsigned long count)
2119{ 2119{
2120 unsigned block_start, block_end, blocksize; 2120 unsigned block_start, block_end, blocksize;
2121 unsigned to; 2121 unsigned to;
@@ -2127,7 +2127,7 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2127 2127
2128 head = page_buffers(page); 2128 head = page_buffers(page);
2129 blocksize = head->b_size; 2129 blocksize = head->b_size;
2130 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); 2130 to = min_t(unsigned, PAGE_CACHE_SIZE - from, count);
2131 to = from + to; 2131 to = from + to;
2132 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) 2132 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2133 return 0; 2133 return 0;
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index 622f4696e484..5b99bafc31d1 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -124,7 +124,6 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
124 /* check parameters */ 124 /* check parameters */
125 ret = -EOPNOTSUPP; 125 ret = -EOPNOTSUPP;
126 if (!root->d_inode || 126 if (!root->d_inode ||
127 !root->d_inode->i_op ||
128 !root->d_inode->i_op->lookup || 127 !root->d_inode->i_op->lookup ||
129 !root->d_inode->i_op->mkdir || 128 !root->d_inode->i_op->mkdir ||
130 !root->d_inode->i_op->setxattr || 129 !root->d_inode->i_op->setxattr ||
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 6494d9f673aa..c0a681705104 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -779,8 +779,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
779 } 779 }
780 780
781 ret = -EPERM; 781 ret = -EPERM;
782 if (!subdir->d_inode->i_op || 782 if (!subdir->d_inode->i_op->setxattr ||
783 !subdir->d_inode->i_op->setxattr ||
784 !subdir->d_inode->i_op->getxattr || 783 !subdir->d_inode->i_op->getxattr ||
785 !subdir->d_inode->i_op->lookup || 784 !subdir->d_inode->i_op->lookup ||
786 !subdir->d_inode->i_op->mkdir || 785 !subdir->d_inode->i_op->mkdir ||
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 66075a4ad979..39da1c2efa50 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -601,7 +601,7 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
601 false); 601 false);
602 if (IS_ERR(req)) { 602 if (IS_ERR(req)) {
603 ret = PTR_ERR(req); 603 ret = PTR_ERR(req);
604 goto out; 604 break;
605 } 605 }
606 606
607 num_pages = calc_pages_for(page_align, len); 607 num_pages = calc_pages_for(page_align, len);
@@ -719,7 +719,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
719 false); 719 false);
720 if (IS_ERR(req)) { 720 if (IS_ERR(req)) {
721 ret = PTR_ERR(req); 721 ret = PTR_ERR(req);
722 goto out; 722 break;
723 } 723 }
724 724
725 /* 725 /*
@@ -972,6 +972,7 @@ retry_snap:
972 } 972 }
973 } else { 973 } else {
974 loff_t old_size = inode->i_size; 974 loff_t old_size = inode->i_size;
975 struct iov_iter from;
975 /* 976 /*
976 * No need to acquire the i_truncate_mutex. Because 977 * No need to acquire the i_truncate_mutex. Because
977 * the MDS revokes Fwb caps before sending truncate 978 * the MDS revokes Fwb caps before sending truncate
@@ -979,9 +980,10 @@ retry_snap:
979 * are pending vmtruncate. So write and vmtruncate 980 * are pending vmtruncate. So write and vmtruncate
980 * can not run at the same time 981 * can not run at the same time
981 */ 982 */
982 written = generic_file_buffered_write(iocb, iov, nr_segs, 983 iov_iter_init(&from, iov, nr_segs, count, 0);
983 pos, &iocb->ki_pos, 984 written = generic_perform_write(file, &from, pos);
984 count, 0); 985 if (likely(written >= 0))
986 iocb->ki_pos = pos + written;
985 if (inode->i_size > old_size) 987 if (inode->i_size > old_size)
986 ceph_fscache_update_objectsize(inode); 988 ceph_fscache_update_objectsize(inode);
987 mutex_unlock(&inode->i_mutex); 989 mutex_unlock(&inode->i_mutex);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 2c70cbe35d39..df9c9141c099 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -850,7 +850,6 @@ const struct inode_operations cifs_file_inode_ops = {
850/* revalidate:cifs_revalidate, */ 850/* revalidate:cifs_revalidate, */
851 .setattr = cifs_setattr, 851 .setattr = cifs_setattr,
852 .getattr = cifs_getattr, /* do we need this anymore? */ 852 .getattr = cifs_getattr, /* do we need this anymore? */
853 .rename = cifs_rename,
854 .permission = cifs_permission, 853 .permission = cifs_permission,
855#ifdef CONFIG_CIFS_XATTR 854#ifdef CONFIG_CIFS_XATTR
856 .setxattr = cifs_setxattr, 855 .setxattr = cifs_setxattr,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 216d7e99f921..8807442c94dd 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2579,19 +2579,32 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2579 struct cifsInodeInfo *cinode = CIFS_I(inode); 2579 struct cifsInodeInfo *cinode = CIFS_I(inode);
2580 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; 2580 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2581 ssize_t rc = -EACCES; 2581 ssize_t rc = -EACCES;
2582 loff_t lock_pos = pos; 2582 loff_t lock_pos = iocb->ki_pos;
2583 2583
2584 if (file->f_flags & O_APPEND)
2585 lock_pos = i_size_read(inode);
2586 /* 2584 /*
2587 * We need to hold the sem to be sure nobody modifies lock list 2585 * We need to hold the sem to be sure nobody modifies lock list
2588 * with a brlock that prevents writing. 2586 * with a brlock that prevents writing.
2589 */ 2587 */
2590 down_read(&cinode->lock_sem); 2588 down_read(&cinode->lock_sem);
2589 mutex_lock(&inode->i_mutex);
2590 if (file->f_flags & O_APPEND)
2591 lock_pos = i_size_read(inode);
2591 if (!cifs_find_lock_conflict(cfile, lock_pos, iov_length(iov, nr_segs), 2592 if (!cifs_find_lock_conflict(cfile, lock_pos, iov_length(iov, nr_segs),
2592 server->vals->exclusive_lock_type, NULL, 2593 server->vals->exclusive_lock_type, NULL,
2593 CIFS_WRITE_OP)) 2594 CIFS_WRITE_OP)) {
2594 rc = generic_file_aio_write(iocb, iov, nr_segs, pos); 2595 rc = __generic_file_aio_write(iocb, iov, nr_segs);
2596 mutex_unlock(&inode->i_mutex);
2597
2598 if (rc > 0) {
2599 ssize_t err;
2600
2601 err = generic_write_sync(file, iocb->ki_pos - rc, rc);
2602 if (rc < 0)
2603 rc = err;
2604 }
2605 } else {
2606 mutex_unlock(&inode->i_mutex);
2607 }
2595 up_read(&cinode->lock_sem); 2608 up_read(&cinode->lock_sem);
2596 return rc; 2609 return rc;
2597} 2610}
@@ -2727,56 +2740,27 @@ cifs_retry_async_readv(struct cifs_readdata *rdata)
2727/** 2740/**
2728 * cifs_readdata_to_iov - copy data from pages in response to an iovec 2741 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2729 * @rdata: the readdata response with list of pages holding data 2742 * @rdata: the readdata response with list of pages holding data
2730 * @iov: vector in which we should copy the data 2743 * @iter: destination for our data
2731 * @nr_segs: number of segments in vector
2732 * @offset: offset into file of the first iovec
2733 * @copied: used to return the amount of data copied to the iov
2734 * 2744 *
2735 * This function copies data from a list of pages in a readdata response into 2745 * This function copies data from a list of pages in a readdata response into
2736 * an array of iovecs. It will first calculate where the data should go 2746 * an array of iovecs. It will first calculate where the data should go
2737 * based on the info in the readdata and then copy the data into that spot. 2747 * based on the info in the readdata and then copy the data into that spot.
2738 */ 2748 */
2739static ssize_t 2749static int
2740cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov, 2750cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2741 unsigned long nr_segs, loff_t offset, ssize_t *copied)
2742{ 2751{
2743 int rc = 0; 2752 size_t remaining = rdata->bytes;
2744 struct iov_iter ii;
2745 size_t pos = rdata->offset - offset;
2746 ssize_t remaining = rdata->bytes;
2747 unsigned char *pdata;
2748 unsigned int i; 2753 unsigned int i;
2749 2754
2750 /* set up iov_iter and advance to the correct offset */
2751 iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2752 iov_iter_advance(&ii, pos);
2753
2754 *copied = 0;
2755 for (i = 0; i < rdata->nr_pages; i++) { 2755 for (i = 0; i < rdata->nr_pages; i++) {
2756 ssize_t copy;
2757 struct page *page = rdata->pages[i]; 2756 struct page *page = rdata->pages[i];
2758 2757 size_t copy = min(remaining, PAGE_SIZE);
2759 /* copy a whole page or whatever's left */ 2758 size_t written = copy_page_to_iter(page, 0, copy, iter);
2760 copy = min_t(ssize_t, remaining, PAGE_SIZE); 2759 remaining -= written;
2761 2760 if (written < copy && iov_iter_count(iter) > 0)
2762 /* ...but limit it to whatever space is left in the iov */ 2761 break;
2763 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2764
2765 /* go while there's data to be copied and no errors */
2766 if (copy && !rc) {
2767 pdata = kmap(page);
2768 rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2769 (int)copy);
2770 kunmap(page);
2771 if (!rc) {
2772 *copied += copy;
2773 remaining -= copy;
2774 iov_iter_advance(&ii, copy);
2775 }
2776 }
2777 } 2762 }
2778 2763 return remaining ? -EFAULT : 0;
2779 return rc;
2780} 2764}
2781 2765
2782static void 2766static void
@@ -2837,20 +2821,21 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2837 return total_read > 0 ? total_read : result; 2821 return total_read > 0 ? total_read : result;
2838} 2822}
2839 2823
2840static ssize_t 2824ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2841cifs_iovec_read(struct file *file, const struct iovec *iov, 2825 unsigned long nr_segs, loff_t pos)
2842 unsigned long nr_segs, loff_t *poffset)
2843{ 2826{
2827 struct file *file = iocb->ki_filp;
2844 ssize_t rc; 2828 ssize_t rc;
2845 size_t len, cur_len; 2829 size_t len, cur_len;
2846 ssize_t total_read = 0; 2830 ssize_t total_read = 0;
2847 loff_t offset = *poffset; 2831 loff_t offset = pos;
2848 unsigned int npages; 2832 unsigned int npages;
2849 struct cifs_sb_info *cifs_sb; 2833 struct cifs_sb_info *cifs_sb;
2850 struct cifs_tcon *tcon; 2834 struct cifs_tcon *tcon;
2851 struct cifsFileInfo *open_file; 2835 struct cifsFileInfo *open_file;
2852 struct cifs_readdata *rdata, *tmp; 2836 struct cifs_readdata *rdata, *tmp;
2853 struct list_head rdata_list; 2837 struct list_head rdata_list;
2838 struct iov_iter to;
2854 pid_t pid; 2839 pid_t pid;
2855 2840
2856 if (!nr_segs) 2841 if (!nr_segs)
@@ -2860,6 +2845,8 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
2860 if (!len) 2845 if (!len)
2861 return 0; 2846 return 0;
2862 2847
2848 iov_iter_init(&to, iov, nr_segs, len, 0);
2849
2863 INIT_LIST_HEAD(&rdata_list); 2850 INIT_LIST_HEAD(&rdata_list);
2864 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 2851 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2865 open_file = file->private_data; 2852 open_file = file->private_data;
@@ -2917,55 +2904,44 @@ error:
2917 if (!list_empty(&rdata_list)) 2904 if (!list_empty(&rdata_list))
2918 rc = 0; 2905 rc = 0;
2919 2906
2907 len = iov_iter_count(&to);
2920 /* the loop below should proceed in the order of increasing offsets */ 2908 /* the loop below should proceed in the order of increasing offsets */
2921restart_loop:
2922 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) { 2909 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2910 again:
2923 if (!rc) { 2911 if (!rc) {
2924 ssize_t copied;
2925
2926 /* FIXME: freezable sleep too? */ 2912 /* FIXME: freezable sleep too? */
2927 rc = wait_for_completion_killable(&rdata->done); 2913 rc = wait_for_completion_killable(&rdata->done);
2928 if (rc) 2914 if (rc)
2929 rc = -EINTR; 2915 rc = -EINTR;
2930 else if (rdata->result) 2916 else if (rdata->result) {
2931 rc = rdata->result; 2917 rc = rdata->result;
2932 else { 2918 /* resend call if it's a retryable error */
2933 rc = cifs_readdata_to_iov(rdata, iov, 2919 if (rc == -EAGAIN) {
2934 nr_segs, *poffset, 2920 rc = cifs_retry_async_readv(rdata);
2935 &copied); 2921 goto again;
2936 total_read += copied; 2922 }
2923 } else {
2924 rc = cifs_readdata_to_iov(rdata, &to);
2937 } 2925 }
2938 2926
2939 /* resend call if it's a retryable error */
2940 if (rc == -EAGAIN) {
2941 rc = cifs_retry_async_readv(rdata);
2942 goto restart_loop;
2943 }
2944 } 2927 }
2945 list_del_init(&rdata->list); 2928 list_del_init(&rdata->list);
2946 kref_put(&rdata->refcount, cifs_uncached_readdata_release); 2929 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2947 } 2930 }
2948 2931
2932 total_read = len - iov_iter_count(&to);
2933
2949 cifs_stats_bytes_read(tcon, total_read); 2934 cifs_stats_bytes_read(tcon, total_read);
2950 *poffset += total_read;
2951 2935
2952 /* mask nodata case */ 2936 /* mask nodata case */
2953 if (rc == -ENODATA) 2937 if (rc == -ENODATA)
2954 rc = 0; 2938 rc = 0;
2955 2939
2956 return total_read ? total_read : rc; 2940 if (total_read) {
2957} 2941 iocb->ki_pos = pos + total_read;
2958 2942 return total_read;
2959ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov, 2943 }
2960 unsigned long nr_segs, loff_t pos) 2944 return rc;
2961{
2962 ssize_t read;
2963
2964 read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2965 if (read > 0)
2966 iocb->ki_pos = pos;
2967
2968 return read;
2969} 2945}
2970 2946
2971ssize_t 2947ssize_t
diff --git a/fs/exec.c b/fs/exec.c
index 9e81c630dfa7..476f3ebf437e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -813,7 +813,7 @@ EXPORT_SYMBOL(kernel_read);
813 813
814ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len) 814ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
815{ 815{
816 ssize_t res = file->f_op->read(file, (void __user *)addr, len, &pos); 816 ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
817 if (res > 0) 817 if (res > 0)
818 flush_icache_range(addr, addr + len); 818 flush_icache_range(addr, addr + len);
819 return res; 819 return res;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 4e508fc83dcf..ca7502d89fde 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -146,7 +146,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
146 overwrite = 1; 146 overwrite = 1;
147 } 147 }
148 148
149 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 149 ret = __generic_file_aio_write(iocb, iov, nr_segs);
150 mutex_unlock(&inode->i_mutex); 150 mutex_unlock(&inode->i_mutex);
151 151
152 if (ret > 0) { 152 if (ret > 0) {
diff --git a/fs/file.c b/fs/file.c
index b61293badfb1..8f294cfac697 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -25,7 +25,10 @@
25 25
26int sysctl_nr_open __read_mostly = 1024*1024; 26int sysctl_nr_open __read_mostly = 1024*1024;
27int sysctl_nr_open_min = BITS_PER_LONG; 27int sysctl_nr_open_min = BITS_PER_LONG;
28int sysctl_nr_open_max = 1024 * 1024; /* raised later */ 28/* our max() is unusable in constant expressions ;-/ */
29#define __const_max(x, y) ((x) < (y) ? (x) : (y))
30int sysctl_nr_open_max = __const_max(INT_MAX, ~(size_t)0/sizeof(void *)) &
31 -BITS_PER_LONG;
29 32
30static void *alloc_fdmem(size_t size) 33static void *alloc_fdmem(size_t size)
31{ 34{
@@ -429,12 +432,6 @@ void exit_files(struct task_struct *tsk)
429 } 432 }
430} 433}
431 434
432void __init files_defer_init(void)
433{
434 sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) &
435 -BITS_PER_LONG;
436}
437
438struct files_struct init_files = { 435struct files_struct init_files = {
439 .count = ATOMIC_INIT(1), 436 .count = ATOMIC_INIT(1),
440 .fdt = &init_files.fdtab, 437 .fdt = &init_files.fdtab,
diff --git a/fs/file_table.c b/fs/file_table.c
index 01071c4d752e..a374f5033e97 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -52,7 +52,6 @@ static void file_free_rcu(struct rcu_head *head)
52static inline void file_free(struct file *f) 52static inline void file_free(struct file *f)
53{ 53{
54 percpu_counter_dec(&nr_files); 54 percpu_counter_dec(&nr_files);
55 file_check_state(f);
56 call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); 55 call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
57} 56}
58 57
@@ -178,47 +177,12 @@ struct file *alloc_file(struct path *path, fmode_t mode,
178 file->f_mapping = path->dentry->d_inode->i_mapping; 177 file->f_mapping = path->dentry->d_inode->i_mapping;
179 file->f_mode = mode; 178 file->f_mode = mode;
180 file->f_op = fop; 179 file->f_op = fop;
181
182 /*
183 * These mounts don't really matter in practice
184 * for r/o bind mounts. They aren't userspace-
185 * visible. We do this for consistency, and so
186 * that we can do debugging checks at __fput()
187 */
188 if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) {
189 file_take_write(file);
190 WARN_ON(mnt_clone_write(path->mnt));
191 }
192 if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) 180 if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
193 i_readcount_inc(path->dentry->d_inode); 181 i_readcount_inc(path->dentry->d_inode);
194 return file; 182 return file;
195} 183}
196EXPORT_SYMBOL(alloc_file); 184EXPORT_SYMBOL(alloc_file);
197 185
198/**
199 * drop_file_write_access - give up ability to write to a file
200 * @file: the file to which we will stop writing
201 *
202 * This is a central place which will give up the ability
203 * to write to @file, along with access to write through
204 * its vfsmount.
205 */
206static void drop_file_write_access(struct file *file)
207{
208 struct vfsmount *mnt = file->f_path.mnt;
209 struct dentry *dentry = file->f_path.dentry;
210 struct inode *inode = dentry->d_inode;
211
212 put_write_access(inode);
213
214 if (special_file(inode->i_mode))
215 return;
216 if (file_check_writeable(file) != 0)
217 return;
218 __mnt_drop_write(mnt);
219 file_release_write(file);
220}
221
222/* the real guts of fput() - releasing the last reference to file 186/* the real guts of fput() - releasing the last reference to file
223 */ 187 */
224static void __fput(struct file *file) 188static void __fput(struct file *file)
@@ -253,8 +217,10 @@ static void __fput(struct file *file)
253 put_pid(file->f_owner.pid); 217 put_pid(file->f_owner.pid);
254 if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) 218 if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
255 i_readcount_dec(inode); 219 i_readcount_dec(inode);
256 if (file->f_mode & FMODE_WRITE) 220 if (file->f_mode & FMODE_WRITER) {
257 drop_file_write_access(file); 221 put_write_access(inode);
222 __mnt_drop_write(mnt);
223 }
258 file->f_path.dentry = NULL; 224 file->f_path.dentry = NULL;
259 file->f_path.mnt = NULL; 225 file->f_path.mnt = NULL;
260 file->f_inode = NULL; 226 file->f_inode = NULL;
@@ -359,6 +325,5 @@ void __init files_init(unsigned long mempages)
359 325
360 n = (mempages * (PAGE_SIZE / 1024)) / 10; 326 n = (mempages * (PAGE_SIZE / 1024)) / 10;
361 files_stat.max_files = max_t(unsigned long, n, NR_FILE); 327 files_stat.max_files = max_t(unsigned long, n, NR_FILE);
362 files_defer_init();
363 percpu_counter_init(&nr_files, 0); 328 percpu_counter_init(&nr_files, 0);
364} 329}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 0a648bb455ae..aac71ce373e4 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -667,15 +667,15 @@ static void fuse_copy_finish(struct fuse_copy_state *cs)
667 struct pipe_buffer *buf = cs->currbuf; 667 struct pipe_buffer *buf = cs->currbuf;
668 668
669 if (!cs->write) { 669 if (!cs->write) {
670 buf->ops->unmap(cs->pipe, buf, cs->mapaddr); 670 kunmap_atomic(cs->mapaddr);
671 } else { 671 } else {
672 kunmap(buf->page); 672 kunmap_atomic(cs->mapaddr);
673 buf->len = PAGE_SIZE - cs->len; 673 buf->len = PAGE_SIZE - cs->len;
674 } 674 }
675 cs->currbuf = NULL; 675 cs->currbuf = NULL;
676 cs->mapaddr = NULL; 676 cs->mapaddr = NULL;
677 } else if (cs->mapaddr) { 677 } else if (cs->mapaddr) {
678 kunmap(cs->pg); 678 kunmap_atomic(cs->mapaddr);
679 if (cs->write) { 679 if (cs->write) {
680 flush_dcache_page(cs->pg); 680 flush_dcache_page(cs->pg);
681 set_page_dirty_lock(cs->pg); 681 set_page_dirty_lock(cs->pg);
@@ -706,7 +706,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
706 706
707 BUG_ON(!cs->nr_segs); 707 BUG_ON(!cs->nr_segs);
708 cs->currbuf = buf; 708 cs->currbuf = buf;
709 cs->mapaddr = buf->ops->map(cs->pipe, buf, 0); 709 cs->mapaddr = kmap_atomic(buf->page);
710 cs->len = buf->len; 710 cs->len = buf->len;
711 cs->buf = cs->mapaddr + buf->offset; 711 cs->buf = cs->mapaddr + buf->offset;
712 cs->pipebufs++; 712 cs->pipebufs++;
@@ -726,7 +726,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
726 buf->len = 0; 726 buf->len = 0;
727 727
728 cs->currbuf = buf; 728 cs->currbuf = buf;
729 cs->mapaddr = kmap(page); 729 cs->mapaddr = kmap_atomic(page);
730 cs->buf = cs->mapaddr; 730 cs->buf = cs->mapaddr;
731 cs->len = PAGE_SIZE; 731 cs->len = PAGE_SIZE;
732 cs->pipebufs++; 732 cs->pipebufs++;
@@ -745,7 +745,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
745 return err; 745 return err;
746 BUG_ON(err != 1); 746 BUG_ON(err != 1);
747 offset = cs->addr % PAGE_SIZE; 747 offset = cs->addr % PAGE_SIZE;
748 cs->mapaddr = kmap(cs->pg); 748 cs->mapaddr = kmap_atomic(cs->pg);
749 cs->buf = cs->mapaddr + offset; 749 cs->buf = cs->mapaddr + offset;
750 cs->len = min(PAGE_SIZE - offset, cs->seglen); 750 cs->len = min(PAGE_SIZE - offset, cs->seglen);
751 cs->seglen -= cs->len; 751 cs->seglen -= cs->len;
@@ -874,7 +874,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
874out_fallback_unlock: 874out_fallback_unlock:
875 unlock_page(newpage); 875 unlock_page(newpage);
876out_fallback: 876out_fallback:
877 cs->mapaddr = buf->ops->map(cs->pipe, buf, 1); 877 cs->mapaddr = kmap_atomic(buf->page);
878 cs->buf = cs->mapaddr + buf->offset; 878 cs->buf = cs->mapaddr + buf->offset;
879 879
880 err = lock_request(cs->fc, cs->req); 880 err = lock_request(cs->fc, cs->req);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 48992cac714b..13f8bdec5110 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1086,9 +1086,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
1086 if (mapping_writably_mapped(mapping)) 1086 if (mapping_writably_mapped(mapping))
1087 flush_dcache_page(page); 1087 flush_dcache_page(page);
1088 1088
1089 pagefault_disable();
1090 tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); 1089 tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
1091 pagefault_enable();
1092 flush_dcache_page(page); 1090 flush_dcache_page(page);
1093 1091
1094 mark_page_accessed(page); 1092 mark_page_accessed(page);
@@ -1237,8 +1235,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1237 goto out; 1235 goto out;
1238 1236
1239 if (file->f_flags & O_DIRECT) { 1237 if (file->f_flags & O_DIRECT) {
1240 written = generic_file_direct_write(iocb, iov, &nr_segs, 1238 written = generic_file_direct_write(iocb, iov, &nr_segs, pos,
1241 pos, &iocb->ki_pos,
1242 count, ocount); 1239 count, ocount);
1243 if (written < 0 || written == count) 1240 if (written < 0 || written == count)
1244 goto out; 1241 goto out;
diff --git a/fs/mount.h b/fs/mount.h
index b29e42f05f34..d55297f2fa05 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -10,7 +10,7 @@ struct mnt_namespace {
10 struct user_namespace *user_ns; 10 struct user_namespace *user_ns;
11 u64 seq; /* Sequence number to prevent loops */ 11 u64 seq; /* Sequence number to prevent loops */
12 wait_queue_head_t poll; 12 wait_queue_head_t poll;
13 int event; 13 u64 event;
14}; 14};
15 15
16struct mnt_pcp { 16struct mnt_pcp {
@@ -104,6 +104,9 @@ struct proc_mounts {
104 struct mnt_namespace *ns; 104 struct mnt_namespace *ns;
105 struct path root; 105 struct path root;
106 int (*show)(struct seq_file *, struct vfsmount *); 106 int (*show)(struct seq_file *, struct vfsmount *);
107 void *cached_mount;
108 u64 cached_event;
109 loff_t cached_index;
107}; 110};
108 111
109#define proc_mounts(p) (container_of((p), struct proc_mounts, m)) 112#define proc_mounts(p) (container_of((p), struct proc_mounts, m))
diff --git a/fs/namei.c b/fs/namei.c
index 88339f59efb5..c6157c894fce 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -358,6 +358,7 @@ int generic_permission(struct inode *inode, int mask)
358 358
359 return -EACCES; 359 return -EACCES;
360} 360}
361EXPORT_SYMBOL(generic_permission);
361 362
362/* 363/*
363 * We _really_ want to just do "generic_permission()" without 364 * We _really_ want to just do "generic_permission()" without
@@ -455,6 +456,7 @@ int inode_permission(struct inode *inode, int mask)
455 return retval; 456 return retval;
456 return __inode_permission(inode, mask); 457 return __inode_permission(inode, mask);
457} 458}
459EXPORT_SYMBOL(inode_permission);
458 460
459/** 461/**
460 * path_get - get a reference to a path 462 * path_get - get a reference to a path
@@ -924,6 +926,7 @@ int follow_up(struct path *path)
924 path->mnt = &parent->mnt; 926 path->mnt = &parent->mnt;
925 return 1; 927 return 1;
926} 928}
929EXPORT_SYMBOL(follow_up);
927 930
928/* 931/*
929 * Perform an automount 932 * Perform an automount
@@ -1085,6 +1088,7 @@ int follow_down_one(struct path *path)
1085 } 1088 }
1086 return 0; 1089 return 0;
1087} 1090}
1091EXPORT_SYMBOL(follow_down_one);
1088 1092
1089static inline bool managed_dentry_might_block(struct dentry *dentry) 1093static inline bool managed_dentry_might_block(struct dentry *dentry)
1090{ 1094{
@@ -1223,6 +1227,7 @@ int follow_down(struct path *path)
1223 } 1227 }
1224 return 0; 1228 return 0;
1225} 1229}
1230EXPORT_SYMBOL(follow_down);
1226 1231
1227/* 1232/*
1228 * Skip to top of mountpoint pile in refwalk mode for follow_dotdot() 1233 * Skip to top of mountpoint pile in refwalk mode for follow_dotdot()
@@ -2025,6 +2030,7 @@ int kern_path(const char *name, unsigned int flags, struct path *path)
2025 *path = nd.path; 2030 *path = nd.path;
2026 return res; 2031 return res;
2027} 2032}
2033EXPORT_SYMBOL(kern_path);
2028 2034
2029/** 2035/**
2030 * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair 2036 * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
@@ -2049,6 +2055,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
2049 *path = nd.path; 2055 *path = nd.path;
2050 return err; 2056 return err;
2051} 2057}
2058EXPORT_SYMBOL(vfs_path_lookup);
2052 2059
2053/* 2060/*
2054 * Restricted form of lookup. Doesn't follow links, single-component only, 2061 * Restricted form of lookup. Doesn't follow links, single-component only,
@@ -2111,6 +2118,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
2111 2118
2112 return __lookup_hash(&this, base, 0); 2119 return __lookup_hash(&this, base, 0);
2113} 2120}
2121EXPORT_SYMBOL(lookup_one_len);
2114 2122
2115int user_path_at_empty(int dfd, const char __user *name, unsigned flags, 2123int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
2116 struct path *path, int *empty) 2124 struct path *path, int *empty)
@@ -2135,6 +2143,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
2135{ 2143{
2136 return user_path_at_empty(dfd, name, flags, path, NULL); 2144 return user_path_at_empty(dfd, name, flags, path, NULL);
2137} 2145}
2146EXPORT_SYMBOL(user_path_at);
2138 2147
2139/* 2148/*
2140 * NB: most callers don't do anything directly with the reference to the 2149 * NB: most callers don't do anything directly with the reference to the
@@ -2477,6 +2486,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
2477 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); 2486 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
2478 return NULL; 2487 return NULL;
2479} 2488}
2489EXPORT_SYMBOL(lock_rename);
2480 2490
2481void unlock_rename(struct dentry *p1, struct dentry *p2) 2491void unlock_rename(struct dentry *p1, struct dentry *p2)
2482{ 2492{
@@ -2486,6 +2496,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
2486 mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex); 2496 mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
2487 } 2497 }
2488} 2498}
2499EXPORT_SYMBOL(unlock_rename);
2489 2500
2490int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 2501int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2491 bool want_excl) 2502 bool want_excl)
@@ -2506,6 +2517,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2506 fsnotify_create(dir, dentry); 2517 fsnotify_create(dir, dentry);
2507 return error; 2518 return error;
2508} 2519}
2520EXPORT_SYMBOL(vfs_create);
2509 2521
2510static int may_open(struct path *path, int acc_mode, int flag) 2522static int may_open(struct path *path, int acc_mode, int flag)
2511{ 2523{
@@ -3375,6 +3387,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
3375 fsnotify_create(dir, dentry); 3387 fsnotify_create(dir, dentry);
3376 return error; 3388 return error;
3377} 3389}
3390EXPORT_SYMBOL(vfs_mknod);
3378 3391
3379static int may_mknod(umode_t mode) 3392static int may_mknod(umode_t mode)
3380{ 3393{
@@ -3464,6 +3477,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
3464 fsnotify_mkdir(dir, dentry); 3477 fsnotify_mkdir(dir, dentry);
3465 return error; 3478 return error;
3466} 3479}
3480EXPORT_SYMBOL(vfs_mkdir);
3467 3481
3468SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) 3482SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
3469{ 3483{
@@ -3518,6 +3532,7 @@ void dentry_unhash(struct dentry *dentry)
3518 __d_drop(dentry); 3532 __d_drop(dentry);
3519 spin_unlock(&dentry->d_lock); 3533 spin_unlock(&dentry->d_lock);
3520} 3534}
3535EXPORT_SYMBOL(dentry_unhash);
3521 3536
3522int vfs_rmdir(struct inode *dir, struct dentry *dentry) 3537int vfs_rmdir(struct inode *dir, struct dentry *dentry)
3523{ 3538{
@@ -3555,6 +3570,7 @@ out:
3555 d_delete(dentry); 3570 d_delete(dentry);
3556 return error; 3571 return error;
3557} 3572}
3573EXPORT_SYMBOL(vfs_rmdir);
3558 3574
3559static long do_rmdir(int dfd, const char __user *pathname) 3575static long do_rmdir(int dfd, const char __user *pathname)
3560{ 3576{
@@ -3672,6 +3688,7 @@ out:
3672 3688
3673 return error; 3689 return error;
3674} 3690}
3691EXPORT_SYMBOL(vfs_unlink);
3675 3692
3676/* 3693/*
3677 * Make sure that the actual truncation of the file will occur outside its 3694 * Make sure that the actual truncation of the file will occur outside its
@@ -3785,6 +3802,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
3785 fsnotify_create(dir, dentry); 3802 fsnotify_create(dir, dentry);
3786 return error; 3803 return error;
3787} 3804}
3805EXPORT_SYMBOL(vfs_symlink);
3788 3806
3789SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, 3807SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
3790 int, newdfd, const char __user *, newname) 3808 int, newdfd, const char __user *, newname)
@@ -3893,6 +3911,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
3893 fsnotify_link(dir, inode, new_dentry); 3911 fsnotify_link(dir, inode, new_dentry);
3894 return error; 3912 return error;
3895} 3913}
3914EXPORT_SYMBOL(vfs_link);
3896 3915
3897/* 3916/*
3898 * Hardlinks are often used in delicate situations. We avoid 3917 * Hardlinks are often used in delicate situations. We avoid
@@ -4152,6 +4171,7 @@ out:
4152 4171
4153 return error; 4172 return error;
4154} 4173}
4174EXPORT_SYMBOL(vfs_rename);
4155 4175
4156SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, 4176SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname,
4157 int, newdfd, const char __user *, newname, unsigned int, flags) 4177 int, newdfd, const char __user *, newname, unsigned int, flags)
@@ -4304,11 +4324,9 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
4304 return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); 4324 return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
4305} 4325}
4306 4326
4307int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) 4327int readlink_copy(char __user *buffer, int buflen, const char *link)
4308{ 4328{
4309 int len; 4329 int len = PTR_ERR(link);
4310
4311 len = PTR_ERR(link);
4312 if (IS_ERR(link)) 4330 if (IS_ERR(link))
4313 goto out; 4331 goto out;
4314 4332
@@ -4320,6 +4338,7 @@ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const c
4320out: 4338out:
4321 return len; 4339 return len;
4322} 4340}
4341EXPORT_SYMBOL(readlink_copy);
4323 4342
4324/* 4343/*
4325 * A helper for ->readlink(). This should be used *ONLY* for symlinks that 4344 * A helper for ->readlink(). This should be used *ONLY* for symlinks that
@@ -4337,11 +4356,12 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
4337 if (IS_ERR(cookie)) 4356 if (IS_ERR(cookie))
4338 return PTR_ERR(cookie); 4357 return PTR_ERR(cookie);
4339 4358
4340 res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); 4359 res = readlink_copy(buffer, buflen, nd_get_link(&nd));
4341 if (dentry->d_inode->i_op->put_link) 4360 if (dentry->d_inode->i_op->put_link)
4342 dentry->d_inode->i_op->put_link(dentry, &nd, cookie); 4361 dentry->d_inode->i_op->put_link(dentry, &nd, cookie);
4343 return res; 4362 return res;
4344} 4363}
4364EXPORT_SYMBOL(generic_readlink);
4345 4365
4346/* get the link contents into pagecache */ 4366/* get the link contents into pagecache */
4347static char *page_getlink(struct dentry * dentry, struct page **ppage) 4367static char *page_getlink(struct dentry * dentry, struct page **ppage)
@@ -4361,14 +4381,14 @@ static char *page_getlink(struct dentry * dentry, struct page **ppage)
4361int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) 4381int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
4362{ 4382{
4363 struct page *page = NULL; 4383 struct page *page = NULL;
4364 char *s = page_getlink(dentry, &page); 4384 int res = readlink_copy(buffer, buflen, page_getlink(dentry, &page));
4365 int res = vfs_readlink(dentry,buffer,buflen,s);
4366 if (page) { 4385 if (page) {
4367 kunmap(page); 4386 kunmap(page);
4368 page_cache_release(page); 4387 page_cache_release(page);
4369 } 4388 }
4370 return res; 4389 return res;
4371} 4390}
4391EXPORT_SYMBOL(page_readlink);
4372 4392
4373void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd) 4393void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
4374{ 4394{
@@ -4376,6 +4396,7 @@ void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
4376 nd_set_link(nd, page_getlink(dentry, &page)); 4396 nd_set_link(nd, page_getlink(dentry, &page));
4377 return page; 4397 return page;
4378} 4398}
4399EXPORT_SYMBOL(page_follow_link_light);
4379 4400
4380void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) 4401void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
4381{ 4402{
@@ -4386,6 +4407,7 @@ void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
4386 page_cache_release(page); 4407 page_cache_release(page);
4387 } 4408 }
4388} 4409}
4410EXPORT_SYMBOL(page_put_link);
4389 4411
4390/* 4412/*
4391 * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS 4413 * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
@@ -4423,45 +4445,18 @@ retry:
4423fail: 4445fail:
4424 return err; 4446 return err;
4425} 4447}
4448EXPORT_SYMBOL(__page_symlink);
4426 4449
4427int page_symlink(struct inode *inode, const char *symname, int len) 4450int page_symlink(struct inode *inode, const char *symname, int len)
4428{ 4451{
4429 return __page_symlink(inode, symname, len, 4452 return __page_symlink(inode, symname, len,
4430 !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS)); 4453 !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS));
4431} 4454}
4455EXPORT_SYMBOL(page_symlink);
4432 4456
4433const struct inode_operations page_symlink_inode_operations = { 4457const struct inode_operations page_symlink_inode_operations = {
4434 .readlink = generic_readlink, 4458 .readlink = generic_readlink,
4435 .follow_link = page_follow_link_light, 4459 .follow_link = page_follow_link_light,
4436 .put_link = page_put_link, 4460 .put_link = page_put_link,
4437}; 4461};
4438
4439EXPORT_SYMBOL(user_path_at);
4440EXPORT_SYMBOL(follow_down_one);
4441EXPORT_SYMBOL(follow_down);
4442EXPORT_SYMBOL(follow_up);
4443EXPORT_SYMBOL(get_write_access); /* nfsd */
4444EXPORT_SYMBOL(lock_rename);
4445EXPORT_SYMBOL(lookup_one_len);
4446EXPORT_SYMBOL(page_follow_link_light);
4447EXPORT_SYMBOL(page_put_link);
4448EXPORT_SYMBOL(page_readlink);
4449EXPORT_SYMBOL(__page_symlink);
4450EXPORT_SYMBOL(page_symlink);
4451EXPORT_SYMBOL(page_symlink_inode_operations); 4462EXPORT_SYMBOL(page_symlink_inode_operations);
4452EXPORT_SYMBOL(kern_path);
4453EXPORT_SYMBOL(vfs_path_lookup);
4454EXPORT_SYMBOL(inode_permission);
4455EXPORT_SYMBOL(unlock_rename);
4456EXPORT_SYMBOL(vfs_create);
4457EXPORT_SYMBOL(vfs_link);
4458EXPORT_SYMBOL(vfs_mkdir);
4459EXPORT_SYMBOL(vfs_mknod);
4460EXPORT_SYMBOL(generic_permission);
4461EXPORT_SYMBOL(vfs_readlink);
4462EXPORT_SYMBOL(vfs_rename);
4463EXPORT_SYMBOL(vfs_rmdir);
4464EXPORT_SYMBOL(vfs_symlink);
4465EXPORT_SYMBOL(vfs_unlink);
4466EXPORT_SYMBOL(dentry_unhash);
4467EXPORT_SYMBOL(generic_readlink);
diff --git a/fs/namespace.c b/fs/namespace.c
index 2ffc5a2905d4..182bc41cd887 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -52,7 +52,7 @@ static int __init set_mphash_entries(char *str)
52} 52}
53__setup("mphash_entries=", set_mphash_entries); 53__setup("mphash_entries=", set_mphash_entries);
54 54
55static int event; 55static u64 event;
56static DEFINE_IDA(mnt_id_ida); 56static DEFINE_IDA(mnt_id_ida);
57static DEFINE_IDA(mnt_group_ida); 57static DEFINE_IDA(mnt_group_ida);
58static DEFINE_SPINLOCK(mnt_id_lock); 58static DEFINE_SPINLOCK(mnt_id_lock);
@@ -414,9 +414,7 @@ EXPORT_SYMBOL_GPL(mnt_clone_write);
414 */ 414 */
415int __mnt_want_write_file(struct file *file) 415int __mnt_want_write_file(struct file *file)
416{ 416{
417 struct inode *inode = file_inode(file); 417 if (!(file->f_mode & FMODE_WRITER))
418
419 if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
420 return __mnt_want_write(file->f_path.mnt); 418 return __mnt_want_write(file->f_path.mnt);
421 else 419 else
422 return mnt_clone_write(file->f_path.mnt); 420 return mnt_clone_write(file->f_path.mnt);
@@ -570,13 +568,17 @@ int sb_prepare_remount_readonly(struct super_block *sb)
570static void free_vfsmnt(struct mount *mnt) 568static void free_vfsmnt(struct mount *mnt)
571{ 569{
572 kfree(mnt->mnt_devname); 570 kfree(mnt->mnt_devname);
573 mnt_free_id(mnt);
574#ifdef CONFIG_SMP 571#ifdef CONFIG_SMP
575 free_percpu(mnt->mnt_pcp); 572 free_percpu(mnt->mnt_pcp);
576#endif 573#endif
577 kmem_cache_free(mnt_cache, mnt); 574 kmem_cache_free(mnt_cache, mnt);
578} 575}
579 576
577static void delayed_free_vfsmnt(struct rcu_head *head)
578{
579 free_vfsmnt(container_of(head, struct mount, mnt_rcu));
580}
581
580/* call under rcu_read_lock */ 582/* call under rcu_read_lock */
581bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) 583bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
582{ 584{
@@ -848,6 +850,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
848 850
849 root = mount_fs(type, flags, name, data); 851 root = mount_fs(type, flags, name, data);
850 if (IS_ERR(root)) { 852 if (IS_ERR(root)) {
853 mnt_free_id(mnt);
851 free_vfsmnt(mnt); 854 free_vfsmnt(mnt);
852 return ERR_CAST(root); 855 return ERR_CAST(root);
853 } 856 }
@@ -885,7 +888,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
885 goto out_free; 888 goto out_free;
886 } 889 }
887 890
888 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; 891 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
889 /* Don't allow unprivileged users to change mount flags */ 892 /* Don't allow unprivileged users to change mount flags */
890 if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY)) 893 if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
891 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY; 894 mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
@@ -928,20 +931,11 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
928 return mnt; 931 return mnt;
929 932
930 out_free: 933 out_free:
934 mnt_free_id(mnt);
931 free_vfsmnt(mnt); 935 free_vfsmnt(mnt);
932 return ERR_PTR(err); 936 return ERR_PTR(err);
933} 937}
934 938
935static void delayed_free(struct rcu_head *head)
936{
937 struct mount *mnt = container_of(head, struct mount, mnt_rcu);
938 kfree(mnt->mnt_devname);
939#ifdef CONFIG_SMP
940 free_percpu(mnt->mnt_pcp);
941#endif
942 kmem_cache_free(mnt_cache, mnt);
943}
944
945static void mntput_no_expire(struct mount *mnt) 939static void mntput_no_expire(struct mount *mnt)
946{ 940{
947put_again: 941put_again:
@@ -991,7 +985,7 @@ put_again:
991 dput(mnt->mnt.mnt_root); 985 dput(mnt->mnt.mnt_root);
992 deactivate_super(mnt->mnt.mnt_sb); 986 deactivate_super(mnt->mnt.mnt_sb);
993 mnt_free_id(mnt); 987 mnt_free_id(mnt);
994 call_rcu(&mnt->mnt_rcu, delayed_free); 988 call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
995} 989}
996 990
997void mntput(struct vfsmount *mnt) 991void mntput(struct vfsmount *mnt)
@@ -1100,14 +1094,29 @@ static void *m_start(struct seq_file *m, loff_t *pos)
1100 struct proc_mounts *p = proc_mounts(m); 1094 struct proc_mounts *p = proc_mounts(m);
1101 1095
1102 down_read(&namespace_sem); 1096 down_read(&namespace_sem);
1103 return seq_list_start(&p->ns->list, *pos); 1097 if (p->cached_event == p->ns->event) {
1098 void *v = p->cached_mount;
1099 if (*pos == p->cached_index)
1100 return v;
1101 if (*pos == p->cached_index + 1) {
1102 v = seq_list_next(v, &p->ns->list, &p->cached_index);
1103 return p->cached_mount = v;
1104 }
1105 }
1106
1107 p->cached_event = p->ns->event;
1108 p->cached_mount = seq_list_start(&p->ns->list, *pos);
1109 p->cached_index = *pos;
1110 return p->cached_mount;
1104} 1111}
1105 1112
1106static void *m_next(struct seq_file *m, void *v, loff_t *pos) 1113static void *m_next(struct seq_file *m, void *v, loff_t *pos)
1107{ 1114{
1108 struct proc_mounts *p = proc_mounts(m); 1115 struct proc_mounts *p = proc_mounts(m);
1109 1116
1110 return seq_list_next(v, &p->ns->list, pos); 1117 p->cached_mount = seq_list_next(v, &p->ns->list, pos);
1118 p->cached_index = *pos;
1119 return p->cached_mount;
1111} 1120}
1112 1121
1113static void m_stop(struct seq_file *m, void *v) 1122static void m_stop(struct seq_file *m, void *v)
@@ -1661,9 +1670,9 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1661 if (err) 1670 if (err)
1662 goto out; 1671 goto out;
1663 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); 1672 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
1673 lock_mount_hash();
1664 if (err) 1674 if (err)
1665 goto out_cleanup_ids; 1675 goto out_cleanup_ids;
1666 lock_mount_hash();
1667 for (p = source_mnt; p; p = next_mnt(p, source_mnt)) 1676 for (p = source_mnt; p; p = next_mnt(p, source_mnt))
1668 set_mnt_shared(p); 1677 set_mnt_shared(p);
1669 } else { 1678 } else {
@@ -1690,6 +1699,11 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1690 return 0; 1699 return 0;
1691 1700
1692 out_cleanup_ids: 1701 out_cleanup_ids:
1702 while (!hlist_empty(&tree_list)) {
1703 child = hlist_entry(tree_list.first, struct mount, mnt_hash);
1704 umount_tree(child, 0);
1705 }
1706 unlock_mount_hash();
1693 cleanup_group_ids(source_mnt, NULL); 1707 cleanup_group_ids(source_mnt, NULL);
1694 out: 1708 out:
1695 return err; 1709 return err;
@@ -2044,7 +2058,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
2044 struct mount *parent; 2058 struct mount *parent;
2045 int err; 2059 int err;
2046 2060
2047 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT); 2061 mnt_flags &= ~MNT_INTERNAL_FLAGS;
2048 2062
2049 mp = lock_mount(path); 2063 mp = lock_mount(path);
2050 if (IS_ERR(mp)) 2064 if (IS_ERR(mp))
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 81b4f643ecef..e31e589369a4 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -470,9 +470,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
470{ 470{
471 struct ncp_mount_data_kernel data; 471 struct ncp_mount_data_kernel data;
472 struct ncp_server *server; 472 struct ncp_server *server;
473 struct file *ncp_filp;
474 struct inode *root_inode; 473 struct inode *root_inode;
475 struct inode *sock_inode;
476 struct socket *sock; 474 struct socket *sock;
477 int error; 475 int error;
478 int default_bufsize; 476 int default_bufsize;
@@ -541,18 +539,10 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
541 if (!uid_valid(data.mounted_uid) || !uid_valid(data.uid) || 539 if (!uid_valid(data.mounted_uid) || !uid_valid(data.uid) ||
542 !gid_valid(data.gid)) 540 !gid_valid(data.gid))
543 goto out; 541 goto out;
544 error = -EBADF; 542 sock = sockfd_lookup(data.ncp_fd, &error);
545 ncp_filp = fget(data.ncp_fd);
546 if (!ncp_filp)
547 goto out;
548 error = -ENOTSOCK;
549 sock_inode = file_inode(ncp_filp);
550 if (!S_ISSOCK(sock_inode->i_mode))
551 goto out_fput;
552 sock = SOCKET_I(sock_inode);
553 if (!sock) 543 if (!sock)
554 goto out_fput; 544 goto out;
555 545
556 if (sock->type == SOCK_STREAM) 546 if (sock->type == SOCK_STREAM)
557 default_bufsize = 0xF000; 547 default_bufsize = 0xF000;
558 else 548 else
@@ -574,27 +564,16 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
574 if (error) 564 if (error)
575 goto out_fput; 565 goto out_fput;
576 566
577 server->ncp_filp = ncp_filp;
578 server->ncp_sock = sock; 567 server->ncp_sock = sock;
579 568
580 if (data.info_fd != -1) { 569 if (data.info_fd != -1) {
581 struct socket *info_sock; 570 struct socket *info_sock = sockfd_lookup(data.info_fd, &error);
582
583 error = -EBADF;
584 server->info_filp = fget(data.info_fd);
585 if (!server->info_filp)
586 goto out_bdi;
587 error = -ENOTSOCK;
588 sock_inode = file_inode(server->info_filp);
589 if (!S_ISSOCK(sock_inode->i_mode))
590 goto out_fput2;
591 info_sock = SOCKET_I(sock_inode);
592 if (!info_sock) 571 if (!info_sock)
593 goto out_fput2; 572 goto out_bdi;
573 server->info_sock = info_sock;
594 error = -EBADFD; 574 error = -EBADFD;
595 if (info_sock->type != SOCK_STREAM) 575 if (info_sock->type != SOCK_STREAM)
596 goto out_fput2; 576 goto out_fput2;
597 server->info_sock = info_sock;
598 } 577 }
599 578
600/* server->lock = 0; */ 579/* server->lock = 0; */
@@ -766,17 +745,12 @@ out_nls:
766 mutex_destroy(&server->root_setup_lock); 745 mutex_destroy(&server->root_setup_lock);
767 mutex_destroy(&server->mutex); 746 mutex_destroy(&server->mutex);
768out_fput2: 747out_fput2:
769 if (server->info_filp) 748 if (server->info_sock)
770 fput(server->info_filp); 749 sockfd_put(server->info_sock);
771out_bdi: 750out_bdi:
772 bdi_destroy(&server->bdi); 751 bdi_destroy(&server->bdi);
773out_fput: 752out_fput:
774 /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>: 753 sockfd_put(sock);
775 *
776 * The previously used put_filp(ncp_filp); was bogus, since
777 * it doesn't perform proper unlocking.
778 */
779 fput(ncp_filp);
780out: 754out:
781 put_pid(data.wdog_pid); 755 put_pid(data.wdog_pid);
782 sb->s_fs_info = NULL; 756 sb->s_fs_info = NULL;
@@ -809,9 +783,9 @@ static void ncp_put_super(struct super_block *sb)
809 mutex_destroy(&server->root_setup_lock); 783 mutex_destroy(&server->root_setup_lock);
810 mutex_destroy(&server->mutex); 784 mutex_destroy(&server->mutex);
811 785
812 if (server->info_filp) 786 if (server->info_sock)
813 fput(server->info_filp); 787 sockfd_put(server->info_sock);
814 fput(server->ncp_filp); 788 sockfd_put(server->ncp_sock);
815 kill_pid(server->m.wdog_pid, SIGTERM, 1); 789 kill_pid(server->m.wdog_pid, SIGTERM, 1);
816 put_pid(server->m.wdog_pid); 790 put_pid(server->m.wdog_pid);
817 791
diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h
index b81e97adc5a9..7fa17e459366 100644
--- a/fs/ncpfs/ncp_fs_sb.h
+++ b/fs/ncpfs/ncp_fs_sb.h
@@ -45,9 +45,7 @@ struct ncp_server {
45 45
46 __u8 name_space[NCP_NUMBER_OF_VOLUMES + 2]; 46 __u8 name_space[NCP_NUMBER_OF_VOLUMES + 2];
47 47
48 struct file *ncp_filp; /* File pointer to ncp socket */
49 struct socket *ncp_sock;/* ncp socket */ 48 struct socket *ncp_sock;/* ncp socket */
50 struct file *info_filp;
51 struct socket *info_sock; 49 struct socket *info_sock;
52 50
53 u8 sequence; 51 u8 sequence;
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 9d8153ebacfb..f47af5e6e230 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1704,8 +1704,6 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
1704 iput(bvi); 1704 iput(bvi);
1705skip_large_index_stuff: 1705skip_large_index_stuff:
1706 /* Setup the operations for this index inode. */ 1706 /* Setup the operations for this index inode. */
1707 vi->i_op = NULL;
1708 vi->i_fop = NULL;
1709 vi->i_mapping->a_ops = &ntfs_mst_aops; 1707 vi->i_mapping->a_ops = &ntfs_mst_aops;
1710 vi->i_blocks = ni->allocated_size >> 9; 1708 vi->i_blocks = ni->allocated_size >> 9;
1711 /* 1709 /*
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index eb649d23a4de..dfda2ffdb16c 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -916,57 +916,30 @@ static struct o2net_msg_handler *o2net_handler_get(u32 msg_type, u32 key)
916 916
917static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len) 917static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len)
918{ 918{
919 int ret; 919 struct kvec vec = { .iov_len = len, .iov_base = data, };
920 mm_segment_t oldfs; 920 struct msghdr msg = { .msg_flags = MSG_DONTWAIT, };
921 struct kvec vec = { 921 return kernel_recvmsg(sock, &msg, &vec, 1, len, msg.msg_flags);
922 .iov_len = len,
923 .iov_base = data,
924 };
925 struct msghdr msg = {
926 .msg_iovlen = 1,
927 .msg_iov = (struct iovec *)&vec,
928 .msg_flags = MSG_DONTWAIT,
929 };
930
931 oldfs = get_fs();
932 set_fs(get_ds());
933 ret = sock_recvmsg(sock, &msg, len, msg.msg_flags);
934 set_fs(oldfs);
935
936 return ret;
937} 922}
938 923
939static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec, 924static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec,
940 size_t veclen, size_t total) 925 size_t veclen, size_t total)
941{ 926{
942 int ret; 927 int ret;
943 mm_segment_t oldfs; 928 struct msghdr msg;
944 struct msghdr msg = {
945 .msg_iov = (struct iovec *)vec,
946 .msg_iovlen = veclen,
947 };
948 929
949 if (sock == NULL) { 930 if (sock == NULL) {
950 ret = -EINVAL; 931 ret = -EINVAL;
951 goto out; 932 goto out;
952 } 933 }
953 934
954 oldfs = get_fs(); 935 ret = kernel_sendmsg(sock, &msg, vec, veclen, total);
955 set_fs(get_ds()); 936 if (likely(ret == total))
956 ret = sock_sendmsg(sock, &msg, total); 937 return 0;
957 set_fs(oldfs); 938 mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret, total);
958 if (ret != total) { 939 if (ret >= 0)
959 mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret, 940 ret = -EPIPE; /* should be smarter, I bet */
960 total);
961 if (ret >= 0)
962 ret = -EPIPE; /* should be smarter, I bet */
963 goto out;
964 }
965
966 ret = 0;
967out: 941out:
968 if (ret < 0) 942 mlog(0, "returning error: %d\n", ret);
969 mlog(0, "returning error: %d\n", ret);
970 return ret; 943 return ret;
971} 944}
972 945
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ff33c5ef87f2..8970dcf74de5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2367,15 +2367,18 @@ relock:
2367 2367
2368 if (direct_io) { 2368 if (direct_io) {
2369 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, 2369 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
2370 ppos, count, ocount); 2370 count, ocount);
2371 if (written < 0) { 2371 if (written < 0) {
2372 ret = written; 2372 ret = written;
2373 goto out_dio; 2373 goto out_dio;
2374 } 2374 }
2375 } else { 2375 } else {
2376 struct iov_iter from;
2377 iov_iter_init(&from, iov, nr_segs, count, 0);
2376 current->backing_dev_info = file->f_mapping->backing_dev_info; 2378 current->backing_dev_info = file->f_mapping->backing_dev_info;
2377 written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos, 2379 written = generic_perform_write(file, &from, *ppos);
2378 ppos, count, 0); 2380 if (likely(written >= 0))
2381 iocb->ki_pos = *ppos + written;
2379 current->backing_dev_info = NULL; 2382 current->backing_dev_info = NULL;
2380 } 2383 }
2381 2384
diff --git a/fs/open.c b/fs/open.c
index 631aea815def..3d30eb1fc95e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -655,35 +655,6 @@ out:
655 return error; 655 return error;
656} 656}
657 657
658/*
659 * You have to be very careful that these write
660 * counts get cleaned up in error cases and
661 * upon __fput(). This should probably never
662 * be called outside of __dentry_open().
663 */
664static inline int __get_file_write_access(struct inode *inode,
665 struct vfsmount *mnt)
666{
667 int error;
668 error = get_write_access(inode);
669 if (error)
670 return error;
671 /*
672 * Do not take mount writer counts on
673 * special files since no writes to
674 * the mount itself will occur.
675 */
676 if (!special_file(inode->i_mode)) {
677 /*
678 * Balanced in __fput()
679 */
680 error = __mnt_want_write(mnt);
681 if (error)
682 put_write_access(inode);
683 }
684 return error;
685}
686
687int open_check_o_direct(struct file *f) 658int open_check_o_direct(struct file *f)
688{ 659{
689 /* NB: we're sure to have correct a_ops only after f_op->open */ 660 /* NB: we're sure to have correct a_ops only after f_op->open */
@@ -708,26 +679,28 @@ static int do_dentry_open(struct file *f,
708 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | 679 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
709 FMODE_PREAD | FMODE_PWRITE; 680 FMODE_PREAD | FMODE_PWRITE;
710 681
711 if (unlikely(f->f_flags & O_PATH))
712 f->f_mode = FMODE_PATH;
713
714 path_get(&f->f_path); 682 path_get(&f->f_path);
715 inode = f->f_inode = f->f_path.dentry->d_inode; 683 inode = f->f_inode = f->f_path.dentry->d_inode;
716 if (f->f_mode & FMODE_WRITE) {
717 error = __get_file_write_access(inode, f->f_path.mnt);
718 if (error)
719 goto cleanup_file;
720 if (!special_file(inode->i_mode))
721 file_take_write(f);
722 }
723
724 f->f_mapping = inode->i_mapping; 684 f->f_mapping = inode->i_mapping;
725 685
726 if (unlikely(f->f_mode & FMODE_PATH)) { 686 if (unlikely(f->f_flags & O_PATH)) {
687 f->f_mode = FMODE_PATH;
727 f->f_op = &empty_fops; 688 f->f_op = &empty_fops;
728 return 0; 689 return 0;
729 } 690 }
730 691
692 if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
693 error = get_write_access(inode);
694 if (unlikely(error))
695 goto cleanup_file;
696 error = __mnt_want_write(f->f_path.mnt);
697 if (unlikely(error)) {
698 put_write_access(inode);
699 goto cleanup_file;
700 }
701 f->f_mode |= FMODE_WRITER;
702 }
703
731 /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */ 704 /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
732 if (S_ISREG(inode->i_mode)) 705 if (S_ISREG(inode->i_mode))
733 f->f_mode |= FMODE_ATOMIC_POS; 706 f->f_mode |= FMODE_ATOMIC_POS;
@@ -764,18 +737,9 @@ static int do_dentry_open(struct file *f,
764 737
765cleanup_all: 738cleanup_all:
766 fops_put(f->f_op); 739 fops_put(f->f_op);
767 if (f->f_mode & FMODE_WRITE) { 740 if (f->f_mode & FMODE_WRITER) {
768 put_write_access(inode); 741 put_write_access(inode);
769 if (!special_file(inode->i_mode)) { 742 __mnt_drop_write(f->f_path.mnt);
770 /*
771 * We don't consider this a real
772 * mnt_want/drop_write() pair
773 * because it all happenend right
774 * here, so just reset the state.
775 */
776 file_reset_write(f);
777 __mnt_drop_write(f->f_path.mnt);
778 }
779 } 743 }
780cleanup_file: 744cleanup_file:
781 path_put(&f->f_path); 745 path_put(&f->f_path);
diff --git a/fs/pipe.c b/fs/pipe.c
index 78fd0d0788db..034bffac3f97 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -142,55 +142,6 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
142 return 0; 142 return 0;
143} 143}
144 144
145static int
146pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len,
147 int atomic)
148{
149 unsigned long copy;
150
151 while (len > 0) {
152 while (!iov->iov_len)
153 iov++;
154 copy = min_t(unsigned long, len, iov->iov_len);
155
156 if (atomic) {
157 if (__copy_to_user_inatomic(iov->iov_base, from, copy))
158 return -EFAULT;
159 } else {
160 if (copy_to_user(iov->iov_base, from, copy))
161 return -EFAULT;
162 }
163 from += copy;
164 len -= copy;
165 iov->iov_base += copy;
166 iov->iov_len -= copy;
167 }
168 return 0;
169}
170
171/*
172 * Attempt to pre-fault in the user memory, so we can use atomic copies.
173 * Returns the number of bytes not faulted in.
174 */
175static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len)
176{
177 while (!iov->iov_len)
178 iov++;
179
180 while (len > 0) {
181 unsigned long this_len;
182
183 this_len = min_t(unsigned long, len, iov->iov_len);
184 if (fault_in_pages_writeable(iov->iov_base, this_len))
185 break;
186
187 len -= this_len;
188 iov++;
189 }
190
191 return len;
192}
193
194/* 145/*
195 * Pre-fault in the user memory, so we can use atomic copies. 146 * Pre-fault in the user memory, so we can use atomic copies.
196 */ 147 */
@@ -226,52 +177,6 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
226} 177}
227 178
228/** 179/**
229 * generic_pipe_buf_map - virtually map a pipe buffer
230 * @pipe: the pipe that the buffer belongs to
231 * @buf: the buffer that should be mapped
232 * @atomic: whether to use an atomic map
233 *
234 * Description:
235 * This function returns a kernel virtual address mapping for the
236 * pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided
237 * and the caller has to be careful not to fault before calling
238 * the unmap function.
239 *
240 * Note that this function calls kmap_atomic() if @atomic != 0.
241 */
242void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
243 struct pipe_buffer *buf, int atomic)
244{
245 if (atomic) {
246 buf->flags |= PIPE_BUF_FLAG_ATOMIC;
247 return kmap_atomic(buf->page);
248 }
249
250 return kmap(buf->page);
251}
252EXPORT_SYMBOL(generic_pipe_buf_map);
253
254/**
255 * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer
256 * @pipe: the pipe that the buffer belongs to
257 * @buf: the buffer that should be unmapped
258 * @map_data: the data that the mapping function returned
259 *
260 * Description:
261 * This function undoes the mapping that ->map() provided.
262 */
263void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
264 struct pipe_buffer *buf, void *map_data)
265{
266 if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
267 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
268 kunmap_atomic(map_data);
269 } else
270 kunmap(buf->page);
271}
272EXPORT_SYMBOL(generic_pipe_buf_unmap);
273
274/**
275 * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer 180 * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer
276 * @pipe: the pipe that the buffer belongs to 181 * @pipe: the pipe that the buffer belongs to
277 * @buf: the buffer to attempt to steal 182 * @buf: the buffer to attempt to steal
@@ -351,8 +256,6 @@ EXPORT_SYMBOL(generic_pipe_buf_release);
351 256
352static const struct pipe_buf_operations anon_pipe_buf_ops = { 257static const struct pipe_buf_operations anon_pipe_buf_ops = {
353 .can_merge = 1, 258 .can_merge = 1,
354 .map = generic_pipe_buf_map,
355 .unmap = generic_pipe_buf_unmap,
356 .confirm = generic_pipe_buf_confirm, 259 .confirm = generic_pipe_buf_confirm,
357 .release = anon_pipe_buf_release, 260 .release = anon_pipe_buf_release,
358 .steal = generic_pipe_buf_steal, 261 .steal = generic_pipe_buf_steal,
@@ -361,8 +264,6 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
361 264
362static const struct pipe_buf_operations packet_pipe_buf_ops = { 265static const struct pipe_buf_operations packet_pipe_buf_ops = {
363 .can_merge = 0, 266 .can_merge = 0,
364 .map = generic_pipe_buf_map,
365 .unmap = generic_pipe_buf_unmap,
366 .confirm = generic_pipe_buf_confirm, 267 .confirm = generic_pipe_buf_confirm,
367 .release = anon_pipe_buf_release, 268 .release = anon_pipe_buf_release,
368 .steal = generic_pipe_buf_steal, 269 .steal = generic_pipe_buf_steal,
@@ -379,12 +280,15 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
379 ssize_t ret; 280 ssize_t ret;
380 struct iovec *iov = (struct iovec *)_iov; 281 struct iovec *iov = (struct iovec *)_iov;
381 size_t total_len; 282 size_t total_len;
283 struct iov_iter iter;
382 284
383 total_len = iov_length(iov, nr_segs); 285 total_len = iov_length(iov, nr_segs);
384 /* Null read succeeds. */ 286 /* Null read succeeds. */
385 if (unlikely(total_len == 0)) 287 if (unlikely(total_len == 0))
386 return 0; 288 return 0;
387 289
290 iov_iter_init(&iter, iov, nr_segs, total_len, 0);
291
388 do_wakeup = 0; 292 do_wakeup = 0;
389 ret = 0; 293 ret = 0;
390 __pipe_lock(pipe); 294 __pipe_lock(pipe);
@@ -394,9 +298,9 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
394 int curbuf = pipe->curbuf; 298 int curbuf = pipe->curbuf;
395 struct pipe_buffer *buf = pipe->bufs + curbuf; 299 struct pipe_buffer *buf = pipe->bufs + curbuf;
396 const struct pipe_buf_operations *ops = buf->ops; 300 const struct pipe_buf_operations *ops = buf->ops;
397 void *addr;
398 size_t chars = buf->len; 301 size_t chars = buf->len;
399 int error, atomic; 302 size_t written;
303 int error;
400 304
401 if (chars > total_len) 305 if (chars > total_len)
402 chars = total_len; 306 chars = total_len;
@@ -408,21 +312,10 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
408 break; 312 break;
409 } 313 }
410 314
411 atomic = !iov_fault_in_pages_write(iov, chars); 315 written = copy_page_to_iter(buf->page, buf->offset, chars, &iter);
412redo: 316 if (unlikely(written < chars)) {
413 addr = ops->map(pipe, buf, atomic);
414 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic);
415 ops->unmap(pipe, buf, addr);
416 if (unlikely(error)) {
417 /*
418 * Just retry with the slow path if we failed.
419 */
420 if (atomic) {
421 atomic = 0;
422 goto redo;
423 }
424 if (!ret) 317 if (!ret)
425 ret = error; 318 ret = -EFAULT;
426 break; 319 break;
427 } 320 }
428 ret += chars; 321 ret += chars;
@@ -538,10 +431,16 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
538 431
539 iov_fault_in_pages_read(iov, chars); 432 iov_fault_in_pages_read(iov, chars);
540redo1: 433redo1:
541 addr = ops->map(pipe, buf, atomic); 434 if (atomic)
435 addr = kmap_atomic(buf->page);
436 else
437 addr = kmap(buf->page);
542 error = pipe_iov_copy_from_user(offset + addr, iov, 438 error = pipe_iov_copy_from_user(offset + addr, iov,
543 chars, atomic); 439 chars, atomic);
544 ops->unmap(pipe, buf, addr); 440 if (atomic)
441 kunmap_atomic(addr);
442 else
443 kunmap(buf->page);
545 ret = error; 444 ret = error;
546 do_wakeup = 1; 445 do_wakeup = 1;
547 if (error) { 446 if (error) {
diff --git a/fs/pnode.c b/fs/pnode.c
index 88396df725b4..302bf22c4a30 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -164,46 +164,94 @@ static struct mount *propagation_next(struct mount *m,
164 } 164 }
165} 165}
166 166
167/* 167static struct mount *next_group(struct mount *m, struct mount *origin)
168 * return the source mount to be used for cloning
169 *
170 * @dest the current destination mount
171 * @last_dest the last seen destination mount
172 * @last_src the last seen source mount
173 * @type return CL_SLAVE if the new mount has to be
174 * cloned as a slave.
175 */
176static struct mount *get_source(struct mount *dest,
177 struct mount *last_dest,
178 struct mount *last_src,
179 int *type)
180{ 168{
181 struct mount *p_last_src = NULL; 169 while (1) {
182 struct mount *p_last_dest = NULL; 170 while (1) {
183 171 struct mount *next;
184 while (last_dest != dest->mnt_master) { 172 if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
185 p_last_dest = last_dest; 173 return first_slave(m);
186 p_last_src = last_src; 174 next = next_peer(m);
187 last_dest = last_dest->mnt_master; 175 if (m->mnt_group_id == origin->mnt_group_id) {
188 last_src = last_src->mnt_master; 176 if (next == origin)
177 return NULL;
178 } else if (m->mnt_slave.next != &next->mnt_slave)
179 break;
180 m = next;
181 }
182 /* m is the last peer */
183 while (1) {
184 struct mount *master = m->mnt_master;
185 if (m->mnt_slave.next != &master->mnt_slave_list)
186 return next_slave(m);
187 m = next_peer(master);
188 if (master->mnt_group_id == origin->mnt_group_id)
189 break;
190 if (master->mnt_slave.next == &m->mnt_slave)
191 break;
192 m = master;
193 }
194 if (m == origin)
195 return NULL;
189 } 196 }
197}
190 198
191 if (p_last_dest) { 199/* all accesses are serialized by namespace_sem */
192 do { 200static struct user_namespace *user_ns;
193 p_last_dest = next_peer(p_last_dest); 201static struct mount *last_dest, *last_source, *dest_master;
194 } while (IS_MNT_NEW(p_last_dest)); 202static struct mountpoint *mp;
195 /* is that a peer of the earlier? */ 203static struct hlist_head *list;
196 if (dest == p_last_dest) { 204
197 *type = CL_MAKE_SHARED; 205static int propagate_one(struct mount *m)
198 return p_last_src; 206{
207 struct mount *child;
208 int type;
209 /* skip ones added by this propagate_mnt() */
210 if (IS_MNT_NEW(m))
211 return 0;
212 /* skip if mountpoint isn't covered by it */
213 if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
214 return 0;
215 if (m->mnt_group_id == last_dest->mnt_group_id) {
216 type = CL_MAKE_SHARED;
217 } else {
218 struct mount *n, *p;
219 for (n = m; ; n = p) {
220 p = n->mnt_master;
221 if (p == dest_master || IS_MNT_MARKED(p)) {
222 while (last_dest->mnt_master != p) {
223 last_source = last_source->mnt_master;
224 last_dest = last_source->mnt_parent;
225 }
226 if (n->mnt_group_id != last_dest->mnt_group_id) {
227 last_source = last_source->mnt_master;
228 last_dest = last_source->mnt_parent;
229 }
230 break;
231 }
199 } 232 }
233 type = CL_SLAVE;
234 /* beginning of peer group among the slaves? */
235 if (IS_MNT_SHARED(m))
236 type |= CL_MAKE_SHARED;
200 } 237 }
201 /* slave of the earlier, then */ 238
202 *type = CL_SLAVE; 239 /* Notice when we are propagating across user namespaces */
203 /* beginning of peer group among the slaves? */ 240 if (m->mnt_ns->user_ns != user_ns)
204 if (IS_MNT_SHARED(dest)) 241 type |= CL_UNPRIVILEGED;
205 *type |= CL_MAKE_SHARED; 242 child = copy_tree(last_source, last_source->mnt.mnt_root, type);
206 return last_src; 243 if (IS_ERR(child))
244 return PTR_ERR(child);
245 mnt_set_mountpoint(m, mp, child);
246 last_dest = m;
247 last_source = child;
248 if (m->mnt_master != dest_master) {
249 read_seqlock_excl(&mount_lock);
250 SET_MNT_MARK(m->mnt_master);
251 read_sequnlock_excl(&mount_lock);
252 }
253 hlist_add_head(&child->mnt_hash, list);
254 return 0;
207} 255}
208 256
209/* 257/*
@@ -222,56 +270,48 @@ static struct mount *get_source(struct mount *dest,
222int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, 270int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
223 struct mount *source_mnt, struct hlist_head *tree_list) 271 struct mount *source_mnt, struct hlist_head *tree_list)
224{ 272{
225 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; 273 struct mount *m, *n;
226 struct mount *m, *child;
227 int ret = 0; 274 int ret = 0;
228 struct mount *prev_dest_mnt = dest_mnt; 275
229 struct mount *prev_src_mnt = source_mnt; 276 /*
230 HLIST_HEAD(tmp_list); 277 * we don't want to bother passing tons of arguments to
231 278 * propagate_one(); everything is serialized by namespace_sem,
232 for (m = propagation_next(dest_mnt, dest_mnt); m; 279 * so globals will do just fine.
233 m = propagation_next(m, dest_mnt)) { 280 */
234 int type; 281 user_ns = current->nsproxy->mnt_ns->user_ns;
235 struct mount *source; 282 last_dest = dest_mnt;
236 283 last_source = source_mnt;
237 if (IS_MNT_NEW(m)) 284 mp = dest_mp;
238 continue; 285 list = tree_list;
239 286 dest_master = dest_mnt->mnt_master;
240 source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); 287
241 288 /* all peers of dest_mnt, except dest_mnt itself */
242 /* Notice when we are propagating across user namespaces */ 289 for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) {
243 if (m->mnt_ns->user_ns != user_ns) 290 ret = propagate_one(n);
244 type |= CL_UNPRIVILEGED; 291 if (ret)
245
246 child = copy_tree(source, source->mnt.mnt_root, type);
247 if (IS_ERR(child)) {
248 ret = PTR_ERR(child);
249 tmp_list = *tree_list;
250 tmp_list.first->pprev = &tmp_list.first;
251 INIT_HLIST_HEAD(tree_list);
252 goto out; 292 goto out;
253 } 293 }
254 294
255 if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) { 295 /* all slave groups */
256 mnt_set_mountpoint(m, dest_mp, child); 296 for (m = next_group(dest_mnt, dest_mnt); m;
257 hlist_add_head(&child->mnt_hash, tree_list); 297 m = next_group(m, dest_mnt)) {
258 } else { 298 /* everything in that slave group */
259 /* 299 n = m;
260 * This can happen if the parent mount was bind mounted 300 do {
261 * on some subdirectory of a shared/slave mount. 301 ret = propagate_one(n);
262 */ 302 if (ret)
263 hlist_add_head(&child->mnt_hash, &tmp_list); 303 goto out;
264 } 304 n = next_peer(n);
265 prev_dest_mnt = m; 305 } while (n != m);
266 prev_src_mnt = child;
267 } 306 }
268out: 307out:
269 lock_mount_hash(); 308 read_seqlock_excl(&mount_lock);
270 while (!hlist_empty(&tmp_list)) { 309 hlist_for_each_entry(n, tree_list, mnt_hash) {
271 child = hlist_entry(tmp_list.first, struct mount, mnt_hash); 310 m = n->mnt_parent;
272 umount_tree(child, 0); 311 if (m->mnt_master != dest_mnt->mnt_master)
312 CLEAR_MNT_MARK(m->mnt_master);
273 } 313 }
274 unlock_mount_hash(); 314 read_sequnlock_excl(&mount_lock);
275 return ret; 315 return ret;
276} 316}
277 317
diff --git a/fs/pnode.h b/fs/pnode.h
index fc28a27fa892..4a246358b031 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -16,6 +16,9 @@
16#define IS_MNT_NEW(m) (!(m)->mnt_ns) 16#define IS_MNT_NEW(m) (!(m)->mnt_ns)
17#define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED) 17#define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED)
18#define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE) 18#define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE)
19#define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
20#define SET_MNT_MARK(m) ((m)->mnt.mnt_flags |= MNT_MARKED)
21#define CLEAR_MNT_MARK(m) ((m)->mnt.mnt_flags &= ~MNT_MARKED)
19 22
20#define CL_EXPIRE 0x01 23#define CL_EXPIRE 0x01
21#define CL_SLAVE 0x02 24#define CL_SLAVE 0x02
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 9ae46b87470d..89026095f2b5 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -146,7 +146,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
146 struct task_struct *task; 146 struct task_struct *task;
147 void *ns; 147 void *ns;
148 char name[50]; 148 char name[50];
149 int len = -EACCES; 149 int res = -EACCES;
150 150
151 task = get_proc_task(inode); 151 task = get_proc_task(inode);
152 if (!task) 152 if (!task)
@@ -155,24 +155,18 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
155 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 155 if (!ptrace_may_access(task, PTRACE_MODE_READ))
156 goto out_put_task; 156 goto out_put_task;
157 157
158 len = -ENOENT; 158 res = -ENOENT;
159 ns = ns_ops->get(task); 159 ns = ns_ops->get(task);
160 if (!ns) 160 if (!ns)
161 goto out_put_task; 161 goto out_put_task;
162 162
163 snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns)); 163 snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns));
164 len = strlen(name); 164 res = readlink_copy(buffer, buflen, name);
165
166 if (len > buflen)
167 len = buflen;
168 if (copy_to_user(buffer, name, len))
169 len = -EFAULT;
170
171 ns_ops->put(ns); 165 ns_ops->put(ns);
172out_put_task: 166out_put_task:
173 put_task_struct(task); 167 put_task_struct(task);
174out: 168out:
175 return len; 169 return res;
176} 170}
177 171
178static const struct inode_operations proc_ns_link_inode_operations = { 172static const struct inode_operations proc_ns_link_inode_operations = {
diff --git a/fs/proc/self.c b/fs/proc/self.c
index ffeb202ec942..4348bb8907c2 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -16,7 +16,7 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
16 if (!tgid) 16 if (!tgid)
17 return -ENOENT; 17 return -ENOENT;
18 sprintf(tmp, "%d", tgid); 18 sprintf(tmp, "%d", tgid);
19 return vfs_readlink(dentry,buffer,buflen,tmp); 19 return readlink_copy(buffer, buflen, tmp);
20} 20}
21 21
22static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) 22static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 7be26f03a3f5..1a81373947f3 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -267,6 +267,7 @@ static int mounts_open_common(struct inode *inode, struct file *file,
267 p->root = root; 267 p->root = root;
268 p->m.poll_event = ns->event; 268 p->m.poll_event = ns->event;
269 p->show = show; 269 p->show = show;
270 p->cached_event = ~0ULL;
270 271
271 return 0; 272 return 0;
272 273
diff --git a/fs/splice.c b/fs/splice.c
index 12028fa41def..9bc07d2b53cf 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -136,8 +136,6 @@ error:
136 136
137const struct pipe_buf_operations page_cache_pipe_buf_ops = { 137const struct pipe_buf_operations page_cache_pipe_buf_ops = {
138 .can_merge = 0, 138 .can_merge = 0,
139 .map = generic_pipe_buf_map,
140 .unmap = generic_pipe_buf_unmap,
141 .confirm = page_cache_pipe_buf_confirm, 139 .confirm = page_cache_pipe_buf_confirm,
142 .release = page_cache_pipe_buf_release, 140 .release = page_cache_pipe_buf_release,
143 .steal = page_cache_pipe_buf_steal, 141 .steal = page_cache_pipe_buf_steal,
@@ -156,8 +154,6 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
156 154
157static const struct pipe_buf_operations user_page_pipe_buf_ops = { 155static const struct pipe_buf_operations user_page_pipe_buf_ops = {
158 .can_merge = 0, 156 .can_merge = 0,
159 .map = generic_pipe_buf_map,
160 .unmap = generic_pipe_buf_unmap,
161 .confirm = generic_pipe_buf_confirm, 157 .confirm = generic_pipe_buf_confirm,
162 .release = page_cache_pipe_buf_release, 158 .release = page_cache_pipe_buf_release,
163 .steal = user_page_pipe_buf_steal, 159 .steal = user_page_pipe_buf_steal,
@@ -547,8 +543,6 @@ EXPORT_SYMBOL(generic_file_splice_read);
547 543
548static const struct pipe_buf_operations default_pipe_buf_ops = { 544static const struct pipe_buf_operations default_pipe_buf_ops = {
549 .can_merge = 0, 545 .can_merge = 0,
550 .map = generic_pipe_buf_map,
551 .unmap = generic_pipe_buf_unmap,
552 .confirm = generic_pipe_buf_confirm, 546 .confirm = generic_pipe_buf_confirm,
553 .release = generic_pipe_buf_release, 547 .release = generic_pipe_buf_release,
554 .steal = generic_pipe_buf_steal, 548 .steal = generic_pipe_buf_steal,
@@ -564,8 +558,6 @@ static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
564/* Pipe buffer operations for a socket and similar. */ 558/* Pipe buffer operations for a socket and similar. */
565const struct pipe_buf_operations nosteal_pipe_buf_ops = { 559const struct pipe_buf_operations nosteal_pipe_buf_ops = {
566 .can_merge = 0, 560 .can_merge = 0,
567 .map = generic_pipe_buf_map,
568 .unmap = generic_pipe_buf_unmap,
569 .confirm = generic_pipe_buf_confirm, 561 .confirm = generic_pipe_buf_confirm,
570 .release = generic_pipe_buf_release, 562 .release = generic_pipe_buf_release,
571 .steal = generic_pipe_buf_nosteal, 563 .steal = generic_pipe_buf_nosteal,
@@ -767,13 +759,13 @@ int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
767 goto out; 759 goto out;
768 760
769 if (buf->page != page) { 761 if (buf->page != page) {
770 char *src = buf->ops->map(pipe, buf, 1); 762 char *src = kmap_atomic(buf->page);
771 char *dst = kmap_atomic(page); 763 char *dst = kmap_atomic(page);
772 764
773 memcpy(dst + offset, src + buf->offset, this_len); 765 memcpy(dst + offset, src + buf->offset, this_len);
774 flush_dcache_page(page); 766 flush_dcache_page(page);
775 kunmap_atomic(dst); 767 kunmap_atomic(dst);
776 buf->ops->unmap(pipe, buf, src); 768 kunmap_atomic(src);
777 } 769 }
778 ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len, 770 ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
779 page, fsdata); 771 page, fsdata);
@@ -1067,9 +1059,9 @@ static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
1067 void *data; 1059 void *data;
1068 loff_t tmp = sd->pos; 1060 loff_t tmp = sd->pos;
1069 1061
1070 data = buf->ops->map(pipe, buf, 0); 1062 data = kmap(buf->page);
1071 ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp); 1063 ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
1072 buf->ops->unmap(pipe, buf, data); 1064 kunmap(buf->page);
1073 1065
1074 return ret; 1066 return ret;
1075} 1067}
@@ -1528,116 +1520,48 @@ static int get_iovec_page_array(const struct iovec __user *iov,
1528static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 1520static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
1529 struct splice_desc *sd) 1521 struct splice_desc *sd)
1530{ 1522{
1531 char *src; 1523 int n = copy_page_to_iter(buf->page, buf->offset, sd->len, sd->u.data);
1532 int ret; 1524 return n == sd->len ? n : -EFAULT;
1533
1534 /*
1535 * See if we can use the atomic maps, by prefaulting in the
1536 * pages and doing an atomic copy
1537 */
1538 if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) {
1539 src = buf->ops->map(pipe, buf, 1);
1540 ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset,
1541 sd->len);
1542 buf->ops->unmap(pipe, buf, src);
1543 if (!ret) {
1544 ret = sd->len;
1545 goto out;
1546 }
1547 }
1548
1549 /*
1550 * No dice, use slow non-atomic map and copy
1551 */
1552 src = buf->ops->map(pipe, buf, 0);
1553
1554 ret = sd->len;
1555 if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len))
1556 ret = -EFAULT;
1557
1558 buf->ops->unmap(pipe, buf, src);
1559out:
1560 if (ret > 0)
1561 sd->u.userptr += ret;
1562 return ret;
1563} 1525}
1564 1526
1565/* 1527/*
1566 * For lack of a better implementation, implement vmsplice() to userspace 1528 * For lack of a better implementation, implement vmsplice() to userspace
1567 * as a simple copy of the pipes pages to the user iov. 1529 * as a simple copy of the pipes pages to the user iov.
1568 */ 1530 */
1569static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, 1531static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
1570 unsigned long nr_segs, unsigned int flags) 1532 unsigned long nr_segs, unsigned int flags)
1571{ 1533{
1572 struct pipe_inode_info *pipe; 1534 struct pipe_inode_info *pipe;
1573 struct splice_desc sd; 1535 struct splice_desc sd;
1574 ssize_t size;
1575 int error;
1576 long ret; 1536 long ret;
1537 struct iovec iovstack[UIO_FASTIOV];
1538 struct iovec *iov = iovstack;
1539 struct iov_iter iter;
1540 ssize_t count = 0;
1577 1541
1578 pipe = get_pipe_info(file); 1542 pipe = get_pipe_info(file);
1579 if (!pipe) 1543 if (!pipe)
1580 return -EBADF; 1544 return -EBADF;
1581 1545
1582 pipe_lock(pipe); 1546 ret = rw_copy_check_uvector(READ, uiov, nr_segs,
1583 1547 ARRAY_SIZE(iovstack), iovstack, &iov);
1584 error = ret = 0; 1548 if (ret <= 0)
1585 while (nr_segs) { 1549 return ret;
1586 void __user *base;
1587 size_t len;
1588
1589 /*
1590 * Get user address base and length for this iovec.
1591 */
1592 error = get_user(base, &iov->iov_base);
1593 if (unlikely(error))
1594 break;
1595 error = get_user(len, &iov->iov_len);
1596 if (unlikely(error))
1597 break;
1598
1599 /*
1600 * Sanity check this iovec. 0 read succeeds.
1601 */
1602 if (unlikely(!len))
1603 break;
1604 if (unlikely(!base)) {
1605 error = -EFAULT;
1606 break;
1607 }
1608
1609 if (unlikely(!access_ok(VERIFY_WRITE, base, len))) {
1610 error = -EFAULT;
1611 break;
1612 }
1613
1614 sd.len = 0;
1615 sd.total_len = len;
1616 sd.flags = flags;
1617 sd.u.userptr = base;
1618 sd.pos = 0;
1619
1620 size = __splice_from_pipe(pipe, &sd, pipe_to_user);
1621 if (size < 0) {
1622 if (!ret)
1623 ret = size;
1624
1625 break;
1626 }
1627
1628 ret += size;
1629 1550
1630 if (size < len) 1551 iov_iter_init(&iter, iov, nr_segs, count, 0);
1631 break;
1632 1552
1633 nr_segs--; 1553 sd.len = 0;
1634 iov++; 1554 sd.total_len = count;
1635 } 1555 sd.flags = flags;
1556 sd.u.data = &iter;
1557 sd.pos = 0;
1636 1558
1559 pipe_lock(pipe);
1560 ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
1637 pipe_unlock(pipe); 1561 pipe_unlock(pipe);
1638 1562
1639 if (!ret) 1563 if (iov != iovstack)
1640 ret = error; 1564 kfree(iov);
1641 1565
1642 return ret; 1566 return ret;
1643} 1567}
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 1037637957c7..d2c170f8b035 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -171,7 +171,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
171 } else 171 } else
172 up_write(&iinfo->i_data_sem); 172 up_write(&iinfo->i_data_sem);
173 173
174 retval = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 174 retval = __generic_file_aio_write(iocb, iov, nr_segs);
175 mutex_unlock(&inode->i_mutex); 175 mutex_unlock(&inode->i_mutex);
176 176
177 if (retval > 0) { 177 if (retval > 0) {
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 003c0051b62f..79e96ce98733 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -699,7 +699,7 @@ xfs_file_dio_aio_write(
699 699
700 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); 700 trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
701 ret = generic_file_direct_write(iocb, iovp, 701 ret = generic_file_direct_write(iocb, iovp,
702 &nr_segs, pos, &iocb->ki_pos, count, ocount); 702 &nr_segs, pos, count, ocount);
703 703
704out: 704out:
705 xfs_rw_iunlock(ip, iolock); 705 xfs_rw_iunlock(ip, iolock);
@@ -715,7 +715,7 @@ xfs_file_buffered_aio_write(
715 const struct iovec *iovp, 715 const struct iovec *iovp,
716 unsigned long nr_segs, 716 unsigned long nr_segs,
717 loff_t pos, 717 loff_t pos,
718 size_t ocount) 718 size_t count)
719{ 719{
720 struct file *file = iocb->ki_filp; 720 struct file *file = iocb->ki_filp;
721 struct address_space *mapping = file->f_mapping; 721 struct address_space *mapping = file->f_mapping;
@@ -724,7 +724,7 @@ xfs_file_buffered_aio_write(
724 ssize_t ret; 724 ssize_t ret;
725 int enospc = 0; 725 int enospc = 0;
726 int iolock = XFS_IOLOCK_EXCL; 726 int iolock = XFS_IOLOCK_EXCL;
727 size_t count = ocount; 727 struct iov_iter from;
728 728
729 xfs_rw_ilock(ip, iolock); 729 xfs_rw_ilock(ip, iolock);
730 730
@@ -732,14 +732,15 @@ xfs_file_buffered_aio_write(
732 if (ret) 732 if (ret)
733 goto out; 733 goto out;
734 734
735 iov_iter_init(&from, iovp, nr_segs, count, 0);
735 /* We can write back this queue in page reclaim */ 736 /* We can write back this queue in page reclaim */
736 current->backing_dev_info = mapping->backing_dev_info; 737 current->backing_dev_info = mapping->backing_dev_info;
737 738
738write_retry: 739write_retry:
739 trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); 740 trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
740 ret = generic_file_buffered_write(iocb, iovp, nr_segs, 741 ret = generic_perform_write(file, &from, pos);
741 pos, &iocb->ki_pos, count, 0); 742 if (likely(ret >= 0))
742 743 iocb->ki_pos = pos + ret;
743 /* 744 /*
744 * If we just got an ENOSPC, try to write back all dirty inodes to 745 * If we just got an ENOSPC, try to write back all dirty inodes to
745 * convert delalloc space to free up some of the excess reserved 746 * convert delalloc space to free up some of the excess reserved
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index bcfe61202115..0b18776b075e 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -271,32 +271,6 @@ xfs_open_by_handle(
271 return error; 271 return error;
272} 272}
273 273
274/*
275 * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's
276 * unused first argument.
277 */
278STATIC int
279do_readlink(
280 char __user *buffer,
281 int buflen,
282 const char *link)
283{
284 int len;
285
286 len = PTR_ERR(link);
287 if (IS_ERR(link))
288 goto out;
289
290 len = strlen(link);
291 if (len > (unsigned) buflen)
292 len = buflen;
293 if (copy_to_user(buffer, link, len))
294 len = -EFAULT;
295 out:
296 return len;
297}
298
299
300int 274int
301xfs_readlink_by_handle( 275xfs_readlink_by_handle(
302 struct file *parfilp, 276 struct file *parfilp,
@@ -334,7 +308,7 @@ xfs_readlink_by_handle(
334 error = -xfs_readlink(XFS_I(dentry->d_inode), link); 308 error = -xfs_readlink(XFS_I(dentry->d_inode), link);
335 if (error) 309 if (error)
336 goto out_kfree; 310 goto out_kfree;
337 error = do_readlink(hreq->ohandle, olen, link); 311 error = readlink_copy(hreq->ohandle, olen, link);
338 if (error) 312 if (error)
339 goto out_kfree; 313 goto out_kfree;
340 314
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5aa372a7380c..bba550826921 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -388,7 +388,7 @@ struct sg_iovec;
388struct rq_map_data; 388struct rq_map_data;
389extern struct bio *bio_map_user_iov(struct request_queue *, 389extern struct bio *bio_map_user_iov(struct request_queue *,
390 struct block_device *, 390 struct block_device *,
391 struct sg_iovec *, int, int, gfp_t); 391 const struct sg_iovec *, int, int, gfp_t);
392extern void bio_unmap_user(struct bio *); 392extern void bio_unmap_user(struct bio *);
393extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, 393extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
394 gfp_t); 394 gfp_t);
@@ -414,7 +414,8 @@ extern int bio_alloc_pages(struct bio *bio, gfp_t gfp);
414extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, 414extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *,
415 unsigned long, unsigned int, int, gfp_t); 415 unsigned long, unsigned int, int, gfp_t);
416extern struct bio *bio_copy_user_iov(struct request_queue *, 416extern struct bio *bio_copy_user_iov(struct request_queue *,
417 struct rq_map_data *, struct sg_iovec *, 417 struct rq_map_data *,
418 const struct sg_iovec *,
418 int, int, gfp_t); 419 int, int, gfp_t);
419extern int bio_uncopy_user(struct bio *); 420extern int bio_uncopy_user(struct bio *);
420void zero_fill_bio(struct bio *bio); 421void zero_fill_bio(struct bio *bio);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 99617cf7dd1a..0d84981ee03f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -835,8 +835,8 @@ extern int blk_rq_map_user(struct request_queue *, struct request *,
835extern int blk_rq_unmap_user(struct bio *); 835extern int blk_rq_unmap_user(struct bio *);
836extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); 836extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
837extern int blk_rq_map_user_iov(struct request_queue *, struct request *, 837extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
838 struct rq_map_data *, struct sg_iovec *, int, 838 struct rq_map_data *, const struct sg_iovec *,
839 unsigned int, gfp_t); 839 int, unsigned int, gfp_t);
840extern int blk_execute_rq(struct request_queue *, struct gendisk *, 840extern int blk_execute_rq(struct request_queue *, struct gendisk *,
841 struct request *, int); 841 struct request *, int);
842extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, 842extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index d77797a52b7b..c40302f909ce 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -210,8 +210,8 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
210int block_write_full_page_endio(struct page *page, get_block_t *get_block, 210int block_write_full_page_endio(struct page *page, get_block_t *get_block,
211 struct writeback_control *wbc, bh_end_io_t *handler); 211 struct writeback_control *wbc, bh_end_io_t *handler);
212int block_read_full_page(struct page*, get_block_t*); 212int block_read_full_page(struct page*, get_block_t*);
213int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, 213int block_is_partially_uptodate(struct page *page, unsigned long from,
214 unsigned long from); 214 unsigned long count);
215int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, 215int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
216 unsigned flags, struct page **pagep, get_block_t *get_block); 216 unsigned flags, struct page **pagep, get_block_t *get_block);
217int __block_write_begin(struct page *page, loff_t pos, unsigned len, 217int __block_write_begin(struct page *page, loff_t pos, unsigned len,
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 70e8e21c0a30..230f87bdf5ad 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -63,8 +63,6 @@ struct file_operations;
63struct vfsmount; 63struct vfsmount;
64struct dentry; 64struct dentry;
65 65
66extern void __init files_defer_init(void);
67
68#define rcu_dereference_check_fdtable(files, fdtfd) \ 66#define rcu_dereference_check_fdtable(files, fdtfd) \
69 rcu_dereference_check((fdtfd), lockdep_is_held(&(files)->file_lock)) 67 rcu_dereference_check((fdtfd), lockdep_is_held(&(files)->file_lock))
70 68
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 81048f9bc783..7a9c5bca2b76 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -48,6 +48,7 @@ struct cred;
48struct swap_info_struct; 48struct swap_info_struct;
49struct seq_file; 49struct seq_file;
50struct workqueue_struct; 50struct workqueue_struct;
51struct iov_iter;
51 52
52extern void __init inode_init(void); 53extern void __init inode_init(void);
53extern void __init inode_init_early(void); 54extern void __init inode_init_early(void);
@@ -125,6 +126,8 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
125 126
126/* File needs atomic accesses to f_pos */ 127/* File needs atomic accesses to f_pos */
127#define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) 128#define FMODE_ATOMIC_POS ((__force fmode_t)0x8000)
129/* Write access to underlying fs */
130#define FMODE_WRITER ((__force fmode_t)0x10000)
128 131
129/* File was opened by fanotify and shouldn't generate fanotify events */ 132/* File was opened by fanotify and shouldn't generate fanotify events */
130#define FMODE_NONOTIFY ((__force fmode_t)0x1000000) 133#define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
@@ -293,38 +296,6 @@ struct page;
293struct address_space; 296struct address_space;
294struct writeback_control; 297struct writeback_control;
295 298
296struct iov_iter {
297 const struct iovec *iov;
298 unsigned long nr_segs;
299 size_t iov_offset;
300 size_t count;
301};
302
303size_t iov_iter_copy_from_user_atomic(struct page *page,
304 struct iov_iter *i, unsigned long offset, size_t bytes);
305size_t iov_iter_copy_from_user(struct page *page,
306 struct iov_iter *i, unsigned long offset, size_t bytes);
307void iov_iter_advance(struct iov_iter *i, size_t bytes);
308int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
309size_t iov_iter_single_seg_count(const struct iov_iter *i);
310
311static inline void iov_iter_init(struct iov_iter *i,
312 const struct iovec *iov, unsigned long nr_segs,
313 size_t count, size_t written)
314{
315 i->iov = iov;
316 i->nr_segs = nr_segs;
317 i->iov_offset = 0;
318 i->count = count + written;
319
320 iov_iter_advance(i, written);
321}
322
323static inline size_t iov_iter_count(struct iov_iter *i)
324{
325 return i->count;
326}
327
328/* 299/*
329 * "descriptor" for what we're up to with a read. 300 * "descriptor" for what we're up to with a read.
330 * This allows us to use the same read code yet 301 * This allows us to use the same read code yet
@@ -383,7 +354,7 @@ struct address_space_operations {
383 int (*migratepage) (struct address_space *, 354 int (*migratepage) (struct address_space *,
384 struct page *, struct page *, enum migrate_mode); 355 struct page *, struct page *, enum migrate_mode);
385 int (*launder_page) (struct page *); 356 int (*launder_page) (struct page *);
386 int (*is_partially_uptodate) (struct page *, read_descriptor_t *, 357 int (*is_partially_uptodate) (struct page *, unsigned long,
387 unsigned long); 358 unsigned long);
388 void (*is_dirty_writeback) (struct page *, bool *, bool *); 359 void (*is_dirty_writeback) (struct page *, bool *, bool *);
389 int (*error_remove_page)(struct address_space *, struct page *); 360 int (*error_remove_page)(struct address_space *, struct page *);
@@ -770,9 +741,6 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
770 index < ra->start + ra->size); 741 index < ra->start + ra->size);
771} 742}
772 743
773#define FILE_MNT_WRITE_TAKEN 1
774#define FILE_MNT_WRITE_RELEASED 2
775
776struct file { 744struct file {
777 union { 745 union {
778 struct llist_node fu_llist; 746 struct llist_node fu_llist;
@@ -810,9 +778,6 @@ struct file {
810 struct list_head f_tfile_llink; 778 struct list_head f_tfile_llink;
811#endif /* #ifdef CONFIG_EPOLL */ 779#endif /* #ifdef CONFIG_EPOLL */
812 struct address_space *f_mapping; 780 struct address_space *f_mapping;
813#ifdef CONFIG_DEBUG_WRITECOUNT
814 unsigned long f_mnt_write_state;
815#endif
816} __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ 781} __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
817 782
818struct file_handle { 783struct file_handle {
@@ -830,49 +795,6 @@ static inline struct file *get_file(struct file *f)
830#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) 795#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1)
831#define file_count(x) atomic_long_read(&(x)->f_count) 796#define file_count(x) atomic_long_read(&(x)->f_count)
832 797
833#ifdef CONFIG_DEBUG_WRITECOUNT
834static inline void file_take_write(struct file *f)
835{
836 WARN_ON(f->f_mnt_write_state != 0);
837 f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN;
838}
839static inline void file_release_write(struct file *f)
840{
841 f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED;
842}
843static inline void file_reset_write(struct file *f)
844{
845 f->f_mnt_write_state = 0;
846}
847static inline void file_check_state(struct file *f)
848{
849 /*
850 * At this point, either both or neither of these bits
851 * should be set.
852 */
853 WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN);
854 WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED);
855}
856static inline int file_check_writeable(struct file *f)
857{
858 if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN)
859 return 0;
860 printk(KERN_WARNING "writeable file with no "
861 "mnt_want_write()\n");
862 WARN_ON(1);
863 return -EINVAL;
864}
865#else /* !CONFIG_DEBUG_WRITECOUNT */
866static inline void file_take_write(struct file *filp) {}
867static inline void file_release_write(struct file *filp) {}
868static inline void file_reset_write(struct file *filp) {}
869static inline void file_check_state(struct file *filp) {}
870static inline int file_check_writeable(struct file *filp)
871{
872 return 0;
873}
874#endif /* CONFIG_DEBUG_WRITECOUNT */
875
876#define MAX_NON_LFS ((1UL<<31) - 1) 798#define MAX_NON_LFS ((1UL<<31) - 1)
877 799
878/* Page cache limit. The filesystems should put that into their s_maxbytes 800/* Page cache limit. The filesystems should put that into their s_maxbytes
@@ -2481,16 +2403,13 @@ extern int generic_file_mmap(struct file *, struct vm_area_struct *);
2481extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); 2403extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
2482extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, 2404extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
2483 unsigned long size, pgoff_t pgoff); 2405 unsigned long size, pgoff_t pgoff);
2484extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
2485int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); 2406int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
2486extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); 2407extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
2487extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, 2408extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long);
2488 loff_t *);
2489extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); 2409extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
2490extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, 2410extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
2491 unsigned long *, loff_t, loff_t *, size_t, size_t); 2411 unsigned long *, loff_t, size_t, size_t);
2492extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, 2412extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
2493 unsigned long, loff_t, loff_t *, size_t, ssize_t);
2494extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); 2413extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
2495extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); 2414extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
2496extern int generic_segment_checks(const struct iovec *iov, 2415extern int generic_segment_checks(const struct iovec *iov,
@@ -2582,7 +2501,7 @@ extern const struct file_operations generic_ro_fops;
2582 2501
2583#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) 2502#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
2584 2503
2585extern int vfs_readlink(struct dentry *, char __user *, int, const char *); 2504extern int readlink_copy(char __user *, int, const char *);
2586extern int page_readlink(struct dentry *, char __user *, int); 2505extern int page_readlink(struct dentry *, char __user *, int);
2587extern void *page_follow_link_light(struct dentry *, struct nameidata *); 2506extern void *page_follow_link_light(struct dentry *, struct nameidata *);
2588extern void page_put_link(struct dentry *, struct nameidata *, void *); 2507extern void page_put_link(struct dentry *, struct nameidata *, void *);
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 371d346fa270..839bac270904 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -44,6 +44,8 @@ struct mnt_namespace;
44#define MNT_SHARED_MASK (MNT_UNBINDABLE) 44#define MNT_SHARED_MASK (MNT_UNBINDABLE)
45#define MNT_PROPAGATION_MASK (MNT_SHARED | MNT_UNBINDABLE) 45#define MNT_PROPAGATION_MASK (MNT_SHARED | MNT_UNBINDABLE)
46 46
47#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
48 MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
47 49
48#define MNT_INTERNAL 0x4000 50#define MNT_INTERNAL 0x4000
49 51
@@ -51,6 +53,7 @@ struct mnt_namespace;
51#define MNT_LOCKED 0x800000 53#define MNT_LOCKED 0x800000
52#define MNT_DOOMED 0x1000000 54#define MNT_DOOMED 0x1000000
53#define MNT_SYNC_UMOUNT 0x2000000 55#define MNT_SYNC_UMOUNT 0x2000000
56#define MNT_MARKED 0x4000000
54 57
55struct vfsmount { 58struct vfsmount {
56 struct dentry *mnt_root; /* root of the mounted tree */ 59 struct dentry *mnt_root; /* root of the mounted tree */
diff --git a/include/linux/nbd.h b/include/linux/nbd.h
index ae4981ebd18e..f62f78aef4ac 100644
--- a/include/linux/nbd.h
+++ b/include/linux/nbd.h
@@ -24,8 +24,7 @@ struct request;
24struct nbd_device { 24struct nbd_device {
25 int flags; 25 int flags;
26 int harderror; /* Code of hard error */ 26 int harderror; /* Code of hard error */
27 struct socket * sock; 27 struct socket * sock; /* If == NULL, device is not ready, yet */
28 struct file * file; /* If == NULL, device is not ready, yet */
29 int magic; 28 int magic;
30 29
31 spinlock_t queue_lock; 30 spinlock_t queue_lock;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 4d9389c79e61..eb8b8ac6df3c 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -83,23 +83,6 @@ struct pipe_buf_operations {
83 int can_merge; 83 int can_merge;
84 84
85 /* 85 /*
86 * ->map() returns a virtual address mapping of the pipe buffer.
87 * The last integer flag reflects whether this should be an atomic
88 * mapping or not. The atomic map is faster, however you can't take
89 * page faults before calling ->unmap() again. So if you need to eg
90 * access user data through copy_to/from_user(), then you must get
91 * a non-atomic map. ->map() uses the kmap_atomic slot for
92 * atomic maps, you have to be careful if mapping another page as
93 * source or destination for a copy.
94 */
95 void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int);
96
97 /*
98 * Undoes ->map(), finishes the virtual mapping of the pipe buffer.
99 */
100 void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *);
101
102 /*
103 * ->confirm() verifies that the data in the pipe buffer is there 86 * ->confirm() verifies that the data in the pipe buffer is there
104 * and that the contents are good. If the pages in the pipe belong 87 * and that the contents are good. If the pages in the pipe belong
105 * to a file system, we may need to wait for IO completion in this 88 * to a file system, we may need to wait for IO completion in this
@@ -150,8 +133,6 @@ struct pipe_inode_info *alloc_pipe_info(void);
150void free_pipe_info(struct pipe_inode_info *); 133void free_pipe_info(struct pipe_inode_info *);
151 134
152/* Generic pipe buffer ops functions */ 135/* Generic pipe buffer ops functions */
153void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int);
154void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *);
155void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); 136void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
156int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); 137int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
157int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); 138int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index c55ce243cc09..199bcc34241b 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -9,14 +9,23 @@
9#ifndef __LINUX_UIO_H 9#ifndef __LINUX_UIO_H
10#define __LINUX_UIO_H 10#define __LINUX_UIO_H
11 11
12#include <linux/kernel.h>
12#include <uapi/linux/uio.h> 13#include <uapi/linux/uio.h>
13 14
15struct page;
14 16
15struct kvec { 17struct kvec {
16 void *iov_base; /* and that should *never* hold a userland pointer */ 18 void *iov_base; /* and that should *never* hold a userland pointer */
17 size_t iov_len; 19 size_t iov_len;
18}; 20};
19 21
22struct iov_iter {
23 const struct iovec *iov;
24 unsigned long nr_segs;
25 size_t iov_offset;
26 size_t count;
27};
28
20/* 29/*
21 * Total number of bytes covered by an iovec. 30 * Total number of bytes covered by an iovec.
22 * 31 *
@@ -34,8 +43,51 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
34 return ret; 43 return ret;
35} 44}
36 45
46static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
47{
48 return (struct iovec) {
49 .iov_base = iter->iov->iov_base + iter->iov_offset,
50 .iov_len = min(iter->count,
51 iter->iov->iov_len - iter->iov_offset),
52 };
53}
54
55#define iov_for_each(iov, iter, start) \
56 for (iter = (start); \
57 (iter).count && \
58 ((iov = iov_iter_iovec(&(iter))), 1); \
59 iov_iter_advance(&(iter), (iov).iov_len))
60
37unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to); 61unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to);
38 62
63size_t iov_iter_copy_from_user_atomic(struct page *page,
64 struct iov_iter *i, unsigned long offset, size_t bytes);
65size_t iov_iter_copy_from_user(struct page *page,
66 struct iov_iter *i, unsigned long offset, size_t bytes);
67void iov_iter_advance(struct iov_iter *i, size_t bytes);
68int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
69size_t iov_iter_single_seg_count(const struct iov_iter *i);
70size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
71 struct iov_iter *i);
72
73static inline void iov_iter_init(struct iov_iter *i,
74 const struct iovec *iov, unsigned long nr_segs,
75 size_t count, size_t written)
76{
77 i->iov = iov;
78 i->nr_segs = nr_segs;
79 i->iov_offset = 0;
80 i->count = count + written;
81
82 iov_iter_advance(i, written);
83}
84
85static inline size_t iov_iter_count(struct iov_iter *i)
86{
87 return i->count;
88}
89
39int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len); 90int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
40int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len); 91int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len);
92
41#endif 93#endif
diff --git a/kernel/relay.c b/kernel/relay.c
index 52d6a6f56261..5a56d3c8dc03 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1195,8 +1195,6 @@ static void relay_pipe_buf_release(struct pipe_inode_info *pipe,
1195 1195
1196static const struct pipe_buf_operations relay_pipe_buf_ops = { 1196static const struct pipe_buf_operations relay_pipe_buf_ops = {
1197 .can_merge = 0, 1197 .can_merge = 0,
1198 .map = generic_pipe_buf_map,
1199 .unmap = generic_pipe_buf_unmap,
1200 .confirm = generic_pipe_buf_confirm, 1198 .confirm = generic_pipe_buf_confirm,
1201 .release = relay_pipe_buf_release, 1199 .release = relay_pipe_buf_release,
1202 .steal = generic_pipe_buf_steal, 1200 .steal = generic_pipe_buf_steal,
@@ -1253,7 +1251,7 @@ static ssize_t subbuf_splice_actor(struct file *in,
1253 subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT; 1251 subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT;
1254 pidx = (read_start / PAGE_SIZE) % subbuf_pages; 1252 pidx = (read_start / PAGE_SIZE) % subbuf_pages;
1255 poff = read_start & ~PAGE_MASK; 1253 poff = read_start & ~PAGE_MASK;
1256 nr_pages = min_t(unsigned int, subbuf_pages, pipe->buffers); 1254 nr_pages = min_t(unsigned int, subbuf_pages, spd.nr_pages_max);
1257 1255
1258 for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) { 1256 for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) {
1259 unsigned int this_len, this_end, private; 1257 unsigned int this_len, this_end, private;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e3e665685ee5..737b0efa1a62 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4392,8 +4392,6 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4392 4392
4393static const struct pipe_buf_operations tracing_pipe_buf_ops = { 4393static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4394 .can_merge = 0, 4394 .can_merge = 0,
4395 .map = generic_pipe_buf_map,
4396 .unmap = generic_pipe_buf_unmap,
4397 .confirm = generic_pipe_buf_confirm, 4395 .confirm = generic_pipe_buf_confirm,
4398 .release = generic_pipe_buf_release, 4396 .release = generic_pipe_buf_release,
4399 .steal = generic_pipe_buf_steal, 4397 .steal = generic_pipe_buf_steal,
@@ -4488,7 +4486,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
4488 trace_access_lock(iter->cpu_file); 4486 trace_access_lock(iter->cpu_file);
4489 4487
4490 /* Fill as many pages as possible. */ 4488 /* Fill as many pages as possible. */
4491 for (i = 0, rem = len; i < pipe->buffers && rem; i++) { 4489 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4492 spd.pages[i] = alloc_page(GFP_KERNEL); 4490 spd.pages[i] = alloc_page(GFP_KERNEL);
4493 if (!spd.pages[i]) 4491 if (!spd.pages[i])
4494 break; 4492 break;
@@ -5281,8 +5279,6 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5281/* Pipe buffer operations for a buffer. */ 5279/* Pipe buffer operations for a buffer. */
5282static const struct pipe_buf_operations buffer_pipe_buf_ops = { 5280static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5283 .can_merge = 0, 5281 .can_merge = 0,
5284 .map = generic_pipe_buf_map,
5285 .unmap = generic_pipe_buf_unmap,
5286 .confirm = generic_pipe_buf_confirm, 5282 .confirm = generic_pipe_buf_confirm,
5287 .release = buffer_pipe_buf_release, 5283 .release = buffer_pipe_buf_release,
5288 .steal = generic_pipe_buf_steal, 5284 .steal = generic_pipe_buf_steal,
@@ -5358,7 +5354,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5358 trace_access_lock(iter->cpu_file); 5354 trace_access_lock(iter->cpu_file);
5359 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file); 5355 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5360 5356
5361 for (i = 0; i < pipe->buffers && len && entries; i++, len -= PAGE_SIZE) { 5357 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5362 struct page *page; 5358 struct page *page;
5363 int r; 5359 int r;
5364 5360
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index dd7f8858188a..140b66a874c1 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1045,16 +1045,6 @@ config DEBUG_BUGVERBOSE
1045 of the BUG call as well as the EIP and oops trace. This aids 1045 of the BUG call as well as the EIP and oops trace. This aids
1046 debugging but costs about 70-100K of memory. 1046 debugging but costs about 70-100K of memory.
1047 1047
1048config DEBUG_WRITECOUNT
1049 bool "Debug filesystem writers count"
1050 depends on DEBUG_KERNEL
1051 help
1052 Enable this to catch wrong use of the writers count in struct
1053 vfsmount. This will increase the size of each file struct by
1054 32 bits.
1055
1056 If unsure, say N.
1057
1058config DEBUG_LIST 1048config DEBUG_LIST
1059 bool "Debug linked list manipulation" 1049 bool "Debug linked list manipulation"
1060 depends on DEBUG_KERNEL 1050 depends on DEBUG_KERNEL
diff --git a/mm/Makefile b/mm/Makefile
index 9e5aaf92197d..b484452dac57 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -17,7 +17,8 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
17 util.o mmzone.o vmstat.o backing-dev.o \ 17 util.o mmzone.o vmstat.o backing-dev.o \
18 mm_init.o mmu_context.o percpu.o slab_common.o \ 18 mm_init.o mmu_context.o percpu.o slab_common.o \
19 compaction.o balloon_compaction.o vmacache.o \ 19 compaction.o balloon_compaction.o vmacache.o \
20 interval_tree.o list_lru.o workingset.o $(mmu-y) 20 interval_tree.o list_lru.o workingset.o \
21 iov_iter.o $(mmu-y)
21 22
22obj-y += init-mm.o 23obj-y += init-mm.o
23 24
diff --git a/mm/filemap.c b/mm/filemap.c
index 27ebc0c9571b..a82fbe4c9e8e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -77,7 +77,7 @@
77 * ->mmap_sem 77 * ->mmap_sem
78 * ->lock_page (access_process_vm) 78 * ->lock_page (access_process_vm)
79 * 79 *
80 * ->i_mutex (generic_file_buffered_write) 80 * ->i_mutex (generic_perform_write)
81 * ->mmap_sem (fault_in_pages_readable->do_page_fault) 81 * ->mmap_sem (fault_in_pages_readable->do_page_fault)
82 * 82 *
83 * bdi->wb.list_lock 83 * bdi->wb.list_lock
@@ -1428,7 +1428,8 @@ static void shrink_readahead_size_eio(struct file *filp,
1428 * do_generic_file_read - generic file read routine 1428 * do_generic_file_read - generic file read routine
1429 * @filp: the file to read 1429 * @filp: the file to read
1430 * @ppos: current file position 1430 * @ppos: current file position
1431 * @desc: read_descriptor 1431 * @iter: data destination
1432 * @written: already copied
1432 * 1433 *
1433 * This is a generic file read routine, and uses the 1434 * This is a generic file read routine, and uses the
1434 * mapping->a_ops->readpage() function for the actual low-level stuff. 1435 * mapping->a_ops->readpage() function for the actual low-level stuff.
@@ -1436,8 +1437,8 @@ static void shrink_readahead_size_eio(struct file *filp,
1436 * This is really ugly. But the goto's actually try to clarify some 1437 * This is really ugly. But the goto's actually try to clarify some
1437 * of the logic when it comes to error handling etc. 1438 * of the logic when it comes to error handling etc.
1438 */ 1439 */
1439static void do_generic_file_read(struct file *filp, loff_t *ppos, 1440static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos,
1440 read_descriptor_t *desc) 1441 struct iov_iter *iter, ssize_t written)
1441{ 1442{
1442 struct address_space *mapping = filp->f_mapping; 1443 struct address_space *mapping = filp->f_mapping;
1443 struct inode *inode = mapping->host; 1444 struct inode *inode = mapping->host;
@@ -1447,12 +1448,12 @@ static void do_generic_file_read(struct file *filp, loff_t *ppos,
1447 pgoff_t prev_index; 1448 pgoff_t prev_index;
1448 unsigned long offset; /* offset into pagecache page */ 1449 unsigned long offset; /* offset into pagecache page */
1449 unsigned int prev_offset; 1450 unsigned int prev_offset;
1450 int error; 1451 int error = 0;
1451 1452
1452 index = *ppos >> PAGE_CACHE_SHIFT; 1453 index = *ppos >> PAGE_CACHE_SHIFT;
1453 prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; 1454 prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT;
1454 prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1); 1455 prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1);
1455 last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; 1456 last_index = (*ppos + iter->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
1456 offset = *ppos & ~PAGE_CACHE_MASK; 1457 offset = *ppos & ~PAGE_CACHE_MASK;
1457 1458
1458 for (;;) { 1459 for (;;) {
@@ -1487,7 +1488,7 @@ find_page:
1487 if (!page->mapping) 1488 if (!page->mapping)
1488 goto page_not_up_to_date_locked; 1489 goto page_not_up_to_date_locked;
1489 if (!mapping->a_ops->is_partially_uptodate(page, 1490 if (!mapping->a_ops->is_partially_uptodate(page,
1490 desc, offset)) 1491 offset, iter->count))
1491 goto page_not_up_to_date_locked; 1492 goto page_not_up_to_date_locked;
1492 unlock_page(page); 1493 unlock_page(page);
1493 } 1494 }
@@ -1537,24 +1538,23 @@ page_ok:
1537 /* 1538 /*
1538 * Ok, we have the page, and it's up-to-date, so 1539 * Ok, we have the page, and it's up-to-date, so
1539 * now we can copy it to user space... 1540 * now we can copy it to user space...
1540 *
1541 * The file_read_actor routine returns how many bytes were
1542 * actually used..
1543 * NOTE! This may not be the same as how much of a user buffer
1544 * we filled up (we may be padding etc), so we can only update
1545 * "pos" here (the actor routine has to update the user buffer
1546 * pointers and the remaining count).
1547 */ 1541 */
1548 ret = file_read_actor(desc, page, offset, nr); 1542
1543 ret = copy_page_to_iter(page, offset, nr, iter);
1549 offset += ret; 1544 offset += ret;
1550 index += offset >> PAGE_CACHE_SHIFT; 1545 index += offset >> PAGE_CACHE_SHIFT;
1551 offset &= ~PAGE_CACHE_MASK; 1546 offset &= ~PAGE_CACHE_MASK;
1552 prev_offset = offset; 1547 prev_offset = offset;
1553 1548
1554 page_cache_release(page); 1549 page_cache_release(page);
1555 if (ret == nr && desc->count) 1550 written += ret;
1556 continue; 1551 if (!iov_iter_count(iter))
1557 goto out; 1552 goto out;
1553 if (ret < nr) {
1554 error = -EFAULT;
1555 goto out;
1556 }
1557 continue;
1558 1558
1559page_not_up_to_date: 1559page_not_up_to_date:
1560 /* Get exclusive access to the page ... */ 1560 /* Get exclusive access to the page ... */
@@ -1589,6 +1589,7 @@ readpage:
1589 if (unlikely(error)) { 1589 if (unlikely(error)) {
1590 if (error == AOP_TRUNCATED_PAGE) { 1590 if (error == AOP_TRUNCATED_PAGE) {
1591 page_cache_release(page); 1591 page_cache_release(page);
1592 error = 0;
1592 goto find_page; 1593 goto find_page;
1593 } 1594 }
1594 goto readpage_error; 1595 goto readpage_error;
@@ -1619,7 +1620,6 @@ readpage:
1619 1620
1620readpage_error: 1621readpage_error:
1621 /* UHHUH! A synchronous read error occurred. Report it */ 1622 /* UHHUH! A synchronous read error occurred. Report it */
1622 desc->error = error;
1623 page_cache_release(page); 1623 page_cache_release(page);
1624 goto out; 1624 goto out;
1625 1625
@@ -1630,16 +1630,17 @@ no_cached_page:
1630 */ 1630 */
1631 page = page_cache_alloc_cold(mapping); 1631 page = page_cache_alloc_cold(mapping);
1632 if (!page) { 1632 if (!page) {
1633 desc->error = -ENOMEM; 1633 error = -ENOMEM;
1634 goto out; 1634 goto out;
1635 } 1635 }
1636 error = add_to_page_cache_lru(page, mapping, 1636 error = add_to_page_cache_lru(page, mapping,
1637 index, GFP_KERNEL); 1637 index, GFP_KERNEL);
1638 if (error) { 1638 if (error) {
1639 page_cache_release(page); 1639 page_cache_release(page);
1640 if (error == -EEXIST) 1640 if (error == -EEXIST) {
1641 error = 0;
1641 goto find_page; 1642 goto find_page;
1642 desc->error = error; 1643 }
1643 goto out; 1644 goto out;
1644 } 1645 }
1645 goto readpage; 1646 goto readpage;
@@ -1652,44 +1653,7 @@ out:
1652 1653
1653 *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset; 1654 *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset;
1654 file_accessed(filp); 1655 file_accessed(filp);
1655} 1656 return written ? written : error;
1656
1657int file_read_actor(read_descriptor_t *desc, struct page *page,
1658 unsigned long offset, unsigned long size)
1659{
1660 char *kaddr;
1661 unsigned long left, count = desc->count;
1662
1663 if (size > count)
1664 size = count;
1665
1666 /*
1667 * Faults on the destination of a read are common, so do it before
1668 * taking the kmap.
1669 */
1670 if (!fault_in_pages_writeable(desc->arg.buf, size)) {
1671 kaddr = kmap_atomic(page);
1672 left = __copy_to_user_inatomic(desc->arg.buf,
1673 kaddr + offset, size);
1674 kunmap_atomic(kaddr);
1675 if (left == 0)
1676 goto success;
1677 }
1678
1679 /* Do it the slow way */
1680 kaddr = kmap(page);
1681 left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
1682 kunmap(page);
1683
1684 if (left) {
1685 size -= left;
1686 desc->error = -EFAULT;
1687 }
1688success:
1689 desc->count = count - size;
1690 desc->written += size;
1691 desc->arg.buf += size;
1692 return size;
1693} 1657}
1694 1658
1695/* 1659/*
@@ -1747,14 +1711,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1747{ 1711{
1748 struct file *filp = iocb->ki_filp; 1712 struct file *filp = iocb->ki_filp;
1749 ssize_t retval; 1713 ssize_t retval;
1750 unsigned long seg = 0;
1751 size_t count; 1714 size_t count;
1752 loff_t *ppos = &iocb->ki_pos; 1715 loff_t *ppos = &iocb->ki_pos;
1716 struct iov_iter i;
1753 1717
1754 count = 0; 1718 count = 0;
1755 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); 1719 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
1756 if (retval) 1720 if (retval)
1757 return retval; 1721 return retval;
1722 iov_iter_init(&i, iov, nr_segs, count, 0);
1758 1723
1759 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ 1724 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
1760 if (filp->f_flags & O_DIRECT) { 1725 if (filp->f_flags & O_DIRECT) {
@@ -1776,6 +1741,11 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1776 if (retval > 0) { 1741 if (retval > 0) {
1777 *ppos = pos + retval; 1742 *ppos = pos + retval;
1778 count -= retval; 1743 count -= retval;
1744 /*
1745 * If we did a short DIO read we need to skip the
1746 * section of the iov that we've already read data into.
1747 */
1748 iov_iter_advance(&i, retval);
1779 } 1749 }
1780 1750
1781 /* 1751 /*
@@ -1792,39 +1762,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1792 } 1762 }
1793 } 1763 }
1794 1764
1795 count = retval; 1765 retval = do_generic_file_read(filp, ppos, &i, retval);
1796 for (seg = 0; seg < nr_segs; seg++) {
1797 read_descriptor_t desc;
1798 loff_t offset = 0;
1799
1800 /*
1801 * If we did a short DIO read we need to skip the section of the
1802 * iov that we've already read data into.
1803 */
1804 if (count) {
1805 if (count > iov[seg].iov_len) {
1806 count -= iov[seg].iov_len;
1807 continue;
1808 }
1809 offset = count;
1810 count = 0;
1811 }
1812
1813 desc.written = 0;
1814 desc.arg.buf = iov[seg].iov_base + offset;
1815 desc.count = iov[seg].iov_len - offset;
1816 if (desc.count == 0)
1817 continue;
1818 desc.error = 0;
1819 do_generic_file_read(filp, ppos, &desc);
1820 retval += desc.written;
1821 if (desc.error) {
1822 retval = retval ?: desc.error;
1823 break;
1824 }
1825 if (desc.count > 0)
1826 break;
1827 }
1828out: 1766out:
1829 return retval; 1767 return retval;
1830} 1768}
@@ -2335,150 +2273,6 @@ struct page *read_cache_page_gfp(struct address_space *mapping,
2335} 2273}
2336EXPORT_SYMBOL(read_cache_page_gfp); 2274EXPORT_SYMBOL(read_cache_page_gfp);
2337 2275
2338static size_t __iovec_copy_from_user_inatomic(char *vaddr,
2339 const struct iovec *iov, size_t base, size_t bytes)
2340{
2341 size_t copied = 0, left = 0;
2342
2343 while (bytes) {
2344 char __user *buf = iov->iov_base + base;
2345 int copy = min(bytes, iov->iov_len - base);
2346
2347 base = 0;
2348 left = __copy_from_user_inatomic(vaddr, buf, copy);
2349 copied += copy;
2350 bytes -= copy;
2351 vaddr += copy;
2352 iov++;
2353
2354 if (unlikely(left))
2355 break;
2356 }
2357 return copied - left;
2358}
2359
2360/*
2361 * Copy as much as we can into the page and return the number of bytes which
2362 * were successfully copied. If a fault is encountered then return the number of
2363 * bytes which were copied.
2364 */
2365size_t iov_iter_copy_from_user_atomic(struct page *page,
2366 struct iov_iter *i, unsigned long offset, size_t bytes)
2367{
2368 char *kaddr;
2369 size_t copied;
2370
2371 BUG_ON(!in_atomic());
2372 kaddr = kmap_atomic(page);
2373 if (likely(i->nr_segs == 1)) {
2374 int left;
2375 char __user *buf = i->iov->iov_base + i->iov_offset;
2376 left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
2377 copied = bytes - left;
2378 } else {
2379 copied = __iovec_copy_from_user_inatomic(kaddr + offset,
2380 i->iov, i->iov_offset, bytes);
2381 }
2382 kunmap_atomic(kaddr);
2383
2384 return copied;
2385}
2386EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
2387
2388/*
2389 * This has the same sideeffects and return value as
2390 * iov_iter_copy_from_user_atomic().
2391 * The difference is that it attempts to resolve faults.
2392 * Page must not be locked.
2393 */
2394size_t iov_iter_copy_from_user(struct page *page,
2395 struct iov_iter *i, unsigned long offset, size_t bytes)
2396{
2397 char *kaddr;
2398 size_t copied;
2399
2400 kaddr = kmap(page);
2401 if (likely(i->nr_segs == 1)) {
2402 int left;
2403 char __user *buf = i->iov->iov_base + i->iov_offset;
2404 left = __copy_from_user(kaddr + offset, buf, bytes);
2405 copied = bytes - left;
2406 } else {
2407 copied = __iovec_copy_from_user_inatomic(kaddr + offset,
2408 i->iov, i->iov_offset, bytes);
2409 }
2410 kunmap(page);
2411 return copied;
2412}
2413EXPORT_SYMBOL(iov_iter_copy_from_user);
2414
2415void iov_iter_advance(struct iov_iter *i, size_t bytes)
2416{
2417 BUG_ON(i->count < bytes);
2418
2419 if (likely(i->nr_segs == 1)) {
2420 i->iov_offset += bytes;
2421 i->count -= bytes;
2422 } else {
2423 const struct iovec *iov = i->iov;
2424 size_t base = i->iov_offset;
2425 unsigned long nr_segs = i->nr_segs;
2426
2427 /*
2428 * The !iov->iov_len check ensures we skip over unlikely
2429 * zero-length segments (without overruning the iovec).
2430 */
2431 while (bytes || unlikely(i->count && !iov->iov_len)) {
2432 int copy;
2433
2434 copy = min(bytes, iov->iov_len - base);
2435 BUG_ON(!i->count || i->count < copy);
2436 i->count -= copy;
2437 bytes -= copy;
2438 base += copy;
2439 if (iov->iov_len == base) {
2440 iov++;
2441 nr_segs--;
2442 base = 0;
2443 }
2444 }
2445 i->iov = iov;
2446 i->iov_offset = base;
2447 i->nr_segs = nr_segs;
2448 }
2449}
2450EXPORT_SYMBOL(iov_iter_advance);
2451
2452/*
2453 * Fault in the first iovec of the given iov_iter, to a maximum length
2454 * of bytes. Returns 0 on success, or non-zero if the memory could not be
2455 * accessed (ie. because it is an invalid address).
2456 *
2457 * writev-intensive code may want this to prefault several iovecs -- that
2458 * would be possible (callers must not rely on the fact that _only_ the
2459 * first iovec will be faulted with the current implementation).
2460 */
2461int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
2462{
2463 char __user *buf = i->iov->iov_base + i->iov_offset;
2464 bytes = min(bytes, i->iov->iov_len - i->iov_offset);
2465 return fault_in_pages_readable(buf, bytes);
2466}
2467EXPORT_SYMBOL(iov_iter_fault_in_readable);
2468
2469/*
2470 * Return the count of just the current iov_iter segment.
2471 */
2472size_t iov_iter_single_seg_count(const struct iov_iter *i)
2473{
2474 const struct iovec *iov = i->iov;
2475 if (i->nr_segs == 1)
2476 return i->count;
2477 else
2478 return min(i->count, iov->iov_len - i->iov_offset);
2479}
2480EXPORT_SYMBOL(iov_iter_single_seg_count);
2481
2482/* 2276/*
2483 * Performs necessary checks before doing a write 2277 * Performs necessary checks before doing a write
2484 * 2278 *
@@ -2585,7 +2379,7 @@ EXPORT_SYMBOL(pagecache_write_end);
2585 2379
2586ssize_t 2380ssize_t
2587generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, 2381generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
2588 unsigned long *nr_segs, loff_t pos, loff_t *ppos, 2382 unsigned long *nr_segs, loff_t pos,
2589 size_t count, size_t ocount) 2383 size_t count, size_t ocount)
2590{ 2384{
2591 struct file *file = iocb->ki_filp; 2385 struct file *file = iocb->ki_filp;
@@ -2646,7 +2440,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
2646 i_size_write(inode, pos); 2440 i_size_write(inode, pos);
2647 mark_inode_dirty(inode); 2441 mark_inode_dirty(inode);
2648 } 2442 }
2649 *ppos = pos; 2443 iocb->ki_pos = pos;
2650 } 2444 }
2651out: 2445out:
2652 return written; 2446 return written;
@@ -2692,7 +2486,7 @@ found:
2692} 2486}
2693EXPORT_SYMBOL(grab_cache_page_write_begin); 2487EXPORT_SYMBOL(grab_cache_page_write_begin);
2694 2488
2695static ssize_t generic_perform_write(struct file *file, 2489ssize_t generic_perform_write(struct file *file,
2696 struct iov_iter *i, loff_t pos) 2490 struct iov_iter *i, loff_t pos)
2697{ 2491{
2698 struct address_space *mapping = file->f_mapping; 2492 struct address_space *mapping = file->f_mapping;
@@ -2742,9 +2536,7 @@ again:
2742 if (mapping_writably_mapped(mapping)) 2536 if (mapping_writably_mapped(mapping))
2743 flush_dcache_page(page); 2537 flush_dcache_page(page);
2744 2538
2745 pagefault_disable();
2746 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); 2539 copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
2747 pagefault_enable();
2748 flush_dcache_page(page); 2540 flush_dcache_page(page);
2749 2541
2750 mark_page_accessed(page); 2542 mark_page_accessed(page);
@@ -2782,27 +2574,7 @@ again:
2782 2574
2783 return written ? written : status; 2575 return written ? written : status;
2784} 2576}
2785 2577EXPORT_SYMBOL(generic_perform_write);
2786ssize_t
2787generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
2788 unsigned long nr_segs, loff_t pos, loff_t *ppos,
2789 size_t count, ssize_t written)
2790{
2791 struct file *file = iocb->ki_filp;
2792 ssize_t status;
2793 struct iov_iter i;
2794
2795 iov_iter_init(&i, iov, nr_segs, count, written);
2796 status = generic_perform_write(file, &i, pos);
2797
2798 if (likely(status >= 0)) {
2799 written += status;
2800 *ppos = pos + status;
2801 }
2802
2803 return written ? written : status;
2804}
2805EXPORT_SYMBOL(generic_file_buffered_write);
2806 2578
2807/** 2579/**
2808 * __generic_file_aio_write - write data to a file 2580 * __generic_file_aio_write - write data to a file
@@ -2824,16 +2596,18 @@ EXPORT_SYMBOL(generic_file_buffered_write);
2824 * avoid syncing under i_mutex. 2596 * avoid syncing under i_mutex.
2825 */ 2597 */
2826ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, 2598ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2827 unsigned long nr_segs, loff_t *ppos) 2599 unsigned long nr_segs)
2828{ 2600{
2829 struct file *file = iocb->ki_filp; 2601 struct file *file = iocb->ki_filp;
2830 struct address_space * mapping = file->f_mapping; 2602 struct address_space * mapping = file->f_mapping;
2831 size_t ocount; /* original count */ 2603 size_t ocount; /* original count */
2832 size_t count; /* after file limit checks */ 2604 size_t count; /* after file limit checks */
2833 struct inode *inode = mapping->host; 2605 struct inode *inode = mapping->host;
2834 loff_t pos; 2606 loff_t pos = iocb->ki_pos;
2835 ssize_t written; 2607 ssize_t written = 0;
2836 ssize_t err; 2608 ssize_t err;
2609 ssize_t status;
2610 struct iov_iter from;
2837 2611
2838 ocount = 0; 2612 ocount = 0;
2839 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); 2613 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
@@ -2841,12 +2615,9 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2841 return err; 2615 return err;
2842 2616
2843 count = ocount; 2617 count = ocount;
2844 pos = *ppos;
2845 2618
2846 /* We can write back this queue in page reclaim */ 2619 /* We can write back this queue in page reclaim */
2847 current->backing_dev_info = mapping->backing_dev_info; 2620 current->backing_dev_info = mapping->backing_dev_info;
2848 written = 0;
2849
2850 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 2621 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
2851 if (err) 2622 if (err)
2852 goto out; 2623 goto out;
@@ -2862,45 +2633,47 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2862 if (err) 2633 if (err)
2863 goto out; 2634 goto out;
2864 2635
2636 iov_iter_init(&from, iov, nr_segs, count, 0);
2637
2865 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ 2638 /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
2866 if (unlikely(file->f_flags & O_DIRECT)) { 2639 if (unlikely(file->f_flags & O_DIRECT)) {
2867 loff_t endbyte; 2640 loff_t endbyte;
2868 ssize_t written_buffered;
2869 2641
2870 written = generic_file_direct_write(iocb, iov, &nr_segs, pos, 2642 written = generic_file_direct_write(iocb, iov, &from.nr_segs, pos,
2871 ppos, count, ocount); 2643 count, ocount);
2872 if (written < 0 || written == count) 2644 if (written < 0 || written == count)
2873 goto out; 2645 goto out;
2646 iov_iter_advance(&from, written);
2647
2874 /* 2648 /*
2875 * direct-io write to a hole: fall through to buffered I/O 2649 * direct-io write to a hole: fall through to buffered I/O
2876 * for completing the rest of the request. 2650 * for completing the rest of the request.
2877 */ 2651 */
2878 pos += written; 2652 pos += written;
2879 count -= written; 2653 count -= written;
2880 written_buffered = generic_file_buffered_write(iocb, iov, 2654
2881 nr_segs, pos, ppos, count, 2655 status = generic_perform_write(file, &from, pos);
2882 written);
2883 /* 2656 /*
2884 * If generic_file_buffered_write() retuned a synchronous error 2657 * If generic_perform_write() returned a synchronous error
2885 * then we want to return the number of bytes which were 2658 * then we want to return the number of bytes which were
2886 * direct-written, or the error code if that was zero. Note 2659 * direct-written, or the error code if that was zero. Note
2887 * that this differs from normal direct-io semantics, which 2660 * that this differs from normal direct-io semantics, which
2888 * will return -EFOO even if some bytes were written. 2661 * will return -EFOO even if some bytes were written.
2889 */ 2662 */
2890 if (written_buffered < 0) { 2663 if (unlikely(status < 0) && !written) {
2891 err = written_buffered; 2664 err = status;
2892 goto out; 2665 goto out;
2893 } 2666 }
2894 2667 iocb->ki_pos = pos + status;
2895 /* 2668 /*
2896 * We need to ensure that the page cache pages are written to 2669 * We need to ensure that the page cache pages are written to
2897 * disk and invalidated to preserve the expected O_DIRECT 2670 * disk and invalidated to preserve the expected O_DIRECT
2898 * semantics. 2671 * semantics.
2899 */ 2672 */
2900 endbyte = pos + written_buffered - written - 1; 2673 endbyte = pos + status - 1;
2901 err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte); 2674 err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
2902 if (err == 0) { 2675 if (err == 0) {
2903 written = written_buffered; 2676 written += status;
2904 invalidate_mapping_pages(mapping, 2677 invalidate_mapping_pages(mapping,
2905 pos >> PAGE_CACHE_SHIFT, 2678 pos >> PAGE_CACHE_SHIFT,
2906 endbyte >> PAGE_CACHE_SHIFT); 2679 endbyte >> PAGE_CACHE_SHIFT);
@@ -2911,8 +2684,9 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2911 */ 2684 */
2912 } 2685 }
2913 } else { 2686 } else {
2914 written = generic_file_buffered_write(iocb, iov, nr_segs, 2687 written = generic_perform_write(file, &from, pos);
2915 pos, ppos, count, written); 2688 if (likely(written >= 0))
2689 iocb->ki_pos = pos + written;
2916 } 2690 }
2917out: 2691out:
2918 current->backing_dev_info = NULL; 2692 current->backing_dev_info = NULL;
@@ -2941,7 +2715,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2941 BUG_ON(iocb->ki_pos != pos); 2715 BUG_ON(iocb->ki_pos != pos);
2942 2716
2943 mutex_lock(&inode->i_mutex); 2717 mutex_lock(&inode->i_mutex);
2944 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 2718 ret = __generic_file_aio_write(iocb, iov, nr_segs);
2945 mutex_unlock(&inode->i_mutex); 2719 mutex_unlock(&inode->i_mutex);
2946 2720
2947 if (ret > 0) { 2721 if (ret > 0) {
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
new file mode 100644
index 000000000000..10e46cd721de
--- /dev/null
+++ b/mm/iov_iter.c
@@ -0,0 +1,224 @@
1#include <linux/export.h>
2#include <linux/uio.h>
3#include <linux/pagemap.h>
4
5size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
6 struct iov_iter *i)
7{
8 size_t skip, copy, left, wanted;
9 const struct iovec *iov;
10 char __user *buf;
11 void *kaddr, *from;
12
13 if (unlikely(bytes > i->count))
14 bytes = i->count;
15
16 if (unlikely(!bytes))
17 return 0;
18
19 wanted = bytes;
20 iov = i->iov;
21 skip = i->iov_offset;
22 buf = iov->iov_base + skip;
23 copy = min(bytes, iov->iov_len - skip);
24
25 if (!fault_in_pages_writeable(buf, copy)) {
26 kaddr = kmap_atomic(page);
27 from = kaddr + offset;
28
29 /* first chunk, usually the only one */
30 left = __copy_to_user_inatomic(buf, from, copy);
31 copy -= left;
32 skip += copy;
33 from += copy;
34 bytes -= copy;
35
36 while (unlikely(!left && bytes)) {
37 iov++;
38 buf = iov->iov_base;
39 copy = min(bytes, iov->iov_len);
40 left = __copy_to_user_inatomic(buf, from, copy);
41 copy -= left;
42 skip = copy;
43 from += copy;
44 bytes -= copy;
45 }
46 if (likely(!bytes)) {
47 kunmap_atomic(kaddr);
48 goto done;
49 }
50 offset = from - kaddr;
51 buf += copy;
52 kunmap_atomic(kaddr);
53 copy = min(bytes, iov->iov_len - skip);
54 }
55 /* Too bad - revert to non-atomic kmap */
56 kaddr = kmap(page);
57 from = kaddr + offset;
58 left = __copy_to_user(buf, from, copy);
59 copy -= left;
60 skip += copy;
61 from += copy;
62 bytes -= copy;
63 while (unlikely(!left && bytes)) {
64 iov++;
65 buf = iov->iov_base;
66 copy = min(bytes, iov->iov_len);
67 left = __copy_to_user(buf, from, copy);
68 copy -= left;
69 skip = copy;
70 from += copy;
71 bytes -= copy;
72 }
73 kunmap(page);
74done:
75 i->count -= wanted - bytes;
76 i->nr_segs -= iov - i->iov;
77 i->iov = iov;
78 i->iov_offset = skip;
79 return wanted - bytes;
80}
81EXPORT_SYMBOL(copy_page_to_iter);
82
83static size_t __iovec_copy_from_user_inatomic(char *vaddr,
84 const struct iovec *iov, size_t base, size_t bytes)
85{
86 size_t copied = 0, left = 0;
87
88 while (bytes) {
89 char __user *buf = iov->iov_base + base;
90 int copy = min(bytes, iov->iov_len - base);
91
92 base = 0;
93 left = __copy_from_user_inatomic(vaddr, buf, copy);
94 copied += copy;
95 bytes -= copy;
96 vaddr += copy;
97 iov++;
98
99 if (unlikely(left))
100 break;
101 }
102 return copied - left;
103}
104
105/*
106 * Copy as much as we can into the page and return the number of bytes which
107 * were successfully copied. If a fault is encountered then return the number of
108 * bytes which were copied.
109 */
110size_t iov_iter_copy_from_user_atomic(struct page *page,
111 struct iov_iter *i, unsigned long offset, size_t bytes)
112{
113 char *kaddr;
114 size_t copied;
115
116 kaddr = kmap_atomic(page);
117 if (likely(i->nr_segs == 1)) {
118 int left;
119 char __user *buf = i->iov->iov_base + i->iov_offset;
120 left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
121 copied = bytes - left;
122 } else {
123 copied = __iovec_copy_from_user_inatomic(kaddr + offset,
124 i->iov, i->iov_offset, bytes);
125 }
126 kunmap_atomic(kaddr);
127
128 return copied;
129}
130EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
131
132/*
133 * This has the same sideeffects and return value as
134 * iov_iter_copy_from_user_atomic().
135 * The difference is that it attempts to resolve faults.
136 * Page must not be locked.
137 */
138size_t iov_iter_copy_from_user(struct page *page,
139 struct iov_iter *i, unsigned long offset, size_t bytes)
140{
141 char *kaddr;
142 size_t copied;
143
144 kaddr = kmap(page);
145 if (likely(i->nr_segs == 1)) {
146 int left;
147 char __user *buf = i->iov->iov_base + i->iov_offset;
148 left = __copy_from_user(kaddr + offset, buf, bytes);
149 copied = bytes - left;
150 } else {
151 copied = __iovec_copy_from_user_inatomic(kaddr + offset,
152 i->iov, i->iov_offset, bytes);
153 }
154 kunmap(page);
155 return copied;
156}
157EXPORT_SYMBOL(iov_iter_copy_from_user);
158
159void iov_iter_advance(struct iov_iter *i, size_t bytes)
160{
161 BUG_ON(i->count < bytes);
162
163 if (likely(i->nr_segs == 1)) {
164 i->iov_offset += bytes;
165 i->count -= bytes;
166 } else {
167 const struct iovec *iov = i->iov;
168 size_t base = i->iov_offset;
169 unsigned long nr_segs = i->nr_segs;
170
171 /*
172 * The !iov->iov_len check ensures we skip over unlikely
173 * zero-length segments (without overruning the iovec).
174 */
175 while (bytes || unlikely(i->count && !iov->iov_len)) {
176 int copy;
177
178 copy = min(bytes, iov->iov_len - base);
179 BUG_ON(!i->count || i->count < copy);
180 i->count -= copy;
181 bytes -= copy;
182 base += copy;
183 if (iov->iov_len == base) {
184 iov++;
185 nr_segs--;
186 base = 0;
187 }
188 }
189 i->iov = iov;
190 i->iov_offset = base;
191 i->nr_segs = nr_segs;
192 }
193}
194EXPORT_SYMBOL(iov_iter_advance);
195
196/*
197 * Fault in the first iovec of the given iov_iter, to a maximum length
198 * of bytes. Returns 0 on success, or non-zero if the memory could not be
199 * accessed (ie. because it is an invalid address).
200 *
201 * writev-intensive code may want this to prefault several iovecs -- that
202 * would be possible (callers must not rely on the fact that _only_ the
203 * first iovec will be faulted with the current implementation).
204 */
205int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
206{
207 char __user *buf = i->iov->iov_base + i->iov_offset;
208 bytes = min(bytes, i->iov->iov_len - i->iov_offset);
209 return fault_in_pages_readable(buf, bytes);
210}
211EXPORT_SYMBOL(iov_iter_fault_in_readable);
212
213/*
214 * Return the count of just the current iov_iter segment.
215 */
216size_t iov_iter_single_seg_count(const struct iov_iter *i)
217{
218 const struct iovec *iov = i->iov;
219 if (i->nr_segs == 1)
220 return i->count;
221 else
222 return min(i->count, iov->iov_len - i->iov_offset);
223}
224EXPORT_SYMBOL(iov_iter_single_seg_count);
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index cb79065c19e5..8505c9262b35 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -23,129 +23,44 @@
23 23
24/** 24/**
25 * process_vm_rw_pages - read/write pages from task specified 25 * process_vm_rw_pages - read/write pages from task specified
26 * @task: task to read/write from 26 * @pages: array of pointers to pages we want to copy
27 * @mm: mm for task
28 * @process_pages: struct pages area that can store at least
29 * nr_pages_to_copy struct page pointers
30 * @pa: address of page in task to start copying from/to
31 * @start_offset: offset in page to start copying from/to 27 * @start_offset: offset in page to start copying from/to
32 * @len: number of bytes to copy 28 * @len: number of bytes to copy
33 * @lvec: iovec array specifying where to copy to/from 29 * @iter: where to copy to/from locally
34 * @lvec_cnt: number of elements in iovec array
35 * @lvec_current: index in iovec array we are up to
36 * @lvec_offset: offset in bytes from current iovec iov_base we are up to
37 * @vm_write: 0 means copy from, 1 means copy to 30 * @vm_write: 0 means copy from, 1 means copy to
38 * @nr_pages_to_copy: number of pages to copy
39 * @bytes_copied: returns number of bytes successfully copied
40 * Returns 0 on success, error code otherwise 31 * Returns 0 on success, error code otherwise
41 */ 32 */
42static int process_vm_rw_pages(struct task_struct *task, 33static int process_vm_rw_pages(struct page **pages,
43 struct mm_struct *mm, 34 unsigned offset,
44 struct page **process_pages, 35 size_t len,
45 unsigned long pa, 36 struct iov_iter *iter,
46 unsigned long start_offset, 37 int vm_write)
47 unsigned long len,
48 const struct iovec *lvec,
49 unsigned long lvec_cnt,
50 unsigned long *lvec_current,
51 size_t *lvec_offset,
52 int vm_write,
53 unsigned int nr_pages_to_copy,
54 ssize_t *bytes_copied)
55{ 38{
56 int pages_pinned;
57 void *target_kaddr;
58 int pgs_copied = 0;
59 int j;
60 int ret;
61 ssize_t bytes_to_copy;
62 ssize_t rc = 0;
63
64 *bytes_copied = 0;
65
66 /* Get the pages we're interested in */
67 down_read(&mm->mmap_sem);
68 pages_pinned = get_user_pages(task, mm, pa,
69 nr_pages_to_copy,
70 vm_write, 0, process_pages, NULL);
71 up_read(&mm->mmap_sem);
72
73 if (pages_pinned != nr_pages_to_copy) {
74 rc = -EFAULT;
75 goto end;
76 }
77
78 /* Do the copy for each page */ 39 /* Do the copy for each page */
79 for (pgs_copied = 0; 40 while (len && iov_iter_count(iter)) {
80 (pgs_copied < nr_pages_to_copy) && (*lvec_current < lvec_cnt); 41 struct page *page = *pages++;
81 pgs_copied++) { 42 size_t copy = PAGE_SIZE - offset;
82 /* Make sure we have a non zero length iovec */ 43 size_t copied;
83 while (*lvec_current < lvec_cnt 44
84 && lvec[*lvec_current].iov_len == 0) 45 if (copy > len)
85 (*lvec_current)++; 46 copy = len;
86 if (*lvec_current == lvec_cnt) 47
87 break; 48 if (vm_write) {
88 49 if (copy > iov_iter_count(iter))
89 /* 50 copy = iov_iter_count(iter);
90 * Will copy smallest of: 51 copied = iov_iter_copy_from_user(page, iter,
91 * - bytes remaining in page 52 offset, copy);
92 * - bytes remaining in destination iovec 53 iov_iter_advance(iter, copied);
93 */ 54 set_page_dirty_lock(page);
94 bytes_to_copy = min_t(ssize_t, PAGE_SIZE - start_offset,
95 len - *bytes_copied);
96 bytes_to_copy = min_t(ssize_t, bytes_to_copy,
97 lvec[*lvec_current].iov_len
98 - *lvec_offset);
99
100 target_kaddr = kmap(process_pages[pgs_copied]) + start_offset;
101
102 if (vm_write)
103 ret = copy_from_user(target_kaddr,
104 lvec[*lvec_current].iov_base
105 + *lvec_offset,
106 bytes_to_copy);
107 else
108 ret = copy_to_user(lvec[*lvec_current].iov_base
109 + *lvec_offset,
110 target_kaddr, bytes_to_copy);
111 kunmap(process_pages[pgs_copied]);
112 if (ret) {
113 *bytes_copied += bytes_to_copy - ret;
114 pgs_copied++;
115 rc = -EFAULT;
116 goto end;
117 }
118 *bytes_copied += bytes_to_copy;
119 *lvec_offset += bytes_to_copy;
120 if (*lvec_offset == lvec[*lvec_current].iov_len) {
121 /*
122 * Need to copy remaining part of page into the
123 * next iovec if there are any bytes left in page
124 */
125 (*lvec_current)++;
126 *lvec_offset = 0;
127 start_offset = (start_offset + bytes_to_copy)
128 % PAGE_SIZE;
129 if (start_offset)
130 pgs_copied--;
131 } else { 55 } else {
132 start_offset = 0; 56 copied = copy_page_to_iter(page, offset, copy, iter);
133 }
134 }
135
136end:
137 if (vm_write) {
138 for (j = 0; j < pages_pinned; j++) {
139 if (j < pgs_copied)
140 set_page_dirty_lock(process_pages[j]);
141 put_page(process_pages[j]);
142 } 57 }
143 } else { 58 len -= copied;
144 for (j = 0; j < pages_pinned; j++) 59 if (copied < copy && iov_iter_count(iter))
145 put_page(process_pages[j]); 60 return -EFAULT;
61 offset = 0;
146 } 62 }
147 63 return 0;
148 return rc;
149} 64}
150 65
151/* Maximum number of pages kmalloc'd to hold struct page's during copy */ 66/* Maximum number of pages kmalloc'd to hold struct page's during copy */
@@ -155,67 +70,60 @@ end:
155 * process_vm_rw_single_vec - read/write pages from task specified 70 * process_vm_rw_single_vec - read/write pages from task specified
156 * @addr: start memory address of target process 71 * @addr: start memory address of target process
157 * @len: size of area to copy to/from 72 * @len: size of area to copy to/from
158 * @lvec: iovec array specifying where to copy to/from locally 73 * @iter: where to copy to/from locally
159 * @lvec_cnt: number of elements in iovec array
160 * @lvec_current: index in iovec array we are up to
161 * @lvec_offset: offset in bytes from current iovec iov_base we are up to
162 * @process_pages: struct pages area that can store at least 74 * @process_pages: struct pages area that can store at least
163 * nr_pages_to_copy struct page pointers 75 * nr_pages_to_copy struct page pointers
164 * @mm: mm for task 76 * @mm: mm for task
165 * @task: task to read/write from 77 * @task: task to read/write from
166 * @vm_write: 0 means copy from, 1 means copy to 78 * @vm_write: 0 means copy from, 1 means copy to
167 * @bytes_copied: returns number of bytes successfully copied
168 * Returns 0 on success or on failure error code 79 * Returns 0 on success or on failure error code
169 */ 80 */
170static int process_vm_rw_single_vec(unsigned long addr, 81static int process_vm_rw_single_vec(unsigned long addr,
171 unsigned long len, 82 unsigned long len,
172 const struct iovec *lvec, 83 struct iov_iter *iter,
173 unsigned long lvec_cnt,
174 unsigned long *lvec_current,
175 size_t *lvec_offset,
176 struct page **process_pages, 84 struct page **process_pages,
177 struct mm_struct *mm, 85 struct mm_struct *mm,
178 struct task_struct *task, 86 struct task_struct *task,
179 int vm_write, 87 int vm_write)
180 ssize_t *bytes_copied)
181{ 88{
182 unsigned long pa = addr & PAGE_MASK; 89 unsigned long pa = addr & PAGE_MASK;
183 unsigned long start_offset = addr - pa; 90 unsigned long start_offset = addr - pa;
184 unsigned long nr_pages; 91 unsigned long nr_pages;
185 ssize_t bytes_copied_loop;
186 ssize_t rc = 0; 92 ssize_t rc = 0;
187 unsigned long nr_pages_copied = 0;
188 unsigned long nr_pages_to_copy;
189 unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES 93 unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES
190 / sizeof(struct pages *); 94 / sizeof(struct pages *);
191 95
192 *bytes_copied = 0;
193
194 /* Work out address and page range required */ 96 /* Work out address and page range required */
195 if (len == 0) 97 if (len == 0)
196 return 0; 98 return 0;
197 nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1; 99 nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1;
198 100
199 while ((nr_pages_copied < nr_pages) && (*lvec_current < lvec_cnt)) { 101 while (!rc && nr_pages && iov_iter_count(iter)) {
200 nr_pages_to_copy = min(nr_pages - nr_pages_copied, 102 int pages = min(nr_pages, max_pages_per_loop);
201 max_pages_per_loop); 103 size_t bytes;
202 104
203 rc = process_vm_rw_pages(task, mm, process_pages, pa, 105 /* Get the pages we're interested in */
204 start_offset, len, 106 down_read(&mm->mmap_sem);
205 lvec, lvec_cnt, 107 pages = get_user_pages(task, mm, pa, pages,
206 lvec_current, lvec_offset, 108 vm_write, 0, process_pages, NULL);
207 vm_write, nr_pages_to_copy, 109 up_read(&mm->mmap_sem);
208 &bytes_copied_loop);
209 start_offset = 0;
210 *bytes_copied += bytes_copied_loop;
211 110
212 if (rc < 0) { 111 if (pages <= 0)
213 return rc; 112 return -EFAULT;
214 } else { 113
215 len -= bytes_copied_loop; 114 bytes = pages * PAGE_SIZE - start_offset;
216 nr_pages_copied += nr_pages_to_copy; 115 if (bytes > len)
217 pa += nr_pages_to_copy * PAGE_SIZE; 116 bytes = len;
218 } 117
118 rc = process_vm_rw_pages(process_pages,
119 start_offset, bytes, iter,
120 vm_write);
121 len -= bytes;
122 start_offset = 0;
123 nr_pages -= pages;
124 pa += pages * PAGE_SIZE;
125 while (pages)
126 put_page(process_pages[--pages]);
219 } 127 }
220 128
221 return rc; 129 return rc;
@@ -228,8 +136,7 @@ static int process_vm_rw_single_vec(unsigned long addr,
228/** 136/**
229 * process_vm_rw_core - core of reading/writing pages from task specified 137 * process_vm_rw_core - core of reading/writing pages from task specified
230 * @pid: PID of process to read/write from/to 138 * @pid: PID of process to read/write from/to
231 * @lvec: iovec array specifying where to copy to/from locally 139 * @iter: where to copy to/from locally
232 * @liovcnt: size of lvec array
233 * @rvec: iovec array specifying where to copy to/from in the other process 140 * @rvec: iovec array specifying where to copy to/from in the other process
234 * @riovcnt: size of rvec array 141 * @riovcnt: size of rvec array
235 * @flags: currently unused 142 * @flags: currently unused
@@ -238,8 +145,7 @@ static int process_vm_rw_single_vec(unsigned long addr,
238 * return less bytes than expected if an error occurs during the copying 145 * return less bytes than expected if an error occurs during the copying
239 * process. 146 * process.
240 */ 147 */
241static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec, 148static ssize_t process_vm_rw_core(pid_t pid, struct iov_iter *iter,
242 unsigned long liovcnt,
243 const struct iovec *rvec, 149 const struct iovec *rvec,
244 unsigned long riovcnt, 150 unsigned long riovcnt,
245 unsigned long flags, int vm_write) 151 unsigned long flags, int vm_write)
@@ -250,13 +156,10 @@ static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec,
250 struct mm_struct *mm; 156 struct mm_struct *mm;
251 unsigned long i; 157 unsigned long i;
252 ssize_t rc = 0; 158 ssize_t rc = 0;
253 ssize_t bytes_copied_loop;
254 ssize_t bytes_copied = 0;
255 unsigned long nr_pages = 0; 159 unsigned long nr_pages = 0;
256 unsigned long nr_pages_iov; 160 unsigned long nr_pages_iov;
257 unsigned long iov_l_curr_idx = 0;
258 size_t iov_l_curr_offset = 0;
259 ssize_t iov_len; 161 ssize_t iov_len;
162 size_t total_len = iov_iter_count(iter);
260 163
261 /* 164 /*
262 * Work out how many pages of struct pages we're going to need 165 * Work out how many pages of struct pages we're going to need
@@ -310,24 +213,20 @@ static ssize_t process_vm_rw_core(pid_t pid, const struct iovec *lvec,
310 goto put_task_struct; 213 goto put_task_struct;
311 } 214 }
312 215
313 for (i = 0; i < riovcnt && iov_l_curr_idx < liovcnt; i++) { 216 for (i = 0; i < riovcnt && iov_iter_count(iter) && !rc; i++)
314 rc = process_vm_rw_single_vec( 217 rc = process_vm_rw_single_vec(
315 (unsigned long)rvec[i].iov_base, rvec[i].iov_len, 218 (unsigned long)rvec[i].iov_base, rvec[i].iov_len,
316 lvec, liovcnt, &iov_l_curr_idx, &iov_l_curr_offset, 219 iter, process_pages, mm, task, vm_write);
317 process_pages, mm, task, vm_write, &bytes_copied_loop); 220
318 bytes_copied += bytes_copied_loop; 221 /* copied = space before - space after */
319 if (rc != 0) { 222 total_len -= iov_iter_count(iter);
320 /* If we have managed to copy any data at all then 223
321 we return the number of bytes copied. Otherwise 224 /* If we have managed to copy any data at all then
322 we return the error code */ 225 we return the number of bytes copied. Otherwise
323 if (bytes_copied) 226 we return the error code */
324 rc = bytes_copied; 227 if (total_len)
325 goto put_mm; 228 rc = total_len;
326 }
327 }
328 229
329 rc = bytes_copied;
330put_mm:
331 mmput(mm); 230 mmput(mm);
332 231
333put_task_struct: 232put_task_struct:
@@ -363,6 +262,7 @@ static ssize_t process_vm_rw(pid_t pid,
363 struct iovec iovstack_r[UIO_FASTIOV]; 262 struct iovec iovstack_r[UIO_FASTIOV];
364 struct iovec *iov_l = iovstack_l; 263 struct iovec *iov_l = iovstack_l;
365 struct iovec *iov_r = iovstack_r; 264 struct iovec *iov_r = iovstack_r;
265 struct iov_iter iter;
366 ssize_t rc; 266 ssize_t rc;
367 267
368 if (flags != 0) 268 if (flags != 0)
@@ -378,13 +278,14 @@ static ssize_t process_vm_rw(pid_t pid,
378 if (rc <= 0) 278 if (rc <= 0)
379 goto free_iovecs; 279 goto free_iovecs;
380 280
281 iov_iter_init(&iter, iov_l, liovcnt, rc, 0);
282
381 rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV, 283 rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV,
382 iovstack_r, &iov_r); 284 iovstack_r, &iov_r);
383 if (rc <= 0) 285 if (rc <= 0)
384 goto free_iovecs; 286 goto free_iovecs;
385 287
386 rc = process_vm_rw_core(pid, iov_l, liovcnt, iov_r, riovcnt, flags, 288 rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write);
387 vm_write);
388 289
389free_iovecs: 290free_iovecs:
390 if (iov_r != iovstack_r) 291 if (iov_r != iovstack_r)
@@ -424,6 +325,7 @@ compat_process_vm_rw(compat_pid_t pid,
424 struct iovec iovstack_r[UIO_FASTIOV]; 325 struct iovec iovstack_r[UIO_FASTIOV];
425 struct iovec *iov_l = iovstack_l; 326 struct iovec *iov_l = iovstack_l;
426 struct iovec *iov_r = iovstack_r; 327 struct iovec *iov_r = iovstack_r;
328 struct iov_iter iter;
427 ssize_t rc = -EFAULT; 329 ssize_t rc = -EFAULT;
428 330
429 if (flags != 0) 331 if (flags != 0)
@@ -439,14 +341,14 @@ compat_process_vm_rw(compat_pid_t pid,
439 &iov_l); 341 &iov_l);
440 if (rc <= 0) 342 if (rc <= 0)
441 goto free_iovecs; 343 goto free_iovecs;
344 iov_iter_init(&iter, iov_l, liovcnt, rc, 0);
442 rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, 345 rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt,
443 UIO_FASTIOV, iovstack_r, 346 UIO_FASTIOV, iovstack_r,
444 &iov_r); 347 &iov_r);
445 if (rc <= 0) 348 if (rc <= 0)
446 goto free_iovecs; 349 goto free_iovecs;
447 350
448 rc = process_vm_rw_core(pid, iov_l, liovcnt, iov_r, riovcnt, flags, 351 rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write);
449 vm_write);
450 352
451free_iovecs: 353free_iovecs:
452 if (iov_r != iovstack_r) 354 if (iov_r != iovstack_r)
diff --git a/mm/shmem.c b/mm/shmem.c
index 70273f8df586..8f1a95406bae 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1402,13 +1402,25 @@ shmem_write_end(struct file *file, struct address_space *mapping,
1402 return copied; 1402 return copied;
1403} 1403}
1404 1404
1405static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor) 1405static ssize_t shmem_file_aio_read(struct kiocb *iocb,
1406 const struct iovec *iov, unsigned long nr_segs, loff_t pos)
1406{ 1407{
1407 struct inode *inode = file_inode(filp); 1408 struct file *file = iocb->ki_filp;
1409 struct inode *inode = file_inode(file);
1408 struct address_space *mapping = inode->i_mapping; 1410 struct address_space *mapping = inode->i_mapping;
1409 pgoff_t index; 1411 pgoff_t index;
1410 unsigned long offset; 1412 unsigned long offset;
1411 enum sgp_type sgp = SGP_READ; 1413 enum sgp_type sgp = SGP_READ;
1414 int error;
1415 ssize_t retval;
1416 size_t count;
1417 loff_t *ppos = &iocb->ki_pos;
1418 struct iov_iter iter;
1419
1420 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
1421 if (retval)
1422 return retval;
1423 iov_iter_init(&iter, iov, nr_segs, count, 0);
1412 1424
1413 /* 1425 /*
1414 * Might this read be for a stacking filesystem? Then when reading 1426 * Might this read be for a stacking filesystem? Then when reading
@@ -1436,10 +1448,10 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
1436 break; 1448 break;
1437 } 1449 }
1438 1450
1439 desc->error = shmem_getpage(inode, index, &page, sgp, NULL); 1451 error = shmem_getpage(inode, index, &page, sgp, NULL);
1440 if (desc->error) { 1452 if (error) {
1441 if (desc->error == -EINVAL) 1453 if (error == -EINVAL)
1442 desc->error = 0; 1454 error = 0;
1443 break; 1455 break;
1444 } 1456 }
1445 if (page) 1457 if (page)
@@ -1483,61 +1495,26 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
1483 /* 1495 /*
1484 * Ok, we have the page, and it's up-to-date, so 1496 * Ok, we have the page, and it's up-to-date, so
1485 * now we can copy it to user space... 1497 * now we can copy it to user space...
1486 *
1487 * The actor routine returns how many bytes were actually used..
1488 * NOTE! This may not be the same as how much of a user buffer
1489 * we filled up (we may be padding etc), so we can only update
1490 * "pos" here (the actor routine has to update the user buffer
1491 * pointers and the remaining count).
1492 */ 1498 */
1493 ret = actor(desc, page, offset, nr); 1499 ret = copy_page_to_iter(page, offset, nr, &iter);
1500 retval += ret;
1494 offset += ret; 1501 offset += ret;
1495 index += offset >> PAGE_CACHE_SHIFT; 1502 index += offset >> PAGE_CACHE_SHIFT;
1496 offset &= ~PAGE_CACHE_MASK; 1503 offset &= ~PAGE_CACHE_MASK;
1497 1504
1498 page_cache_release(page); 1505 page_cache_release(page);
1499 if (ret != nr || !desc->count) 1506 if (!iov_iter_count(&iter))
1500 break; 1507 break;
1501 1508 if (ret < nr) {
1509 error = -EFAULT;
1510 break;
1511 }
1502 cond_resched(); 1512 cond_resched();
1503 } 1513 }
1504 1514
1505 *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; 1515 *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
1506 file_accessed(filp); 1516 file_accessed(file);
1507} 1517 return retval ? retval : error;
1508
1509static ssize_t shmem_file_aio_read(struct kiocb *iocb,
1510 const struct iovec *iov, unsigned long nr_segs, loff_t pos)
1511{
1512 struct file *filp = iocb->ki_filp;
1513 ssize_t retval;
1514 unsigned long seg;
1515 size_t count;
1516 loff_t *ppos = &iocb->ki_pos;
1517
1518 retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
1519 if (retval)
1520 return retval;
1521
1522 for (seg = 0; seg < nr_segs; seg++) {
1523 read_descriptor_t desc;
1524
1525 desc.written = 0;
1526 desc.arg.buf = iov[seg].iov_base;
1527 desc.count = iov[seg].iov_len;
1528 if (desc.count == 0)
1529 continue;
1530 desc.error = 0;
1531 do_shmem_file_read(filp, ppos, &desc, file_read_actor);
1532 retval += desc.written;
1533 if (desc.error) {
1534 retval = retval ?: desc.error;
1535 break;
1536 }
1537 if (desc.count > 0)
1538 break;
1539 }
1540 return retval;
1541} 1518}
1542 1519
1543static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, 1520static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
@@ -1576,7 +1553,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
1576 index = *ppos >> PAGE_CACHE_SHIFT; 1553 index = *ppos >> PAGE_CACHE_SHIFT;
1577 loff = *ppos & ~PAGE_CACHE_MASK; 1554 loff = *ppos & ~PAGE_CACHE_MASK;
1578 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1555 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1579 nr_pages = min(req_pages, pipe->buffers); 1556 nr_pages = min(req_pages, spd.nr_pages_max);
1580 1557
1581 spd.nr_pages = find_get_pages_contig(mapping, index, 1558 spd.nr_pages = find_get_pages_contig(mapping, index,
1582 nr_pages, spd.pages); 1559 nr_pages, spd.pages);
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c
index babd8626bf96..6b540f1822e0 100644
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -139,7 +139,7 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry,
139 int error; 139 int error;
140 int size; 140 int size;
141 141
142 if (!inode->i_op || !inode->i_op->getxattr) 142 if (!inode->i_op->getxattr)
143 return -EOPNOTSUPP; 143 return -EOPNOTSUPP;
144 desc = init_desc(type); 144 desc = init_desc(type);
145 if (IS_ERR(desc)) 145 if (IS_ERR(desc))
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 996092f21b64..6e0bd933b6a9 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -64,7 +64,7 @@ static int evm_find_protected_xattrs(struct dentry *dentry)
64 int error; 64 int error;
65 int count = 0; 65 int count = 0;
66 66
67 if (!inode->i_op || !inode->i_op->getxattr) 67 if (!inode->i_op->getxattr)
68 return -EOPNOTSUPP; 68 return -EOPNOTSUPP;
69 69
70 for (xattr = evm_config_xattrnames; *xattr != NULL; xattr++) { 70 for (xattr = evm_config_xattrnames; *xattr != NULL; xattr++) {
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index 80a09c37cac8..a3386d119425 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -173,7 +173,7 @@ static char *tomoyo_get_local_path(struct dentry *dentry, char * const buffer,
173 * Use filesystem name if filesystem does not support rename() 173 * Use filesystem name if filesystem does not support rename()
174 * operation. 174 * operation.
175 */ 175 */
176 if (inode->i_op && !inode->i_op->rename) 176 if (!inode->i_op->rename)
177 goto prepend_filesystem_name; 177 goto prepend_filesystem_name;
178 } 178 }
179 /* Prepend device name. */ 179 /* Prepend device name. */
@@ -282,7 +282,7 @@ char *tomoyo_realpath_from_path(struct path *path)
282 * Get local name for filesystems without rename() operation 282 * Get local name for filesystems without rename() operation
283 * or dentry without vfsmount. 283 * or dentry without vfsmount.
284 */ 284 */
285 if (!path->mnt || (inode->i_op && !inode->i_op->rename)) 285 if (!path->mnt || !inode->i_op->rename)
286 pos = tomoyo_get_local_path(path->dentry, buf, 286 pos = tomoyo_get_local_path(path->dentry, buf,
287 buf_len - 1); 287 buf_len - 1);
288 /* Get absolute name for the rest. */ 288 /* Get absolute name for the rest. */