aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/aio.c224
-rw-r--r--include/linux/aio.h26
2 files changed, 85 insertions, 165 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 5d7dad365f5f..c5b1a8c10411 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -903,30 +903,21 @@ static void aio_advance_iovec(struct kiocb *iocb, ssize_t ret)
903 BUG_ON(ret > 0 && iocb->ki_left == 0); 903 BUG_ON(ret > 0 && iocb->ki_left == 0);
904} 904}
905 905
906static ssize_t aio_rw_vect_retry(struct kiocb *iocb) 906typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
907 unsigned long, loff_t);
908
909static ssize_t aio_rw_vect_retry(struct kiocb *iocb, int rw, aio_rw_op *rw_op)
907{ 910{
908 struct file *file = iocb->ki_filp; 911 struct file *file = iocb->ki_filp;
909 struct address_space *mapping = file->f_mapping; 912 struct address_space *mapping = file->f_mapping;
910 struct inode *inode = mapping->host; 913 struct inode *inode = mapping->host;
911 ssize_t (*rw_op)(struct kiocb *, const struct iovec *,
912 unsigned long, loff_t);
913 ssize_t ret = 0; 914 ssize_t ret = 0;
914 unsigned short opcode;
915
916 if ((iocb->ki_opcode == IOCB_CMD_PREADV) ||
917 (iocb->ki_opcode == IOCB_CMD_PREAD)) {
918 rw_op = file->f_op->aio_read;
919 opcode = IOCB_CMD_PREADV;
920 } else {
921 rw_op = file->f_op->aio_write;
922 opcode = IOCB_CMD_PWRITEV;
923 }
924 915
925 /* This matches the pread()/pwrite() logic */ 916 /* This matches the pread()/pwrite() logic */
926 if (iocb->ki_pos < 0) 917 if (iocb->ki_pos < 0)
927 return -EINVAL; 918 return -EINVAL;
928 919
929 if (opcode == IOCB_CMD_PWRITEV) 920 if (rw == WRITE)
930 file_start_write(file); 921 file_start_write(file);
931 do { 922 do {
932 ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg], 923 ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg],
@@ -938,9 +929,9 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
938 /* retry all partial writes. retry partial reads as long as its a 929 /* retry all partial writes. retry partial reads as long as its a
939 * regular file. */ 930 * regular file. */
940 } while (ret > 0 && iocb->ki_left > 0 && 931 } while (ret > 0 && iocb->ki_left > 0 &&
941 (opcode == IOCB_CMD_PWRITEV || 932 (rw == WRITE ||
942 (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)))); 933 (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))));
943 if (opcode == IOCB_CMD_PWRITEV) 934 if (rw == WRITE)
944 file_end_write(file); 935 file_end_write(file);
945 936
946 /* This means we must have transferred all that we could */ 937 /* This means we must have transferred all that we could */
@@ -950,7 +941,7 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
950 941
951 /* If we managed to write some out we return that, rather than 942 /* If we managed to write some out we return that, rather than
952 * the eventual error. */ 943 * the eventual error. */
953 if (opcode == IOCB_CMD_PWRITEV 944 if (rw == WRITE
954 && ret < 0 && ret != -EIOCBQUEUED 945 && ret < 0 && ret != -EIOCBQUEUED
955 && iocb->ki_nbytes - iocb->ki_left) 946 && iocb->ki_nbytes - iocb->ki_left)
956 ret = iocb->ki_nbytes - iocb->ki_left; 947 ret = iocb->ki_nbytes - iocb->ki_left;
@@ -958,73 +949,41 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
958 return ret; 949 return ret;
959} 950}
960 951
961static ssize_t aio_fdsync(struct kiocb *iocb) 952static ssize_t aio_setup_vectored_rw(int rw, struct kiocb *kiocb, bool compat)
962{
963 struct file *file = iocb->ki_filp;
964 ssize_t ret = -EINVAL;
965
966 if (file->f_op->aio_fsync)
967 ret = file->f_op->aio_fsync(iocb, 1);
968 return ret;
969}
970
971static ssize_t aio_fsync(struct kiocb *iocb)
972{
973 struct file *file = iocb->ki_filp;
974 ssize_t ret = -EINVAL;
975
976 if (file->f_op->aio_fsync)
977 ret = file->f_op->aio_fsync(iocb, 0);
978 return ret;
979}
980
981static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb, bool compat)
982{ 953{
983 ssize_t ret; 954 ssize_t ret;
984 955
956 kiocb->ki_nr_segs = kiocb->ki_nbytes;
957
985#ifdef CONFIG_COMPAT 958#ifdef CONFIG_COMPAT
986 if (compat) 959 if (compat)
987 ret = compat_rw_copy_check_uvector(type, 960 ret = compat_rw_copy_check_uvector(rw,
988 (struct compat_iovec __user *)kiocb->ki_buf, 961 (struct compat_iovec __user *)kiocb->ki_buf,
989 kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec, 962 kiocb->ki_nr_segs, 1, &kiocb->ki_inline_vec,
990 &kiocb->ki_iovec); 963 &kiocb->ki_iovec);
991 else 964 else
992#endif 965#endif
993 ret = rw_copy_check_uvector(type, 966 ret = rw_copy_check_uvector(rw,
994 (struct iovec __user *)kiocb->ki_buf, 967 (struct iovec __user *)kiocb->ki_buf,
995 kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec, 968 kiocb->ki_nr_segs, 1, &kiocb->ki_inline_vec,
996 &kiocb->ki_iovec); 969 &kiocb->ki_iovec);
997 if (ret < 0) 970 if (ret < 0)
998 goto out; 971 return ret;
999
1000 ret = rw_verify_area(type, kiocb->ki_filp, &kiocb->ki_pos, ret);
1001 if (ret < 0)
1002 goto out;
1003 972
1004 kiocb->ki_nr_segs = kiocb->ki_nbytes; 973 /* ki_nbytes now reflect bytes instead of segs */
1005 kiocb->ki_cur_seg = 0;
1006 /* ki_nbytes/left now reflect bytes instead of segs */
1007 kiocb->ki_nbytes = ret; 974 kiocb->ki_nbytes = ret;
1008 kiocb->ki_left = ret; 975 return 0;
1009
1010 ret = 0;
1011out:
1012 return ret;
1013} 976}
1014 977
1015static ssize_t aio_setup_single_vector(int type, struct file * file, struct kiocb *kiocb) 978static ssize_t aio_setup_single_vector(int rw, struct kiocb *kiocb)
1016{ 979{
1017 int bytes; 980 if (unlikely(!access_ok(!rw, kiocb->ki_buf, kiocb->ki_nbytes)))
1018 981 return -EFAULT;
1019 bytes = rw_verify_area(type, file, &kiocb->ki_pos, kiocb->ki_left);
1020 if (bytes < 0)
1021 return bytes;
1022 982
1023 kiocb->ki_iovec = &kiocb->ki_inline_vec; 983 kiocb->ki_iovec = &kiocb->ki_inline_vec;
1024 kiocb->ki_iovec->iov_base = kiocb->ki_buf; 984 kiocb->ki_iovec->iov_base = kiocb->ki_buf;
1025 kiocb->ki_iovec->iov_len = bytes; 985 kiocb->ki_iovec->iov_len = kiocb->ki_nbytes;
1026 kiocb->ki_nr_segs = 1; 986 kiocb->ki_nr_segs = 1;
1027 kiocb->ki_cur_seg = 0;
1028 return 0; 987 return 0;
1029} 988}
1030 989
@@ -1033,81 +992,82 @@ static ssize_t aio_setup_single_vector(int type, struct file * file, struct kioc
1033 * Performs the initial checks and aio retry method 992 * Performs the initial checks and aio retry method
1034 * setup for the kiocb at the time of io submission. 993 * setup for the kiocb at the time of io submission.
1035 */ 994 */
1036static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat) 995static ssize_t aio_run_iocb(struct kiocb *req, bool compat)
1037{ 996{
1038 struct file *file = kiocb->ki_filp; 997 struct file *file = req->ki_filp;
1039 ssize_t ret = 0; 998 ssize_t ret;
999 int rw;
1000 fmode_t mode;
1001 aio_rw_op *rw_op;
1040 1002
1041 switch (kiocb->ki_opcode) { 1003 switch (req->ki_opcode) {
1042 case IOCB_CMD_PREAD: 1004 case IOCB_CMD_PREAD:
1043 ret = -EBADF;
1044 if (unlikely(!(file->f_mode & FMODE_READ)))
1045 break;
1046 ret = -EFAULT;
1047 if (unlikely(!access_ok(VERIFY_WRITE, kiocb->ki_buf,
1048 kiocb->ki_left)))
1049 break;
1050 ret = aio_setup_single_vector(READ, file, kiocb);
1051 if (ret)
1052 break;
1053 ret = -EINVAL;
1054 if (file->f_op->aio_read)
1055 kiocb->ki_retry = aio_rw_vect_retry;
1056 break;
1057 case IOCB_CMD_PWRITE:
1058 ret = -EBADF;
1059 if (unlikely(!(file->f_mode & FMODE_WRITE)))
1060 break;
1061 ret = -EFAULT;
1062 if (unlikely(!access_ok(VERIFY_READ, kiocb->ki_buf,
1063 kiocb->ki_left)))
1064 break;
1065 ret = aio_setup_single_vector(WRITE, file, kiocb);
1066 if (ret)
1067 break;
1068 ret = -EINVAL;
1069 if (file->f_op->aio_write)
1070 kiocb->ki_retry = aio_rw_vect_retry;
1071 break;
1072 case IOCB_CMD_PREADV: 1005 case IOCB_CMD_PREADV:
1073 ret = -EBADF; 1006 mode = FMODE_READ;
1074 if (unlikely(!(file->f_mode & FMODE_READ))) 1007 rw = READ;
1075 break; 1008 rw_op = file->f_op->aio_read;
1076 ret = aio_setup_vectored_rw(READ, kiocb, compat); 1009 goto rw_common;
1077 if (ret) 1010
1078 break; 1011 case IOCB_CMD_PWRITE:
1079 ret = -EINVAL;
1080 if (file->f_op->aio_read)
1081 kiocb->ki_retry = aio_rw_vect_retry;
1082 break;
1083 case IOCB_CMD_PWRITEV: 1012 case IOCB_CMD_PWRITEV:
1084 ret = -EBADF; 1013 mode = FMODE_WRITE;
1085 if (unlikely(!(file->f_mode & FMODE_WRITE))) 1014 rw = WRITE;
1086 break; 1015 rw_op = file->f_op->aio_write;
1087 ret = aio_setup_vectored_rw(WRITE, kiocb, compat); 1016 goto rw_common;
1017rw_common:
1018 if (unlikely(!(file->f_mode & mode)))
1019 return -EBADF;
1020
1021 if (!rw_op)
1022 return -EINVAL;
1023
1024 ret = (req->ki_opcode == IOCB_CMD_PREADV ||
1025 req->ki_opcode == IOCB_CMD_PWRITEV)
1026 ? aio_setup_vectored_rw(rw, req, compat)
1027 : aio_setup_single_vector(rw, req);
1088 if (ret) 1028 if (ret)
1089 break; 1029 return ret;
1090 ret = -EINVAL; 1030
1091 if (file->f_op->aio_write) 1031 ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
1092 kiocb->ki_retry = aio_rw_vect_retry; 1032 if (ret < 0)
1033 return ret;
1034
1035 req->ki_nbytes = ret;
1036 req->ki_left = ret;
1037
1038 ret = aio_rw_vect_retry(req, rw, rw_op);
1093 break; 1039 break;
1040
1094 case IOCB_CMD_FDSYNC: 1041 case IOCB_CMD_FDSYNC:
1095 ret = -EINVAL; 1042 if (!file->f_op->aio_fsync)
1096 if (file->f_op->aio_fsync) 1043 return -EINVAL;
1097 kiocb->ki_retry = aio_fdsync; 1044
1045 ret = file->f_op->aio_fsync(req, 1);
1098 break; 1046 break;
1047
1099 case IOCB_CMD_FSYNC: 1048 case IOCB_CMD_FSYNC:
1100 ret = -EINVAL; 1049 if (!file->f_op->aio_fsync)
1101 if (file->f_op->aio_fsync) 1050 return -EINVAL;
1102 kiocb->ki_retry = aio_fsync; 1051
1052 ret = file->f_op->aio_fsync(req, 0);
1103 break; 1053 break;
1054
1104 default: 1055 default:
1105 pr_debug("EINVAL: no operation provided\n"); 1056 pr_debug("EINVAL: no operation provided\n");
1106 ret = -EINVAL; 1057 return -EINVAL;
1107 } 1058 }
1108 1059
1109 if (!kiocb->ki_retry) 1060 if (ret != -EIOCBQUEUED) {
1110 return ret; 1061 /*
1062 * There's no easy way to restart the syscall since other AIO's
1063 * may be already running. Just fail this IO with EINTR.
1064 */
1065 if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
1066 ret == -ERESTARTNOHAND ||
1067 ret == -ERESTART_RESTARTBLOCK))
1068 ret = -EINTR;
1069 aio_complete(req, ret, 0);
1070 }
1111 1071
1112 return 0; 1072 return 0;
1113} 1073}
@@ -1134,7 +1094,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1134 return -EINVAL; 1094 return -EINVAL;
1135 } 1095 }
1136 1096
1137 req = aio_get_req(ctx); /* returns with 2 references to req */ 1097 req = aio_get_req(ctx);
1138 if (unlikely(!req)) 1098 if (unlikely(!req))
1139 return -EAGAIN; 1099 return -EAGAIN;
1140 1100
@@ -1173,26 +1133,12 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
1173 req->ki_left = req->ki_nbytes = iocb->aio_nbytes; 1133 req->ki_left = req->ki_nbytes = iocb->aio_nbytes;
1174 req->ki_opcode = iocb->aio_lio_opcode; 1134 req->ki_opcode = iocb->aio_lio_opcode;
1175 1135
1176 ret = aio_setup_iocb(req, compat); 1136 ret = aio_run_iocb(req, compat);
1177 if (ret) 1137 if (ret)
1178 goto out_put_req; 1138 goto out_put_req;
1179 1139
1180 ret = req->ki_retry(req);
1181 if (ret != -EIOCBQUEUED) {
1182 /*
1183 * There's no easy way to restart the syscall since other AIO's
1184 * may be already running. Just fail this IO with EINTR.
1185 */
1186 if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
1187 ret == -ERESTARTNOHAND ||
1188 ret == -ERESTART_RESTARTBLOCK))
1189 ret = -EINTR;
1190 aio_complete(req, ret, 0);
1191 }
1192
1193 aio_put_req(req); /* drop extra ref to req */ 1140 aio_put_req(req); /* drop extra ref to req */
1194 return 0; 1141 return 0;
1195
1196out_put_req: 1142out_put_req:
1197 atomic_dec(&ctx->reqs_active); 1143 atomic_dec(&ctx->reqs_active);
1198 aio_put_req(req); /* drop extra ref to req */ 1144 aio_put_req(req); /* drop extra ref to req */
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 7308836dd045..1bdf965339f9 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -29,38 +29,12 @@ struct kiocb;
29 29
30typedef int (kiocb_cancel_fn)(struct kiocb *, struct io_event *); 30typedef int (kiocb_cancel_fn)(struct kiocb *, struct io_event *);
31 31
32/* is there a better place to document function pointer methods? */
33/**
34 * ki_retry - iocb forward progress callback
35 * @kiocb: The kiocb struct to advance by performing an operation.
36 *
37 * This callback is called when the AIO core wants a given AIO operation
38 * to make forward progress. The kiocb argument describes the operation
39 * that is to be performed. As the operation proceeds, perhaps partially,
40 * ki_retry is expected to update the kiocb with progress made. Typically
41 * ki_retry is set in the AIO core and it itself calls file_operations
42 * helpers.
43 *
44 * ki_retry's return value determines when the AIO operation is completed
45 * and an event is generated in the AIO event ring. Except the special
46 * return values described below, the value that is returned from ki_retry
47 * is transferred directly into the completion ring as the operation's
48 * resulting status. Once this has happened ki_retry *MUST NOT* reference
49 * the kiocb pointer again.
50 *
51 * If ki_retry returns -EIOCBQUEUED it has made a promise that aio_complete()
52 * will be called on the kiocb pointer in the future. The AIO core will
53 * not ask the method again -- ki_retry must ensure forward progress.
54 * aio_complete() must be called once and only once in the future, multiple
55 * calls may result in undefined behaviour.
56 */
57struct kiocb { 32struct kiocb {
58 atomic_t ki_users; 33 atomic_t ki_users;
59 34
60 struct file *ki_filp; 35 struct file *ki_filp;
61 struct kioctx *ki_ctx; /* NULL for sync ops */ 36 struct kioctx *ki_ctx; /* NULL for sync ops */
62 kiocb_cancel_fn *ki_cancel; 37 kiocb_cancel_fn *ki_cancel;
63 ssize_t (*ki_retry)(struct kiocb *);
64 void (*ki_dtor)(struct kiocb *); 38 void (*ki_dtor)(struct kiocb *);
65 39
66 union { 40 union {