aboutsummaryrefslogtreecommitdiffstats
path: root/fs/direct-io.c
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2011-08-02 00:38:09 -0400
committerroot <root@serles.lst.de>2011-10-28 08:58:58 -0400
commit847cc6371ba820763773e993000410d6d8d23515 (patch)
treeffc36e702c82fc3d9c371a72e9b0663e5ec16f80 /fs/direct-io.c
parentba253fbf6d3502c54e1ac8792e7ac8290a1f5b8d (diff)
direct-io: merge direct_io_walker into __blockdev_direct_IO
This doesn't change anything for the compiler, but hch thought it would make the code clearer. I moved the reference counting into its own little inline. Signed-off-by: Andi Kleen <ak@linux.intel.com> Acked-by: Jeff Moyer <jmoyer@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs/direct-io.c')
-rw-r--r--fs/direct-io.c271
1 files changed, 132 insertions, 139 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 6d425821be66..d740ab67ff6e 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1043,136 +1043,10 @@ out:
1043 return ret; 1043 return ret;
1044} 1044}
1045 1045
1046static inline ssize_t 1046static inline int drop_refcount(struct dio *dio)
1047direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1048 const struct iovec *iov, loff_t offset, unsigned long nr_segs,
1049 unsigned blkbits, get_block_t get_block, dio_iodone_t end_io,
1050 dio_submit_t submit_io, struct dio *dio, struct dio_submit *sdio)
1051{ 1047{
1052 unsigned long user_addr; 1048 int ret2;
1053 unsigned long flags; 1049 unsigned long flags;
1054 int seg;
1055 ssize_t ret = 0;
1056 ssize_t ret2;
1057 size_t bytes;
1058 struct buffer_head map_bh = { 0, };
1059
1060 dio->inode = inode;
1061 dio->rw = rw;
1062 sdio->blkbits = blkbits;
1063 sdio->blkfactor = inode->i_blkbits - blkbits;
1064 sdio->block_in_file = offset >> blkbits;
1065
1066 sdio->get_block = get_block;
1067 dio->end_io = end_io;
1068 sdio->submit_io = submit_io;
1069 sdio->final_block_in_bio = -1;
1070 sdio->next_block_for_io = -1;
1071
1072 dio->iocb = iocb;
1073 dio->i_size = i_size_read(inode);
1074
1075 spin_lock_init(&dio->bio_lock);
1076 dio->refcount = 1;
1077
1078 /*
1079 * In case of non-aligned buffers, we may need 2 more
1080 * pages since we need to zero out first and last block.
1081 */
1082 if (unlikely(sdio->blkfactor))
1083 sdio->pages_in_io = 2;
1084
1085 for (seg = 0; seg < nr_segs; seg++) {
1086 user_addr = (unsigned long)iov[seg].iov_base;
1087 sdio->pages_in_io +=
1088 ((user_addr+iov[seg].iov_len +PAGE_SIZE-1)/PAGE_SIZE
1089 - user_addr/PAGE_SIZE);
1090 }
1091
1092 for (seg = 0; seg < nr_segs; seg++) {
1093 user_addr = (unsigned long)iov[seg].iov_base;
1094 sdio->size += bytes = iov[seg].iov_len;
1095
1096 /* Index into the first page of the first block */
1097 sdio->first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits;
1098 sdio->final_block_in_request = sdio->block_in_file +
1099 (bytes >> blkbits);
1100 /* Page fetching state */
1101 sdio->head = 0;
1102 sdio->tail = 0;
1103 sdio->curr_page = 0;
1104
1105 sdio->total_pages = 0;
1106 if (user_addr & (PAGE_SIZE-1)) {
1107 sdio->total_pages++;
1108 bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1));
1109 }
1110 sdio->total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
1111 sdio->curr_user_address = user_addr;
1112
1113 ret = do_direct_IO(dio, sdio, &map_bh);
1114
1115 dio->result += iov[seg].iov_len -
1116 ((sdio->final_block_in_request - sdio->block_in_file) <<
1117 blkbits);
1118
1119 if (ret) {
1120 dio_cleanup(dio, sdio);
1121 break;
1122 }
1123 } /* end iovec loop */
1124
1125 if (ret == -ENOTBLK) {
1126 /*
1127 * The remaining part of the request will be
1128 * be handled by buffered I/O when we return
1129 */
1130 ret = 0;
1131 }
1132 /*
1133 * There may be some unwritten disk at the end of a part-written
1134 * fs-block-sized block. Go zero that now.
1135 */
1136 dio_zero_block(dio, sdio, 1, &map_bh);
1137
1138 if (sdio->cur_page) {
1139 ret2 = dio_send_cur_page(dio, sdio, &map_bh);
1140 if (ret == 0)
1141 ret = ret2;
1142 page_cache_release(sdio->cur_page);
1143 sdio->cur_page = NULL;
1144 }
1145 if (sdio->bio)
1146 dio_bio_submit(dio, sdio);
1147
1148 /*
1149 * It is possible that, we return short IO due to end of file.
1150 * In that case, we need to release all the pages we got hold on.
1151 */
1152 dio_cleanup(dio, sdio);
1153
1154 /*
1155 * All block lookups have been performed. For READ requests
1156 * we can let i_mutex go now that its achieved its purpose
1157 * of protecting us from looking up uninitialized blocks.
1158 */
1159 if (rw == READ && (dio->flags & DIO_LOCKING))
1160 mutex_unlock(&dio->inode->i_mutex);
1161
1162 /*
1163 * The only time we want to leave bios in flight is when a successful
1164 * partial aio read or full aio write have been setup. In that case
1165 * bio completion will call aio_complete. The only time it's safe to
1166 * call aio_complete is when we return -EIOCBQUEUED, so we key on that.
1167 * This had *better* be the only place that raises -EIOCBQUEUED.
1168 */
1169 BUG_ON(ret == -EIOCBQUEUED);
1170 if (dio->is_async && ret == 0 && dio->result &&
1171 ((rw & READ) || (dio->result == sdio->size)))
1172 ret = -EIOCBQUEUED;
1173
1174 if (ret != -EIOCBQUEUED)
1175 dio_await_completion(dio);
1176 1050
1177 /* 1051 /*
1178 * Sync will always be dropping the final ref and completing the 1052 * Sync will always be dropping the final ref and completing the
@@ -1188,14 +1062,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1188 spin_lock_irqsave(&dio->bio_lock, flags); 1062 spin_lock_irqsave(&dio->bio_lock, flags);
1189 ret2 = --dio->refcount; 1063 ret2 = --dio->refcount;
1190 spin_unlock_irqrestore(&dio->bio_lock, flags); 1064 spin_unlock_irqrestore(&dio->bio_lock, flags);
1191 1065 return ret2;
1192 if (ret2 == 0) {
1193 ret = dio_complete(dio, offset, ret, false);
1194 kmem_cache_free(dio_cache, dio);
1195 } else
1196 BUG_ON(ret != -EIOCBQUEUED);
1197
1198 return ret;
1199} 1066}
1200 1067
1201/* 1068/*
@@ -1239,6 +1106,9 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1239 loff_t end = offset; 1106 loff_t end = offset;
1240 struct dio *dio; 1107 struct dio *dio;
1241 struct dio_submit sdio = { 0, }; 1108 struct dio_submit sdio = { 0, };
1109 unsigned long user_addr;
1110 size_t bytes;
1111 struct buffer_head map_bh = { 0, };
1242 1112
1243 if (rw & WRITE) 1113 if (rw & WRITE)
1244 rw = WRITE_ODIRECT; 1114 rw = WRITE_ODIRECT;
@@ -1316,9 +1186,132 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1316 dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) && 1186 dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) &&
1317 (end > i_size_read(inode))); 1187 (end > i_size_read(inode)));
1318 1188
1319 retval = direct_io_worker(rw, iocb, inode, iov, offset, 1189 retval = 0;
1320 nr_segs, blkbits, get_block, end_io, 1190
1321 submit_io, dio, &sdio); 1191 dio->inode = inode;
1192 dio->rw = rw;
1193 sdio.blkbits = blkbits;
1194 sdio.blkfactor = inode->i_blkbits - blkbits;
1195 sdio.block_in_file = offset >> blkbits;
1196
1197 sdio.get_block = get_block;
1198 dio->end_io = end_io;
1199 sdio.submit_io = submit_io;
1200 sdio.final_block_in_bio = -1;
1201 sdio.next_block_for_io = -1;
1202
1203 dio->iocb = iocb;
1204 dio->i_size = i_size_read(inode);
1205
1206 spin_lock_init(&dio->bio_lock);
1207 dio->refcount = 1;
1208
1209 /*
1210 * In case of non-aligned buffers, we may need 2 more
1211 * pages since we need to zero out first and last block.
1212 */
1213 if (unlikely(sdio.blkfactor))
1214 sdio.pages_in_io = 2;
1215
1216 for (seg = 0; seg < nr_segs; seg++) {
1217 user_addr = (unsigned long)iov[seg].iov_base;
1218 sdio.pages_in_io +=
1219 ((user_addr + iov[seg].iov_len + PAGE_SIZE-1) /
1220 PAGE_SIZE - user_addr / PAGE_SIZE);
1221 }
1222
1223 for (seg = 0; seg < nr_segs; seg++) {
1224 user_addr = (unsigned long)iov[seg].iov_base;
1225 sdio.size += bytes = iov[seg].iov_len;
1226
1227 /* Index into the first page of the first block */
1228 sdio.first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits;
1229 sdio.final_block_in_request = sdio.block_in_file +
1230 (bytes >> blkbits);
1231 /* Page fetching state */
1232 sdio.head = 0;
1233 sdio.tail = 0;
1234 sdio.curr_page = 0;
1235
1236 sdio.total_pages = 0;
1237 if (user_addr & (PAGE_SIZE-1)) {
1238 sdio.total_pages++;
1239 bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1));
1240 }
1241 sdio.total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
1242 sdio.curr_user_address = user_addr;
1243
1244 retval = do_direct_IO(dio, &sdio, &map_bh);
1245
1246 dio->result += iov[seg].iov_len -
1247 ((sdio.final_block_in_request - sdio.block_in_file) <<
1248 blkbits);
1249
1250 if (retval) {
1251 dio_cleanup(dio, &sdio);
1252 break;
1253 }
1254 } /* end iovec loop */
1255
1256 if (retval == -ENOTBLK) {
1257 /*
1258 * The remaining part of the request will be
1259 * be handled by buffered I/O when we return
1260 */
1261 retval = 0;
1262 }
1263 /*
1264 * There may be some unwritten disk at the end of a part-written
1265 * fs-block-sized block. Go zero that now.
1266 */
1267 dio_zero_block(dio, &sdio, 1, &map_bh);
1268
1269 if (sdio.cur_page) {
1270 ssize_t ret2;
1271
1272 ret2 = dio_send_cur_page(dio, &sdio, &map_bh);
1273 if (retval == 0)
1274 retval = ret2;
1275 page_cache_release(sdio.cur_page);
1276 sdio.cur_page = NULL;
1277 }
1278 if (sdio.bio)
1279 dio_bio_submit(dio, &sdio);
1280
1281 /*
1282 * It is possible that, we return short IO due to end of file.
1283 * In that case, we need to release all the pages we got hold on.
1284 */
1285 dio_cleanup(dio, &sdio);
1286
1287 /*
1288 * All block lookups have been performed. For READ requests
1289 * we can let i_mutex go now that its achieved its purpose
1290 * of protecting us from looking up uninitialized blocks.
1291 */
1292 if (rw == READ && (dio->flags & DIO_LOCKING))
1293 mutex_unlock(&dio->inode->i_mutex);
1294
1295 /*
1296 * The only time we want to leave bios in flight is when a successful
1297 * partial aio read or full aio write have been setup. In that case
1298 * bio completion will call aio_complete. The only time it's safe to
1299 * call aio_complete is when we return -EIOCBQUEUED, so we key on that.
1300 * This had *better* be the only place that raises -EIOCBQUEUED.
1301 */
1302 BUG_ON(retval == -EIOCBQUEUED);
1303 if (dio->is_async && retval == 0 && dio->result &&
1304 ((rw & READ) || (dio->result == sdio.size)))
1305 retval = -EIOCBQUEUED;
1306
1307 if (retval != -EIOCBQUEUED)
1308 dio_await_completion(dio);
1309
1310 if (drop_refcount(dio) == 0) {
1311 retval = dio_complete(dio, offset, retval, false);
1312 kmem_cache_free(dio_cache, dio);
1313 } else
1314 BUG_ON(retval != -EIOCBQUEUED);
1322 1315
1323out: 1316out:
1324 return retval; 1317 return retval;