diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2014-02-03 17:07:03 -0500 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2014-04-01 23:19:21 -0400 |
commit | 6e58e79db8a16222b31fc8da1ca2ac2dccfc4237 (patch) | |
tree | 3babba99823f4f723a119a7ed61372baed4786bc /mm/filemap.c | |
parent | 9223687863ffa63fa655f52ef64148ee08dee4d1 (diff) |
introduce copy_page_to_iter, kill loop over iovec in generic_file_aio_read()
generic_file_aio_read() was looping over the target iovec, with loop over
(source) pages nested inside that. Just set an iov_iter up and pass *that*
to do_generic_file_aio_read(). With copy_page_to_iter() doing all work
of mapping and copying a page to iovec and advancing iov_iter.
Switch shmem_file_aio_read() to the same and kill file_read_actor(), while
we are at it.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 202 |
1 files changed, 109 insertions, 93 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index bfb7a97d6d0f..a16eb2c4f316 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1085,11 +1085,90 @@ static void shrink_readahead_size_eio(struct file *filp, | |||
1085 | ra->ra_pages /= 4; | 1085 | ra->ra_pages /= 4; |
1086 | } | 1086 | } |
1087 | 1087 | ||
1088 | size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, | ||
1089 | struct iov_iter *i) | ||
1090 | { | ||
1091 | size_t skip, copy, left, wanted; | ||
1092 | const struct iovec *iov; | ||
1093 | char __user *buf; | ||
1094 | void *kaddr, *from; | ||
1095 | |||
1096 | if (unlikely(bytes > i->count)) | ||
1097 | bytes = i->count; | ||
1098 | |||
1099 | if (unlikely(!bytes)) | ||
1100 | return 0; | ||
1101 | |||
1102 | wanted = bytes; | ||
1103 | iov = i->iov; | ||
1104 | skip = i->iov_offset; | ||
1105 | buf = iov->iov_base + skip; | ||
1106 | copy = min(bytes, iov->iov_len - skip); | ||
1107 | |||
1108 | if (!fault_in_pages_writeable(buf, copy)) { | ||
1109 | kaddr = kmap_atomic(page); | ||
1110 | from = kaddr + offset; | ||
1111 | |||
1112 | /* first chunk, usually the only one */ | ||
1113 | left = __copy_to_user_inatomic(buf, from, copy); | ||
1114 | copy -= left; | ||
1115 | skip += copy; | ||
1116 | from += copy; | ||
1117 | bytes -= copy; | ||
1118 | |||
1119 | while (unlikely(!left && bytes)) { | ||
1120 | iov++; | ||
1121 | buf = iov->iov_base; | ||
1122 | copy = min(bytes, iov->iov_len); | ||
1123 | left = __copy_to_user_inatomic(buf, from, copy); | ||
1124 | copy -= left; | ||
1125 | skip = copy; | ||
1126 | from += copy; | ||
1127 | bytes -= copy; | ||
1128 | } | ||
1129 | if (likely(!bytes)) { | ||
1130 | kunmap_atomic(kaddr); | ||
1131 | goto done; | ||
1132 | } | ||
1133 | offset = from - kaddr; | ||
1134 | buf += copy; | ||
1135 | kunmap_atomic(kaddr); | ||
1136 | copy = min(bytes, iov->iov_len - skip); | ||
1137 | } | ||
1138 | /* Too bad - revert to non-atomic kmap */ | ||
1139 | kaddr = kmap(page); | ||
1140 | from = kaddr + offset; | ||
1141 | left = __copy_to_user(buf, from, copy); | ||
1142 | copy -= left; | ||
1143 | skip += copy; | ||
1144 | from += copy; | ||
1145 | bytes -= copy; | ||
1146 | while (unlikely(!left && bytes)) { | ||
1147 | iov++; | ||
1148 | buf = iov->iov_base; | ||
1149 | copy = min(bytes, iov->iov_len); | ||
1150 | left = __copy_to_user(buf, from, copy); | ||
1151 | copy -= left; | ||
1152 | skip = copy; | ||
1153 | from += copy; | ||
1154 | bytes -= copy; | ||
1155 | } | ||
1156 | kunmap(page); | ||
1157 | done: | ||
1158 | i->count -= wanted - bytes; | ||
1159 | i->nr_segs -= iov - i->iov; | ||
1160 | i->iov = iov; | ||
1161 | i->iov_offset = skip; | ||
1162 | return wanted - bytes; | ||
1163 | } | ||
1164 | EXPORT_SYMBOL(copy_page_to_iter); | ||
1165 | |||
1088 | /** | 1166 | /** |
1089 | * do_generic_file_read - generic file read routine | 1167 | * do_generic_file_read - generic file read routine |
1090 | * @filp: the file to read | 1168 | * @filp: the file to read |
1091 | * @ppos: current file position | 1169 | * @ppos: current file position |
1092 | * @desc: read_descriptor | 1170 | * @iter: data destination |
1171 | * @written: already copied | ||
1093 | * | 1172 | * |
1094 | * This is a generic file read routine, and uses the | 1173 | * This is a generic file read routine, and uses the |
1095 | * mapping->a_ops->readpage() function for the actual low-level stuff. | 1174 | * mapping->a_ops->readpage() function for the actual low-level stuff. |
@@ -1097,8 +1176,8 @@ static void shrink_readahead_size_eio(struct file *filp, | |||
1097 | * This is really ugly. But the goto's actually try to clarify some | 1176 | * This is really ugly. But the goto's actually try to clarify some |
1098 | * of the logic when it comes to error handling etc. | 1177 | * of the logic when it comes to error handling etc. |
1099 | */ | 1178 | */ |
1100 | static void do_generic_file_read(struct file *filp, loff_t *ppos, | 1179 | static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, |
1101 | read_descriptor_t *desc) | 1180 | struct iov_iter *iter, ssize_t written) |
1102 | { | 1181 | { |
1103 | struct address_space *mapping = filp->f_mapping; | 1182 | struct address_space *mapping = filp->f_mapping; |
1104 | struct inode *inode = mapping->host; | 1183 | struct inode *inode = mapping->host; |
@@ -1108,12 +1187,12 @@ static void do_generic_file_read(struct file *filp, loff_t *ppos, | |||
1108 | pgoff_t prev_index; | 1187 | pgoff_t prev_index; |
1109 | unsigned long offset; /* offset into pagecache page */ | 1188 | unsigned long offset; /* offset into pagecache page */ |
1110 | unsigned int prev_offset; | 1189 | unsigned int prev_offset; |
1111 | int error; | 1190 | int error = 0; |
1112 | 1191 | ||
1113 | index = *ppos >> PAGE_CACHE_SHIFT; | 1192 | index = *ppos >> PAGE_CACHE_SHIFT; |
1114 | prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; | 1193 | prev_index = ra->prev_pos >> PAGE_CACHE_SHIFT; |
1115 | prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1); | 1194 | prev_offset = ra->prev_pos & (PAGE_CACHE_SIZE-1); |
1116 | last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; | 1195 | last_index = (*ppos + iter->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; |
1117 | offset = *ppos & ~PAGE_CACHE_MASK; | 1196 | offset = *ppos & ~PAGE_CACHE_MASK; |
1118 | 1197 | ||
1119 | for (;;) { | 1198 | for (;;) { |
@@ -1148,7 +1227,7 @@ find_page: | |||
1148 | if (!page->mapping) | 1227 | if (!page->mapping) |
1149 | goto page_not_up_to_date_locked; | 1228 | goto page_not_up_to_date_locked; |
1150 | if (!mapping->a_ops->is_partially_uptodate(page, | 1229 | if (!mapping->a_ops->is_partially_uptodate(page, |
1151 | offset, desc->count)) | 1230 | offset, iter->count)) |
1152 | goto page_not_up_to_date_locked; | 1231 | goto page_not_up_to_date_locked; |
1153 | unlock_page(page); | 1232 | unlock_page(page); |
1154 | } | 1233 | } |
@@ -1198,24 +1277,23 @@ page_ok: | |||
1198 | /* | 1277 | /* |
1199 | * Ok, we have the page, and it's up-to-date, so | 1278 | * Ok, we have the page, and it's up-to-date, so |
1200 | * now we can copy it to user space... | 1279 | * now we can copy it to user space... |
1201 | * | ||
1202 | * The file_read_actor routine returns how many bytes were | ||
1203 | * actually used.. | ||
1204 | * NOTE! This may not be the same as how much of a user buffer | ||
1205 | * we filled up (we may be padding etc), so we can only update | ||
1206 | * "pos" here (the actor routine has to update the user buffer | ||
1207 | * pointers and the remaining count). | ||
1208 | */ | 1280 | */ |
1209 | ret = file_read_actor(desc, page, offset, nr); | 1281 | |
1282 | ret = copy_page_to_iter(page, offset, nr, iter); | ||
1210 | offset += ret; | 1283 | offset += ret; |
1211 | index += offset >> PAGE_CACHE_SHIFT; | 1284 | index += offset >> PAGE_CACHE_SHIFT; |
1212 | offset &= ~PAGE_CACHE_MASK; | 1285 | offset &= ~PAGE_CACHE_MASK; |
1213 | prev_offset = offset; | 1286 | prev_offset = offset; |
1214 | 1287 | ||
1215 | page_cache_release(page); | 1288 | page_cache_release(page); |
1216 | if (ret == nr && desc->count) | 1289 | written += ret; |
1217 | continue; | 1290 | if (!iov_iter_count(iter)) |
1218 | goto out; | 1291 | goto out; |
1292 | if (ret < nr) { | ||
1293 | error = -EFAULT; | ||
1294 | goto out; | ||
1295 | } | ||
1296 | continue; | ||
1219 | 1297 | ||
1220 | page_not_up_to_date: | 1298 | page_not_up_to_date: |
1221 | /* Get exclusive access to the page ... */ | 1299 | /* Get exclusive access to the page ... */ |
@@ -1250,6 +1328,7 @@ readpage: | |||
1250 | if (unlikely(error)) { | 1328 | if (unlikely(error)) { |
1251 | if (error == AOP_TRUNCATED_PAGE) { | 1329 | if (error == AOP_TRUNCATED_PAGE) { |
1252 | page_cache_release(page); | 1330 | page_cache_release(page); |
1331 | error = 0; | ||
1253 | goto find_page; | 1332 | goto find_page; |
1254 | } | 1333 | } |
1255 | goto readpage_error; | 1334 | goto readpage_error; |
@@ -1280,7 +1359,6 @@ readpage: | |||
1280 | 1359 | ||
1281 | readpage_error: | 1360 | readpage_error: |
1282 | /* UHHUH! A synchronous read error occurred. Report it */ | 1361 | /* UHHUH! A synchronous read error occurred. Report it */ |
1283 | desc->error = error; | ||
1284 | page_cache_release(page); | 1362 | page_cache_release(page); |
1285 | goto out; | 1363 | goto out; |
1286 | 1364 | ||
@@ -1291,16 +1369,17 @@ no_cached_page: | |||
1291 | */ | 1369 | */ |
1292 | page = page_cache_alloc_cold(mapping); | 1370 | page = page_cache_alloc_cold(mapping); |
1293 | if (!page) { | 1371 | if (!page) { |
1294 | desc->error = -ENOMEM; | 1372 | error = -ENOMEM; |
1295 | goto out; | 1373 | goto out; |
1296 | } | 1374 | } |
1297 | error = add_to_page_cache_lru(page, mapping, | 1375 | error = add_to_page_cache_lru(page, mapping, |
1298 | index, GFP_KERNEL); | 1376 | index, GFP_KERNEL); |
1299 | if (error) { | 1377 | if (error) { |
1300 | page_cache_release(page); | 1378 | page_cache_release(page); |
1301 | if (error == -EEXIST) | 1379 | if (error == -EEXIST) { |
1380 | error = 0; | ||
1302 | goto find_page; | 1381 | goto find_page; |
1303 | desc->error = error; | 1382 | } |
1304 | goto out; | 1383 | goto out; |
1305 | } | 1384 | } |
1306 | goto readpage; | 1385 | goto readpage; |
@@ -1313,44 +1392,7 @@ out: | |||
1313 | 1392 | ||
1314 | *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset; | 1393 | *ppos = ((loff_t)index << PAGE_CACHE_SHIFT) + offset; |
1315 | file_accessed(filp); | 1394 | file_accessed(filp); |
1316 | } | 1395 | return written ? written : error; |
1317 | |||
1318 | int file_read_actor(read_descriptor_t *desc, struct page *page, | ||
1319 | unsigned long offset, unsigned long size) | ||
1320 | { | ||
1321 | char *kaddr; | ||
1322 | unsigned long left, count = desc->count; | ||
1323 | |||
1324 | if (size > count) | ||
1325 | size = count; | ||
1326 | |||
1327 | /* | ||
1328 | * Faults on the destination of a read are common, so do it before | ||
1329 | * taking the kmap. | ||
1330 | */ | ||
1331 | if (!fault_in_pages_writeable(desc->arg.buf, size)) { | ||
1332 | kaddr = kmap_atomic(page); | ||
1333 | left = __copy_to_user_inatomic(desc->arg.buf, | ||
1334 | kaddr + offset, size); | ||
1335 | kunmap_atomic(kaddr); | ||
1336 | if (left == 0) | ||
1337 | goto success; | ||
1338 | } | ||
1339 | |||
1340 | /* Do it the slow way */ | ||
1341 | kaddr = kmap(page); | ||
1342 | left = __copy_to_user(desc->arg.buf, kaddr + offset, size); | ||
1343 | kunmap(page); | ||
1344 | |||
1345 | if (left) { | ||
1346 | size -= left; | ||
1347 | desc->error = -EFAULT; | ||
1348 | } | ||
1349 | success: | ||
1350 | desc->count = count - size; | ||
1351 | desc->written += size; | ||
1352 | desc->arg.buf += size; | ||
1353 | return size; | ||
1354 | } | 1396 | } |
1355 | 1397 | ||
1356 | /* | 1398 | /* |
@@ -1408,14 +1450,15 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
1408 | { | 1450 | { |
1409 | struct file *filp = iocb->ki_filp; | 1451 | struct file *filp = iocb->ki_filp; |
1410 | ssize_t retval; | 1452 | ssize_t retval; |
1411 | unsigned long seg = 0; | ||
1412 | size_t count; | 1453 | size_t count; |
1413 | loff_t *ppos = &iocb->ki_pos; | 1454 | loff_t *ppos = &iocb->ki_pos; |
1455 | struct iov_iter i; | ||
1414 | 1456 | ||
1415 | count = 0; | 1457 | count = 0; |
1416 | retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); | 1458 | retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); |
1417 | if (retval) | 1459 | if (retval) |
1418 | return retval; | 1460 | return retval; |
1461 | iov_iter_init(&i, iov, nr_segs, count, 0); | ||
1419 | 1462 | ||
1420 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ | 1463 | /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ |
1421 | if (filp->f_flags & O_DIRECT) { | 1464 | if (filp->f_flags & O_DIRECT) { |
@@ -1437,6 +1480,11 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
1437 | if (retval > 0) { | 1480 | if (retval > 0) { |
1438 | *ppos = pos + retval; | 1481 | *ppos = pos + retval; |
1439 | count -= retval; | 1482 | count -= retval; |
1483 | /* | ||
1484 | * If we did a short DIO read we need to skip the | ||
1485 | * section of the iov that we've already read data into. | ||
1486 | */ | ||
1487 | iov_iter_advance(&i, retval); | ||
1440 | } | 1488 | } |
1441 | 1489 | ||
1442 | /* | 1490 | /* |
@@ -1453,39 +1501,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
1453 | } | 1501 | } |
1454 | } | 1502 | } |
1455 | 1503 | ||
1456 | count = retval; | 1504 | retval = do_generic_file_read(filp, ppos, &i, retval); |
1457 | for (seg = 0; seg < nr_segs; seg++) { | ||
1458 | read_descriptor_t desc; | ||
1459 | loff_t offset = 0; | ||
1460 | |||
1461 | /* | ||
1462 | * If we did a short DIO read we need to skip the section of the | ||
1463 | * iov that we've already read data into. | ||
1464 | */ | ||
1465 | if (count) { | ||
1466 | if (count > iov[seg].iov_len) { | ||
1467 | count -= iov[seg].iov_len; | ||
1468 | continue; | ||
1469 | } | ||
1470 | offset = count; | ||
1471 | count = 0; | ||
1472 | } | ||
1473 | |||
1474 | desc.written = 0; | ||
1475 | desc.arg.buf = iov[seg].iov_base + offset; | ||
1476 | desc.count = iov[seg].iov_len - offset; | ||
1477 | if (desc.count == 0) | ||
1478 | continue; | ||
1479 | desc.error = 0; | ||
1480 | do_generic_file_read(filp, ppos, &desc); | ||
1481 | retval += desc.written; | ||
1482 | if (desc.error) { | ||
1483 | retval = retval ?: desc.error; | ||
1484 | break; | ||
1485 | } | ||
1486 | if (desc.count > 0) | ||
1487 | break; | ||
1488 | } | ||
1489 | out: | 1505 | out: |
1490 | return retval; | 1506 | return retval; |
1491 | } | 1507 | } |