diff options
author | Zheng Liu <wenqing.lz@taobao.com> | 2012-11-08 21:57:40 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2012-11-08 21:57:40 -0500 |
commit | c8c0df241cc2719b1262e627f999638411934f60 (patch) | |
tree | 0961d5d82c43240e03951b2f1b942aad7cfbe7cb /fs/ext4/file.c | |
parent | b3aff3e3f61d13586fd46d1ee6f7353ab3050b6d (diff) |
ext4: introduce lseek SEEK_DATA/SEEK_HOLE support
This patch makes ext4 really support SEEK_DATA/SEEK_HOLE flags. Block-mapped
and extent-mapped files are fully implemented together because ext4_map_blocks
hides this differences.
After applying this patch, it will cause a failure in xfstest #285 when the file
is block-mapped due to block-mapped file isn't support fallocate(2).
I had tried to use ext4_ext_walk_space() to retrieve the offset for a
extent-mapped file. But finally I decide to keep using ext4_map_blocks() to
support SEEK_DATA/SEEK_HOLE because ext4_map_blocks() can hide the difference
between block-mapped file and extent-mapped file. Moreover, in next step,
extent status tree will track all extent status, and we can get all mappings
from this tree. So I think that using ext4_map_blocks() is a better choice.
CC: Hugh Dickins <hughd@google.com>
Signed-off-by: Jie Liu <jeff.liu@oracle.com>
Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/file.c')
-rw-r--r-- | fs/ext4/file.c | 334 |
1 files changed, 332 insertions, 2 deletions
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index bf3966bccd34..2f5759eb9f89 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/mount.h> | 24 | #include <linux/mount.h> |
25 | #include <linux/path.h> | 25 | #include <linux/path.h> |
26 | #include <linux/quotaops.h> | 26 | #include <linux/quotaops.h> |
27 | #include <linux/pagevec.h> | ||
27 | #include "ext4.h" | 28 | #include "ext4.h" |
28 | #include "ext4_jbd2.h" | 29 | #include "ext4_jbd2.h" |
29 | #include "xattr.h" | 30 | #include "xattr.h" |
@@ -286,6 +287,324 @@ static int ext4_file_open(struct inode * inode, struct file * filp) | |||
286 | } | 287 | } |
287 | 288 | ||
288 | /* | 289 | /* |
290 | * Here we use ext4_map_blocks() to get a block mapping for a extent-based | ||
291 | * file rather than ext4_ext_walk_space() because we can introduce | ||
292 | * SEEK_DATA/SEEK_HOLE for block-mapped and extent-mapped file at the same | ||
293 | * function. When extent status tree has been fully implemented, it will | ||
294 | * track all extent status for a file and we can directly use it to | ||
295 | * retrieve the offset for SEEK_DATA/SEEK_HOLE. | ||
296 | */ | ||
297 | |||
298 | /* | ||
299 | * When we retrieve the offset for SEEK_DATA/SEEK_HOLE, we would need to | ||
300 | * lookup page cache to check whether or not there has some data between | ||
301 | * [startoff, endoff] because, if this range contains an unwritten extent, | ||
302 | * we determine this extent as a data or a hole according to whether the | ||
303 | * page cache has data or not. | ||
304 | */ | ||
305 | static int ext4_find_unwritten_pgoff(struct inode *inode, | ||
306 | int origin, | ||
307 | struct ext4_map_blocks *map, | ||
308 | loff_t *offset) | ||
309 | { | ||
310 | struct pagevec pvec; | ||
311 | unsigned int blkbits; | ||
312 | pgoff_t index; | ||
313 | pgoff_t end; | ||
314 | loff_t endoff; | ||
315 | loff_t startoff; | ||
316 | loff_t lastoff; | ||
317 | int found = 0; | ||
318 | |||
319 | blkbits = inode->i_sb->s_blocksize_bits; | ||
320 | startoff = *offset; | ||
321 | lastoff = startoff; | ||
322 | endoff = (map->m_lblk + map->m_len) << blkbits; | ||
323 | |||
324 | index = startoff >> PAGE_CACHE_SHIFT; | ||
325 | end = endoff >> PAGE_CACHE_SHIFT; | ||
326 | |||
327 | pagevec_init(&pvec, 0); | ||
328 | do { | ||
329 | int i, num; | ||
330 | unsigned long nr_pages; | ||
331 | |||
332 | num = min_t(pgoff_t, end - index, PAGEVEC_SIZE); | ||
333 | nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, | ||
334 | (pgoff_t)num); | ||
335 | if (nr_pages == 0) { | ||
336 | if (origin == SEEK_DATA) | ||
337 | break; | ||
338 | |||
339 | BUG_ON(origin != SEEK_HOLE); | ||
340 | /* | ||
341 | * If this is the first time to go into the loop and | ||
342 | * offset is not beyond the end offset, it will be a | ||
343 | * hole at this offset | ||
344 | */ | ||
345 | if (lastoff == startoff || lastoff < endoff) | ||
346 | found = 1; | ||
347 | break; | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * If this is the first time to go into the loop and | ||
352 | * offset is smaller than the first page offset, it will be a | ||
353 | * hole at this offset. | ||
354 | */ | ||
355 | if (lastoff == startoff && origin == SEEK_HOLE && | ||
356 | lastoff < page_offset(pvec.pages[0])) { | ||
357 | found = 1; | ||
358 | break; | ||
359 | } | ||
360 | |||
361 | for (i = 0; i < nr_pages; i++) { | ||
362 | struct page *page = pvec.pages[i]; | ||
363 | struct buffer_head *bh, *head; | ||
364 | |||
365 | /* | ||
366 | * If the current offset is not beyond the end of given | ||
367 | * range, it will be a hole. | ||
368 | */ | ||
369 | if (lastoff < endoff && origin == SEEK_HOLE && | ||
370 | page->index > end) { | ||
371 | found = 1; | ||
372 | *offset = lastoff; | ||
373 | goto out; | ||
374 | } | ||
375 | |||
376 | lock_page(page); | ||
377 | |||
378 | if (unlikely(page->mapping != inode->i_mapping)) { | ||
379 | unlock_page(page); | ||
380 | continue; | ||
381 | } | ||
382 | |||
383 | if (!page_has_buffers(page)) { | ||
384 | unlock_page(page); | ||
385 | continue; | ||
386 | } | ||
387 | |||
388 | if (page_has_buffers(page)) { | ||
389 | lastoff = page_offset(page); | ||
390 | bh = head = page_buffers(page); | ||
391 | do { | ||
392 | if (buffer_uptodate(bh) || | ||
393 | buffer_unwritten(bh)) { | ||
394 | if (origin == SEEK_DATA) | ||
395 | found = 1; | ||
396 | } else { | ||
397 | if (origin == SEEK_HOLE) | ||
398 | found = 1; | ||
399 | } | ||
400 | if (found) { | ||
401 | *offset = max_t(loff_t, | ||
402 | startoff, lastoff); | ||
403 | unlock_page(page); | ||
404 | goto out; | ||
405 | } | ||
406 | lastoff += bh->b_size; | ||
407 | bh = bh->b_this_page; | ||
408 | } while (bh != head); | ||
409 | } | ||
410 | |||
411 | lastoff = page_offset(page) + PAGE_SIZE; | ||
412 | unlock_page(page); | ||
413 | } | ||
414 | |||
415 | /* | ||
416 | * The no. of pages is less than our desired, that would be a | ||
417 | * hole in there. | ||
418 | */ | ||
419 | if (nr_pages < num && origin == SEEK_HOLE) { | ||
420 | found = 1; | ||
421 | *offset = lastoff; | ||
422 | break; | ||
423 | } | ||
424 | |||
425 | index = pvec.pages[i - 1]->index + 1; | ||
426 | pagevec_release(&pvec); | ||
427 | } while (index <= end); | ||
428 | |||
429 | out: | ||
430 | pagevec_release(&pvec); | ||
431 | return found; | ||
432 | } | ||
433 | |||
434 | /* | ||
435 | * ext4_seek_data() retrieves the offset for SEEK_DATA. | ||
436 | */ | ||
437 | static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | ||
438 | { | ||
439 | struct inode *inode = file->f_mapping->host; | ||
440 | struct ext4_map_blocks map; | ||
441 | struct extent_status es; | ||
442 | ext4_lblk_t start, last, end; | ||
443 | loff_t dataoff, isize; | ||
444 | int blkbits; | ||
445 | int ret = 0; | ||
446 | |||
447 | mutex_lock(&inode->i_mutex); | ||
448 | |||
449 | isize = i_size_read(inode); | ||
450 | if (offset >= isize) { | ||
451 | mutex_unlock(&inode->i_mutex); | ||
452 | return -ENXIO; | ||
453 | } | ||
454 | |||
455 | blkbits = inode->i_sb->s_blocksize_bits; | ||
456 | start = offset >> blkbits; | ||
457 | last = start; | ||
458 | end = isize >> blkbits; | ||
459 | dataoff = offset; | ||
460 | |||
461 | do { | ||
462 | map.m_lblk = last; | ||
463 | map.m_len = end - last + 1; | ||
464 | ret = ext4_map_blocks(NULL, inode, &map, 0); | ||
465 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { | ||
466 | if (last != start) | ||
467 | dataoff = last << blkbits; | ||
468 | break; | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * If there is a delay extent at this offset, | ||
473 | * it will be as a data. | ||
474 | */ | ||
475 | es.start = last; | ||
476 | (void)ext4_es_find_extent(inode, &es); | ||
477 | if (last >= es.start && | ||
478 | last < es.start + es.len) { | ||
479 | if (last != start) | ||
480 | dataoff = last << blkbits; | ||
481 | break; | ||
482 | } | ||
483 | |||
484 | /* | ||
485 | * If there is a unwritten extent at this offset, | ||
486 | * it will be as a data or a hole according to page | ||
487 | * cache that has data or not. | ||
488 | */ | ||
489 | if (map.m_flags & EXT4_MAP_UNWRITTEN) { | ||
490 | int unwritten; | ||
491 | unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA, | ||
492 | &map, &dataoff); | ||
493 | if (unwritten) | ||
494 | break; | ||
495 | } | ||
496 | |||
497 | last++; | ||
498 | dataoff = last << blkbits; | ||
499 | } while (last <= end); | ||
500 | |||
501 | mutex_unlock(&inode->i_mutex); | ||
502 | |||
503 | if (dataoff > isize) | ||
504 | return -ENXIO; | ||
505 | |||
506 | if (dataoff < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) | ||
507 | return -EINVAL; | ||
508 | if (dataoff > maxsize) | ||
509 | return -EINVAL; | ||
510 | |||
511 | if (dataoff != file->f_pos) { | ||
512 | file->f_pos = dataoff; | ||
513 | file->f_version = 0; | ||
514 | } | ||
515 | |||
516 | return dataoff; | ||
517 | } | ||
518 | |||
519 | /* | ||
520 | * ext4_seek_hole() retrieves the offset for SEEK_HOLE. | ||
521 | */ | ||
522 | static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | ||
523 | { | ||
524 | struct inode *inode = file->f_mapping->host; | ||
525 | struct ext4_map_blocks map; | ||
526 | struct extent_status es; | ||
527 | ext4_lblk_t start, last, end; | ||
528 | loff_t holeoff, isize; | ||
529 | int blkbits; | ||
530 | int ret = 0; | ||
531 | |||
532 | mutex_lock(&inode->i_mutex); | ||
533 | |||
534 | isize = i_size_read(inode); | ||
535 | if (offset >= isize) { | ||
536 | mutex_unlock(&inode->i_mutex); | ||
537 | return -ENXIO; | ||
538 | } | ||
539 | |||
540 | blkbits = inode->i_sb->s_blocksize_bits; | ||
541 | start = offset >> blkbits; | ||
542 | last = start; | ||
543 | end = isize >> blkbits; | ||
544 | holeoff = offset; | ||
545 | |||
546 | do { | ||
547 | map.m_lblk = last; | ||
548 | map.m_len = end - last + 1; | ||
549 | ret = ext4_map_blocks(NULL, inode, &map, 0); | ||
550 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { | ||
551 | last += ret; | ||
552 | holeoff = last << blkbits; | ||
553 | continue; | ||
554 | } | ||
555 | |||
556 | /* | ||
557 | * If there is a delay extent at this offset, | ||
558 | * we will skip this extent. | ||
559 | */ | ||
560 | es.start = last; | ||
561 | (void)ext4_es_find_extent(inode, &es); | ||
562 | if (last >= es.start && | ||
563 | last < es.start + es.len) { | ||
564 | last = es.start + es.len; | ||
565 | holeoff = last << blkbits; | ||
566 | continue; | ||
567 | } | ||
568 | |||
569 | /* | ||
570 | * If there is a unwritten extent at this offset, | ||
571 | * it will be as a data or a hole according to page | ||
572 | * cache that has data or not. | ||
573 | */ | ||
574 | if (map.m_flags & EXT4_MAP_UNWRITTEN) { | ||
575 | int unwritten; | ||
576 | unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE, | ||
577 | &map, &holeoff); | ||
578 | if (!unwritten) { | ||
579 | last += ret; | ||
580 | holeoff = last << blkbits; | ||
581 | continue; | ||
582 | } | ||
583 | } | ||
584 | |||
585 | /* find a hole */ | ||
586 | break; | ||
587 | } while (last <= end); | ||
588 | |||
589 | mutex_unlock(&inode->i_mutex); | ||
590 | |||
591 | if (holeoff > isize) | ||
592 | holeoff = isize; | ||
593 | |||
594 | if (holeoff < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) | ||
595 | return -EINVAL; | ||
596 | if (holeoff > maxsize) | ||
597 | return -EINVAL; | ||
598 | |||
599 | if (holeoff != file->f_pos) { | ||
600 | file->f_pos = holeoff; | ||
601 | file->f_version = 0; | ||
602 | } | ||
603 | |||
604 | return holeoff; | ||
605 | } | ||
606 | |||
607 | /* | ||
289 | * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values | 608 | * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values |
290 | * by calling generic_file_llseek_size() with the appropriate maxbytes | 609 | * by calling generic_file_llseek_size() with the appropriate maxbytes |
291 | * value for each. | 610 | * value for each. |
@@ -300,8 +619,19 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin) | |||
300 | else | 619 | else |
301 | maxbytes = inode->i_sb->s_maxbytes; | 620 | maxbytes = inode->i_sb->s_maxbytes; |
302 | 621 | ||
303 | return generic_file_llseek_size(file, offset, origin, | 622 | switch (origin) { |
304 | maxbytes, i_size_read(inode)); | 623 | case SEEK_SET: |
624 | case SEEK_CUR: | ||
625 | case SEEK_END: | ||
626 | return generic_file_llseek_size(file, offset, origin, | ||
627 | maxbytes, i_size_read(inode)); | ||
628 | case SEEK_DATA: | ||
629 | return ext4_seek_data(file, offset, maxbytes); | ||
630 | case SEEK_HOLE: | ||
631 | return ext4_seek_hole(file, offset, maxbytes); | ||
632 | } | ||
633 | |||
634 | return -EINVAL; | ||
305 | } | 635 | } |
306 | 636 | ||
307 | const struct file_operations ext4_file_operations = { | 637 | const struct file_operations ext4_file_operations = { |