diff options
Diffstat (limited to 'fs/ocfs2/refcounttree.c')
-rw-r--r-- | fs/ocfs2/refcounttree.c | 814 |
1 files changed, 814 insertions, 0 deletions
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index e72dbdd3b6e8..4e7df8b8fd4f 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -31,6 +31,27 @@ | |||
31 | #include "sysfile.h" | 31 | #include "sysfile.h" |
32 | #include "dlmglue.h" | 32 | #include "dlmglue.h" |
33 | #include "extent_map.h" | 33 | #include "extent_map.h" |
34 | #include "aops.h" | ||
35 | |||
36 | #include <linux/bio.h> | ||
37 | #include <linux/blkdev.h> | ||
38 | #include <linux/gfp.h> | ||
39 | #include <linux/slab.h> | ||
40 | #include <linux/writeback.h> | ||
41 | #include <linux/pagevec.h> | ||
42 | #include <linux/swap.h> | ||
43 | |||
44 | struct ocfs2_cow_context { | ||
45 | struct inode *inode; | ||
46 | u32 cow_start; | ||
47 | u32 cow_len; | ||
48 | struct ocfs2_extent_tree di_et; | ||
49 | struct ocfs2_caching_info *ref_ci; | ||
50 | struct buffer_head *ref_root_bh; | ||
51 | struct ocfs2_alloc_context *meta_ac; | ||
52 | struct ocfs2_alloc_context *data_ac; | ||
53 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
54 | }; | ||
34 | 55 | ||
35 | static inline struct ocfs2_refcount_tree * | 56 | static inline struct ocfs2_refcount_tree * |
36 | cache_info_to_refcount(struct ocfs2_caching_info *ci) | 57 | cache_info_to_refcount(struct ocfs2_caching_info *ci) |
@@ -2404,3 +2425,796 @@ out: | |||
2404 | brelse(ref_root_bh); | 2425 | brelse(ref_root_bh); |
2405 | return ret; | 2426 | return ret; |
2406 | } | 2427 | } |
2428 | |||
2429 | #define MAX_CONTIG_BYTES 1048576 | ||
2430 | |||
2431 | static inline unsigned int ocfs2_cow_contig_clusters(struct super_block *sb) | ||
2432 | { | ||
2433 | return ocfs2_clusters_for_bytes(sb, MAX_CONTIG_BYTES); | ||
2434 | } | ||
2435 | |||
2436 | static inline unsigned int ocfs2_cow_contig_mask(struct super_block *sb) | ||
2437 | { | ||
2438 | return ~(ocfs2_cow_contig_clusters(sb) - 1); | ||
2439 | } | ||
2440 | |||
2441 | /* | ||
2442 | * Given an extent that starts at 'start' and an I/O that starts at 'cpos', | ||
2443 | * find an offset (start + (n * contig_clusters)) that is closest to cpos | ||
2444 | * while still being less than or equal to it. | ||
2445 | * | ||
2446 | * The goal is to break the extent at a multiple of contig_clusters. | ||
2447 | */ | ||
2448 | static inline unsigned int ocfs2_cow_align_start(struct super_block *sb, | ||
2449 | unsigned int start, | ||
2450 | unsigned int cpos) | ||
2451 | { | ||
2452 | BUG_ON(start > cpos); | ||
2453 | |||
2454 | return start + ((cpos - start) & ocfs2_cow_contig_mask(sb)); | ||
2455 | } | ||
2456 | |||
2457 | /* | ||
2458 | * Given a cluster count of len, pad it out so that it is a multiple | ||
2459 | * of contig_clusters. | ||
2460 | */ | ||
2461 | static inline unsigned int ocfs2_cow_align_length(struct super_block *sb, | ||
2462 | unsigned int len) | ||
2463 | { | ||
2464 | unsigned int padded = | ||
2465 | (len + (ocfs2_cow_contig_clusters(sb) - 1)) & | ||
2466 | ocfs2_cow_contig_mask(sb); | ||
2467 | |||
2468 | /* Did we wrap? */ | ||
2469 | if (padded < len) | ||
2470 | padded = UINT_MAX; | ||
2471 | |||
2472 | return padded; | ||
2473 | } | ||
2474 | |||
2475 | /* | ||
2476 | * Calculate out the start and number of virtual clusters we need to to CoW. | ||
2477 | * | ||
2478 | * cpos is vitual start cluster position we want to do CoW in a | ||
2479 | * file and write_len is the cluster length. | ||
2480 | * | ||
2481 | * Normal we will start CoW from the beginning of extent record cotaining cpos. | ||
2482 | * We try to break up extents on boundaries of MAX_CONTIG_BYTES so that we | ||
2483 | * get good I/O from the resulting extent tree. | ||
2484 | */ | ||
2485 | static int ocfs2_refcount_cal_cow_clusters(struct inode *inode, | ||
2486 | struct buffer_head *di_bh, | ||
2487 | u32 cpos, | ||
2488 | u32 write_len, | ||
2489 | u32 *cow_start, | ||
2490 | u32 *cow_len) | ||
2491 | { | ||
2492 | int ret = 0; | ||
2493 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; | ||
2494 | struct ocfs2_extent_list *el = &di->id2.i_list; | ||
2495 | int tree_height = le16_to_cpu(el->l_tree_depth), i; | ||
2496 | struct buffer_head *eb_bh = NULL; | ||
2497 | struct ocfs2_extent_block *eb = NULL; | ||
2498 | struct ocfs2_extent_rec *rec; | ||
2499 | unsigned int want_clusters, rec_end = 0; | ||
2500 | int contig_clusters = ocfs2_cow_contig_clusters(inode->i_sb); | ||
2501 | int leaf_clusters; | ||
2502 | |||
2503 | if (tree_height > 0) { | ||
2504 | ret = ocfs2_find_leaf(INODE_CACHE(inode), el, cpos, &eb_bh); | ||
2505 | if (ret) { | ||
2506 | mlog_errno(ret); | ||
2507 | goto out; | ||
2508 | } | ||
2509 | |||
2510 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
2511 | el = &eb->h_list; | ||
2512 | |||
2513 | if (el->l_tree_depth) { | ||
2514 | ocfs2_error(inode->i_sb, | ||
2515 | "Inode %lu has non zero tree depth in " | ||
2516 | "leaf block %llu\n", inode->i_ino, | ||
2517 | (unsigned long long)eb_bh->b_blocknr); | ||
2518 | ret = -EROFS; | ||
2519 | goto out; | ||
2520 | } | ||
2521 | } | ||
2522 | |||
2523 | *cow_len = 0; | ||
2524 | for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { | ||
2525 | rec = &el->l_recs[i]; | ||
2526 | |||
2527 | if (ocfs2_is_empty_extent(rec)) { | ||
2528 | mlog_bug_on_msg(i != 0, "Inode %lu has empty record in " | ||
2529 | "index %d\n", inode->i_ino, i); | ||
2530 | continue; | ||
2531 | } | ||
2532 | |||
2533 | if (le32_to_cpu(rec->e_cpos) + | ||
2534 | le16_to_cpu(rec->e_leaf_clusters) <= cpos) | ||
2535 | continue; | ||
2536 | |||
2537 | if (*cow_len == 0) { | ||
2538 | /* | ||
2539 | * We should find a refcounted record in the | ||
2540 | * first pass. | ||
2541 | */ | ||
2542 | BUG_ON(!(rec->e_flags & OCFS2_EXT_REFCOUNTED)); | ||
2543 | *cow_start = le32_to_cpu(rec->e_cpos); | ||
2544 | } | ||
2545 | |||
2546 | /* | ||
2547 | * If we encounter a hole or a non-refcounted record, | ||
2548 | * stop the search. | ||
2549 | */ | ||
2550 | if ((!(rec->e_flags & OCFS2_EXT_REFCOUNTED)) || | ||
2551 | (*cow_len && rec_end != le32_to_cpu(rec->e_cpos))) | ||
2552 | break; | ||
2553 | |||
2554 | leaf_clusters = le16_to_cpu(rec->e_leaf_clusters); | ||
2555 | rec_end = le32_to_cpu(rec->e_cpos) + leaf_clusters; | ||
2556 | |||
2557 | /* | ||
2558 | * How many clusters do we actually need from | ||
2559 | * this extent? First we see how many we actually | ||
2560 | * need to complete the write. If that's smaller | ||
2561 | * than contig_clusters, we try for contig_clusters. | ||
2562 | */ | ||
2563 | if (!*cow_len) | ||
2564 | want_clusters = write_len; | ||
2565 | else | ||
2566 | want_clusters = (cpos + write_len) - | ||
2567 | (*cow_start + *cow_len); | ||
2568 | if (want_clusters < contig_clusters) | ||
2569 | want_clusters = contig_clusters; | ||
2570 | |||
2571 | /* | ||
2572 | * If the write does not cover the whole extent, we | ||
2573 | * need to calculate how we're going to split the extent. | ||
2574 | * We try to do it on contig_clusters boundaries. | ||
2575 | * | ||
2576 | * Any extent smaller than contig_clusters will be | ||
2577 | * CoWed in its entirety. | ||
2578 | */ | ||
2579 | if (leaf_clusters <= contig_clusters) | ||
2580 | *cow_len += leaf_clusters; | ||
2581 | else if (*cow_len || (*cow_start == cpos)) { | ||
2582 | /* | ||
2583 | * This extent needs to be CoW'd from its | ||
2584 | * beginning, so all we have to do is compute | ||
2585 | * how many clusters to grab. We align | ||
2586 | * want_clusters to the edge of contig_clusters | ||
2587 | * to get better I/O. | ||
2588 | */ | ||
2589 | want_clusters = ocfs2_cow_align_length(inode->i_sb, | ||
2590 | want_clusters); | ||
2591 | |||
2592 | if (leaf_clusters < want_clusters) | ||
2593 | *cow_len += leaf_clusters; | ||
2594 | else | ||
2595 | *cow_len += want_clusters; | ||
2596 | } else if ((*cow_start + contig_clusters) >= | ||
2597 | (cpos + write_len)) { | ||
2598 | /* | ||
2599 | * Breaking off contig_clusters at the front | ||
2600 | * of the extent will cover our write. That's | ||
2601 | * easy. | ||
2602 | */ | ||
2603 | *cow_len = contig_clusters; | ||
2604 | } else if ((rec_end - cpos) <= contig_clusters) { | ||
2605 | /* | ||
2606 | * Breaking off contig_clusters at the tail of | ||
2607 | * this extent will cover cpos. | ||
2608 | */ | ||
2609 | *cow_start = rec_end - contig_clusters; | ||
2610 | *cow_len = contig_clusters; | ||
2611 | } else if ((rec_end - cpos) <= want_clusters) { | ||
2612 | /* | ||
2613 | * While we can't fit the entire write in this | ||
2614 | * extent, we know that the write goes from cpos | ||
2615 | * to the end of the extent. Break that off. | ||
2616 | * We try to break it at some multiple of | ||
2617 | * contig_clusters from the front of the extent. | ||
2618 | * Failing that (ie, cpos is within | ||
2619 | * contig_clusters of the front), we'll CoW the | ||
2620 | * entire extent. | ||
2621 | */ | ||
2622 | *cow_start = ocfs2_cow_align_start(inode->i_sb, | ||
2623 | *cow_start, cpos); | ||
2624 | *cow_len = rec_end - *cow_start; | ||
2625 | } else { | ||
2626 | /* | ||
2627 | * Ok, the entire write lives in the middle of | ||
2628 | * this extent. Let's try to slice the extent up | ||
2629 | * nicely. Optimally, our CoW region starts at | ||
2630 | * m*contig_clusters from the beginning of the | ||
2631 | * extent and goes for n*contig_clusters, | ||
2632 | * covering the entire write. | ||
2633 | */ | ||
2634 | *cow_start = ocfs2_cow_align_start(inode->i_sb, | ||
2635 | *cow_start, cpos); | ||
2636 | |||
2637 | want_clusters = (cpos + write_len) - *cow_start; | ||
2638 | want_clusters = ocfs2_cow_align_length(inode->i_sb, | ||
2639 | want_clusters); | ||
2640 | if (*cow_start + want_clusters <= rec_end) | ||
2641 | *cow_len = want_clusters; | ||
2642 | else | ||
2643 | *cow_len = rec_end - *cow_start; | ||
2644 | } | ||
2645 | |||
2646 | /* Have we covered our entire write yet? */ | ||
2647 | if ((*cow_start + *cow_len) >= (cpos + write_len)) | ||
2648 | break; | ||
2649 | |||
2650 | /* | ||
2651 | * If we reach the end of the extent block and don't get enough | ||
2652 | * clusters, continue with the next extent block if possible. | ||
2653 | */ | ||
2654 | if (i + 1 == le16_to_cpu(el->l_next_free_rec) && | ||
2655 | eb && eb->h_next_leaf_blk) { | ||
2656 | brelse(eb_bh); | ||
2657 | eb_bh = NULL; | ||
2658 | |||
2659 | ret = ocfs2_read_extent_block(INODE_CACHE(inode), | ||
2660 | le64_to_cpu(eb->h_next_leaf_blk), | ||
2661 | &eb_bh); | ||
2662 | if (ret) { | ||
2663 | mlog_errno(ret); | ||
2664 | goto out; | ||
2665 | } | ||
2666 | |||
2667 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
2668 | el = &eb->h_list; | ||
2669 | i = -1; | ||
2670 | } | ||
2671 | } | ||
2672 | |||
2673 | out: | ||
2674 | brelse(eb_bh); | ||
2675 | return ret; | ||
2676 | } | ||
2677 | |||
2678 | /* | ||
2679 | * Prepare meta_ac, data_ac and calculate credits when we want to add some | ||
2680 | * num_clusters in data_tree "et" and change the refcount for the old | ||
2681 | * clusters(starting form p_cluster) in the refcount tree. | ||
2682 | * | ||
2683 | * Note: | ||
2684 | * 1. since we may split the old tree, so we at most will need num_clusters + 2 | ||
2685 | * more new leaf records. | ||
2686 | * 2. In some case, we may not need to reserve new clusters(e.g, reflink), so | ||
2687 | * just give data_ac = NULL. | ||
2688 | */ | ||
2689 | static int ocfs2_lock_refcount_allocators(struct super_block *sb, | ||
2690 | u32 p_cluster, u32 num_clusters, | ||
2691 | struct ocfs2_extent_tree *et, | ||
2692 | struct ocfs2_caching_info *ref_ci, | ||
2693 | struct buffer_head *ref_root_bh, | ||
2694 | struct ocfs2_alloc_context **meta_ac, | ||
2695 | struct ocfs2_alloc_context **data_ac, | ||
2696 | int *credits) | ||
2697 | { | ||
2698 | int ret = 0, meta_add = 0; | ||
2699 | int num_free_extents = ocfs2_num_free_extents(OCFS2_SB(sb), et); | ||
2700 | |||
2701 | if (num_free_extents < 0) { | ||
2702 | ret = num_free_extents; | ||
2703 | mlog_errno(ret); | ||
2704 | goto out; | ||
2705 | } | ||
2706 | |||
2707 | if (num_free_extents < num_clusters + 2) | ||
2708 | meta_add = | ||
2709 | ocfs2_extend_meta_needed(et->et_root_el); | ||
2710 | |||
2711 | *credits += ocfs2_calc_extend_credits(sb, et->et_root_el, | ||
2712 | num_clusters + 2); | ||
2713 | |||
2714 | ret = ocfs2_calc_refcount_meta_credits(sb, ref_ci, ref_root_bh, | ||
2715 | p_cluster, num_clusters, | ||
2716 | &meta_add, credits); | ||
2717 | if (ret) { | ||
2718 | mlog_errno(ret); | ||
2719 | goto out; | ||
2720 | } | ||
2721 | |||
2722 | mlog(0, "reserve new metadata %d, clusters %u, credits = %d\n", | ||
2723 | meta_add, num_clusters, *credits); | ||
2724 | ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(sb), meta_add, | ||
2725 | meta_ac); | ||
2726 | if (ret) { | ||
2727 | mlog_errno(ret); | ||
2728 | goto out; | ||
2729 | } | ||
2730 | |||
2731 | if (data_ac) { | ||
2732 | ret = ocfs2_reserve_clusters(OCFS2_SB(sb), num_clusters, | ||
2733 | data_ac); | ||
2734 | if (ret) | ||
2735 | mlog_errno(ret); | ||
2736 | } | ||
2737 | |||
2738 | out: | ||
2739 | if (ret) { | ||
2740 | if (*meta_ac) { | ||
2741 | ocfs2_free_alloc_context(*meta_ac); | ||
2742 | *meta_ac = NULL; | ||
2743 | } | ||
2744 | } | ||
2745 | |||
2746 | return ret; | ||
2747 | } | ||
2748 | |||
2749 | static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh) | ||
2750 | { | ||
2751 | BUG_ON(buffer_dirty(bh)); | ||
2752 | |||
2753 | clear_buffer_mapped(bh); | ||
2754 | |||
2755 | return 0; | ||
2756 | } | ||
2757 | |||
2758 | static int ocfs2_duplicate_clusters(handle_t *handle, | ||
2759 | struct ocfs2_cow_context *context, | ||
2760 | u32 cpos, u32 old_cluster, | ||
2761 | u32 new_cluster, u32 new_len) | ||
2762 | { | ||
2763 | int ret = 0, partial; | ||
2764 | struct ocfs2_caching_info *ci = context->di_et.et_ci; | ||
2765 | struct super_block *sb = ocfs2_metadata_cache_get_super(ci); | ||
2766 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); | ||
2767 | struct page *page; | ||
2768 | pgoff_t page_index; | ||
2769 | unsigned int from, to; | ||
2770 | loff_t offset, end, map_end; | ||
2771 | struct address_space *mapping = context->inode->i_mapping; | ||
2772 | |||
2773 | mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster, | ||
2774 | new_cluster, new_len, cpos); | ||
2775 | |||
2776 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; | ||
2777 | end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); | ||
2778 | |||
2779 | while (offset < end) { | ||
2780 | page_index = offset >> PAGE_CACHE_SHIFT; | ||
2781 | map_end = (page_index + 1) << PAGE_CACHE_SHIFT; | ||
2782 | if (map_end > end) | ||
2783 | map_end = end; | ||
2784 | |||
2785 | /* from, to is the offset within the page. */ | ||
2786 | from = offset & (PAGE_CACHE_SIZE - 1); | ||
2787 | to = PAGE_CACHE_SIZE; | ||
2788 | if (map_end & (PAGE_CACHE_SIZE - 1)) | ||
2789 | to = map_end & (PAGE_CACHE_SIZE - 1); | ||
2790 | |||
2791 | page = grab_cache_page(mapping, page_index); | ||
2792 | |||
2793 | /* This page can't be dirtied before we CoW it out. */ | ||
2794 | BUG_ON(PageDirty(page)); | ||
2795 | |||
2796 | if (!PageUptodate(page)) { | ||
2797 | ret = block_read_full_page(page, ocfs2_get_block); | ||
2798 | if (ret) { | ||
2799 | mlog_errno(ret); | ||
2800 | goto unlock; | ||
2801 | } | ||
2802 | lock_page(page); | ||
2803 | } | ||
2804 | |||
2805 | if (page_has_buffers(page)) { | ||
2806 | ret = walk_page_buffers(handle, page_buffers(page), | ||
2807 | from, to, &partial, | ||
2808 | ocfs2_clear_cow_buffer); | ||
2809 | if (ret) { | ||
2810 | mlog_errno(ret); | ||
2811 | goto unlock; | ||
2812 | } | ||
2813 | } | ||
2814 | |||
2815 | ocfs2_map_and_dirty_page(context->inode, | ||
2816 | handle, from, to, | ||
2817 | page, 0, &new_block); | ||
2818 | mark_page_accessed(page); | ||
2819 | unlock: | ||
2820 | unlock_page(page); | ||
2821 | page_cache_release(page); | ||
2822 | page = NULL; | ||
2823 | offset = map_end; | ||
2824 | if (ret) | ||
2825 | break; | ||
2826 | } | ||
2827 | |||
2828 | return ret; | ||
2829 | } | ||
2830 | |||
2831 | static int ocfs2_clear_ext_refcount(handle_t *handle, | ||
2832 | struct ocfs2_extent_tree *et, | ||
2833 | u32 cpos, u32 p_cluster, u32 len, | ||
2834 | unsigned int ext_flags, | ||
2835 | struct ocfs2_alloc_context *meta_ac, | ||
2836 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
2837 | { | ||
2838 | int ret, index; | ||
2839 | struct ocfs2_extent_rec replace_rec; | ||
2840 | struct ocfs2_path *path = NULL; | ||
2841 | struct ocfs2_extent_list *el; | ||
2842 | struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci); | ||
2843 | u64 ino = ocfs2_metadata_cache_owner(et->et_ci); | ||
2844 | |||
2845 | mlog(0, "inode %llu cpos %u, len %u, p_cluster %u, ext_flags %u\n", | ||
2846 | (unsigned long long)ino, cpos, len, p_cluster, ext_flags); | ||
2847 | |||
2848 | memset(&replace_rec, 0, sizeof(replace_rec)); | ||
2849 | replace_rec.e_cpos = cpu_to_le32(cpos); | ||
2850 | replace_rec.e_leaf_clusters = cpu_to_le16(len); | ||
2851 | replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(sb, | ||
2852 | p_cluster)); | ||
2853 | replace_rec.e_flags = ext_flags; | ||
2854 | replace_rec.e_flags &= ~OCFS2_EXT_REFCOUNTED; | ||
2855 | |||
2856 | path = ocfs2_new_path_from_et(et); | ||
2857 | if (!path) { | ||
2858 | ret = -ENOMEM; | ||
2859 | mlog_errno(ret); | ||
2860 | goto out; | ||
2861 | } | ||
2862 | |||
2863 | ret = ocfs2_find_path(et->et_ci, path, cpos); | ||
2864 | if (ret) { | ||
2865 | mlog_errno(ret); | ||
2866 | goto out; | ||
2867 | } | ||
2868 | |||
2869 | el = path_leaf_el(path); | ||
2870 | |||
2871 | index = ocfs2_search_extent_list(el, cpos); | ||
2872 | if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { | ||
2873 | ocfs2_error(sb, | ||
2874 | "Inode %llu has an extent at cpos %u which can no " | ||
2875 | "longer be found.\n", | ||
2876 | (unsigned long long)ino, cpos); | ||
2877 | ret = -EROFS; | ||
2878 | goto out; | ||
2879 | } | ||
2880 | |||
2881 | ret = ocfs2_split_extent(handle, et, path, index, | ||
2882 | &replace_rec, meta_ac, dealloc); | ||
2883 | if (ret) | ||
2884 | mlog_errno(ret); | ||
2885 | |||
2886 | out: | ||
2887 | ocfs2_free_path(path); | ||
2888 | return ret; | ||
2889 | } | ||
2890 | |||
2891 | static int ocfs2_replace_clusters(handle_t *handle, | ||
2892 | struct ocfs2_cow_context *context, | ||
2893 | u32 cpos, u32 old, | ||
2894 | u32 new, u32 len, | ||
2895 | unsigned int ext_flags) | ||
2896 | { | ||
2897 | int ret; | ||
2898 | struct ocfs2_caching_info *ci = context->di_et.et_ci; | ||
2899 | u64 ino = ocfs2_metadata_cache_owner(ci); | ||
2900 | |||
2901 | mlog(0, "inode %llu, cpos %u, old %u, new %u, len %u, ext_flags %u\n", | ||
2902 | (unsigned long long)ino, cpos, old, new, len, ext_flags); | ||
2903 | |||
2904 | /*If the old clusters is unwritten, no need to duplicate. */ | ||
2905 | if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { | ||
2906 | ret = ocfs2_duplicate_clusters(handle, context, cpos, | ||
2907 | old, new, len); | ||
2908 | if (ret) { | ||
2909 | mlog_errno(ret); | ||
2910 | goto out; | ||
2911 | } | ||
2912 | } | ||
2913 | |||
2914 | ret = ocfs2_clear_ext_refcount(handle, &context->di_et, | ||
2915 | cpos, new, len, ext_flags, | ||
2916 | context->meta_ac, &context->dealloc); | ||
2917 | if (ret) | ||
2918 | mlog_errno(ret); | ||
2919 | out: | ||
2920 | return ret; | ||
2921 | } | ||
2922 | |||
2923 | static int ocfs2_cow_sync_writeback(struct super_block *sb, | ||
2924 | struct ocfs2_cow_context *context, | ||
2925 | u32 cpos, u32 num_clusters) | ||
2926 | { | ||
2927 | int ret = 0; | ||
2928 | loff_t offset, end, map_end; | ||
2929 | pgoff_t page_index; | ||
2930 | struct page *page; | ||
2931 | |||
2932 | if (ocfs2_should_order_data(context->inode)) | ||
2933 | return 0; | ||
2934 | |||
2935 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; | ||
2936 | end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits); | ||
2937 | |||
2938 | ret = filemap_fdatawrite_range(context->inode->i_mapping, | ||
2939 | offset, end - 1); | ||
2940 | if (ret < 0) { | ||
2941 | mlog_errno(ret); | ||
2942 | return ret; | ||
2943 | } | ||
2944 | |||
2945 | while (offset < end) { | ||
2946 | page_index = offset >> PAGE_CACHE_SHIFT; | ||
2947 | map_end = (page_index + 1) << PAGE_CACHE_SHIFT; | ||
2948 | if (map_end > end) | ||
2949 | map_end = end; | ||
2950 | |||
2951 | page = grab_cache_page(context->inode->i_mapping, page_index); | ||
2952 | BUG_ON(!page); | ||
2953 | |||
2954 | wait_on_page_writeback(page); | ||
2955 | if (PageError(page)) { | ||
2956 | ret = -EIO; | ||
2957 | mlog_errno(ret); | ||
2958 | } else | ||
2959 | mark_page_accessed(page); | ||
2960 | |||
2961 | unlock_page(page); | ||
2962 | page_cache_release(page); | ||
2963 | page = NULL; | ||
2964 | offset = map_end; | ||
2965 | if (ret) | ||
2966 | break; | ||
2967 | } | ||
2968 | |||
2969 | return ret; | ||
2970 | } | ||
2971 | |||
2972 | static int ocfs2_make_clusters_writable(struct super_block *sb, | ||
2973 | struct ocfs2_cow_context *context, | ||
2974 | u32 cpos, u32 p_cluster, | ||
2975 | u32 num_clusters, unsigned int e_flags) | ||
2976 | { | ||
2977 | int ret, credits = 0; | ||
2978 | u32 new_bit, new_len; | ||
2979 | struct ocfs2_super *osb = OCFS2_SB(sb); | ||
2980 | handle_t *handle; | ||
2981 | |||
2982 | ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters, | ||
2983 | &context->di_et, | ||
2984 | context->ref_ci, | ||
2985 | context->ref_root_bh, | ||
2986 | &context->meta_ac, | ||
2987 | &context->data_ac, &credits); | ||
2988 | if (ret) { | ||
2989 | mlog_errno(ret); | ||
2990 | return ret; | ||
2991 | } | ||
2992 | |||
2993 | handle = ocfs2_start_trans(osb, credits); | ||
2994 | if (IS_ERR(handle)) { | ||
2995 | ret = PTR_ERR(handle); | ||
2996 | mlog_errno(ret); | ||
2997 | goto out; | ||
2998 | } | ||
2999 | |||
3000 | while (num_clusters) { | ||
3001 | ret = __ocfs2_claim_clusters(osb, handle, context->data_ac, | ||
3002 | 1, num_clusters, | ||
3003 | &new_bit, &new_len); | ||
3004 | if (ret) { | ||
3005 | mlog_errno(ret); | ||
3006 | goto out_commit; | ||
3007 | } | ||
3008 | |||
3009 | ret = ocfs2_replace_clusters(handle, context, | ||
3010 | cpos, p_cluster, new_bit, | ||
3011 | new_len, e_flags); | ||
3012 | if (ret) { | ||
3013 | mlog_errno(ret); | ||
3014 | goto out_commit; | ||
3015 | } | ||
3016 | |||
3017 | cpos += new_len; | ||
3018 | p_cluster += new_len; | ||
3019 | num_clusters -= new_len; | ||
3020 | } | ||
3021 | |||
3022 | ret = __ocfs2_decrease_refcount(handle, context->ref_ci, | ||
3023 | context->ref_root_bh, | ||
3024 | p_cluster, num_clusters, | ||
3025 | context->meta_ac, | ||
3026 | &context->dealloc); | ||
3027 | if (ret) { | ||
3028 | mlog_errno(ret); | ||
3029 | goto out_commit; | ||
3030 | } | ||
3031 | |||
3032 | /* | ||
3033 | * Here we should write the new page out first if we are | ||
3034 | * in write-back mode. | ||
3035 | */ | ||
3036 | ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters); | ||
3037 | if (ret) | ||
3038 | mlog_errno(ret); | ||
3039 | |||
3040 | out_commit: | ||
3041 | ocfs2_commit_trans(osb, handle); | ||
3042 | |||
3043 | out: | ||
3044 | if (context->data_ac) { | ||
3045 | ocfs2_free_alloc_context(context->data_ac); | ||
3046 | context->data_ac = NULL; | ||
3047 | } | ||
3048 | if (context->meta_ac) { | ||
3049 | ocfs2_free_alloc_context(context->meta_ac); | ||
3050 | context->meta_ac = NULL; | ||
3051 | } | ||
3052 | |||
3053 | return ret; | ||
3054 | } | ||
3055 | |||
3056 | static int ocfs2_replace_cow(struct inode *inode, | ||
3057 | struct buffer_head *di_bh, | ||
3058 | struct buffer_head *ref_root_bh, | ||
3059 | struct ocfs2_caching_info *ref_ci, | ||
3060 | u32 cow_start, u32 cow_len) | ||
3061 | { | ||
3062 | int ret = 0; | ||
3063 | u32 p_cluster, num_clusters, start = cow_start; | ||
3064 | unsigned int ext_flags; | ||
3065 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
3066 | struct ocfs2_cow_context *context; | ||
3067 | |||
3068 | if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) { | ||
3069 | ocfs2_error(inode->i_sb, "Inode %lu want to use refcount " | ||
3070 | "tree, but the feature bit is not set in the " | ||
3071 | "super block.", inode->i_ino); | ||
3072 | return -EROFS; | ||
3073 | } | ||
3074 | |||
3075 | context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); | ||
3076 | if (!context) { | ||
3077 | ret = -ENOMEM; | ||
3078 | mlog_errno(ret); | ||
3079 | return ret; | ||
3080 | } | ||
3081 | |||
3082 | context->inode = inode; | ||
3083 | context->cow_start = cow_start; | ||
3084 | context->cow_len = cow_len; | ||
3085 | context->ref_ci = ref_ci; | ||
3086 | context->ref_root_bh = ref_root_bh; | ||
3087 | |||
3088 | ocfs2_init_dealloc_ctxt(&context->dealloc); | ||
3089 | ocfs2_init_dinode_extent_tree(&context->di_et, | ||
3090 | INODE_CACHE(inode), di_bh); | ||
3091 | |||
3092 | while (cow_len) { | ||
3093 | ret = ocfs2_get_clusters(inode, cow_start, &p_cluster, | ||
3094 | &num_clusters, &ext_flags); | ||
3095 | if (ret) { | ||
3096 | mlog_errno(ret); | ||
3097 | break; | ||
3098 | } | ||
3099 | |||
3100 | BUG_ON(!(ext_flags & OCFS2_EXT_REFCOUNTED)); | ||
3101 | |||
3102 | if (cow_len < num_clusters) | ||
3103 | num_clusters = cow_len; | ||
3104 | |||
3105 | ret = ocfs2_make_clusters_writable(inode->i_sb, context, | ||
3106 | cow_start, p_cluster, | ||
3107 | num_clusters, ext_flags); | ||
3108 | if (ret) { | ||
3109 | mlog_errno(ret); | ||
3110 | break; | ||
3111 | } | ||
3112 | |||
3113 | cow_len -= num_clusters; | ||
3114 | cow_start += num_clusters; | ||
3115 | } | ||
3116 | |||
3117 | |||
3118 | /* | ||
3119 | * truncate the extent map here since no matter whether we meet with | ||
3120 | * any error during the action, we shouldn't trust cached extent map | ||
3121 | * any more. | ||
3122 | */ | ||
3123 | ocfs2_extent_map_trunc(inode, start); | ||
3124 | |||
3125 | if (ocfs2_dealloc_has_cluster(&context->dealloc)) { | ||
3126 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
3127 | ocfs2_run_deallocs(osb, &context->dealloc); | ||
3128 | } | ||
3129 | |||
3130 | kfree(context); | ||
3131 | return ret; | ||
3132 | } | ||
3133 | |||
3134 | /* | ||
3135 | * Starting at cpos, try to CoW write_len clusters. | ||
3136 | * This will stop when it runs into a hole or an unrefcounted extent. | ||
3137 | */ | ||
3138 | static int ocfs2_refcount_cow_hunk(struct inode *inode, | ||
3139 | struct buffer_head *di_bh, | ||
3140 | u32 cpos, u32 write_len) | ||
3141 | { | ||
3142 | int ret; | ||
3143 | u32 cow_start = 0, cow_len = 0; | ||
3144 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
3145 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
3146 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
3147 | struct buffer_head *ref_root_bh = NULL; | ||
3148 | struct ocfs2_refcount_tree *ref_tree; | ||
3149 | |||
3150 | BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); | ||
3151 | |||
3152 | ret = ocfs2_refcount_cal_cow_clusters(inode, di_bh, cpos, write_len, | ||
3153 | &cow_start, &cow_len); | ||
3154 | if (ret) { | ||
3155 | mlog_errno(ret); | ||
3156 | goto out; | ||
3157 | } | ||
3158 | mlog(0, "CoW inode %lu, cpos %u, write_len %u, cow_start %u, " | ||
3159 | "cow_len %u\n", inode->i_ino, | ||
3160 | cpos, write_len, cow_start, cow_len); | ||
3161 | |||
3162 | BUG_ON(cow_len == 0); | ||
3163 | |||
3164 | ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc), | ||
3165 | 1, &ref_tree, &ref_root_bh); | ||
3166 | if (ret) { | ||
3167 | mlog_errno(ret); | ||
3168 | goto out; | ||
3169 | } | ||
3170 | |||
3171 | ret = ocfs2_replace_cow(inode, di_bh, ref_root_bh, &ref_tree->rf_ci, | ||
3172 | cow_start, cow_len); | ||
3173 | if (ret) | ||
3174 | mlog_errno(ret); | ||
3175 | |||
3176 | ocfs2_unlock_refcount_tree(osb, ref_tree, 1); | ||
3177 | brelse(ref_root_bh); | ||
3178 | out: | ||
3179 | return ret; | ||
3180 | } | ||
3181 | |||
3182 | /* | ||
3183 | * CoW any and all clusters between cpos and cpos+write_len. | ||
3184 | * If this returns successfully, all clusters between cpos and | ||
3185 | * cpos+write_len are safe to modify. | ||
3186 | */ | ||
3187 | int ocfs2_refcount_cow(struct inode *inode, | ||
3188 | struct buffer_head *di_bh, | ||
3189 | u32 cpos, u32 write_len) | ||
3190 | { | ||
3191 | int ret = 0; | ||
3192 | u32 p_cluster, num_clusters; | ||
3193 | unsigned int ext_flags; | ||
3194 | |||
3195 | while (write_len) { | ||
3196 | ret = ocfs2_get_clusters(inode, cpos, &p_cluster, | ||
3197 | &num_clusters, &ext_flags); | ||
3198 | if (ret) { | ||
3199 | mlog_errno(ret); | ||
3200 | break; | ||
3201 | } | ||
3202 | |||
3203 | if (write_len < num_clusters) | ||
3204 | num_clusters = write_len; | ||
3205 | |||
3206 | if (ext_flags & OCFS2_EXT_REFCOUNTED) { | ||
3207 | ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos, | ||
3208 | num_clusters); | ||
3209 | if (ret) { | ||
3210 | mlog_errno(ret); | ||
3211 | break; | ||
3212 | } | ||
3213 | } | ||
3214 | |||
3215 | write_len -= num_clusters; | ||
3216 | cpos += num_clusters; | ||
3217 | } | ||
3218 | |||
3219 | return ret; | ||
3220 | } | ||