aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ocfs2/alloc.c25
-rw-r--r--fs/ocfs2/alloc.h5
-rw-r--r--fs/ocfs2/aops.c4
-rw-r--r--fs/ocfs2/aops.h2
-rw-r--r--fs/ocfs2/refcounttree.c814
-rw-r--r--fs/ocfs2/refcounttree.h2
6 files changed, 841 insertions, 11 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 03438a677933..b8fc95d10630 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6998,9 +6998,9 @@ static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh)
6998 return 0; 6998 return 0;
6999} 6999}
7000 7000
7001static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, 7001void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
7002 unsigned int from, unsigned int to, 7002 unsigned int from, unsigned int to,
7003 struct page *page, int zero, u64 *phys) 7003 struct page *page, int zero, u64 *phys)
7004{ 7004{
7005 int ret, partial = 0; 7005 int ret, partial = 0;
7006 7006
@@ -7068,20 +7068,16 @@ out:
7068 ocfs2_unlock_and_free_pages(pages, numpages); 7068 ocfs2_unlock_and_free_pages(pages, numpages);
7069} 7069}
7070 7070
7071static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end, 7071int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
7072 struct page **pages, int *num) 7072 struct page **pages, int *num)
7073{ 7073{
7074 int numpages, ret = 0; 7074 int numpages, ret = 0;
7075 struct super_block *sb = inode->i_sb;
7076 struct address_space *mapping = inode->i_mapping; 7075 struct address_space *mapping = inode->i_mapping;
7077 unsigned long index; 7076 unsigned long index;
7078 loff_t last_page_bytes; 7077 loff_t last_page_bytes;
7079 7078
7080 BUG_ON(start > end); 7079 BUG_ON(start > end);
7081 7080
7082 BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
7083 (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
7084
7085 numpages = 0; 7081 numpages = 0;
7086 last_page_bytes = PAGE_ALIGN(end); 7082 last_page_bytes = PAGE_ALIGN(end);
7087 index = start >> PAGE_CACHE_SHIFT; 7083 index = start >> PAGE_CACHE_SHIFT;
@@ -7109,6 +7105,17 @@ out:
7109 return ret; 7105 return ret;
7110} 7106}
7111 7107
7108static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
7109 struct page **pages, int *num)
7110{
7111 struct super_block *sb = inode->i_sb;
7112
7113 BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
7114 (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
7115
7116 return ocfs2_grab_pages(inode, start, end, pages, num);
7117}
7118
7112/* 7119/*
7113 * Zero the area past i_size but still within an allocated 7120 * Zero the area past i_size but still within an allocated
7114 * cluster. This avoids exposing nonzero data on subsequent file 7121 * cluster. This avoids exposing nonzero data on subsequent file
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 19d5b88a93df..9c122d574464 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -271,6 +271,11 @@ static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
271 return !rec->e_leaf_clusters; 271 return !rec->e_leaf_clusters;
272} 272}
273 273
274int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
275 struct page **pages, int *num);
276void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
277 unsigned int from, unsigned int to,
278 struct page *page, int zero, u64 *phys);
274/* 279/*
275 * Structures which describe a path through a btree, and functions to 280 * Structures which describe a path through a btree, and functions to
276 * manipulate them. 281 * manipulate them.
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 15c594dfd951..fdad075fed61 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -126,8 +126,8 @@ bail:
126 return err; 126 return err;
127} 127}
128 128
129static int ocfs2_get_block(struct inode *inode, sector_t iblock, 129int ocfs2_get_block(struct inode *inode, sector_t iblock,
130 struct buffer_head *bh_result, int create) 130 struct buffer_head *bh_result, int create)
131{ 131{
132 int err = 0; 132 int err = 0;
133 unsigned int ext_flags; 133 unsigned int ext_flags;
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 503e49232e11..c48e93ffc513 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -57,6 +57,8 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
57 struct buffer_head *di_bh); 57 struct buffer_head *di_bh);
58int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size); 58int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size);
59 59
60int ocfs2_get_block(struct inode *inode, sector_t iblock,
61 struct buffer_head *bh_result, int create);
60/* all ocfs2_dio_end_io()'s fault */ 62/* all ocfs2_dio_end_io()'s fault */
61#define ocfs2_iocb_is_rw_locked(iocb) \ 63#define ocfs2_iocb_is_rw_locked(iocb) \
62 test_bit(0, (unsigned long *)&iocb->private) 64 test_bit(0, (unsigned long *)&iocb->private)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index e72dbdd3b6e8..4e7df8b8fd4f 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -31,6 +31,27 @@
31#include "sysfile.h" 31#include "sysfile.h"
32#include "dlmglue.h" 32#include "dlmglue.h"
33#include "extent_map.h" 33#include "extent_map.h"
34#include "aops.h"
35
36#include <linux/bio.h>
37#include <linux/blkdev.h>
38#include <linux/gfp.h>
39#include <linux/slab.h>
40#include <linux/writeback.h>
41#include <linux/pagevec.h>
42#include <linux/swap.h>
43
44struct ocfs2_cow_context {
45 struct inode *inode;
46 u32 cow_start;
47 u32 cow_len;
48 struct ocfs2_extent_tree di_et;
49 struct ocfs2_caching_info *ref_ci;
50 struct buffer_head *ref_root_bh;
51 struct ocfs2_alloc_context *meta_ac;
52 struct ocfs2_alloc_context *data_ac;
53 struct ocfs2_cached_dealloc_ctxt dealloc;
54};
34 55
35static inline struct ocfs2_refcount_tree * 56static inline struct ocfs2_refcount_tree *
36cache_info_to_refcount(struct ocfs2_caching_info *ci) 57cache_info_to_refcount(struct ocfs2_caching_info *ci)
@@ -2404,3 +2425,796 @@ out:
2404 brelse(ref_root_bh); 2425 brelse(ref_root_bh);
2405 return ret; 2426 return ret;
2406} 2427}
2428
2429#define MAX_CONTIG_BYTES 1048576
2430
2431static inline unsigned int ocfs2_cow_contig_clusters(struct super_block *sb)
2432{
2433 return ocfs2_clusters_for_bytes(sb, MAX_CONTIG_BYTES);
2434}
2435
2436static inline unsigned int ocfs2_cow_contig_mask(struct super_block *sb)
2437{
2438 return ~(ocfs2_cow_contig_clusters(sb) - 1);
2439}
2440
2441/*
2442 * Given an extent that starts at 'start' and an I/O that starts at 'cpos',
2443 * find an offset (start + (n * contig_clusters)) that is closest to cpos
2444 * while still being less than or equal to it.
2445 *
2446 * The goal is to break the extent at a multiple of contig_clusters.
2447 */
2448static inline unsigned int ocfs2_cow_align_start(struct super_block *sb,
2449 unsigned int start,
2450 unsigned int cpos)
2451{
2452 BUG_ON(start > cpos);
2453
2454 return start + ((cpos - start) & ocfs2_cow_contig_mask(sb));
2455}
2456
2457/*
2458 * Given a cluster count of len, pad it out so that it is a multiple
2459 * of contig_clusters.
2460 */
2461static inline unsigned int ocfs2_cow_align_length(struct super_block *sb,
2462 unsigned int len)
2463{
2464 unsigned int padded =
2465 (len + (ocfs2_cow_contig_clusters(sb) - 1)) &
2466 ocfs2_cow_contig_mask(sb);
2467
2468 /* Did we wrap? */
2469 if (padded < len)
2470 padded = UINT_MAX;
2471
2472 return padded;
2473}
2474
2475/*
2476 * Calculate out the start and number of virtual clusters we need to to CoW.
2477 *
2478 * cpos is vitual start cluster position we want to do CoW in a
2479 * file and write_len is the cluster length.
2480 *
2481 * Normal we will start CoW from the beginning of extent record cotaining cpos.
2482 * We try to break up extents on boundaries of MAX_CONTIG_BYTES so that we
2483 * get good I/O from the resulting extent tree.
2484 */
2485static int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
2486 struct buffer_head *di_bh,
2487 u32 cpos,
2488 u32 write_len,
2489 u32 *cow_start,
2490 u32 *cow_len)
2491{
2492 int ret = 0;
2493 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
2494 struct ocfs2_extent_list *el = &di->id2.i_list;
2495 int tree_height = le16_to_cpu(el->l_tree_depth), i;
2496 struct buffer_head *eb_bh = NULL;
2497 struct ocfs2_extent_block *eb = NULL;
2498 struct ocfs2_extent_rec *rec;
2499 unsigned int want_clusters, rec_end = 0;
2500 int contig_clusters = ocfs2_cow_contig_clusters(inode->i_sb);
2501 int leaf_clusters;
2502
2503 if (tree_height > 0) {
2504 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, cpos, &eb_bh);
2505 if (ret) {
2506 mlog_errno(ret);
2507 goto out;
2508 }
2509
2510 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2511 el = &eb->h_list;
2512
2513 if (el->l_tree_depth) {
2514 ocfs2_error(inode->i_sb,
2515 "Inode %lu has non zero tree depth in "
2516 "leaf block %llu\n", inode->i_ino,
2517 (unsigned long long)eb_bh->b_blocknr);
2518 ret = -EROFS;
2519 goto out;
2520 }
2521 }
2522
2523 *cow_len = 0;
2524 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
2525 rec = &el->l_recs[i];
2526
2527 if (ocfs2_is_empty_extent(rec)) {
2528 mlog_bug_on_msg(i != 0, "Inode %lu has empty record in "
2529 "index %d\n", inode->i_ino, i);
2530 continue;
2531 }
2532
2533 if (le32_to_cpu(rec->e_cpos) +
2534 le16_to_cpu(rec->e_leaf_clusters) <= cpos)
2535 continue;
2536
2537 if (*cow_len == 0) {
2538 /*
2539 * We should find a refcounted record in the
2540 * first pass.
2541 */
2542 BUG_ON(!(rec->e_flags & OCFS2_EXT_REFCOUNTED));
2543 *cow_start = le32_to_cpu(rec->e_cpos);
2544 }
2545
2546 /*
2547 * If we encounter a hole or a non-refcounted record,
2548 * stop the search.
2549 */
2550 if ((!(rec->e_flags & OCFS2_EXT_REFCOUNTED)) ||
2551 (*cow_len && rec_end != le32_to_cpu(rec->e_cpos)))
2552 break;
2553
2554 leaf_clusters = le16_to_cpu(rec->e_leaf_clusters);
2555 rec_end = le32_to_cpu(rec->e_cpos) + leaf_clusters;
2556
2557 /*
2558 * How many clusters do we actually need from
2559 * this extent? First we see how many we actually
2560 * need to complete the write. If that's smaller
2561 * than contig_clusters, we try for contig_clusters.
2562 */
2563 if (!*cow_len)
2564 want_clusters = write_len;
2565 else
2566 want_clusters = (cpos + write_len) -
2567 (*cow_start + *cow_len);
2568 if (want_clusters < contig_clusters)
2569 want_clusters = contig_clusters;
2570
2571 /*
2572 * If the write does not cover the whole extent, we
2573 * need to calculate how we're going to split the extent.
2574 * We try to do it on contig_clusters boundaries.
2575 *
2576 * Any extent smaller than contig_clusters will be
2577 * CoWed in its entirety.
2578 */
2579 if (leaf_clusters <= contig_clusters)
2580 *cow_len += leaf_clusters;
2581 else if (*cow_len || (*cow_start == cpos)) {
2582 /*
2583 * This extent needs to be CoW'd from its
2584 * beginning, so all we have to do is compute
2585 * how many clusters to grab. We align
2586 * want_clusters to the edge of contig_clusters
2587 * to get better I/O.
2588 */
2589 want_clusters = ocfs2_cow_align_length(inode->i_sb,
2590 want_clusters);
2591
2592 if (leaf_clusters < want_clusters)
2593 *cow_len += leaf_clusters;
2594 else
2595 *cow_len += want_clusters;
2596 } else if ((*cow_start + contig_clusters) >=
2597 (cpos + write_len)) {
2598 /*
2599 * Breaking off contig_clusters at the front
2600 * of the extent will cover our write. That's
2601 * easy.
2602 */
2603 *cow_len = contig_clusters;
2604 } else if ((rec_end - cpos) <= contig_clusters) {
2605 /*
2606 * Breaking off contig_clusters at the tail of
2607 * this extent will cover cpos.
2608 */
2609 *cow_start = rec_end - contig_clusters;
2610 *cow_len = contig_clusters;
2611 } else if ((rec_end - cpos) <= want_clusters) {
2612 /*
2613 * While we can't fit the entire write in this
2614 * extent, we know that the write goes from cpos
2615 * to the end of the extent. Break that off.
2616 * We try to break it at some multiple of
2617 * contig_clusters from the front of the extent.
2618 * Failing that (ie, cpos is within
2619 * contig_clusters of the front), we'll CoW the
2620 * entire extent.
2621 */
2622 *cow_start = ocfs2_cow_align_start(inode->i_sb,
2623 *cow_start, cpos);
2624 *cow_len = rec_end - *cow_start;
2625 } else {
2626 /*
2627 * Ok, the entire write lives in the middle of
2628 * this extent. Let's try to slice the extent up
2629 * nicely. Optimally, our CoW region starts at
2630 * m*contig_clusters from the beginning of the
2631 * extent and goes for n*contig_clusters,
2632 * covering the entire write.
2633 */
2634 *cow_start = ocfs2_cow_align_start(inode->i_sb,
2635 *cow_start, cpos);
2636
2637 want_clusters = (cpos + write_len) - *cow_start;
2638 want_clusters = ocfs2_cow_align_length(inode->i_sb,
2639 want_clusters);
2640 if (*cow_start + want_clusters <= rec_end)
2641 *cow_len = want_clusters;
2642 else
2643 *cow_len = rec_end - *cow_start;
2644 }
2645
2646 /* Have we covered our entire write yet? */
2647 if ((*cow_start + *cow_len) >= (cpos + write_len))
2648 break;
2649
2650 /*
2651 * If we reach the end of the extent block and don't get enough
2652 * clusters, continue with the next extent block if possible.
2653 */
2654 if (i + 1 == le16_to_cpu(el->l_next_free_rec) &&
2655 eb && eb->h_next_leaf_blk) {
2656 brelse(eb_bh);
2657 eb_bh = NULL;
2658
2659 ret = ocfs2_read_extent_block(INODE_CACHE(inode),
2660 le64_to_cpu(eb->h_next_leaf_blk),
2661 &eb_bh);
2662 if (ret) {
2663 mlog_errno(ret);
2664 goto out;
2665 }
2666
2667 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2668 el = &eb->h_list;
2669 i = -1;
2670 }
2671 }
2672
2673out:
2674 brelse(eb_bh);
2675 return ret;
2676}
2677
2678/*
2679 * Prepare meta_ac, data_ac and calculate credits when we want to add some
2680 * num_clusters in data_tree "et" and change the refcount for the old
2681 * clusters(starting form p_cluster) in the refcount tree.
2682 *
2683 * Note:
2684 * 1. since we may split the old tree, so we at most will need num_clusters + 2
2685 * more new leaf records.
2686 * 2. In some case, we may not need to reserve new clusters(e.g, reflink), so
2687 * just give data_ac = NULL.
2688 */
2689static int ocfs2_lock_refcount_allocators(struct super_block *sb,
2690 u32 p_cluster, u32 num_clusters,
2691 struct ocfs2_extent_tree *et,
2692 struct ocfs2_caching_info *ref_ci,
2693 struct buffer_head *ref_root_bh,
2694 struct ocfs2_alloc_context **meta_ac,
2695 struct ocfs2_alloc_context **data_ac,
2696 int *credits)
2697{
2698 int ret = 0, meta_add = 0;
2699 int num_free_extents = ocfs2_num_free_extents(OCFS2_SB(sb), et);
2700
2701 if (num_free_extents < 0) {
2702 ret = num_free_extents;
2703 mlog_errno(ret);
2704 goto out;
2705 }
2706
2707 if (num_free_extents < num_clusters + 2)
2708 meta_add =
2709 ocfs2_extend_meta_needed(et->et_root_el);
2710
2711 *credits += ocfs2_calc_extend_credits(sb, et->et_root_el,
2712 num_clusters + 2);
2713
2714 ret = ocfs2_calc_refcount_meta_credits(sb, ref_ci, ref_root_bh,
2715 p_cluster, num_clusters,
2716 &meta_add, credits);
2717 if (ret) {
2718 mlog_errno(ret);
2719 goto out;
2720 }
2721
2722 mlog(0, "reserve new metadata %d, clusters %u, credits = %d\n",
2723 meta_add, num_clusters, *credits);
2724 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(sb), meta_add,
2725 meta_ac);
2726 if (ret) {
2727 mlog_errno(ret);
2728 goto out;
2729 }
2730
2731 if (data_ac) {
2732 ret = ocfs2_reserve_clusters(OCFS2_SB(sb), num_clusters,
2733 data_ac);
2734 if (ret)
2735 mlog_errno(ret);
2736 }
2737
2738out:
2739 if (ret) {
2740 if (*meta_ac) {
2741 ocfs2_free_alloc_context(*meta_ac);
2742 *meta_ac = NULL;
2743 }
2744 }
2745
2746 return ret;
2747}
2748
2749static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh)
2750{
2751 BUG_ON(buffer_dirty(bh));
2752
2753 clear_buffer_mapped(bh);
2754
2755 return 0;
2756}
2757
2758static int ocfs2_duplicate_clusters(handle_t *handle,
2759 struct ocfs2_cow_context *context,
2760 u32 cpos, u32 old_cluster,
2761 u32 new_cluster, u32 new_len)
2762{
2763 int ret = 0, partial;
2764 struct ocfs2_caching_info *ci = context->di_et.et_ci;
2765 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
2766 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
2767 struct page *page;
2768 pgoff_t page_index;
2769 unsigned int from, to;
2770 loff_t offset, end, map_end;
2771 struct address_space *mapping = context->inode->i_mapping;
2772
2773 mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster,
2774 new_cluster, new_len, cpos);
2775
2776 offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
2777 end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
2778
2779 while (offset < end) {
2780 page_index = offset >> PAGE_CACHE_SHIFT;
2781 map_end = (page_index + 1) << PAGE_CACHE_SHIFT;
2782 if (map_end > end)
2783 map_end = end;
2784
2785 /* from, to is the offset within the page. */
2786 from = offset & (PAGE_CACHE_SIZE - 1);
2787 to = PAGE_CACHE_SIZE;
2788 if (map_end & (PAGE_CACHE_SIZE - 1))
2789 to = map_end & (PAGE_CACHE_SIZE - 1);
2790
2791 page = grab_cache_page(mapping, page_index);
2792
2793 /* This page can't be dirtied before we CoW it out. */
2794 BUG_ON(PageDirty(page));
2795
2796 if (!PageUptodate(page)) {
2797 ret = block_read_full_page(page, ocfs2_get_block);
2798 if (ret) {
2799 mlog_errno(ret);
2800 goto unlock;
2801 }
2802 lock_page(page);
2803 }
2804
2805 if (page_has_buffers(page)) {
2806 ret = walk_page_buffers(handle, page_buffers(page),
2807 from, to, &partial,
2808 ocfs2_clear_cow_buffer);
2809 if (ret) {
2810 mlog_errno(ret);
2811 goto unlock;
2812 }
2813 }
2814
2815 ocfs2_map_and_dirty_page(context->inode,
2816 handle, from, to,
2817 page, 0, &new_block);
2818 mark_page_accessed(page);
2819unlock:
2820 unlock_page(page);
2821 page_cache_release(page);
2822 page = NULL;
2823 offset = map_end;
2824 if (ret)
2825 break;
2826 }
2827
2828 return ret;
2829}
2830
2831static int ocfs2_clear_ext_refcount(handle_t *handle,
2832 struct ocfs2_extent_tree *et,
2833 u32 cpos, u32 p_cluster, u32 len,
2834 unsigned int ext_flags,
2835 struct ocfs2_alloc_context *meta_ac,
2836 struct ocfs2_cached_dealloc_ctxt *dealloc)
2837{
2838 int ret, index;
2839 struct ocfs2_extent_rec replace_rec;
2840 struct ocfs2_path *path = NULL;
2841 struct ocfs2_extent_list *el;
2842 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
2843 u64 ino = ocfs2_metadata_cache_owner(et->et_ci);
2844
2845 mlog(0, "inode %llu cpos %u, len %u, p_cluster %u, ext_flags %u\n",
2846 (unsigned long long)ino, cpos, len, p_cluster, ext_flags);
2847
2848 memset(&replace_rec, 0, sizeof(replace_rec));
2849 replace_rec.e_cpos = cpu_to_le32(cpos);
2850 replace_rec.e_leaf_clusters = cpu_to_le16(len);
2851 replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(sb,
2852 p_cluster));
2853 replace_rec.e_flags = ext_flags;
2854 replace_rec.e_flags &= ~OCFS2_EXT_REFCOUNTED;
2855
2856 path = ocfs2_new_path_from_et(et);
2857 if (!path) {
2858 ret = -ENOMEM;
2859 mlog_errno(ret);
2860 goto out;
2861 }
2862
2863 ret = ocfs2_find_path(et->et_ci, path, cpos);
2864 if (ret) {
2865 mlog_errno(ret);
2866 goto out;
2867 }
2868
2869 el = path_leaf_el(path);
2870
2871 index = ocfs2_search_extent_list(el, cpos);
2872 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
2873 ocfs2_error(sb,
2874 "Inode %llu has an extent at cpos %u which can no "
2875 "longer be found.\n",
2876 (unsigned long long)ino, cpos);
2877 ret = -EROFS;
2878 goto out;
2879 }
2880
2881 ret = ocfs2_split_extent(handle, et, path, index,
2882 &replace_rec, meta_ac, dealloc);
2883 if (ret)
2884 mlog_errno(ret);
2885
2886out:
2887 ocfs2_free_path(path);
2888 return ret;
2889}
2890
2891static int ocfs2_replace_clusters(handle_t *handle,
2892 struct ocfs2_cow_context *context,
2893 u32 cpos, u32 old,
2894 u32 new, u32 len,
2895 unsigned int ext_flags)
2896{
2897 int ret;
2898 struct ocfs2_caching_info *ci = context->di_et.et_ci;
2899 u64 ino = ocfs2_metadata_cache_owner(ci);
2900
2901 mlog(0, "inode %llu, cpos %u, old %u, new %u, len %u, ext_flags %u\n",
2902 (unsigned long long)ino, cpos, old, new, len, ext_flags);
2903
2904 /*If the old clusters is unwritten, no need to duplicate. */
2905 if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
2906 ret = ocfs2_duplicate_clusters(handle, context, cpos,
2907 old, new, len);
2908 if (ret) {
2909 mlog_errno(ret);
2910 goto out;
2911 }
2912 }
2913
2914 ret = ocfs2_clear_ext_refcount(handle, &context->di_et,
2915 cpos, new, len, ext_flags,
2916 context->meta_ac, &context->dealloc);
2917 if (ret)
2918 mlog_errno(ret);
2919out:
2920 return ret;
2921}
2922
2923static int ocfs2_cow_sync_writeback(struct super_block *sb,
2924 struct ocfs2_cow_context *context,
2925 u32 cpos, u32 num_clusters)
2926{
2927 int ret = 0;
2928 loff_t offset, end, map_end;
2929 pgoff_t page_index;
2930 struct page *page;
2931
2932 if (ocfs2_should_order_data(context->inode))
2933 return 0;
2934
2935 offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
2936 end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits);
2937
2938 ret = filemap_fdatawrite_range(context->inode->i_mapping,
2939 offset, end - 1);
2940 if (ret < 0) {
2941 mlog_errno(ret);
2942 return ret;
2943 }
2944
2945 while (offset < end) {
2946 page_index = offset >> PAGE_CACHE_SHIFT;
2947 map_end = (page_index + 1) << PAGE_CACHE_SHIFT;
2948 if (map_end > end)
2949 map_end = end;
2950
2951 page = grab_cache_page(context->inode->i_mapping, page_index);
2952 BUG_ON(!page);
2953
2954 wait_on_page_writeback(page);
2955 if (PageError(page)) {
2956 ret = -EIO;
2957 mlog_errno(ret);
2958 } else
2959 mark_page_accessed(page);
2960
2961 unlock_page(page);
2962 page_cache_release(page);
2963 page = NULL;
2964 offset = map_end;
2965 if (ret)
2966 break;
2967 }
2968
2969 return ret;
2970}
2971
2972static int ocfs2_make_clusters_writable(struct super_block *sb,
2973 struct ocfs2_cow_context *context,
2974 u32 cpos, u32 p_cluster,
2975 u32 num_clusters, unsigned int e_flags)
2976{
2977 int ret, credits = 0;
2978 u32 new_bit, new_len;
2979 struct ocfs2_super *osb = OCFS2_SB(sb);
2980 handle_t *handle;
2981
2982 ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters,
2983 &context->di_et,
2984 context->ref_ci,
2985 context->ref_root_bh,
2986 &context->meta_ac,
2987 &context->data_ac, &credits);
2988 if (ret) {
2989 mlog_errno(ret);
2990 return ret;
2991 }
2992
2993 handle = ocfs2_start_trans(osb, credits);
2994 if (IS_ERR(handle)) {
2995 ret = PTR_ERR(handle);
2996 mlog_errno(ret);
2997 goto out;
2998 }
2999
3000 while (num_clusters) {
3001 ret = __ocfs2_claim_clusters(osb, handle, context->data_ac,
3002 1, num_clusters,
3003 &new_bit, &new_len);
3004 if (ret) {
3005 mlog_errno(ret);
3006 goto out_commit;
3007 }
3008
3009 ret = ocfs2_replace_clusters(handle, context,
3010 cpos, p_cluster, new_bit,
3011 new_len, e_flags);
3012 if (ret) {
3013 mlog_errno(ret);
3014 goto out_commit;
3015 }
3016
3017 cpos += new_len;
3018 p_cluster += new_len;
3019 num_clusters -= new_len;
3020 }
3021
3022 ret = __ocfs2_decrease_refcount(handle, context->ref_ci,
3023 context->ref_root_bh,
3024 p_cluster, num_clusters,
3025 context->meta_ac,
3026 &context->dealloc);
3027 if (ret) {
3028 mlog_errno(ret);
3029 goto out_commit;
3030 }
3031
3032 /*
3033 * Here we should write the new page out first if we are
3034 * in write-back mode.
3035 */
3036 ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters);
3037 if (ret)
3038 mlog_errno(ret);
3039
3040out_commit:
3041 ocfs2_commit_trans(osb, handle);
3042
3043out:
3044 if (context->data_ac) {
3045 ocfs2_free_alloc_context(context->data_ac);
3046 context->data_ac = NULL;
3047 }
3048 if (context->meta_ac) {
3049 ocfs2_free_alloc_context(context->meta_ac);
3050 context->meta_ac = NULL;
3051 }
3052
3053 return ret;
3054}
3055
3056static int ocfs2_replace_cow(struct inode *inode,
3057 struct buffer_head *di_bh,
3058 struct buffer_head *ref_root_bh,
3059 struct ocfs2_caching_info *ref_ci,
3060 u32 cow_start, u32 cow_len)
3061{
3062 int ret = 0;
3063 u32 p_cluster, num_clusters, start = cow_start;
3064 unsigned int ext_flags;
3065 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3066 struct ocfs2_cow_context *context;
3067
3068 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
3069 ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
3070 "tree, but the feature bit is not set in the "
3071 "super block.", inode->i_ino);
3072 return -EROFS;
3073 }
3074
3075 context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
3076 if (!context) {
3077 ret = -ENOMEM;
3078 mlog_errno(ret);
3079 return ret;
3080 }
3081
3082 context->inode = inode;
3083 context->cow_start = cow_start;
3084 context->cow_len = cow_len;
3085 context->ref_ci = ref_ci;
3086 context->ref_root_bh = ref_root_bh;
3087
3088 ocfs2_init_dealloc_ctxt(&context->dealloc);
3089 ocfs2_init_dinode_extent_tree(&context->di_et,
3090 INODE_CACHE(inode), di_bh);
3091
3092 while (cow_len) {
3093 ret = ocfs2_get_clusters(inode, cow_start, &p_cluster,
3094 &num_clusters, &ext_flags);
3095 if (ret) {
3096 mlog_errno(ret);
3097 break;
3098 }
3099
3100 BUG_ON(!(ext_flags & OCFS2_EXT_REFCOUNTED));
3101
3102 if (cow_len < num_clusters)
3103 num_clusters = cow_len;
3104
3105 ret = ocfs2_make_clusters_writable(inode->i_sb, context,
3106 cow_start, p_cluster,
3107 num_clusters, ext_flags);
3108 if (ret) {
3109 mlog_errno(ret);
3110 break;
3111 }
3112
3113 cow_len -= num_clusters;
3114 cow_start += num_clusters;
3115 }
3116
3117
3118 /*
3119 * truncate the extent map here since no matter whether we meet with
3120 * any error during the action, we shouldn't trust cached extent map
3121 * any more.
3122 */
3123 ocfs2_extent_map_trunc(inode, start);
3124
3125 if (ocfs2_dealloc_has_cluster(&context->dealloc)) {
3126 ocfs2_schedule_truncate_log_flush(osb, 1);
3127 ocfs2_run_deallocs(osb, &context->dealloc);
3128 }
3129
3130 kfree(context);
3131 return ret;
3132}
3133
3134/*
3135 * Starting at cpos, try to CoW write_len clusters.
3136 * This will stop when it runs into a hole or an unrefcounted extent.
3137 */
3138static int ocfs2_refcount_cow_hunk(struct inode *inode,
3139 struct buffer_head *di_bh,
3140 u32 cpos, u32 write_len)
3141{
3142 int ret;
3143 u32 cow_start = 0, cow_len = 0;
3144 struct ocfs2_inode_info *oi = OCFS2_I(inode);
3145 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3146 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
3147 struct buffer_head *ref_root_bh = NULL;
3148 struct ocfs2_refcount_tree *ref_tree;
3149
3150 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
3151
3152 ret = ocfs2_refcount_cal_cow_clusters(inode, di_bh, cpos, write_len,
3153 &cow_start, &cow_len);
3154 if (ret) {
3155 mlog_errno(ret);
3156 goto out;
3157 }
3158 mlog(0, "CoW inode %lu, cpos %u, write_len %u, cow_start %u, "
3159 "cow_len %u\n", inode->i_ino,
3160 cpos, write_len, cow_start, cow_len);
3161
3162 BUG_ON(cow_len == 0);
3163
3164 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
3165 1, &ref_tree, &ref_root_bh);
3166 if (ret) {
3167 mlog_errno(ret);
3168 goto out;
3169 }
3170
3171 ret = ocfs2_replace_cow(inode, di_bh, ref_root_bh, &ref_tree->rf_ci,
3172 cow_start, cow_len);
3173 if (ret)
3174 mlog_errno(ret);
3175
3176 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3177 brelse(ref_root_bh);
3178out:
3179 return ret;
3180}
3181
3182/*
3183 * CoW any and all clusters between cpos and cpos+write_len.
3184 * If this returns successfully, all clusters between cpos and
3185 * cpos+write_len are safe to modify.
3186 */
3187int ocfs2_refcount_cow(struct inode *inode,
3188 struct buffer_head *di_bh,
3189 u32 cpos, u32 write_len)
3190{
3191 int ret = 0;
3192 u32 p_cluster, num_clusters;
3193 unsigned int ext_flags;
3194
3195 while (write_len) {
3196 ret = ocfs2_get_clusters(inode, cpos, &p_cluster,
3197 &num_clusters, &ext_flags);
3198 if (ret) {
3199 mlog_errno(ret);
3200 break;
3201 }
3202
3203 if (write_len < num_clusters)
3204 num_clusters = write_len;
3205
3206 if (ext_flags & OCFS2_EXT_REFCOUNTED) {
3207 ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos,
3208 num_clusters);
3209 if (ret) {
3210 mlog_errno(ret);
3211 break;
3212 }
3213 }
3214
3215 write_len -= num_clusters;
3216 cpos += num_clusters;
3217 }
3218
3219 return ret;
3220}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index b8c9ed7dc383..9960878134df 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -51,4 +51,6 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
51 u32 clusters, 51 u32 clusters,
52 int *credits, 52 int *credits,
53 struct ocfs2_alloc_context **meta_ac); 53 struct ocfs2_alloc_context **meta_ac);
54int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
55 u32 cpos, u32 write_len);
54#endif /* OCFS2_REFCOUNTTREE_H */ 56#endif /* OCFS2_REFCOUNTTREE_H */