aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorTao Ma <tao.ma@oracle.com>2008-08-18 05:38:53 -0400
committerMark Fasheh <mfasheh@suse.com>2008-10-13 19:57:03 -0400
commit012255961c9ecfe22b7a1df47ac26ab37818cb1e (patch)
tree914d2a1f58734f44389bdae6e47b555ec8f81c12 /fs/ocfs2
parentca12b7c48942d21b2e7890b820db9d578bc291cd (diff)
ocfs2: Enable xattr set in index btree
Where the previous patches added the ability of list/get xattr in buckets for ocfs2, this patch enables ocfs2 to store large numbers of EAs. The original design doc is written by Mark Fasheh, and it can be found in http://oss.oracle.com/osswiki/OCFS2/DesignDocs/IndexedEATrees. I only had to make small modifications to it. First, because the bucket size is 4K, a new field named xh_free_start is added in ocfs2_xattr_header to indicate the next valid name/value offset in a bucket. It is used when we store new EA name/value. With this field, we can find the place more quickly and what's more, we don't need to sort the name/value every time to let the last entry indicate the next unused space. This makes the insert operation more efficient for blocksizes smaller than 4k. Because of the new xh_free_start, another field named as xh_name_value_len is also added in ocfs2_xattr_header. It records the total length of all the name/values in the bucket. We need this so that we can check it and defragment the bucket if there is not enough contiguous free space. An xattr insertion looks like this: 1. xattr_index_block_find: find the right bucket by the name_hash, say bucketA. 2. check whether there is enough space in bucketA. If yes, insert it directly and modify xh_free_start and xh_name_value_len accordingly. If not, check xh_name_value_len to see whether we can store this by defragment the bucket. If yes, defragment it and go on insertion. 3. If defragement doesn't work, check whether there is new empty bucket in the clusters within this extent record. If yes, init the new bucket and move all the buckets after bucketA one by one to the next bucket. Move half of the entries in bucketA to the next bucket and go on insertion. 4. If there is no new bucket, grow the extent tree. As for xattr deletion, we will delete an xattr bucket when all it's xattrs are removed and move all the buckets after it to the previous one. When all the xattr buckets in an extend record are freed, free this extend records from ocfs2_xattr_tree. Signed-off-by: Tao Ma <tao.ma@oracle.com> Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/xattr.c2267
-rw-r--r--fs/ocfs2/xattr.h8
2 files changed, 2273 insertions, 2 deletions
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index acccdfabd2d6..5e8fae948882 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -36,6 +36,7 @@
36#include <linux/mount.h> 36#include <linux/mount.h>
37#include <linux/writeback.h> 37#include <linux/writeback.h>
38#include <linux/falloc.h> 38#include <linux/falloc.h>
39#include <linux/sort.h>
39 40
40#define MLOG_MASK_PREFIX ML_XATTR 41#define MLOG_MASK_PREFIX ML_XATTR
41#include <cluster/masklog.h> 42#include <cluster/masklog.h>
@@ -123,6 +124,13 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
123 char *buffer, 124 char *buffer,
124 size_t buffer_size); 125 size_t buffer_size);
125 126
127static int ocfs2_xattr_create_index_block(struct inode *inode,
128 struct ocfs2_xattr_search *xs);
129
130static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
131 struct ocfs2_xattr_info *xi,
132 struct ocfs2_xattr_search *xs);
133
126static inline struct xattr_handler *ocfs2_xattr_handler(int name_index) 134static inline struct xattr_handler *ocfs2_xattr_handler(int name_index)
127{ 135{
128 struct xattr_handler *handler = NULL; 136 struct xattr_handler *handler = NULL;
@@ -1768,6 +1776,52 @@ cleanup:
1768} 1776}
1769 1777
1770/* 1778/*
1779 * When all the xattrs are deleted from index btree, the ocfs2_xattr_tree
1780 * will be erased and ocfs2_xattr_block will have its ocfs2_xattr_header
1781 * re-initialized.
1782 */
1783static int ocfs2_restore_xattr_block(struct inode *inode,
1784 struct ocfs2_xattr_search *xs)
1785{
1786 int ret;
1787 handle_t *handle;
1788 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1789 struct ocfs2_xattr_block *xb =
1790 (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1791 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
1792 u16 xb_flags = le16_to_cpu(xb->xb_flags);
1793
1794 BUG_ON(!(xb_flags & OCFS2_XATTR_INDEXED) ||
1795 le16_to_cpu(el->l_next_free_rec) != 0);
1796
1797 handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1798 if (IS_ERR(handle)) {
1799 ret = PTR_ERR(handle);
1800 handle = NULL;
1801 goto out;
1802 }
1803
1804 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1805 OCFS2_JOURNAL_ACCESS_WRITE);
1806 if (ret < 0) {
1807 mlog_errno(ret);
1808 goto out_commit;
1809 }
1810
1811 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
1812 offsetof(struct ocfs2_xattr_block, xb_attrs));
1813
1814 xb->xb_flags = cpu_to_le16(xb_flags & ~OCFS2_XATTR_INDEXED);
1815
1816 ocfs2_journal_dirty(handle, xs->xattr_bh);
1817
1818out_commit:
1819 ocfs2_commit_trans(osb, handle);
1820out:
1821 return ret;
1822}
1823
1824/*
1771 * ocfs2_xattr_block_set() 1825 * ocfs2_xattr_block_set()
1772 * 1826 *
1773 * Set, replace or remove an extended attribute into external block. 1827 * Set, replace or remove an extended attribute into external block.
@@ -1862,10 +1916,25 @@ out:
1862 ocfs2_free_alloc_context(meta_ac); 1916 ocfs2_free_alloc_context(meta_ac);
1863 if (ret < 0) 1917 if (ret < 0)
1864 return ret; 1918 return ret;
1919 } else
1920 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1921
1922 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
1923 /* Set extended attribute into external block */
1924 ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL);
1925 if (!ret || ret != -ENOSPC)
1926 goto end;
1927
1928 ret = ocfs2_xattr_create_index_block(inode, xs);
1929 if (ret)
1930 goto end;
1865 } 1931 }
1866 1932
1867 /* Set extended attribute into external block */ 1933 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs);
1868 ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL); 1934 if (!ret && xblk->xb_attrs.xb_root.xt_list.l_next_free_rec == 0)
1935 ret = ocfs2_restore_xattr_block(inode, xs);
1936
1937end:
1869 1938
1870 return ret; 1939 return ret;
1871} 1940}
@@ -1887,6 +1956,7 @@ int ocfs2_xattr_set(struct inode *inode,
1887 struct buffer_head *di_bh = NULL; 1956 struct buffer_head *di_bh = NULL;
1888 struct ocfs2_dinode *di; 1957 struct ocfs2_dinode *di;
1889 int ret; 1958 int ret;
1959 u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
1890 1960
1891 struct ocfs2_xattr_info xi = { 1961 struct ocfs2_xattr_info xi = {
1892 .name_index = name_index, 1962 .name_index = name_index,
@@ -1985,6 +2055,8 @@ cleanup:
1985 ocfs2_inode_unlock(inode, 1); 2055 ocfs2_inode_unlock(inode, 1);
1986 brelse(di_bh); 2056 brelse(di_bh);
1987 brelse(xbs.xattr_bh); 2057 brelse(xbs.xattr_bh);
2058 for (i = 0; i < blk_per_bucket; i++)
2059 brelse(xbs.bucket.bhs[i]);
1988 2060
1989 return ret; 2061 return ret;
1990} 2062}
@@ -2475,3 +2547,2194 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
2475out: 2547out:
2476 return ret; 2548 return ret;
2477} 2549}
2550
2551static int cmp_xe(const void *a, const void *b)
2552{
2553 const struct ocfs2_xattr_entry *l = a, *r = b;
2554 u32 l_hash = le32_to_cpu(l->xe_name_hash);
2555 u32 r_hash = le32_to_cpu(r->xe_name_hash);
2556
2557 if (l_hash > r_hash)
2558 return 1;
2559 if (l_hash < r_hash)
2560 return -1;
2561 return 0;
2562}
2563
2564static void swap_xe(void *a, void *b, int size)
2565{
2566 struct ocfs2_xattr_entry *l = a, *r = b, tmp;
2567
2568 tmp = *l;
2569 memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
2570 memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
2571}
2572
2573/*
2574 * When the ocfs2_xattr_block is filled up, new bucket will be created
2575 * and all the xattr entries will be moved to the new bucket.
2576 * Note: we need to sort the entries since they are not saved in order
2577 * in the ocfs2_xattr_block.
2578 */
2579static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
2580 struct buffer_head *xb_bh,
2581 struct buffer_head *xh_bh,
2582 struct buffer_head *data_bh)
2583{
2584 int i, blocksize = inode->i_sb->s_blocksize;
2585 u16 offset, size, off_change;
2586 struct ocfs2_xattr_entry *xe;
2587 struct ocfs2_xattr_block *xb =
2588 (struct ocfs2_xattr_block *)xb_bh->b_data;
2589 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
2590 struct ocfs2_xattr_header *xh =
2591 (struct ocfs2_xattr_header *)xh_bh->b_data;
2592 u16 count = le16_to_cpu(xb_xh->xh_count);
2593 char *target = xh_bh->b_data, *src = xb_bh->b_data;
2594
2595 mlog(0, "cp xattr from block %llu to bucket %llu\n",
2596 (unsigned long long)xb_bh->b_blocknr,
2597 (unsigned long long)xh_bh->b_blocknr);
2598
2599 memset(xh_bh->b_data, 0, blocksize);
2600 if (data_bh)
2601 memset(data_bh->b_data, 0, blocksize);
2602 /*
2603 * Since the xe_name_offset is based on ocfs2_xattr_header,
2604 * there is a offset change corresponding to the change of
2605 * ocfs2_xattr_header's position.
2606 */
2607 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
2608 xe = &xb_xh->xh_entries[count - 1];
2609 offset = le16_to_cpu(xe->xe_name_offset) + off_change;
2610 size = blocksize - offset;
2611
2612 /* copy all the names and values. */
2613 if (data_bh)
2614 target = data_bh->b_data;
2615 memcpy(target + offset, src + offset, size);
2616
2617 /* Init new header now. */
2618 xh->xh_count = xb_xh->xh_count;
2619 xh->xh_num_buckets = cpu_to_le16(1);
2620 xh->xh_name_value_len = cpu_to_le16(size);
2621 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
2622
2623 /* copy all the entries. */
2624 target = xh_bh->b_data;
2625 offset = offsetof(struct ocfs2_xattr_header, xh_entries);
2626 size = count * sizeof(struct ocfs2_xattr_entry);
2627 memcpy(target + offset, (char *)xb_xh + offset, size);
2628
2629 /* Change the xe offset for all the xe because of the move. */
2630 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
2631 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
2632 for (i = 0; i < count; i++)
2633 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
2634
2635 mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
2636 offset, size, off_change);
2637
2638 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
2639 cmp_xe, swap_xe);
2640}
2641
2642/*
2643 * After we move xattr from block to index btree, we have to
2644 * update ocfs2_xattr_search to the new xe and base.
2645 *
2646 * When the entry is in xattr block, xattr_bh indicates the storage place.
2647 * While if the entry is in index b-tree, "bucket" indicates the
2648 * real place of the xattr.
2649 */
2650static int ocfs2_xattr_update_xattr_search(struct inode *inode,
2651 struct ocfs2_xattr_search *xs,
2652 struct buffer_head *old_bh,
2653 struct buffer_head *new_bh)
2654{
2655 int ret = 0;
2656 char *buf = old_bh->b_data;
2657 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
2658 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
2659 int i, blocksize = inode->i_sb->s_blocksize;
2660 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2661
2662 xs->bucket.bhs[0] = new_bh;
2663 get_bh(new_bh);
2664 xs->bucket.xh = (struct ocfs2_xattr_header *)xs->bucket.bhs[0]->b_data;
2665 xs->header = xs->bucket.xh;
2666
2667 xs->base = new_bh->b_data;
2668 xs->end = xs->base + inode->i_sb->s_blocksize;
2669
2670 if (!xs->not_found) {
2671 if (OCFS2_XATTR_BUCKET_SIZE != blocksize) {
2672 ret = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
2673 xs->bucket.bhs[0]->b_blocknr + 1,
2674 blk_per_bucket - 1, &xs->bucket.bhs[1],
2675 OCFS2_BH_CACHED, inode);
2676 if (ret) {
2677 mlog_errno(ret);
2678 return ret;
2679 }
2680
2681 i = xs->here - old_xh->xh_entries;
2682 xs->here = &xs->header->xh_entries[i];
2683 }
2684 }
2685
2686 return ret;
2687}
2688
2689static int ocfs2_xattr_create_index_block(struct inode *inode,
2690 struct ocfs2_xattr_search *xs)
2691{
2692 int ret, credits = OCFS2_SUBALLOC_ALLOC;
2693 u32 bit_off, len;
2694 u64 blkno;
2695 handle_t *handle;
2696 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2697 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2698 struct ocfs2_alloc_context *data_ac;
2699 struct buffer_head *xh_bh = NULL, *data_bh = NULL;
2700 struct buffer_head *xb_bh = xs->xattr_bh;
2701 struct ocfs2_xattr_block *xb =
2702 (struct ocfs2_xattr_block *)xb_bh->b_data;
2703 struct ocfs2_xattr_tree_root *xr;
2704 u16 xb_flags = le16_to_cpu(xb->xb_flags);
2705 u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2706
2707 mlog(0, "create xattr index block for %llu\n",
2708 (unsigned long long)xb_bh->b_blocknr);
2709
2710 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
2711
2712 ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
2713 if (ret) {
2714 mlog_errno(ret);
2715 goto out;
2716 }
2717
2718 /*
2719 * XXX:
2720 * We can use this lock for now, and maybe move to a dedicated mutex
2721 * if performance becomes a problem later.
2722 */
2723 down_write(&oi->ip_alloc_sem);
2724
2725 /*
2726 * 3 more credits, one for xattr block update, one for the 1st block
2727 * of the new xattr bucket and one for the value/data.
2728 */
2729 credits += 3;
2730 handle = ocfs2_start_trans(osb, credits);
2731 if (IS_ERR(handle)) {
2732 ret = PTR_ERR(handle);
2733 mlog_errno(ret);
2734 goto out_sem;
2735 }
2736
2737 ret = ocfs2_journal_access(handle, inode, xb_bh,
2738 OCFS2_JOURNAL_ACCESS_WRITE);
2739 if (ret) {
2740 mlog_errno(ret);
2741 goto out_commit;
2742 }
2743
2744 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
2745 if (ret) {
2746 mlog_errno(ret);
2747 goto out_commit;
2748 }
2749
2750 /*
2751 * The bucket may spread in many blocks, and
2752 * we will only touch the 1st block and the last block
2753 * in the whole bucket(one for entry and one for data).
2754 */
2755 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
2756
2757 mlog(0, "allocate 1 cluster from %llu to xattr block\n", blkno);
2758
2759 xh_bh = sb_getblk(inode->i_sb, blkno);
2760 if (!xh_bh) {
2761 ret = -EIO;
2762 mlog_errno(ret);
2763 goto out_commit;
2764 }
2765
2766 ocfs2_set_new_buffer_uptodate(inode, xh_bh);
2767
2768 ret = ocfs2_journal_access(handle, inode, xh_bh,
2769 OCFS2_JOURNAL_ACCESS_CREATE);
2770 if (ret) {
2771 mlog_errno(ret);
2772 goto out_commit;
2773 }
2774
2775 if (bpb > 1) {
2776 data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1);
2777 if (!data_bh) {
2778 ret = -EIO;
2779 mlog_errno(ret);
2780 goto out_commit;
2781 }
2782
2783 ocfs2_set_new_buffer_uptodate(inode, data_bh);
2784
2785 ret = ocfs2_journal_access(handle, inode, data_bh,
2786 OCFS2_JOURNAL_ACCESS_CREATE);
2787 if (ret) {
2788 mlog_errno(ret);
2789 goto out_commit;
2790 }
2791 }
2792
2793 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh);
2794
2795 ocfs2_journal_dirty(handle, xh_bh);
2796 if (data_bh)
2797 ocfs2_journal_dirty(handle, data_bh);
2798
2799 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh);
2800
2801 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
2802 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
2803 offsetof(struct ocfs2_xattr_block, xb_attrs));
2804
2805 xr = &xb->xb_attrs.xb_root;
2806 xr->xt_clusters = cpu_to_le32(1);
2807 xr->xt_last_eb_blk = 0;
2808 xr->xt_list.l_tree_depth = 0;
2809 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
2810 xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2811
2812 xr->xt_list.l_recs[0].e_cpos = 0;
2813 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
2814 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
2815
2816 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
2817
2818 ret = ocfs2_journal_dirty(handle, xb_bh);
2819 if (ret) {
2820 mlog_errno(ret);
2821 goto out_commit;
2822 }
2823
2824out_commit:
2825 ocfs2_commit_trans(osb, handle);
2826
2827out_sem:
2828 up_write(&oi->ip_alloc_sem);
2829
2830out:
2831 if (data_ac)
2832 ocfs2_free_alloc_context(data_ac);
2833
2834 brelse(xh_bh);
2835 brelse(data_bh);
2836
2837 return ret;
2838}
2839
2840static int cmp_xe_offset(const void *a, const void *b)
2841{
2842 const struct ocfs2_xattr_entry *l = a, *r = b;
2843 u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
2844 u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
2845
2846 if (l_name_offset < r_name_offset)
2847 return 1;
2848 if (l_name_offset > r_name_offset)
2849 return -1;
2850 return 0;
2851}
2852
2853/*
2854 * defrag a xattr bucket if we find that the bucket has some
2855 * holes beteen name/value pairs.
2856 * We will move all the name/value pairs to the end of the bucket
2857 * so that we can spare some space for insertion.
2858 */
2859static int ocfs2_defrag_xattr_bucket(struct inode *inode,
2860 struct ocfs2_xattr_bucket *bucket)
2861{
2862 int ret, i;
2863 size_t end, offset, len, value_len;
2864 struct ocfs2_xattr_header *xh;
2865 char *entries, *buf, *bucket_buf = NULL;
2866 u64 blkno = bucket->bhs[0]->b_blocknr;
2867 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2868 u16 xh_free_start;
2869 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2870 size_t blocksize = inode->i_sb->s_blocksize;
2871 handle_t *handle;
2872 struct buffer_head **bhs;
2873 struct ocfs2_xattr_entry *xe;
2874
2875 bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
2876 GFP_NOFS);
2877 if (!bhs)
2878 return -ENOMEM;
2879
2880 ret = ocfs2_read_blocks(osb, blkno, blk_per_bucket, bhs,
2881 OCFS2_BH_CACHED, inode);
2882 if (ret)
2883 goto out;
2884
2885 /*
2886 * In order to make the operation more efficient and generic,
2887 * we copy all the blocks into a contiguous memory and do the
2888 * defragment there, so if anything is error, we will not touch
2889 * the real block.
2890 */
2891 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
2892 if (!bucket_buf) {
2893 ret = -EIO;
2894 goto out;
2895 }
2896
2897 buf = bucket_buf;
2898 for (i = 0; i < blk_per_bucket; i++, buf += blocksize)
2899 memcpy(buf, bhs[i]->b_data, blocksize);
2900
2901 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket);
2902 if (IS_ERR(handle)) {
2903 ret = PTR_ERR(handle);
2904 handle = NULL;
2905 mlog_errno(ret);
2906 goto out;
2907 }
2908
2909 for (i = 0; i < blk_per_bucket; i++) {
2910 ret = ocfs2_journal_access(handle, inode, bhs[i],
2911 OCFS2_JOURNAL_ACCESS_WRITE);
2912 if (ret < 0) {
2913 mlog_errno(ret);
2914 goto commit;
2915 }
2916 }
2917
2918 xh = (struct ocfs2_xattr_header *)bucket_buf;
2919 entries = (char *)xh->xh_entries;
2920 xh_free_start = le16_to_cpu(xh->xh_free_start);
2921
2922 mlog(0, "adjust xattr bucket in %llu, count = %u, "
2923 "xh_free_start = %u, xh_name_value_len = %u.\n",
2924 blkno, le16_to_cpu(xh->xh_count), xh_free_start,
2925 le16_to_cpu(xh->xh_name_value_len));
2926
2927 /*
2928 * sort all the entries by their offset.
2929 * the largest will be the first, so that we can
2930 * move them to the end one by one.
2931 */
2932 sort(entries, le16_to_cpu(xh->xh_count),
2933 sizeof(struct ocfs2_xattr_entry),
2934 cmp_xe_offset, swap_xe);
2935
2936 /* Move all name/values to the end of the bucket. */
2937 xe = xh->xh_entries;
2938 end = OCFS2_XATTR_BUCKET_SIZE;
2939 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
2940 offset = le16_to_cpu(xe->xe_name_offset);
2941 if (ocfs2_xattr_is_local(xe))
2942 value_len = OCFS2_XATTR_SIZE(
2943 le64_to_cpu(xe->xe_value_size));
2944 else
2945 value_len = OCFS2_XATTR_ROOT_SIZE;
2946 len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
2947
2948 /*
2949 * We must make sure that the name/value pair
2950 * exist in the same block. So adjust end to
2951 * the previous block end if needed.
2952 */
2953 if (((end - len) / blocksize !=
2954 (end - 1) / blocksize))
2955 end = end - end % blocksize;
2956
2957 if (end > offset + len) {
2958 memmove(bucket_buf + end - len,
2959 bucket_buf + offset, len);
2960 xe->xe_name_offset = cpu_to_le16(end - len);
2961 }
2962
2963 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
2964 "bucket %llu\n", (unsigned long long)blkno);
2965
2966 end -= len;
2967 }
2968
2969 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
2970 "bucket %llu\n", (unsigned long long)blkno);
2971
2972 if (xh_free_start == end)
2973 goto commit;
2974
2975 memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
2976 xh->xh_free_start = cpu_to_le16(end);
2977
2978 /* sort the entries by their name_hash. */
2979 sort(entries, le16_to_cpu(xh->xh_count),
2980 sizeof(struct ocfs2_xattr_entry),
2981 cmp_xe, swap_xe);
2982
2983 buf = bucket_buf;
2984 for (i = 0; i < blk_per_bucket; i++, buf += blocksize) {
2985 memcpy(bhs[i]->b_data, buf, blocksize);
2986 ocfs2_journal_dirty(handle, bhs[i]);
2987 }
2988
2989commit:
2990 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2991out:
2992
2993 if (bhs) {
2994 for (i = 0; i < blk_per_bucket; i++)
2995 brelse(bhs[i]);
2996 }
2997 kfree(bhs);
2998
2999 kfree(bucket_buf);
3000 return ret;
3001}
3002
3003/*
3004 * Move half nums of the xattr bucket in the previous cluster to this new
3005 * cluster. We only touch the last cluster of the previous extend record.
3006 *
3007 * first_bh is the first buffer_head of a series of bucket in the same
3008 * extent rec and header_bh is the header of one bucket in this cluster.
3009 * They will be updated if we move the data header_bh contains to the new
3010 * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster.
3011 */
3012static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3013 handle_t *handle,
3014 struct buffer_head **first_bh,
3015 struct buffer_head **header_bh,
3016 u64 new_blkno,
3017 u64 prev_blkno,
3018 u32 num_clusters,
3019 u32 *first_hash)
3020{
3021 int i, ret, credits;
3022 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3023 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3024 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3025 int blocksize = inode->i_sb->s_blocksize;
3026 struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL;
3027 struct ocfs2_xattr_header *new_xh;
3028 struct ocfs2_xattr_header *xh =
3029 (struct ocfs2_xattr_header *)((*first_bh)->b_data);
3030
3031 BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets);
3032 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize);
3033
3034 prev_bh = *first_bh;
3035 get_bh(prev_bh);
3036 xh = (struct ocfs2_xattr_header *)prev_bh->b_data;
3037
3038 prev_blkno += (num_clusters - 1) * bpc + bpc / 2;
3039
3040 mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3041 prev_blkno, new_blkno);
3042
3043 /*
3044 * We need to update the 1st half of the new cluster and
3045 * 1 more for the update of the 1st bucket of the previous
3046 * extent record.
3047 */
3048 credits = bpc / 2 + 1;
3049 ret = ocfs2_extend_trans(handle, credits);
3050 if (ret) {
3051 mlog_errno(ret);
3052 goto out;
3053 }
3054
3055 ret = ocfs2_journal_access(handle, inode, prev_bh,
3056 OCFS2_JOURNAL_ACCESS_WRITE);
3057 if (ret) {
3058 mlog_errno(ret);
3059 goto out;
3060 }
3061
3062 for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) {
3063 old_bh = new_bh = NULL;
3064 new_bh = sb_getblk(inode->i_sb, new_blkno);
3065 if (!new_bh) {
3066 ret = -EIO;
3067 mlog_errno(ret);
3068 goto out;
3069 }
3070
3071 ocfs2_set_new_buffer_uptodate(inode, new_bh);
3072
3073 ret = ocfs2_journal_access(handle, inode, new_bh,
3074 OCFS2_JOURNAL_ACCESS_CREATE);
3075 if (ret < 0) {
3076 mlog_errno(ret);
3077 brelse(new_bh);
3078 goto out;
3079 }
3080
3081 ret = ocfs2_read_block(osb, prev_blkno,
3082 &old_bh, OCFS2_BH_CACHED, inode);
3083 if (ret < 0) {
3084 mlog_errno(ret);
3085 brelse(new_bh);
3086 goto out;
3087 }
3088
3089 memcpy(new_bh->b_data, old_bh->b_data, blocksize);
3090
3091 if (i == 0) {
3092 new_xh = (struct ocfs2_xattr_header *)new_bh->b_data;
3093 new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2);
3094
3095 if (first_hash)
3096 *first_hash = le32_to_cpu(
3097 new_xh->xh_entries[0].xe_name_hash);
3098 new_first_bh = new_bh;
3099 get_bh(new_first_bh);
3100 }
3101
3102 ocfs2_journal_dirty(handle, new_bh);
3103
3104 if (*header_bh == old_bh) {
3105 brelse(*header_bh);
3106 *header_bh = new_bh;
3107 get_bh(*header_bh);
3108
3109 brelse(*first_bh);
3110 *first_bh = new_first_bh;
3111 get_bh(*first_bh);
3112 }
3113 brelse(new_bh);
3114 brelse(old_bh);
3115 }
3116
3117 le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2));
3118
3119 ocfs2_journal_dirty(handle, prev_bh);
3120out:
3121 brelse(prev_bh);
3122 brelse(new_first_bh);
3123 return ret;
3124}
3125
3126static int ocfs2_read_xattr_bucket(struct inode *inode,
3127 u64 blkno,
3128 struct buffer_head **bhs,
3129 int new)
3130{
3131 int ret = 0;
3132 u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3133
3134 if (!new)
3135 return ocfs2_read_blocks(OCFS2_SB(inode->i_sb), blkno,
3136 blk_per_bucket, bhs,
3137 OCFS2_BH_CACHED, inode);
3138
3139 for (i = 0; i < blk_per_bucket; i++) {
3140 bhs[i] = sb_getblk(inode->i_sb, blkno + i);
3141 if (bhs[i] == NULL) {
3142 ret = -EIO;
3143 mlog_errno(ret);
3144 break;
3145 }
3146 ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
3147 }
3148
3149 return ret;
3150}
3151
3152/*
3153 * Move half num of the xattrs in old bucket(blk) to new bucket(new_blk).
3154 * first_hash will record the 1st hash of the new bucket.
3155 */
3156static int ocfs2_half_xattr_bucket(struct inode *inode,
3157 handle_t *handle,
3158 u64 blk,
3159 u64 new_blk,
3160 u32 *first_hash,
3161 int new_bucket_head)
3162{
3163 int ret, i;
3164 u16 count, start, len, name_value_len, xe_len, name_offset;
3165 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3166 struct buffer_head **s_bhs, **t_bhs = NULL;
3167 struct ocfs2_xattr_header *xh;
3168 struct ocfs2_xattr_entry *xe;
3169 int blocksize = inode->i_sb->s_blocksize;
3170
3171 mlog(0, "move half of xattrs from bucket %llu to %llu\n",
3172 blk, new_blk);
3173
3174 s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
3175 if (!s_bhs)
3176 return -ENOMEM;
3177
3178 ret = ocfs2_read_xattr_bucket(inode, blk, s_bhs, 0);
3179 if (ret) {
3180 mlog_errno(ret);
3181 goto out;
3182 }
3183
3184 ret = ocfs2_journal_access(handle, inode, s_bhs[0],
3185 OCFS2_JOURNAL_ACCESS_WRITE);
3186 if (ret) {
3187 mlog_errno(ret);
3188 goto out;
3189 }
3190
3191 t_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
3192 if (!t_bhs) {
3193 ret = -ENOMEM;
3194 goto out;
3195 }
3196
3197 ret = ocfs2_read_xattr_bucket(inode, new_blk, t_bhs, new_bucket_head);
3198 if (ret) {
3199 mlog_errno(ret);
3200 goto out;
3201 }
3202
3203 for (i = 0; i < blk_per_bucket; i++) {
3204 ret = ocfs2_journal_access(handle, inode, t_bhs[i],
3205 OCFS2_JOURNAL_ACCESS_CREATE);
3206 if (ret) {
3207 mlog_errno(ret);
3208 goto out;
3209 }
3210 }
3211
3212 /* copy the whole bucket to the new first. */
3213 for (i = 0; i < blk_per_bucket; i++)
3214 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3215
3216 /* update the new bucket. */
3217 xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
3218 count = le16_to_cpu(xh->xh_count);
3219 start = count / 2;
3220
3221 /*
3222 * Calculate the total name/value len and xh_free_start for
3223 * the old bucket first.
3224 */
3225 name_offset = OCFS2_XATTR_BUCKET_SIZE;
3226 name_value_len = 0;
3227 for (i = 0; i < start; i++) {
3228 xe = &xh->xh_entries[i];
3229 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3230 if (ocfs2_xattr_is_local(xe))
3231 xe_len +=
3232 OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3233 else
3234 xe_len += OCFS2_XATTR_ROOT_SIZE;
3235 name_value_len += xe_len;
3236 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
3237 name_offset = le16_to_cpu(xe->xe_name_offset);
3238 }
3239
3240 /*
3241 * Now begin the modification to the new bucket.
3242 *
3243 * In the new bucket, We just move the xattr entry to the beginning
3244 * and don't touch the name/value. So there will be some holes in the
3245 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
3246 * called.
3247 */
3248 xe = &xh->xh_entries[start];
3249 len = sizeof(struct ocfs2_xattr_entry) * (count - start);
3250 mlog(0, "mv xattr entry len %d from %d to %d\n", len,
3251 (char *)xe - (char *)xh, (char *)xh->xh_entries - (char *)xh);
3252 memmove((char *)xh->xh_entries, (char *)xe, len);
3253 xe = &xh->xh_entries[count - start];
3254 len = sizeof(struct ocfs2_xattr_entry) * start;
3255 memset((char *)xe, 0, len);
3256
3257 le16_add_cpu(&xh->xh_count, -start);
3258 le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
3259
3260 /* Calculate xh_free_start for the new bucket. */
3261 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
3262 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3263 xe = &xh->xh_entries[i];
3264 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3265 if (ocfs2_xattr_is_local(xe))
3266 xe_len +=
3267 OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3268 else
3269 xe_len += OCFS2_XATTR_ROOT_SIZE;
3270 if (le16_to_cpu(xe->xe_name_offset) <
3271 le16_to_cpu(xh->xh_free_start))
3272 xh->xh_free_start = xe->xe_name_offset;
3273 }
3274
3275 /* set xh->xh_num_buckets for the new xh. */
3276 if (new_bucket_head)
3277 xh->xh_num_buckets = cpu_to_le16(1);
3278 else
3279 xh->xh_num_buckets = 0;
3280
3281 for (i = 0; i < blk_per_bucket; i++) {
3282 ocfs2_journal_dirty(handle, t_bhs[i]);
3283 if (ret)
3284 mlog_errno(ret);
3285 }
3286
3287 /* store the first_hash of the new bucket. */
3288 if (first_hash)
3289 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3290
3291 /*
3292 * Now only update the 1st block of the old bucket.
3293 * Please note that the entry has been sorted already above.
3294 */
3295 xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
3296 memset(&xh->xh_entries[start], 0,
3297 sizeof(struct ocfs2_xattr_entry) * (count - start));
3298 xh->xh_count = cpu_to_le16(start);
3299 xh->xh_free_start = cpu_to_le16(name_offset);
3300 xh->xh_name_value_len = cpu_to_le16(name_value_len);
3301
3302 ocfs2_journal_dirty(handle, s_bhs[0]);
3303 if (ret)
3304 mlog_errno(ret);
3305
3306out:
3307 if (s_bhs) {
3308 for (i = 0; i < blk_per_bucket; i++)
3309 brelse(s_bhs[i]);
3310 }
3311 kfree(s_bhs);
3312
3313 if (t_bhs) {
3314 for (i = 0; i < blk_per_bucket; i++)
3315 brelse(t_bhs[i]);
3316 }
3317 kfree(t_bhs);
3318
3319 return ret;
3320}
3321
3322/*
3323 * Copy xattr from one bucket to another bucket.
3324 *
3325 * The caller must make sure that the journal transaction
3326 * has enough space for journaling.
3327 */
3328static int ocfs2_cp_xattr_bucket(struct inode *inode,
3329 handle_t *handle,
3330 u64 s_blkno,
3331 u64 t_blkno,
3332 int t_is_new)
3333{
3334 int ret, i;
3335 int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3336 int blocksize = inode->i_sb->s_blocksize;
3337 struct buffer_head **s_bhs, **t_bhs = NULL;
3338
3339 BUG_ON(s_blkno == t_blkno);
3340
3341 mlog(0, "cp bucket %llu to %llu, target is %d\n",
3342 s_blkno, t_blkno, t_is_new);
3343
3344 s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
3345 GFP_NOFS);
3346 if (!s_bhs)
3347 return -ENOMEM;
3348
3349 ret = ocfs2_read_xattr_bucket(inode, s_blkno, s_bhs, 0);
3350 if (ret)
3351 goto out;
3352
3353 t_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
3354 GFP_NOFS);
3355 if (!t_bhs) {
3356 ret = -ENOMEM;
3357 goto out;
3358 }
3359
3360 ret = ocfs2_read_xattr_bucket(inode, t_blkno, t_bhs, t_is_new);
3361 if (ret)
3362 goto out;
3363
3364 for (i = 0; i < blk_per_bucket; i++) {
3365 ret = ocfs2_journal_access(handle, inode, t_bhs[i],
3366 OCFS2_JOURNAL_ACCESS_WRITE);
3367 if (ret)
3368 goto out;
3369 }
3370
3371 for (i = 0; i < blk_per_bucket; i++) {
3372 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3373 ocfs2_journal_dirty(handle, t_bhs[i]);
3374 }
3375
3376out:
3377 if (s_bhs) {
3378 for (i = 0; i < blk_per_bucket; i++)
3379 brelse(s_bhs[i]);
3380 }
3381 kfree(s_bhs);
3382
3383 if (t_bhs) {
3384 for (i = 0; i < blk_per_bucket; i++)
3385 brelse(t_bhs[i]);
3386 }
3387 kfree(t_bhs);
3388
3389 return ret;
3390}
3391
3392/*
3393 * Copy one xattr cluster from src_blk to to_blk.
3394 * The to_blk will become the first bucket header of the cluster, so its
3395 * xh_num_buckets will be initialized as the bucket num in the cluster.
3396 */
3397static int ocfs2_cp_xattr_cluster(struct inode *inode,
3398 handle_t *handle,
3399 struct buffer_head *first_bh,
3400 u64 src_blk,
3401 u64 to_blk,
3402 u32 *first_hash)
3403{
3404 int i, ret, credits;
3405 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3406 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3407 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3408 struct buffer_head *bh = NULL;
3409 struct ocfs2_xattr_header *xh;
3410 u64 to_blk_start = to_blk;
3411
3412 mlog(0, "cp xattrs from cluster %llu to %llu\n", src_blk, to_blk);
3413
3414 /*
3415 * We need to update the new cluster and 1 more for the update of
3416 * the 1st bucket of the previous extent rec.
3417 */
3418 credits = bpc + 1;
3419 ret = ocfs2_extend_trans(handle, credits);
3420 if (ret) {
3421 mlog_errno(ret);
3422 goto out;
3423 }
3424
3425 ret = ocfs2_journal_access(handle, inode, first_bh,
3426 OCFS2_JOURNAL_ACCESS_WRITE);
3427 if (ret) {
3428 mlog_errno(ret);
3429 goto out;
3430 }
3431
3432 for (i = 0; i < num_buckets; i++) {
3433 ret = ocfs2_cp_xattr_bucket(inode, handle,
3434 src_blk, to_blk, 1);
3435 if (ret) {
3436 mlog_errno(ret);
3437 goto out;
3438 }
3439
3440 src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3441 to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3442 }
3443
3444 /* update the old bucket header. */
3445 xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3446 le16_add_cpu(&xh->xh_num_buckets, -num_buckets);
3447
3448 ocfs2_journal_dirty(handle, first_bh);
3449
3450 /* update the new bucket header. */
3451 ret = ocfs2_read_block(osb, to_blk_start, &bh, OCFS2_BH_CACHED, inode);
3452 if (ret < 0) {
3453 mlog_errno(ret);
3454 goto out;
3455 }
3456
3457 ret = ocfs2_journal_access(handle, inode, bh,
3458 OCFS2_JOURNAL_ACCESS_WRITE);
3459 if (ret) {
3460 mlog_errno(ret);
3461 goto out;
3462 }
3463
3464 xh = (struct ocfs2_xattr_header *)bh->b_data;
3465 xh->xh_num_buckets = cpu_to_le16(num_buckets);
3466
3467 ocfs2_journal_dirty(handle, bh);
3468
3469 if (first_hash)
3470 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3471out:
3472 brelse(bh);
3473 return ret;
3474}
3475
3476/*
3477 * Move half of the xattrs in this cluster to the new cluster.
3478 * This function should only be called when bucket size == cluster size.
3479 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
3480 */
3481static int ocfs2_half_xattr_cluster(struct inode *inode,
3482 handle_t *handle,
3483 u64 prev_blk,
3484 u64 new_blk,
3485 u32 *first_hash)
3486{
3487 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3488 int ret, credits = 2 * blk_per_bucket;
3489
3490 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
3491
3492 ret = ocfs2_extend_trans(handle, credits);
3493 if (ret) {
3494 mlog_errno(ret);
3495 return ret;
3496 }
3497
3498 /* Move half of the xattr in start_blk to the next bucket. */
3499 return ocfs2_half_xattr_bucket(inode, handle, prev_blk,
3500 new_blk, first_hash, 1);
3501}
3502
3503/*
3504 * Move some xattrs from the old cluster to the new one since they are not
3505 * contiguous in ocfs2 xattr tree.
3506 *
3507 * new_blk starts a new separate cluster, and we will move some xattrs from
3508 * prev_blk to it. v_start will be set as the first name hash value in this
3509 * new cluster so that it can be used as e_cpos during tree insertion and
3510 * don't collide with our original b-tree operations. first_bh and header_bh
3511 * will also be updated since they will be used in ocfs2_extend_xattr_bucket
3512 * to extend the insert bucket.
3513 *
3514 * The problem is how much xattr should we move to the new one and when should
3515 * we update first_bh and header_bh?
3516 * 1. If cluster size > bucket size, that means the previous cluster has more
3517 * than 1 bucket, so just move half nums of bucket into the new cluster and
3518 * update the first_bh and header_bh if the insert bucket has been moved
3519 * to the new cluster.
3520 * 2. If cluster_size == bucket_size:
3521 * a) If the previous extent rec has more than one cluster and the insert
3522 * place isn't in the last cluster, copy the entire last cluster to the
3523 * new one. This time, we don't need to upate the first_bh and header_bh
3524 * since they will not be moved into the new cluster.
3525 * b) Otherwise, move the bottom half of the xattrs in the last cluster into
3526 * the new one. And we set the extend flag to zero if the insert place is
3527 * moved into the new allocated cluster since no extend is needed.
3528 */
3529static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3530 handle_t *handle,
3531 struct buffer_head **first_bh,
3532 struct buffer_head **header_bh,
3533 u64 new_blk,
3534 u64 prev_blk,
3535 u32 prev_clusters,
3536 u32 *v_start,
3537 int *extend)
3538{
3539 int ret = 0;
3540 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3541
3542 mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
3543 prev_blk, prev_clusters, new_blk);
3544
3545 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1)
3546 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
3547 handle,
3548 first_bh,
3549 header_bh,
3550 new_blk,
3551 prev_blk,
3552 prev_clusters,
3553 v_start);
3554 else {
3555 u64 last_blk = prev_blk + bpc * (prev_clusters - 1);
3556
3557 if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk)
3558 ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh,
3559 last_blk, new_blk,
3560 v_start);
3561 else {
3562 ret = ocfs2_half_xattr_cluster(inode, handle,
3563 last_blk, new_blk,
3564 v_start);
3565
3566 if ((*header_bh)->b_blocknr == last_blk && extend)
3567 *extend = 0;
3568 }
3569 }
3570
3571 return ret;
3572}
3573
3574/*
3575 * Add a new cluster for xattr storage.
3576 *
3577 * If the new cluster is contiguous with the previous one, it will be
3578 * appended to the same extent record, and num_clusters will be updated.
3579 * If not, we will insert a new extent for it and move some xattrs in
3580 * the last cluster into the new allocated one.
3581 * We also need to limit the maximum size of a btree leaf, otherwise we'll
3582 * lose the benefits of hashing because we'll have to search large leaves.
3583 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
3584 * if it's bigger).
3585 *
3586 * first_bh is the first block of the previous extent rec and header_bh
3587 * indicates the bucket we will insert the new xattrs. They will be updated
3588 * when the header_bh is moved into the new cluster.
3589 */
3590static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3591 struct buffer_head *root_bh,
3592 struct buffer_head **first_bh,
3593 struct buffer_head **header_bh,
3594 u32 *num_clusters,
3595 u32 prev_cpos,
3596 u64 prev_blkno,
3597 int *extend)
3598{
3599 int ret, credits;
3600 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3601 u32 prev_clusters = *num_clusters;
3602 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
3603 u64 block;
3604 handle_t *handle = NULL;
3605 struct ocfs2_alloc_context *data_ac = NULL;
3606 struct ocfs2_alloc_context *meta_ac = NULL;
3607 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3608 struct ocfs2_xattr_block *xb =
3609 (struct ocfs2_xattr_block *)root_bh->b_data;
3610 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3611 struct ocfs2_extent_list *root_el = &xb_root->xt_list;
3612 enum ocfs2_extent_tree_type type = OCFS2_XATTR_TREE_EXTENT;
3613
3614 mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
3615 "previous xattr blkno = %llu\n",
3616 (unsigned long long)OCFS2_I(inode)->ip_blkno,
3617 prev_cpos, prev_blkno);
3618
3619 ret = ocfs2_lock_allocators(inode, root_bh, root_el,
3620 clusters_to_add, 0, &data_ac,
3621 &meta_ac, type, NULL);
3622 if (ret) {
3623 mlog_errno(ret);
3624 goto leave;
3625 }
3626
3627 credits = ocfs2_calc_extend_credits(osb->sb, root_el, clusters_to_add);
3628 handle = ocfs2_start_trans(osb, credits);
3629 if (IS_ERR(handle)) {
3630 ret = PTR_ERR(handle);
3631 handle = NULL;
3632 mlog_errno(ret);
3633 goto leave;
3634 }
3635
3636 ret = ocfs2_journal_access(handle, inode, root_bh,
3637 OCFS2_JOURNAL_ACCESS_WRITE);
3638 if (ret < 0) {
3639 mlog_errno(ret);
3640 goto leave;
3641 }
3642
3643 ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
3644 clusters_to_add, &bit_off, &num_bits);
3645 if (ret < 0) {
3646 if (ret != -ENOSPC)
3647 mlog_errno(ret);
3648 goto leave;
3649 }
3650
3651 BUG_ON(num_bits > clusters_to_add);
3652
3653 block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
3654 mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
3655 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
3656
3657 if (prev_blkno + prev_clusters * bpc == block &&
3658 (prev_clusters + num_bits) << osb->s_clustersize_bits <=
3659 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
3660 /*
3661 * If this cluster is contiguous with the old one and
3662 * adding this new cluster, we don't surpass the limit of
3663 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
3664 * initialized and used like other buckets in the previous
3665 * cluster.
3666 * So add it as a contiguous one. The caller will handle
3667 * its init process.
3668 */
3669 v_start = prev_cpos + prev_clusters;
3670 *num_clusters = prev_clusters + num_bits;
3671 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
3672 num_bits);
3673 } else {
3674 ret = ocfs2_adjust_xattr_cross_cluster(inode,
3675 handle,
3676 first_bh,
3677 header_bh,
3678 block,
3679 prev_blkno,
3680 prev_clusters,
3681 &v_start,
3682 extend);
3683 if (ret) {
3684 mlog_errno(ret);
3685 goto leave;
3686 }
3687 }
3688
3689 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
3690 num_bits, block, v_start);
3691 ret = ocfs2_xattr_tree_insert_extent(osb, handle, inode, root_bh,
3692 v_start, block, num_bits,
3693 0, meta_ac);
3694 if (ret < 0) {
3695 mlog_errno(ret);
3696 goto leave;
3697 }
3698
3699 ret = ocfs2_journal_dirty(handle, root_bh);
3700 if (ret < 0) {
3701 mlog_errno(ret);
3702 goto leave;
3703 }
3704
3705leave:
3706 if (handle)
3707 ocfs2_commit_trans(osb, handle);
3708 if (data_ac)
3709 ocfs2_free_alloc_context(data_ac);
3710 if (meta_ac)
3711 ocfs2_free_alloc_context(meta_ac);
3712
3713 return ret;
3714}
3715
3716/*
3717 * Extend a new xattr bucket and move xattrs to the end one by one until
3718 * We meet with start_bh. Only move half of the xattrs to the bucket after it.
3719 */
3720static int ocfs2_extend_xattr_bucket(struct inode *inode,
3721 struct buffer_head *first_bh,
3722 struct buffer_head *start_bh,
3723 u32 num_clusters)
3724{
3725 int ret, credits;
3726 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3727 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3728 u64 start_blk = start_bh->b_blocknr, end_blk;
3729 u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb);
3730 handle_t *handle;
3731 struct ocfs2_xattr_header *first_xh =
3732 (struct ocfs2_xattr_header *)first_bh->b_data;
3733 u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
3734
3735 mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
3736 "from %llu, len = %u\n", start_blk,
3737 (unsigned long long)first_bh->b_blocknr, num_clusters);
3738
3739 BUG_ON(bucket >= num_buckets);
3740
3741 end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket;
3742
3743 /*
3744 * We will touch all the buckets after the start_bh(include it).
3745 * Add one more bucket and modify the first_bh.
3746 */
3747 credits = end_blk - start_blk + 2 * blk_per_bucket + 1;
3748 handle = ocfs2_start_trans(osb, credits);
3749 if (IS_ERR(handle)) {
3750 ret = PTR_ERR(handle);
3751 handle = NULL;
3752 mlog_errno(ret);
3753 goto out;
3754 }
3755
3756 ret = ocfs2_journal_access(handle, inode, first_bh,
3757 OCFS2_JOURNAL_ACCESS_WRITE);
3758 if (ret) {
3759 mlog_errno(ret);
3760 goto commit;
3761 }
3762
3763 while (end_blk != start_blk) {
3764 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
3765 end_blk + blk_per_bucket, 0);
3766 if (ret)
3767 goto commit;
3768 end_blk -= blk_per_bucket;
3769 }
3770
3771 /* Move half of the xattr in start_blk to the next bucket. */
3772 ret = ocfs2_half_xattr_bucket(inode, handle, start_blk,
3773 start_blk + blk_per_bucket, NULL, 0);
3774
3775 le16_add_cpu(&first_xh->xh_num_buckets, 1);
3776 ocfs2_journal_dirty(handle, first_bh);
3777
3778commit:
3779 ocfs2_commit_trans(osb, handle);
3780out:
3781 return ret;
3782}
3783
3784/*
3785 * Add new xattr bucket in an extent record and adjust the buckets accordingly.
3786 * xb_bh is the ocfs2_xattr_block.
3787 * We will move all the buckets starting from header_bh to the next place. As
3788 * for this one, half num of its xattrs will be moved to the next one.
3789 *
3790 * We will allocate a new cluster if current cluster is full and adjust
3791 * header_bh and first_bh if the insert place is moved to the new cluster.
3792 */
3793static int ocfs2_add_new_xattr_bucket(struct inode *inode,
3794 struct buffer_head *xb_bh,
3795 struct buffer_head *header_bh)
3796{
3797 struct ocfs2_xattr_header *first_xh = NULL;
3798 struct buffer_head *first_bh = NULL;
3799 struct ocfs2_xattr_block *xb =
3800 (struct ocfs2_xattr_block *)xb_bh->b_data;
3801 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3802 struct ocfs2_extent_list *el = &xb_root->xt_list;
3803 struct ocfs2_xattr_header *xh =
3804 (struct ocfs2_xattr_header *)header_bh->b_data;
3805 u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3806 struct super_block *sb = inode->i_sb;
3807 struct ocfs2_super *osb = OCFS2_SB(sb);
3808 int ret, num_buckets, extend = 1;
3809 u64 p_blkno;
3810 u32 e_cpos, num_clusters;
3811
3812 mlog(0, "Add new xattr bucket starting form %llu\n",
3813 (unsigned long long)header_bh->b_blocknr);
3814
3815 /*
3816 * Add refrence for header_bh here because it may be
3817 * changed in ocfs2_add_new_xattr_cluster and we need
3818 * to free it in the end.
3819 */
3820 get_bh(header_bh);
3821
3822 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
3823 &num_clusters, el);
3824 if (ret) {
3825 mlog_errno(ret);
3826 goto out;
3827 }
3828
3829 ret = ocfs2_read_block(osb, p_blkno,
3830 &first_bh, OCFS2_BH_CACHED, inode);
3831 if (ret) {
3832 mlog_errno(ret);
3833 goto out;
3834 }
3835
3836 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
3837 first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3838
3839 if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) {
3840 ret = ocfs2_add_new_xattr_cluster(inode,
3841 xb_bh,
3842 &first_bh,
3843 &header_bh,
3844 &num_clusters,
3845 e_cpos,
3846 p_blkno,
3847 &extend);
3848 if (ret) {
3849 mlog_errno(ret);
3850 goto out;
3851 }
3852 }
3853
3854 if (extend)
3855 ret = ocfs2_extend_xattr_bucket(inode,
3856 first_bh,
3857 header_bh,
3858 num_clusters);
3859 if (ret)
3860 mlog_errno(ret);
3861out:
3862 brelse(first_bh);
3863 brelse(header_bh);
3864 return ret;
3865}
3866
3867static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
3868 struct ocfs2_xattr_bucket *bucket,
3869 int offs)
3870{
3871 int block_off = offs >> inode->i_sb->s_blocksize_bits;
3872
3873 offs = offs % inode->i_sb->s_blocksize;
3874 return bucket->bhs[block_off]->b_data + offs;
3875}
3876
3877/*
3878 * Handle the normal xattr set, including replace, delete and new.
3879 * When the bucket is empty, "is_empty" is set and the caller can
3880 * free this bucket.
3881 *
3882 * Note: "local" indicates the real data's locality. So we can't
3883 * just its bucket locality by its length.
3884 */
3885static void ocfs2_xattr_set_entry_normal(struct inode *inode,
3886 struct ocfs2_xattr_info *xi,
3887 struct ocfs2_xattr_search *xs,
3888 u32 name_hash,
3889 int local,
3890 int *is_empty)
3891{
3892 struct ocfs2_xattr_entry *last, *xe;
3893 int name_len = strlen(xi->name);
3894 struct ocfs2_xattr_header *xh = xs->header;
3895 u16 count = le16_to_cpu(xh->xh_count), start;
3896 size_t blocksize = inode->i_sb->s_blocksize;
3897 char *val;
3898 size_t offs, size, new_size;
3899
3900 last = &xh->xh_entries[count];
3901 if (!xs->not_found) {
3902 xe = xs->here;
3903 offs = le16_to_cpu(xe->xe_name_offset);
3904 if (ocfs2_xattr_is_local(xe))
3905 size = OCFS2_XATTR_SIZE(name_len) +
3906 OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3907 else
3908 size = OCFS2_XATTR_SIZE(name_len) +
3909 OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
3910
3911 /*
3912 * If the new value will be stored outside, xi->value has been
3913 * initalized as an empty ocfs2_xattr_value_root, and the same
3914 * goes with xi->value_len, so we can set new_size safely here.
3915 * See ocfs2_xattr_set_in_bucket.
3916 */
3917 new_size = OCFS2_XATTR_SIZE(name_len) +
3918 OCFS2_XATTR_SIZE(xi->value_len);
3919
3920 le16_add_cpu(&xh->xh_name_value_len, -size);
3921 if (xi->value) {
3922 if (new_size > size)
3923 goto set_new_name_value;
3924
3925 /* Now replace the old value with new one. */
3926 if (local)
3927 xe->xe_value_size = cpu_to_le64(xi->value_len);
3928 else
3929 xe->xe_value_size = 0;
3930
3931 val = ocfs2_xattr_bucket_get_val(inode,
3932 &xs->bucket, offs);
3933 memset(val + OCFS2_XATTR_SIZE(name_len), 0,
3934 size - OCFS2_XATTR_SIZE(name_len));
3935 if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
3936 memcpy(val + OCFS2_XATTR_SIZE(name_len),
3937 xi->value, xi->value_len);
3938
3939 le16_add_cpu(&xh->xh_name_value_len, new_size);
3940 ocfs2_xattr_set_local(xe, local);
3941 return;
3942 } else {
3943 /* Remove the old entry. */
3944 last -= 1;
3945 memmove(xe, xe + 1,
3946 (void *)last - (void *)xe);
3947 memset(last, 0, sizeof(struct ocfs2_xattr_entry));
3948 le16_add_cpu(&xh->xh_count, -1);
3949 if (xh->xh_count == 0 && is_empty)
3950 *is_empty = 1;
3951 return;
3952 }
3953 } else {
3954 /* find a new entry for insert. */
3955 int low = 0, high = count - 1, tmp;
3956 struct ocfs2_xattr_entry *tmp_xe;
3957
3958 while (low <= high) {
3959 tmp = (low + high) / 2;
3960 tmp_xe = &xh->xh_entries[tmp];
3961
3962 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
3963 low = tmp + 1;
3964 else if (name_hash <
3965 le32_to_cpu(tmp_xe->xe_name_hash))
3966 high = tmp - 1;
3967 else
3968 break;
3969 }
3970
3971 xe = &xh->xh_entries[low];
3972 if (low != count)
3973 memmove(xe + 1, xe, (void *)last - (void *)xe);
3974
3975 le16_add_cpu(&xh->xh_count, 1);
3976 memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
3977 xe->xe_name_hash = cpu_to_le32(name_hash);
3978 xe->xe_name_len = name_len;
3979 ocfs2_xattr_set_type(xe, xi->name_index);
3980 }
3981
3982set_new_name_value:
3983 /* Insert the new name+value. */
3984 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);
3985
3986 /*
3987 * We must make sure that the name/value pair
3988 * exists in the same block.
3989 */
3990 offs = le16_to_cpu(xh->xh_free_start);
3991 start = offs - size;
3992
3993 if (start >> inode->i_sb->s_blocksize_bits !=
3994 (offs - 1) >> inode->i_sb->s_blocksize_bits) {
3995 offs = offs - offs % blocksize;
3996 xh->xh_free_start = cpu_to_le16(offs);
3997 }
3998
3999 val = ocfs2_xattr_bucket_get_val(inode,
4000 &xs->bucket, offs - size);
4001 xe->xe_name_offset = cpu_to_le16(offs - size);
4002
4003 memset(val, 0, size);
4004 memcpy(val, xi->name, name_len);
4005 memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);
4006
4007 xe->xe_value_size = cpu_to_le64(xi->value_len);
4008 ocfs2_xattr_set_local(xe, local);
4009 xs->here = xe;
4010 le16_add_cpu(&xh->xh_free_start, -size);
4011 le16_add_cpu(&xh->xh_name_value_len, size);
4012
4013 return;
4014}
4015
4016static int ocfs2_xattr_bucket_handle_journal(struct inode *inode,
4017 handle_t *handle,
4018 struct ocfs2_xattr_search *xs,
4019 struct buffer_head **bhs,
4020 u16 bh_num)
4021{
4022 int ret = 0, off, block_off;
4023 struct ocfs2_xattr_entry *xe = xs->here;
4024
4025 /*
4026 * First calculate all the blocks we should journal_access
4027 * and journal_dirty. The first block should always be touched.
4028 */
4029 ret = ocfs2_journal_dirty(handle, bhs[0]);
4030 if (ret)
4031 mlog_errno(ret);
4032
4033 /* calc the data. */
4034 off = le16_to_cpu(xe->xe_name_offset);
4035 block_off = off >> inode->i_sb->s_blocksize_bits;
4036 ret = ocfs2_journal_dirty(handle, bhs[block_off]);
4037 if (ret)
4038 mlog_errno(ret);
4039
4040 return ret;
4041}
4042
4043/*
4044 * Set the xattr entry in the specified bucket.
4045 * The bucket is indicated by xs->bucket and it should have the enough
4046 * space for the xattr insertion.
4047 */
4048static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4049 struct ocfs2_xattr_info *xi,
4050 struct ocfs2_xattr_search *xs,
4051 u32 name_hash,
4052 int local,
4053 int *bucket_empty)
4054{
4055 int i, ret;
4056 handle_t *handle = NULL;
4057 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4058 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4059
4060 mlog(0, "Set xattr entry len = %d index = %d in bucket %llu\n",
4061 xi->value_len, xi->name_index,
4062 (unsigned long long)xs->bucket.bhs[0]->b_blocknr);
4063
4064 if (!xs->bucket.bhs[1]) {
4065 ret = ocfs2_read_blocks(osb,
4066 xs->bucket.bhs[0]->b_blocknr + 1,
4067 blk_per_bucket - 1, &xs->bucket.bhs[1],
4068 OCFS2_BH_CACHED, inode);
4069 if (ret) {
4070 mlog_errno(ret);
4071 goto out;
4072 }
4073 }
4074
4075 handle = ocfs2_start_trans(osb, blk_per_bucket);
4076 if (IS_ERR(handle)) {
4077 ret = PTR_ERR(handle);
4078 handle = NULL;
4079 mlog_errno(ret);
4080 goto out;
4081 }
4082
4083 for (i = 0; i < blk_per_bucket; i++) {
4084 ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[i],
4085 OCFS2_JOURNAL_ACCESS_WRITE);
4086 if (ret < 0) {
4087 mlog_errno(ret);
4088 goto out;
4089 }
4090 }
4091
4092 ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash,
4093 local, bucket_empty);
4094
4095 /*Only dirty the blocks we have touched in set xattr. */
4096 ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs,
4097 xs->bucket.bhs, blk_per_bucket);
4098 if (ret)
4099 mlog_errno(ret);
4100out:
4101 ocfs2_commit_trans(osb, handle);
4102
4103 return ret;
4104}
4105
4106static int ocfs2_xattr_value_update_size(struct inode *inode,
4107 struct buffer_head *xe_bh,
4108 struct ocfs2_xattr_entry *xe,
4109 u64 new_size)
4110{
4111 int ret;
4112 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4113 handle_t *handle = NULL;
4114
4115 handle = ocfs2_start_trans(osb, 1);
4116 if (handle == NULL) {
4117 ret = -ENOMEM;
4118 mlog_errno(ret);
4119 goto out;
4120 }
4121
4122 ret = ocfs2_journal_access(handle, inode, xe_bh,
4123 OCFS2_JOURNAL_ACCESS_WRITE);
4124 if (ret < 0) {
4125 mlog_errno(ret);
4126 goto out_commit;
4127 }
4128
4129 xe->xe_value_size = cpu_to_le64(new_size);
4130
4131 ret = ocfs2_journal_dirty(handle, xe_bh);
4132 if (ret < 0)
4133 mlog_errno(ret);
4134
4135out_commit:
4136 ocfs2_commit_trans(osb, handle);
4137out:
4138 return ret;
4139}
4140
4141/*
4142 * Truncate the specified xe_off entry in xattr bucket.
4143 * bucket is indicated by header_bh and len is the new length.
4144 * Both the ocfs2_xattr_value_root and the entry will be updated here.
4145 *
4146 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4147 */
4148static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4149 struct buffer_head *header_bh,
4150 int xe_off,
4151 int len)
4152{
4153 int ret, offset;
4154 u64 value_blk;
4155 struct buffer_head *value_bh = NULL;
4156 struct ocfs2_xattr_value_root *xv;
4157 struct ocfs2_xattr_entry *xe;
4158 struct ocfs2_xattr_header *xh =
4159 (struct ocfs2_xattr_header *)header_bh->b_data;
4160 size_t blocksize = inode->i_sb->s_blocksize;
4161
4162 xe = &xh->xh_entries[xe_off];
4163
4164 BUG_ON(!xe || ocfs2_xattr_is_local(xe));
4165
4166 offset = le16_to_cpu(xe->xe_name_offset) +
4167 OCFS2_XATTR_SIZE(xe->xe_name_len);
4168
4169 value_blk = offset / blocksize;
4170
4171 /* We don't allow ocfs2_xattr_value to be stored in different block. */
4172 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4173 value_blk += header_bh->b_blocknr;
4174
4175 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), value_blk,
4176 &value_bh, OCFS2_BH_CACHED, inode);
4177 if (ret) {
4178 mlog_errno(ret);
4179 goto out;
4180 }
4181
4182 xv = (struct ocfs2_xattr_value_root *)
4183 (value_bh->b_data + offset % blocksize);
4184
4185 mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4186 xe_off, (unsigned long long)header_bh->b_blocknr, len);
4187 ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len);
4188 if (ret) {
4189 mlog_errno(ret);
4190 goto out;
4191 }
4192
4193 ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len);
4194 if (ret) {
4195 mlog_errno(ret);
4196 goto out;
4197 }
4198
4199out:
4200 brelse(value_bh);
4201 return ret;
4202}
4203
4204static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4205 struct ocfs2_xattr_search *xs,
4206 int len)
4207{
4208 int ret, offset;
4209 struct ocfs2_xattr_entry *xe = xs->here;
4210 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
4211
4212 BUG_ON(!xs->bucket.bhs[0] || !xe || ocfs2_xattr_is_local(xe));
4213
4214 offset = xe - xh->xh_entries;
4215 ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bhs[0],
4216 offset, len);
4217 if (ret)
4218 mlog_errno(ret);
4219
4220 return ret;
4221}
4222
4223static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4224 struct ocfs2_xattr_search *xs,
4225 char *val,
4226 int value_len)
4227{
4228 int offset;
4229 struct ocfs2_xattr_value_root *xv;
4230 struct ocfs2_xattr_entry *xe = xs->here;
4231
4232 BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4233
4234 offset = le16_to_cpu(xe->xe_name_offset) +
4235 OCFS2_XATTR_SIZE(xe->xe_name_len);
4236
4237 xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
4238
4239 return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len);
4240}
4241
4242/*
4243 * Remove the xattr bucket pointed by bucket_bh.
4244 * All the buckets after it in the same xattr extent rec will be
4245 * move forward one by one.
4246 */
4247static int ocfs2_rm_xattr_bucket(struct inode *inode,
4248 struct buffer_head *first_bh,
4249 struct ocfs2_xattr_bucket *bucket)
4250{
4251 int ret = 0, credits;
4252 struct ocfs2_xattr_header *xh =
4253 (struct ocfs2_xattr_header *)first_bh->b_data;
4254 u16 bucket_num = le16_to_cpu(xh->xh_num_buckets);
4255 u64 end, start = bucket->bhs[0]->b_blocknr;
4256 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4257 handle_t *handle;
4258 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4259
4260 end = first_bh->b_blocknr + (bucket_num - 1) * blk_per_bucket;
4261
4262 mlog(0, "rm xattr bucket %llu\n", start);
4263 /*
4264 * We need to update the first xattr_header and all the buckets starting
4265 * from start in this xattr rec.
4266 *
4267 * XXX: Should we empty the old last bucket here?
4268 */
4269 credits = 1 + end - start;
4270 handle = ocfs2_start_trans(osb, credits);
4271 if (IS_ERR(handle)) {
4272 ret = PTR_ERR(handle);
4273 mlog_errno(ret);
4274 return ret;
4275 }
4276
4277 ret = ocfs2_journal_access(handle, inode, first_bh,
4278 OCFS2_JOURNAL_ACCESS_WRITE);
4279 if (ret) {
4280 mlog_errno(ret);
4281 goto out_commit;
4282 }
4283
4284
4285 while (start < end) {
4286 ret = ocfs2_cp_xattr_bucket(inode, handle,
4287 start + blk_per_bucket,
4288 start, 0);
4289 if (ret) {
4290 mlog_errno(ret);
4291 goto out_commit;
4292 }
4293 start += blk_per_bucket;
4294 }
4295
4296 /* update the first_bh. */
4297 xh->xh_num_buckets = cpu_to_le16(bucket_num - 1);
4298 ocfs2_journal_dirty(handle, first_bh);
4299
4300out_commit:
4301 ocfs2_commit_trans(osb, handle);
4302 return ret;
4303}
4304
4305static int ocfs2_rm_xattr_cluster(struct inode *inode,
4306 struct buffer_head *root_bh,
4307 u64 blkno,
4308 u32 cpos,
4309 u32 len)
4310{
4311 int ret;
4312 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4313 struct inode *tl_inode = osb->osb_tl_inode;
4314 handle_t *handle;
4315 struct ocfs2_xattr_block *xb =
4316 (struct ocfs2_xattr_block *)root_bh->b_data;
4317 struct ocfs2_extent_list *root_el = &xb->xb_attrs.xb_root.xt_list;
4318 struct ocfs2_alloc_context *meta_ac = NULL;
4319 struct ocfs2_cached_dealloc_ctxt dealloc;
4320
4321 ocfs2_init_dealloc_ctxt(&dealloc);
4322
4323 mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
4324 cpos, len, (unsigned long long)blkno);
4325
4326 ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len);
4327
4328 ret = ocfs2_lock_allocators(inode, root_bh, root_el,
4329 0, 1, NULL, &meta_ac,
4330 OCFS2_XATTR_TREE_EXTENT, NULL);
4331 if (ret) {
4332 mlog_errno(ret);
4333 return ret;
4334 }
4335
4336 mutex_lock(&tl_inode->i_mutex);
4337
4338 if (ocfs2_truncate_log_needs_flush(osb)) {
4339 ret = __ocfs2_flush_truncate_log(osb);
4340 if (ret < 0) {
4341 mlog_errno(ret);
4342 goto out;
4343 }
4344 }
4345
4346 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
4347 if (handle == NULL) {
4348 ret = -ENOMEM;
4349 mlog_errno(ret);
4350 goto out;
4351 }
4352
4353 ret = ocfs2_journal_access(handle, inode, root_bh,
4354 OCFS2_JOURNAL_ACCESS_WRITE);
4355 if (ret) {
4356 mlog_errno(ret);
4357 goto out_commit;
4358 }
4359
4360 ret = ocfs2_remove_extent(inode, root_bh, cpos, len, handle, meta_ac,
4361 &dealloc, OCFS2_XATTR_TREE_EXTENT, NULL);
4362 if (ret) {
4363 mlog_errno(ret);
4364 goto out_commit;
4365 }
4366
4367 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
4368
4369 ret = ocfs2_journal_dirty(handle, root_bh);
4370 if (ret) {
4371 mlog_errno(ret);
4372 goto out_commit;
4373 }
4374
4375 ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
4376 if (ret)
4377 mlog_errno(ret);
4378
4379out_commit:
4380 ocfs2_commit_trans(osb, handle);
4381out:
4382 ocfs2_schedule_truncate_log_flush(osb, 1);
4383
4384 mutex_unlock(&tl_inode->i_mutex);
4385
4386 if (meta_ac)
4387 ocfs2_free_alloc_context(meta_ac);
4388
4389 ocfs2_run_deallocs(osb, &dealloc);
4390
4391 return ret;
4392}
4393
4394/*
4395 * Free the xattr bucket indicated by xs->bucket and if all the buckets
4396 * in the clusters is free, free the clusters also.
4397 */
4398static int ocfs2_xattr_bucket_shrink(struct inode *inode,
4399 struct ocfs2_xattr_info *xi,
4400 struct ocfs2_xattr_search *xs,
4401 u32 name_hash)
4402{
4403 int ret;
4404 u32 e_cpos, num_clusters;
4405 u64 p_blkno;
4406 struct buffer_head *first_bh = NULL;
4407 struct ocfs2_xattr_header *first_xh;
4408 struct ocfs2_xattr_block *xb =
4409 (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
4410
4411 BUG_ON(xs->header->xh_count != 0);
4412
4413 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4414 &e_cpos, &num_clusters,
4415 &xb->xb_attrs.xb_root.xt_list);
4416 if (ret) {
4417 mlog_errno(ret);
4418 return ret;
4419 }
4420
4421 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), p_blkno,
4422 &first_bh, OCFS2_BH_CACHED, inode);
4423 if (ret) {
4424 mlog_errno(ret);
4425 return ret;
4426 }
4427
4428 ret = ocfs2_rm_xattr_bucket(inode, first_bh, &xs->bucket);
4429 if (ret) {
4430 mlog_errno(ret);
4431 goto out;
4432 }
4433
4434 first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;
4435 if (first_xh->xh_num_buckets == 0)
4436 ret = ocfs2_rm_xattr_cluster(inode, xs->xattr_bh,
4437 p_blkno, e_cpos,
4438 num_clusters);
4439
4440out:
4441 brelse(first_bh);
4442 return ret;
4443}
4444
4445static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4446 struct ocfs2_xattr_search *xs)
4447{
4448 handle_t *handle = NULL;
4449 struct ocfs2_xattr_header *xh = xs->bucket.xh;
4450 struct ocfs2_xattr_entry *last = &xh->xh_entries[
4451 le16_to_cpu(xh->xh_count) - 1];
4452 int ret = 0;
4453
4454 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1);
4455 if (IS_ERR(handle)) {
4456 ret = PTR_ERR(handle);
4457 mlog_errno(ret);
4458 return;
4459 }
4460
4461 ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[0],
4462 OCFS2_JOURNAL_ACCESS_WRITE);
4463 if (ret) {
4464 mlog_errno(ret);
4465 goto out_commit;
4466 }
4467
4468 /* Remove the old entry. */
4469 memmove(xs->here, xs->here + 1,
4470 (void *)last - (void *)xs->here);
4471 memset(last, 0, sizeof(struct ocfs2_xattr_entry));
4472 le16_add_cpu(&xh->xh_count, -1);
4473
4474 ret = ocfs2_journal_dirty(handle, xs->bucket.bhs[0]);
4475 if (ret < 0)
4476 mlog_errno(ret);
4477out_commit:
4478 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
4479}
4480
4481/*
4482 * Set the xattr name/value in the bucket specified in xs.
4483 *
4484 * As the new value in xi may be stored in the bucket or in an outside cluster,
4485 * we divide the whole process into 3 steps:
4486 * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
4487 * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
4488 * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
4489 * 4. If the clusters for the new outside value can't be allocated, we need
4490 * to free the xattr we allocated in set.
4491 */
4492static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4493 struct ocfs2_xattr_info *xi,
4494 struct ocfs2_xattr_search *xs)
4495{
4496 int ret, local = 1, bucket_empty = 0;
4497 size_t value_len;
4498 char *val = (char *)xi->value;
4499 struct ocfs2_xattr_entry *xe = xs->here;
4500 u32 name_hash = ocfs2_xattr_hash_by_name(inode,
4501 xi->name_index, xi->name);
4502
4503 if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
4504 /*
4505 * We need to truncate the xattr storage first.
4506 *
4507 * If both the old and new value are stored to
4508 * outside block, we only need to truncate
4509 * the storage and then set the value outside.
4510 *
4511 * If the new value should be stored within block,
4512 * we should free all the outside block first and
4513 * the modification to the xattr block will be done
4514 * by following steps.
4515 */
4516 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4517 value_len = xi->value_len;
4518 else
4519 value_len = 0;
4520
4521 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4522 value_len);
4523 if (ret)
4524 goto out;
4525
4526 if (value_len)
4527 goto set_value_outside;
4528 }
4529
4530 value_len = xi->value_len;
4531 /* So we have to handle the inside block change now. */
4532 if (value_len > OCFS2_XATTR_INLINE_SIZE) {
4533 /*
4534 * If the new value will be stored outside of block,
4535 * initalize a new empty value root and insert it first.
4536 */
4537 local = 0;
4538 xi->value = &def_xv;
4539 xi->value_len = OCFS2_XATTR_ROOT_SIZE;
4540 }
4541
4542 ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash,
4543 local, &bucket_empty);
4544 if (ret) {
4545 mlog_errno(ret);
4546 goto out;
4547 }
4548
4549 if (value_len > OCFS2_XATTR_INLINE_SIZE) {
4550 /* allocate the space now for the outside block storage. */
4551 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4552 value_len);
4553 if (ret) {
4554 mlog_errno(ret);
4555
4556 if (xs->not_found) {
4557 /*
4558 * We can't allocate enough clusters for outside
4559 * storage and we have allocated xattr already,
4560 * so need to remove it.
4561 */
4562 ocfs2_xattr_bucket_remove_xs(inode, xs);
4563 }
4564 goto out;
4565 }
4566 } else {
4567 if (bucket_empty)
4568 ret = ocfs2_xattr_bucket_shrink(inode, xi,
4569 xs, name_hash);
4570 goto out;
4571 }
4572
4573set_value_outside:
4574 ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len);
4575out:
4576 return ret;
4577}
4578
4579/* check whether the xattr bucket is filled up with the same hash value. */
4580static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4581 struct ocfs2_xattr_bucket *bucket)
4582{
4583 struct ocfs2_xattr_header *xh = bucket->xh;
4584
4585 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
4586 xh->xh_entries[0].xe_name_hash) {
4587 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
4588 "hash = %u\n",
4589 (unsigned long long)bucket->bhs[0]->b_blocknr,
4590 le32_to_cpu(xh->xh_entries[0].xe_name_hash));
4591 return -ENOSPC;
4592 }
4593
4594 return 0;
4595}
4596
4597static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
4598 struct ocfs2_xattr_info *xi,
4599 struct ocfs2_xattr_search *xs)
4600{
4601 struct ocfs2_xattr_header *xh;
4602 struct ocfs2_xattr_entry *xe;
4603 u16 count, header_size, xh_free_start;
4604 int i, free, max_free, need, old;
4605 size_t value_size = 0, name_len = strlen(xi->name);
4606 size_t blocksize = inode->i_sb->s_blocksize;
4607 int ret, allocation = 0;
4608 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4609
4610 mlog_entry("Set xattr %s in xattr index block\n", xi->name);
4611
4612try_again:
4613 xh = xs->header;
4614 count = le16_to_cpu(xh->xh_count);
4615 xh_free_start = le16_to_cpu(xh->xh_free_start);
4616 header_size = sizeof(struct ocfs2_xattr_header) +
4617 count * sizeof(struct ocfs2_xattr_entry);
4618 max_free = OCFS2_XATTR_BUCKET_SIZE -
4619 le16_to_cpu(xh->xh_name_value_len) - header_size;
4620
4621 mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
4622 "of %u which exceed block size\n",
4623 (unsigned long long)xs->bucket.bhs[0]->b_blocknr,
4624 header_size);
4625
4626 if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4627 value_size = OCFS2_XATTR_ROOT_SIZE;
4628 else if (xi->value)
4629 value_size = OCFS2_XATTR_SIZE(xi->value_len);
4630
4631 if (xs->not_found)
4632 need = sizeof(struct ocfs2_xattr_entry) +
4633 OCFS2_XATTR_SIZE(name_len) + value_size;
4634 else {
4635 need = value_size + OCFS2_XATTR_SIZE(name_len);
4636
4637 /*
4638 * We only replace the old value if the new length is smaller
4639 * than the old one. Otherwise we will allocate new space in the
4640 * bucket to store it.
4641 */
4642 xe = xs->here;
4643 if (ocfs2_xattr_is_local(xe))
4644 old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4645 else
4646 old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4647
4648 if (old >= value_size)
4649 need = 0;
4650 }
4651
4652 free = xh_free_start - header_size;
4653 /*
4654 * We need to make sure the new name/value pair
4655 * can exist in the same block.
4656 */
4657 if (xh_free_start % blocksize < need)
4658 free -= xh_free_start % blocksize;
4659
4660 mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
4661 "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
4662 " %u\n", xs->not_found,
4663 (unsigned long long)xs->bucket.bhs[0]->b_blocknr,
4664 free, need, max_free, le16_to_cpu(xh->xh_free_start),
4665 le16_to_cpu(xh->xh_name_value_len));
4666
4667 if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4668 if (need <= max_free &&
4669 count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4670 /*
4671 * We can create the space by defragment. Since only the
4672 * name/value will be moved, the xe shouldn't be changed
4673 * in xs.
4674 */
4675 ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket);
4676 if (ret) {
4677 mlog_errno(ret);
4678 goto out;
4679 }
4680
4681 xh_free_start = le16_to_cpu(xh->xh_free_start);
4682 free = xh_free_start - header_size;
4683 if (xh_free_start % blocksize < need)
4684 free -= xh_free_start % blocksize;
4685
4686 if (free >= need)
4687 goto xattr_set;
4688
4689 mlog(0, "Can't get enough space for xattr insert by "
4690 "defragment. Need %u bytes, but we have %d, so "
4691 "allocate new bucket for it.\n", need, free);
4692 }
4693
4694 /*
4695 * We have to add new buckets or clusters and one
4696 * allocation should leave us enough space for insert.
4697 */
4698 BUG_ON(allocation);
4699
4700 /*
4701 * We do not allow for overlapping ranges between buckets. And
4702 * the maximum number of collisions we will allow for then is
4703 * one bucket's worth, so check it here whether we need to
4704 * add a new bucket for the insert.
4705 */
4706 ret = ocfs2_check_xattr_bucket_collision(inode, &xs->bucket);
4707 if (ret) {
4708 mlog_errno(ret);
4709 goto out;
4710 }
4711
4712 ret = ocfs2_add_new_xattr_bucket(inode,
4713 xs->xattr_bh,
4714 xs->bucket.bhs[0]);
4715 if (ret) {
4716 mlog_errno(ret);
4717 goto out;
4718 }
4719
4720 for (i = 0; i < blk_per_bucket; i++)
4721 brelse(xs->bucket.bhs[i]);
4722
4723 memset(&xs->bucket, 0, sizeof(xs->bucket));
4724
4725 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
4726 xi->name_index,
4727 xi->name, xs);
4728 if (ret && ret != -ENODATA)
4729 goto out;
4730 xs->not_found = ret;
4731 allocation = 1;
4732 goto try_again;
4733 }
4734
4735xattr_set:
4736 ret = ocfs2_xattr_set_in_bucket(inode, xi, xs);
4737out:
4738 mlog_exit(ret);
4739 return ret;
4740}
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 02afa87d5e69..c25c7c62a059 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -57,4 +57,12 @@ static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
57{ 57{
58 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); 58 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
59} 59}
60
61static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
62{
63 u16 len = sb->s_blocksize -
64 offsetof(struct ocfs2_xattr_header, xh_entries);
65
66 return len / sizeof(struct ocfs2_xattr_entry);
67}
60#endif /* OCFS2_XATTR_H */ 68#endif /* OCFS2_XATTR_H */