aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTao Ma <tao.ma@oracle.com>2008-08-18 05:38:50 -0400
committerMark Fasheh <mfasheh@suse.com>2008-10-13 19:57:03 -0400
commit0c044f0b24b9128ba8c297149d88bd81f2e36af3 (patch)
tree2a0d6a34fca5e5a8bdfde0d1068cb56d4252e864
parentba492615f0d32d0210b02c14b24512b4372b13d6 (diff)
ocfs2: Add xattr bucket iteration for large numbers of EAs
Ocfs2 breaks up xattr index tree leaves into 4k regions, called buckets. Attributes are stored within a given bucket, depending on hash value. After a discussion with Mark, we decided that the per-bucket index (xe_entry[]) would only exist in the 1st block of a bucket. Likewise, name/value pairs will not straddle more than one block. This allows the majority of operations to work directly on the buffer heads in a leaf block. This patch adds code to iterate the buckets in an EA. A new abstration of ocfs2_xattr_bucket is added. It records the bhs in this bucket and ocfs2_xattr_header. This keeps the code neat, improving readibility. Signed-off-by: Tao Ma <tao.ma@oracle.com> Signed-off-by: Mark Fasheh <mfasheh@suse.com>
-rw-r--r--fs/ocfs2/ocfs2_fs.h35
-rw-r--r--fs/ocfs2/xattr.c255
-rw-r--r--fs/ocfs2/xattr.h9
3 files changed, 293 insertions, 6 deletions
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 98e1f8bba0e1..8d5e72f2c5cf 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -755,8 +755,13 @@ struct ocfs2_xattr_header {
755 __le16 xh_count; /* contains the count of how 755 __le16 xh_count; /* contains the count of how
756 many records are in the 756 many records are in the
757 local xattr storage. */ 757 local xattr storage. */
758 __le16 xh_reserved1; 758 __le16 xh_free_start; /* current offset for storing
759 __le32 xh_reserved2; 759 xattr. */
760 __le16 xh_name_value_len; /* total length of name/value
761 length in this bucket. */
762 __le16 xh_num_buckets; /* bucket nums in one extent
763 record, only valid in the
764 first bucket. */
760 __le64 xh_csum; 765 __le64 xh_csum;
761 struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */ 766 struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */
762}; 767};
@@ -793,6 +798,10 @@ struct ocfs2_xattr_tree_root {
793#define OCFS2_XATTR_SIZE(size) (((size) + OCFS2_XATTR_ROUND) & \ 798#define OCFS2_XATTR_SIZE(size) (((size) + OCFS2_XATTR_ROUND) & \
794 ~(OCFS2_XATTR_ROUND)) 799 ~(OCFS2_XATTR_ROUND))
795 800
801#define OCFS2_XATTR_BUCKET_SIZE 4096
802#define OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET (OCFS2_XATTR_BUCKET_SIZE \
803 / OCFS2_MIN_BLOCKSIZE)
804
796/* 805/*
797 * On disk structure for xattr block. 806 * On disk structure for xattr block.
798 */ 807 */
@@ -963,6 +972,17 @@ static inline u64 ocfs2_backup_super_blkno(struct super_block *sb, int index)
963 return 0; 972 return 0;
964 973
965} 974}
975
976static inline u16 ocfs2_xattr_recs_per_xb(struct super_block *sb)
977{
978 int size;
979
980 size = sb->s_blocksize -
981 offsetof(struct ocfs2_xattr_block,
982 xb_attrs.xb_root.xt_list.l_recs);
983
984 return size / sizeof(struct ocfs2_extent_rec);
985}
966#else 986#else
967static inline int ocfs2_fast_symlink_chars(int blocksize) 987static inline int ocfs2_fast_symlink_chars(int blocksize)
968{ 988{
@@ -1046,6 +1066,17 @@ static inline uint64_t ocfs2_backup_super_blkno(int blocksize, int index)
1046 1066
1047 return 0; 1067 return 0;
1048} 1068}
1069
1070static inline int ocfs2_xattr_recs_per_xb(int blocksize)
1071{
1072 int size;
1073
1074 size = blocksize -
1075 offsetof(struct ocfs2_xattr_block,
1076 xb_attrs.xb_root.xt_list.l_recs);
1077
1078 return size / sizeof(struct ocfs2_extent_rec);
1079}
1049#endif /* __KERNEL__ */ 1080#endif /* __KERNEL__ */
1050 1081
1051 1082
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 67bebd9259e7..fb17f7fe4c66 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -52,6 +52,7 @@
52#include "suballoc.h" 52#include "suballoc.h"
53#include "uptodate.h" 53#include "uptodate.h"
54#include "buffer_head_io.h" 54#include "buffer_head_io.h"
55#include "super.h"
55#include "xattr.h" 56#include "xattr.h"
56 57
57 58
@@ -60,6 +61,11 @@ struct ocfs2_xattr_def_value_root {
60 struct ocfs2_extent_rec er; 61 struct ocfs2_extent_rec er;
61}; 62};
62 63
64struct ocfs2_xattr_bucket {
65 struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
66 struct ocfs2_xattr_header *xh;
67};
68
63#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 69#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root))
64#define OCFS2_XATTR_INLINE_SIZE 80 70#define OCFS2_XATTR_INLINE_SIZE 80
65 71
@@ -99,6 +105,11 @@ struct ocfs2_xattr_search {
99 int not_found; 105 int not_found;
100}; 106};
101 107
108static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
109 struct ocfs2_xattr_tree_root *xt,
110 char *buffer,
111 size_t buffer_size);
112
102static inline struct xattr_handler *ocfs2_xattr_handler(int name_index) 113static inline struct xattr_handler *ocfs2_xattr_handler(int name_index)
103{ 114{
104 struct xattr_handler *handler = NULL; 115 struct xattr_handler *handler = NULL;
@@ -483,7 +494,7 @@ static int ocfs2_xattr_block_list(struct inode *inode,
483 size_t buffer_size) 494 size_t buffer_size)
484{ 495{
485 struct buffer_head *blk_bh = NULL; 496 struct buffer_head *blk_bh = NULL;
486 struct ocfs2_xattr_header *header = NULL; 497 struct ocfs2_xattr_block *xb;
487 int ret = 0; 498 int ret = 0;
488 499
489 if (!di->i_xattr_loc) 500 if (!di->i_xattr_loc)
@@ -503,10 +514,17 @@ static int ocfs2_xattr_block_list(struct inode *inode,
503 goto cleanup; 514 goto cleanup;
504 } 515 }
505 516
506 header = &((struct ocfs2_xattr_block *)blk_bh->b_data)-> 517 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
507 xb_attrs.xb_header;
508 518
509 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); 519 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
520 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
521 ret = ocfs2_xattr_list_entries(inode, header,
522 buffer, buffer_size);
523 } else {
524 struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
525 ret = ocfs2_xattr_tree_list_index_block(inode, xt,
526 buffer, buffer_size);
527 }
510cleanup: 528cleanup:
511 brelse(blk_bh); 529 brelse(blk_bh);
512 530
@@ -1923,3 +1941,232 @@ cleanup:
1923 return ret; 1941 return ret;
1924} 1942}
1925 1943
1944/*
1945 * Find the xattr extent rec which may contains name_hash.
1946 * e_cpos will be the first name hash of the xattr rec.
1947 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
1948 */
1949static int ocfs2_xattr_get_rec(struct inode *inode,
1950 u32 name_hash,
1951 u64 *p_blkno,
1952 u32 *e_cpos,
1953 u32 *num_clusters,
1954 struct ocfs2_extent_list *el)
1955{
1956 int ret = 0, i;
1957 struct buffer_head *eb_bh = NULL;
1958 struct ocfs2_extent_block *eb;
1959 struct ocfs2_extent_rec *rec = NULL;
1960 u64 e_blkno = 0;
1961
1962 if (el->l_tree_depth) {
1963 ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh);
1964 if (ret) {
1965 mlog_errno(ret);
1966 goto out;
1967 }
1968
1969 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
1970 el = &eb->h_list;
1971
1972 if (el->l_tree_depth) {
1973 ocfs2_error(inode->i_sb,
1974 "Inode %lu has non zero tree depth in "
1975 "xattr tree block %llu\n", inode->i_ino,
1976 (unsigned long long)eb_bh->b_blocknr);
1977 ret = -EROFS;
1978 goto out;
1979 }
1980 }
1981
1982 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
1983 rec = &el->l_recs[i];
1984
1985 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
1986 e_blkno = le64_to_cpu(rec->e_blkno);
1987 break;
1988 }
1989 }
1990
1991 if (!e_blkno) {
1992 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
1993 "record (%u, %u, 0) in xattr", inode->i_ino,
1994 le32_to_cpu(rec->e_cpos),
1995 ocfs2_rec_clusters(el, rec));
1996 ret = -EROFS;
1997 goto out;
1998 }
1999
2000 *p_blkno = le64_to_cpu(rec->e_blkno);
2001 *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
2002 if (e_cpos)
2003 *e_cpos = le32_to_cpu(rec->e_cpos);
2004out:
2005 brelse(eb_bh);
2006 return ret;
2007}
2008
2009typedef int (xattr_bucket_func)(struct inode *inode,
2010 struct ocfs2_xattr_bucket *bucket,
2011 void *para);
2012
2013static int ocfs2_iterate_xattr_buckets(struct inode *inode,
2014 u64 blkno,
2015 u32 clusters,
2016 xattr_bucket_func *func,
2017 void *para)
2018{
2019 int i, j, ret = 0;
2020 int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2021 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
2022 u32 num_buckets = clusters * bpc;
2023 struct ocfs2_xattr_bucket bucket;
2024
2025 memset(&bucket, 0, sizeof(bucket));
2026
2027 mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
2028 clusters, blkno);
2029
2030 for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) {
2031 ret = ocfs2_read_blocks(OCFS2_SB(inode->i_sb),
2032 blkno, blk_per_bucket,
2033 bucket.bhs, OCFS2_BH_CACHED, inode);
2034 if (ret) {
2035 mlog_errno(ret);
2036 goto out;
2037 }
2038
2039 bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data;
2040 /*
2041 * The real bucket num in this series of blocks is stored
2042 * in the 1st bucket.
2043 */
2044 if (i == 0)
2045 num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets);
2046
2047 mlog(0, "iterating xattr bucket %llu\n", blkno);
2048 if (func) {
2049 ret = func(inode, &bucket, para);
2050 if (ret) {
2051 mlog_errno(ret);
2052 break;
2053 }
2054 }
2055
2056 for (j = 0; j < blk_per_bucket; j++)
2057 brelse(bucket.bhs[j]);
2058 memset(&bucket, 0, sizeof(bucket));
2059 }
2060
2061out:
2062 for (j = 0; j < blk_per_bucket; j++)
2063 brelse(bucket.bhs[j]);
2064
2065 return ret;
2066}
2067
2068struct ocfs2_xattr_tree_list {
2069 char *buffer;
2070 size_t buffer_size;
2071};
2072
2073static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
2074 struct ocfs2_xattr_header *xh,
2075 int index,
2076 int *block_off,
2077 int *new_offset)
2078{
2079 u16 name_offset;
2080
2081 if (index < 0 || index >= le16_to_cpu(xh->xh_count))
2082 return -EINVAL;
2083
2084 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
2085
2086 *block_off = name_offset >> inode->i_sb->s_blocksize_bits;
2087 *new_offset = name_offset % inode->i_sb->s_blocksize;
2088
2089 return 0;
2090}
2091
2092static int ocfs2_list_xattr_bucket(struct inode *inode,
2093 struct ocfs2_xattr_bucket *bucket,
2094 void *para)
2095{
2096 int ret = 0;
2097 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
2098 size_t size;
2099 int i, block_off, new_offset;
2100
2101 for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) {
2102 struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i];
2103 struct xattr_handler *handler =
2104 ocfs2_xattr_handler(ocfs2_xattr_get_type(entry));
2105
2106 if (handler) {
2107 ret = ocfs2_xattr_bucket_get_name_value(inode,
2108 bucket->xh,
2109 i,
2110 &block_off,
2111 &new_offset);
2112 if (ret)
2113 break;
2114 size = handler->list(inode, xl->buffer, xl->buffer_size,
2115 bucket->bhs[block_off]->b_data +
2116 new_offset,
2117 entry->xe_name_len);
2118 if (xl->buffer) {
2119 if (size > xl->buffer_size)
2120 return -ERANGE;
2121 xl->buffer += size;
2122 }
2123 xl->buffer_size -= size;
2124 }
2125 }
2126
2127 return ret;
2128}
2129
2130static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
2131 struct ocfs2_xattr_tree_root *xt,
2132 char *buffer,
2133 size_t buffer_size)
2134{
2135 struct ocfs2_extent_list *el = &xt->xt_list;
2136 int ret = 0;
2137 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
2138 u64 p_blkno = 0;
2139 struct ocfs2_xattr_tree_list xl = {
2140 .buffer = buffer,
2141 .buffer_size = buffer_size,
2142 };
2143
2144 if (le16_to_cpu(el->l_next_free_rec) == 0)
2145 return 0;
2146
2147 while (name_hash > 0) {
2148 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
2149 &e_cpos, &num_clusters, el);
2150 if (ret) {
2151 mlog_errno(ret);
2152 goto out;
2153 }
2154
2155 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
2156 ocfs2_list_xattr_bucket,
2157 &xl);
2158 if (ret) {
2159 mlog_errno(ret);
2160 goto out;
2161 }
2162
2163 if (e_cpos == 0)
2164 break;
2165
2166 name_hash = e_cpos - 1;
2167 }
2168
2169 ret = buffer_size - xl.buffer_size;
2170out:
2171 return ret;
2172}
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index ed32377be9db..02afa87d5e69 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -48,4 +48,13 @@ extern int ocfs2_xattr_set(struct inode *, int, const char *, const void *,
48extern int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh); 48extern int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh);
49extern struct xattr_handler *ocfs2_xattr_handlers[]; 49extern struct xattr_handler *ocfs2_xattr_handlers[];
50 50
51static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
52{
53 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
54}
55
56static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
57{
58 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
59}
51#endif /* OCFS2_XATTR_H */ 60#endif /* OCFS2_XATTR_H */