diff options
author | Tao Ma <tao.ma@oracle.com> | 2008-08-18 05:38:50 -0400 |
---|---|---|
committer | Mark Fasheh <mfasheh@suse.com> | 2008-10-13 19:57:03 -0400 |
commit | 0c044f0b24b9128ba8c297149d88bd81f2e36af3 (patch) | |
tree | 2a0d6a34fca5e5a8bdfde0d1068cb56d4252e864 | |
parent | ba492615f0d32d0210b02c14b24512b4372b13d6 (diff) |
ocfs2: Add xattr bucket iteration for large numbers of EAs
Ocfs2 breaks up xattr index tree leaves into 4k regions, called buckets.
Attributes are stored within a given bucket, depending on hash value.
After a discussion with Mark, we decided that the per-bucket index
(xe_entry[]) would only exist in the 1st block of a bucket. Likewise,
name/value pairs will not straddle more than one block. This allows the
majority of operations to work directly on the buffer heads in a leaf block.
This patch adds code to iterate the buckets in an EA. A new abstration of
ocfs2_xattr_bucket is added. It records the bhs in this bucket and
ocfs2_xattr_header. This keeps the code neat, improving readibility.
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
-rw-r--r-- | fs/ocfs2/ocfs2_fs.h | 35 | ||||
-rw-r--r-- | fs/ocfs2/xattr.c | 255 | ||||
-rw-r--r-- | fs/ocfs2/xattr.h | 9 |
3 files changed, 293 insertions, 6 deletions
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 98e1f8bba0e1..8d5e72f2c5cf 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -755,8 +755,13 @@ struct ocfs2_xattr_header { | |||
755 | __le16 xh_count; /* contains the count of how | 755 | __le16 xh_count; /* contains the count of how |
756 | many records are in the | 756 | many records are in the |
757 | local xattr storage. */ | 757 | local xattr storage. */ |
758 | __le16 xh_reserved1; | 758 | __le16 xh_free_start; /* current offset for storing |
759 | __le32 xh_reserved2; | 759 | xattr. */ |
760 | __le16 xh_name_value_len; /* total length of name/value | ||
761 | length in this bucket. */ | ||
762 | __le16 xh_num_buckets; /* bucket nums in one extent | ||
763 | record, only valid in the | ||
764 | first bucket. */ | ||
760 | __le64 xh_csum; | 765 | __le64 xh_csum; |
761 | struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */ | 766 | struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */ |
762 | }; | 767 | }; |
@@ -793,6 +798,10 @@ struct ocfs2_xattr_tree_root { | |||
793 | #define OCFS2_XATTR_SIZE(size) (((size) + OCFS2_XATTR_ROUND) & \ | 798 | #define OCFS2_XATTR_SIZE(size) (((size) + OCFS2_XATTR_ROUND) & \ |
794 | ~(OCFS2_XATTR_ROUND)) | 799 | ~(OCFS2_XATTR_ROUND)) |
795 | 800 | ||
801 | #define OCFS2_XATTR_BUCKET_SIZE 4096 | ||
802 | #define OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET (OCFS2_XATTR_BUCKET_SIZE \ | ||
803 | / OCFS2_MIN_BLOCKSIZE) | ||
804 | |||
796 | /* | 805 | /* |
797 | * On disk structure for xattr block. | 806 | * On disk structure for xattr block. |
798 | */ | 807 | */ |
@@ -963,6 +972,17 @@ static inline u64 ocfs2_backup_super_blkno(struct super_block *sb, int index) | |||
963 | return 0; | 972 | return 0; |
964 | 973 | ||
965 | } | 974 | } |
975 | |||
976 | static inline u16 ocfs2_xattr_recs_per_xb(struct super_block *sb) | ||
977 | { | ||
978 | int size; | ||
979 | |||
980 | size = sb->s_blocksize - | ||
981 | offsetof(struct ocfs2_xattr_block, | ||
982 | xb_attrs.xb_root.xt_list.l_recs); | ||
983 | |||
984 | return size / sizeof(struct ocfs2_extent_rec); | ||
985 | } | ||
966 | #else | 986 | #else |
967 | static inline int ocfs2_fast_symlink_chars(int blocksize) | 987 | static inline int ocfs2_fast_symlink_chars(int blocksize) |
968 | { | 988 | { |
@@ -1046,6 +1066,17 @@ static inline uint64_t ocfs2_backup_super_blkno(int blocksize, int index) | |||
1046 | 1066 | ||
1047 | return 0; | 1067 | return 0; |
1048 | } | 1068 | } |
1069 | |||
1070 | static inline int ocfs2_xattr_recs_per_xb(int blocksize) | ||
1071 | { | ||
1072 | int size; | ||
1073 | |||
1074 | size = blocksize - | ||
1075 | offsetof(struct ocfs2_xattr_block, | ||
1076 | xb_attrs.xb_root.xt_list.l_recs); | ||
1077 | |||
1078 | return size / sizeof(struct ocfs2_extent_rec); | ||
1079 | } | ||
1049 | #endif /* __KERNEL__ */ | 1080 | #endif /* __KERNEL__ */ |
1050 | 1081 | ||
1051 | 1082 | ||
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 67bebd9259e7..fb17f7fe4c66 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -52,6 +52,7 @@ | |||
52 | #include "suballoc.h" | 52 | #include "suballoc.h" |
53 | #include "uptodate.h" | 53 | #include "uptodate.h" |
54 | #include "buffer_head_io.h" | 54 | #include "buffer_head_io.h" |
55 | #include "super.h" | ||
55 | #include "xattr.h" | 56 | #include "xattr.h" |
56 | 57 | ||
57 | 58 | ||
@@ -60,6 +61,11 @@ struct ocfs2_xattr_def_value_root { | |||
60 | struct ocfs2_extent_rec er; | 61 | struct ocfs2_extent_rec er; |
61 | }; | 62 | }; |
62 | 63 | ||
64 | struct ocfs2_xattr_bucket { | ||
65 | struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; | ||
66 | struct ocfs2_xattr_header *xh; | ||
67 | }; | ||
68 | |||
63 | #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) | 69 | #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) |
64 | #define OCFS2_XATTR_INLINE_SIZE 80 | 70 | #define OCFS2_XATTR_INLINE_SIZE 80 |
65 | 71 | ||
@@ -99,6 +105,11 @@ struct ocfs2_xattr_search { | |||
99 | int not_found; | 105 | int not_found; |
100 | }; | 106 | }; |
101 | 107 | ||
108 | static int ocfs2_xattr_tree_list_index_block(struct inode *inode, | ||
109 | struct ocfs2_xattr_tree_root *xt, | ||
110 | char *buffer, | ||
111 | size_t buffer_size); | ||
112 | |||
102 | static inline struct xattr_handler *ocfs2_xattr_handler(int name_index) | 113 | static inline struct xattr_handler *ocfs2_xattr_handler(int name_index) |
103 | { | 114 | { |
104 | struct xattr_handler *handler = NULL; | 115 | struct xattr_handler *handler = NULL; |
@@ -483,7 +494,7 @@ static int ocfs2_xattr_block_list(struct inode *inode, | |||
483 | size_t buffer_size) | 494 | size_t buffer_size) |
484 | { | 495 | { |
485 | struct buffer_head *blk_bh = NULL; | 496 | struct buffer_head *blk_bh = NULL; |
486 | struct ocfs2_xattr_header *header = NULL; | 497 | struct ocfs2_xattr_block *xb; |
487 | int ret = 0; | 498 | int ret = 0; |
488 | 499 | ||
489 | if (!di->i_xattr_loc) | 500 | if (!di->i_xattr_loc) |
@@ -503,10 +514,17 @@ static int ocfs2_xattr_block_list(struct inode *inode, | |||
503 | goto cleanup; | 514 | goto cleanup; |
504 | } | 515 | } |
505 | 516 | ||
506 | header = &((struct ocfs2_xattr_block *)blk_bh->b_data)-> | 517 | xb = (struct ocfs2_xattr_block *)blk_bh->b_data; |
507 | xb_attrs.xb_header; | ||
508 | 518 | ||
509 | ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); | 519 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { |
520 | struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; | ||
521 | ret = ocfs2_xattr_list_entries(inode, header, | ||
522 | buffer, buffer_size); | ||
523 | } else { | ||
524 | struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root; | ||
525 | ret = ocfs2_xattr_tree_list_index_block(inode, xt, | ||
526 | buffer, buffer_size); | ||
527 | } | ||
510 | cleanup: | 528 | cleanup: |
511 | brelse(blk_bh); | 529 | brelse(blk_bh); |
512 | 530 | ||
@@ -1923,3 +1941,232 @@ cleanup: | |||
1923 | return ret; | 1941 | return ret; |
1924 | } | 1942 | } |
1925 | 1943 | ||
1944 | /* | ||
1945 | * Find the xattr extent rec which may contains name_hash. | ||
1946 | * e_cpos will be the first name hash of the xattr rec. | ||
1947 | * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. | ||
1948 | */ | ||
1949 | static int ocfs2_xattr_get_rec(struct inode *inode, | ||
1950 | u32 name_hash, | ||
1951 | u64 *p_blkno, | ||
1952 | u32 *e_cpos, | ||
1953 | u32 *num_clusters, | ||
1954 | struct ocfs2_extent_list *el) | ||
1955 | { | ||
1956 | int ret = 0, i; | ||
1957 | struct buffer_head *eb_bh = NULL; | ||
1958 | struct ocfs2_extent_block *eb; | ||
1959 | struct ocfs2_extent_rec *rec = NULL; | ||
1960 | u64 e_blkno = 0; | ||
1961 | |||
1962 | if (el->l_tree_depth) { | ||
1963 | ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh); | ||
1964 | if (ret) { | ||
1965 | mlog_errno(ret); | ||
1966 | goto out; | ||
1967 | } | ||
1968 | |||
1969 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
1970 | el = &eb->h_list; | ||
1971 | |||
1972 | if (el->l_tree_depth) { | ||
1973 | ocfs2_error(inode->i_sb, | ||
1974 | "Inode %lu has non zero tree depth in " | ||
1975 | "xattr tree block %llu\n", inode->i_ino, | ||
1976 | (unsigned long long)eb_bh->b_blocknr); | ||
1977 | ret = -EROFS; | ||
1978 | goto out; | ||
1979 | } | ||
1980 | } | ||
1981 | |||
1982 | for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { | ||
1983 | rec = &el->l_recs[i]; | ||
1984 | |||
1985 | if (le32_to_cpu(rec->e_cpos) <= name_hash) { | ||
1986 | e_blkno = le64_to_cpu(rec->e_blkno); | ||
1987 | break; | ||
1988 | } | ||
1989 | } | ||
1990 | |||
1991 | if (!e_blkno) { | ||
1992 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " | ||
1993 | "record (%u, %u, 0) in xattr", inode->i_ino, | ||
1994 | le32_to_cpu(rec->e_cpos), | ||
1995 | ocfs2_rec_clusters(el, rec)); | ||
1996 | ret = -EROFS; | ||
1997 | goto out; | ||
1998 | } | ||
1999 | |||
2000 | *p_blkno = le64_to_cpu(rec->e_blkno); | ||
2001 | *num_clusters = le16_to_cpu(rec->e_leaf_clusters); | ||
2002 | if (e_cpos) | ||
2003 | *e_cpos = le32_to_cpu(rec->e_cpos); | ||
2004 | out: | ||
2005 | brelse(eb_bh); | ||
2006 | return ret; | ||
2007 | } | ||
2008 | |||
2009 | typedef int (xattr_bucket_func)(struct inode *inode, | ||
2010 | struct ocfs2_xattr_bucket *bucket, | ||
2011 | void *para); | ||
2012 | |||
2013 | static int ocfs2_iterate_xattr_buckets(struct inode *inode, | ||
2014 | u64 blkno, | ||
2015 | u32 clusters, | ||
2016 | xattr_bucket_func *func, | ||
2017 | void *para) | ||
2018 | { | ||
2019 | int i, j, ret = 0; | ||
2020 | int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
2021 | u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); | ||
2022 | u32 num_buckets = clusters * bpc; | ||
2023 | struct ocfs2_xattr_bucket bucket; | ||
2024 | |||
2025 | memset(&bucket, 0, sizeof(bucket)); | ||
2026 | |||
2027 | mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n", | ||
2028 | clusters, blkno); | ||
2029 | |||
2030 | for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) { | ||
2031 | ret = ocfs2_read_blocks(OCFS2_SB(inode->i_sb), | ||
2032 | blkno, blk_per_bucket, | ||
2033 | bucket.bhs, OCFS2_BH_CACHED, inode); | ||
2034 | if (ret) { | ||
2035 | mlog_errno(ret); | ||
2036 | goto out; | ||
2037 | } | ||
2038 | |||
2039 | bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data; | ||
2040 | /* | ||
2041 | * The real bucket num in this series of blocks is stored | ||
2042 | * in the 1st bucket. | ||
2043 | */ | ||
2044 | if (i == 0) | ||
2045 | num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets); | ||
2046 | |||
2047 | mlog(0, "iterating xattr bucket %llu\n", blkno); | ||
2048 | if (func) { | ||
2049 | ret = func(inode, &bucket, para); | ||
2050 | if (ret) { | ||
2051 | mlog_errno(ret); | ||
2052 | break; | ||
2053 | } | ||
2054 | } | ||
2055 | |||
2056 | for (j = 0; j < blk_per_bucket; j++) | ||
2057 | brelse(bucket.bhs[j]); | ||
2058 | memset(&bucket, 0, sizeof(bucket)); | ||
2059 | } | ||
2060 | |||
2061 | out: | ||
2062 | for (j = 0; j < blk_per_bucket; j++) | ||
2063 | brelse(bucket.bhs[j]); | ||
2064 | |||
2065 | return ret; | ||
2066 | } | ||
2067 | |||
2068 | struct ocfs2_xattr_tree_list { | ||
2069 | char *buffer; | ||
2070 | size_t buffer_size; | ||
2071 | }; | ||
2072 | |||
2073 | static int ocfs2_xattr_bucket_get_name_value(struct inode *inode, | ||
2074 | struct ocfs2_xattr_header *xh, | ||
2075 | int index, | ||
2076 | int *block_off, | ||
2077 | int *new_offset) | ||
2078 | { | ||
2079 | u16 name_offset; | ||
2080 | |||
2081 | if (index < 0 || index >= le16_to_cpu(xh->xh_count)) | ||
2082 | return -EINVAL; | ||
2083 | |||
2084 | name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); | ||
2085 | |||
2086 | *block_off = name_offset >> inode->i_sb->s_blocksize_bits; | ||
2087 | *new_offset = name_offset % inode->i_sb->s_blocksize; | ||
2088 | |||
2089 | return 0; | ||
2090 | } | ||
2091 | |||
2092 | static int ocfs2_list_xattr_bucket(struct inode *inode, | ||
2093 | struct ocfs2_xattr_bucket *bucket, | ||
2094 | void *para) | ||
2095 | { | ||
2096 | int ret = 0; | ||
2097 | struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; | ||
2098 | size_t size; | ||
2099 | int i, block_off, new_offset; | ||
2100 | |||
2101 | for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) { | ||
2102 | struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i]; | ||
2103 | struct xattr_handler *handler = | ||
2104 | ocfs2_xattr_handler(ocfs2_xattr_get_type(entry)); | ||
2105 | |||
2106 | if (handler) { | ||
2107 | ret = ocfs2_xattr_bucket_get_name_value(inode, | ||
2108 | bucket->xh, | ||
2109 | i, | ||
2110 | &block_off, | ||
2111 | &new_offset); | ||
2112 | if (ret) | ||
2113 | break; | ||
2114 | size = handler->list(inode, xl->buffer, xl->buffer_size, | ||
2115 | bucket->bhs[block_off]->b_data + | ||
2116 | new_offset, | ||
2117 | entry->xe_name_len); | ||
2118 | if (xl->buffer) { | ||
2119 | if (size > xl->buffer_size) | ||
2120 | return -ERANGE; | ||
2121 | xl->buffer += size; | ||
2122 | } | ||
2123 | xl->buffer_size -= size; | ||
2124 | } | ||
2125 | } | ||
2126 | |||
2127 | return ret; | ||
2128 | } | ||
2129 | |||
2130 | static int ocfs2_xattr_tree_list_index_block(struct inode *inode, | ||
2131 | struct ocfs2_xattr_tree_root *xt, | ||
2132 | char *buffer, | ||
2133 | size_t buffer_size) | ||
2134 | { | ||
2135 | struct ocfs2_extent_list *el = &xt->xt_list; | ||
2136 | int ret = 0; | ||
2137 | u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; | ||
2138 | u64 p_blkno = 0; | ||
2139 | struct ocfs2_xattr_tree_list xl = { | ||
2140 | .buffer = buffer, | ||
2141 | .buffer_size = buffer_size, | ||
2142 | }; | ||
2143 | |||
2144 | if (le16_to_cpu(el->l_next_free_rec) == 0) | ||
2145 | return 0; | ||
2146 | |||
2147 | while (name_hash > 0) { | ||
2148 | ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, | ||
2149 | &e_cpos, &num_clusters, el); | ||
2150 | if (ret) { | ||
2151 | mlog_errno(ret); | ||
2152 | goto out; | ||
2153 | } | ||
2154 | |||
2155 | ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters, | ||
2156 | ocfs2_list_xattr_bucket, | ||
2157 | &xl); | ||
2158 | if (ret) { | ||
2159 | mlog_errno(ret); | ||
2160 | goto out; | ||
2161 | } | ||
2162 | |||
2163 | if (e_cpos == 0) | ||
2164 | break; | ||
2165 | |||
2166 | name_hash = e_cpos - 1; | ||
2167 | } | ||
2168 | |||
2169 | ret = buffer_size - xl.buffer_size; | ||
2170 | out: | ||
2171 | return ret; | ||
2172 | } | ||
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index ed32377be9db..02afa87d5e69 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h | |||
@@ -48,4 +48,13 @@ extern int ocfs2_xattr_set(struct inode *, int, const char *, const void *, | |||
48 | extern int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh); | 48 | extern int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh); |
49 | extern struct xattr_handler *ocfs2_xattr_handlers[]; | 49 | extern struct xattr_handler *ocfs2_xattr_handlers[]; |
50 | 50 | ||
51 | static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) | ||
52 | { | ||
53 | return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; | ||
54 | } | ||
55 | |||
56 | static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) | ||
57 | { | ||
58 | return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); | ||
59 | } | ||
51 | #endif /* OCFS2_XATTR_H */ | 60 | #endif /* OCFS2_XATTR_H */ |