diff options
author | Bob Peterson <rpeterso@redhat.com> | 2008-03-10 19:17:47 -0400 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2008-03-31 05:41:39 -0400 |
commit | 1f466a47e8a3a3e3b527b3285c7b9c8a837fb7ec (patch) | |
tree | 355f6084118d4ee9a986e07e5154eaa0e25b834a /fs/gfs2/rgrp.c | |
parent | d82661d96993ac4efc1d54259ea85ffcd9b8bec6 (diff) |
[GFS2] Faster gfs2_bitfit algorithm
This version of the gfs2_bitfit algorithm includes the latest
suggestions from Steve Whitehouse. It is typically eight to
ten times faster than the version we're using today. If there
is a lot of metadata mixed in (lots of small files) the
algorithm is often 15 times faster, and given the right
conditions, I've seen peaks of 20 times faster.
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/gfs2/rgrp.c')
-rw-r--r-- | fs/gfs2/rgrp.c | 93 |
1 files changed, 61 insertions, 32 deletions
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 4291375cecc6..7e8f0b1d6c6e 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/fs.h> | 14 | #include <linux/fs.h> |
15 | #include <linux/gfs2_ondisk.h> | 15 | #include <linux/gfs2_ondisk.h> |
16 | #include <linux/lm_interface.h> | 16 | #include <linux/lm_interface.h> |
17 | #include <linux/prefetch.h> | ||
17 | 18 | ||
18 | #include "gfs2.h" | 19 | #include "gfs2.h" |
19 | #include "incore.h" | 20 | #include "incore.h" |
@@ -33,6 +34,16 @@ | |||
33 | #define BFITNOENT ((u32)~0) | 34 | #define BFITNOENT ((u32)~0) |
34 | #define NO_BLOCK ((u64)~0) | 35 | #define NO_BLOCK ((u64)~0) |
35 | 36 | ||
37 | #if BITS_PER_LONG == 32 | ||
38 | #define LBITMASK (0x55555555UL) | ||
39 | #define LBITSKIP55 (0x55555555UL) | ||
40 | #define LBITSKIP00 (0x00000000UL) | ||
41 | #else | ||
42 | #define LBITMASK (0x5555555555555555UL) | ||
43 | #define LBITSKIP55 (0x5555555555555555UL) | ||
44 | #define LBITSKIP00 (0x0000000000000000UL) | ||
45 | #endif | ||
46 | |||
36 | /* | 47 | /* |
37 | * These routines are used by the resource group routines (rgrp.c) | 48 | * These routines are used by the resource group routines (rgrp.c) |
38 | * to keep track of block allocation. Each block is represented by two | 49 | * to keep track of block allocation. Each block is represented by two |
@@ -138,45 +149,63 @@ static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, | |||
138 | static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal, | 149 | static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal, |
139 | u8 old_state) | 150 | u8 old_state) |
140 | { | 151 | { |
141 | const u8 *byte; | 152 | const u8 *byte, *start, *end; |
142 | u32 blk = goal; | 153 | int bit, startbit; |
143 | unsigned int bit, bitlong; | 154 | u32 g1, g2, misaligned; |
144 | const unsigned long *plong; | 155 | unsigned long *plong; |
145 | #if BITS_PER_LONG == 32 | 156 | unsigned long lskipval; |
146 | const unsigned long plong55 = 0x55555555; | 157 | |
147 | #else | 158 | lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55; |
148 | const unsigned long plong55 = 0x5555555555555555; | 159 | g1 = (goal / GFS2_NBBY); |
149 | #endif | 160 | start = buffer + g1; |
150 | 161 | byte = start; | |
151 | byte = buffer + (goal / GFS2_NBBY); | 162 | end = buffer + buflen; |
152 | plong = (const unsigned long *)(buffer + (goal / GFS2_NBBY)); | 163 | g2 = ALIGN(g1, sizeof(unsigned long)); |
153 | bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; | 164 | plong = (unsigned long *)(buffer + g2); |
154 | bitlong = bit; | 165 | startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; |
155 | 166 | misaligned = g2 - g1; | |
156 | while (byte < buffer + buflen) { | 167 | if (!misaligned) |
157 | 168 | goto ulong_aligned; | |
158 | if (bitlong == 0 && old_state == 0 && *plong == plong55) { | 169 | /* parse the bitmap a byte at a time */ |
159 | plong++; | 170 | misaligned: |
160 | byte += sizeof(unsigned long); | 171 | while (byte < end) { |
161 | blk += sizeof(unsigned long) * GFS2_NBBY; | 172 | if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) { |
162 | continue; | 173 | return goal + |
174 | (((byte - start) * GFS2_NBBY) + | ||
175 | ((bit - startbit) >> 1)); | ||
163 | } | 176 | } |
164 | if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) | ||
165 | return blk; | ||
166 | bit += GFS2_BIT_SIZE; | 177 | bit += GFS2_BIT_SIZE; |
167 | if (bit >= 8) { | 178 | if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) { |
168 | bit = 0; | 179 | bit = 0; |
169 | byte++; | 180 | byte++; |
181 | misaligned--; | ||
182 | if (!misaligned) { | ||
183 | plong = (unsigned long *)byte; | ||
184 | goto ulong_aligned; | ||
185 | } | ||
170 | } | 186 | } |
171 | bitlong += GFS2_BIT_SIZE; | ||
172 | if (bitlong >= sizeof(unsigned long) * 8) { | ||
173 | bitlong = 0; | ||
174 | plong++; | ||
175 | } | ||
176 | |||
177 | blk++; | ||
178 | } | 187 | } |
188 | return BFITNOENT; | ||
179 | 189 | ||
190 | /* parse the bitmap a unsigned long at a time */ | ||
191 | ulong_aligned: | ||
192 | /* Stop at "end - 1" or else prefetch can go past the end and segfault. | ||
193 | We could "if" it but we'd lose some of the performance gained. | ||
194 | This way will only slow down searching the very last 4/8 bytes | ||
195 | depending on architecture. I've experimented with several ways | ||
196 | of writing this section such as using an else before the goto | ||
197 | but this one seems to be the fastest. */ | ||
198 | while ((unsigned char *)plong < end - 1) { | ||
199 | prefetch(plong + 1); | ||
200 | if (((*plong) & LBITMASK) != lskipval) | ||
201 | break; | ||
202 | plong++; | ||
203 | } | ||
204 | if ((unsigned char *)plong < end) { | ||
205 | byte = (const u8 *)plong; | ||
206 | misaligned += sizeof(unsigned long) - 1; | ||
207 | goto misaligned; | ||
208 | } | ||
180 | return BFITNOENT; | 209 | return BFITNOENT; |
181 | } | 210 | } |
182 | 211 | ||