diff options
| author | Bob Peterson <rpeterso@redhat.com> | 2008-03-10 19:17:47 -0400 |
|---|---|---|
| committer | Steven Whitehouse <swhiteho@redhat.com> | 2008-03-31 05:41:39 -0400 |
| commit | 1f466a47e8a3a3e3b527b3285c7b9c8a837fb7ec (patch) | |
| tree | 355f6084118d4ee9a986e07e5154eaa0e25b834a | |
| parent | d82661d96993ac4efc1d54259ea85ffcd9b8bec6 (diff) | |
[GFS2] Faster gfs2_bitfit algorithm
This version of the gfs2_bitfit algorithm includes the latest
suggestions from Steve Whitehouse. It is typically eight to
ten times faster than the version we're using today. If there
is a lot of metadata mixed in (lots of small files) the
algorithm is often 15 times faster, and given the right
conditions, I've seen peaks of 20 times faster.
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
| -rw-r--r-- | fs/gfs2/rgrp.c | 93 |
1 files changed, 61 insertions, 32 deletions
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 4291375cecc6..7e8f0b1d6c6e 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <linux/fs.h> | 14 | #include <linux/fs.h> |
| 15 | #include <linux/gfs2_ondisk.h> | 15 | #include <linux/gfs2_ondisk.h> |
| 16 | #include <linux/lm_interface.h> | 16 | #include <linux/lm_interface.h> |
| 17 | #include <linux/prefetch.h> | ||
| 17 | 18 | ||
| 18 | #include "gfs2.h" | 19 | #include "gfs2.h" |
| 19 | #include "incore.h" | 20 | #include "incore.h" |
| @@ -33,6 +34,16 @@ | |||
| 33 | #define BFITNOENT ((u32)~0) | 34 | #define BFITNOENT ((u32)~0) |
| 34 | #define NO_BLOCK ((u64)~0) | 35 | #define NO_BLOCK ((u64)~0) |
| 35 | 36 | ||
| 37 | #if BITS_PER_LONG == 32 | ||
| 38 | #define LBITMASK (0x55555555UL) | ||
| 39 | #define LBITSKIP55 (0x55555555UL) | ||
| 40 | #define LBITSKIP00 (0x00000000UL) | ||
| 41 | #else | ||
| 42 | #define LBITMASK (0x5555555555555555UL) | ||
| 43 | #define LBITSKIP55 (0x5555555555555555UL) | ||
| 44 | #define LBITSKIP00 (0x0000000000000000UL) | ||
| 45 | #endif | ||
| 46 | |||
| 36 | /* | 47 | /* |
| 37 | * These routines are used by the resource group routines (rgrp.c) | 48 | * These routines are used by the resource group routines (rgrp.c) |
| 38 | * to keep track of block allocation. Each block is represented by two | 49 | * to keep track of block allocation. Each block is represented by two |
| @@ -138,45 +149,63 @@ static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, | |||
| 138 | static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal, | 149 | static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal, |
| 139 | u8 old_state) | 150 | u8 old_state) |
| 140 | { | 151 | { |
| 141 | const u8 *byte; | 152 | const u8 *byte, *start, *end; |
| 142 | u32 blk = goal; | 153 | int bit, startbit; |
| 143 | unsigned int bit, bitlong; | 154 | u32 g1, g2, misaligned; |
| 144 | const unsigned long *plong; | 155 | unsigned long *plong; |
| 145 | #if BITS_PER_LONG == 32 | 156 | unsigned long lskipval; |
| 146 | const unsigned long plong55 = 0x55555555; | 157 | |
| 147 | #else | 158 | lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55; |
| 148 | const unsigned long plong55 = 0x5555555555555555; | 159 | g1 = (goal / GFS2_NBBY); |
| 149 | #endif | 160 | start = buffer + g1; |
| 150 | 161 | byte = start; | |
| 151 | byte = buffer + (goal / GFS2_NBBY); | 162 | end = buffer + buflen; |
| 152 | plong = (const unsigned long *)(buffer + (goal / GFS2_NBBY)); | 163 | g2 = ALIGN(g1, sizeof(unsigned long)); |
| 153 | bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; | 164 | plong = (unsigned long *)(buffer + g2); |
| 154 | bitlong = bit; | 165 | startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; |
| 155 | 166 | misaligned = g2 - g1; | |
| 156 | while (byte < buffer + buflen) { | 167 | if (!misaligned) |
| 157 | 168 | goto ulong_aligned; | |
| 158 | if (bitlong == 0 && old_state == 0 && *plong == plong55) { | 169 | /* parse the bitmap a byte at a time */ |
| 159 | plong++; | 170 | misaligned: |
| 160 | byte += sizeof(unsigned long); | 171 | while (byte < end) { |
| 161 | blk += sizeof(unsigned long) * GFS2_NBBY; | 172 | if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) { |
| 162 | continue; | 173 | return goal + |
| 174 | (((byte - start) * GFS2_NBBY) + | ||
| 175 | ((bit - startbit) >> 1)); | ||
| 163 | } | 176 | } |
| 164 | if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) | ||
| 165 | return blk; | ||
| 166 | bit += GFS2_BIT_SIZE; | 177 | bit += GFS2_BIT_SIZE; |
| 167 | if (bit >= 8) { | 178 | if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) { |
| 168 | bit = 0; | 179 | bit = 0; |
| 169 | byte++; | 180 | byte++; |
| 181 | misaligned--; | ||
| 182 | if (!misaligned) { | ||
| 183 | plong = (unsigned long *)byte; | ||
| 184 | goto ulong_aligned; | ||
| 185 | } | ||
| 170 | } | 186 | } |
| 171 | bitlong += GFS2_BIT_SIZE; | ||
| 172 | if (bitlong >= sizeof(unsigned long) * 8) { | ||
| 173 | bitlong = 0; | ||
| 174 | plong++; | ||
| 175 | } | ||
| 176 | |||
| 177 | blk++; | ||
| 178 | } | 187 | } |
| 188 | return BFITNOENT; | ||
| 179 | 189 | ||
| 190 | /* parse the bitmap a unsigned long at a time */ | ||
| 191 | ulong_aligned: | ||
| 192 | /* Stop at "end - 1" or else prefetch can go past the end and segfault. | ||
| 193 | We could "if" it but we'd lose some of the performance gained. | ||
| 194 | This way will only slow down searching the very last 4/8 bytes | ||
| 195 | depending on architecture. I've experimented with several ways | ||
| 196 | of writing this section such as using an else before the goto | ||
| 197 | but this one seems to be the fastest. */ | ||
| 198 | while ((unsigned char *)plong < end - 1) { | ||
| 199 | prefetch(plong + 1); | ||
| 200 | if (((*plong) & LBITMASK) != lskipval) | ||
| 201 | break; | ||
| 202 | plong++; | ||
| 203 | } | ||
| 204 | if ((unsigned char *)plong < end) { | ||
| 205 | byte = (const u8 *)plong; | ||
| 206 | misaligned += sizeof(unsigned long) - 1; | ||
| 207 | goto misaligned; | ||
| 208 | } | ||
| 180 | return BFITNOENT; | 209 | return BFITNOENT; |
| 181 | } | 210 | } |
| 182 | 211 | ||
