diff options
author | Alex Waterman <alexw@nvidia.com> | 2016-08-22 14:19:50 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-10-18 15:24:26 -0400 |
commit | 641444188f18dbf56dda980e31f1b404dbb6f166 (patch) | |
tree | e94e53459dae58168873362e6348e56121c1b8f6 /drivers/gpu | |
parent | cd452a6ed4cad388909b0372f04a0482609acb90 (diff) |
gpu: nvgpu: bitmap allocator optimization
Add an optimization to the bitmap allocator for handling sequences of
allocations. A common pattern of allocs from the priv_cmdbuf is to do
many allocs and then many frees. In such cases it makes sense to store
the last allocation offset and start searching for the next alloc from
there. For such a pattern we know that the previous bits are already
allocated so it doesn't make sense to search them unless we have to.
Obviously, if there's no space found ahead of the precious alloc's block
then we fall back to the remaining space.
In random allocation patterns this optimization should not have any
negative affects. It merely shifts the start point for searching for
allocs but assuming each bit has an equal probability of being free the
start location does not matter.
Bug 1799159
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1205958
(cherry picked from commit 759c583962d6d57cb8cb073ccdbfcfc6db4c1e18)
Change-Id: I267ef6fa155ff15d6ebfc76dc1abafd9aa1f44df
Reviewed-on: http://git-master/r/1227923
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h | 9 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c | 24 |
2 files changed, 29 insertions, 4 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h index 053a6425..a686b704 100644 --- a/drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h +++ b/drivers/gpu/nvgpu/gk20a/bitmap_allocator_priv.h | |||
@@ -31,6 +31,15 @@ struct gk20a_bitmap_allocator { | |||
31 | u64 num_bits; /* Number of allocatable bits. */ | 31 | u64 num_bits; /* Number of allocatable bits. */ |
32 | u64 bit_offs; /* Offset of bitmap. */ | 32 | u64 bit_offs; /* Offset of bitmap. */ |
33 | 33 | ||
34 | /* | ||
35 | * Optimization for making repeated allocations faster. Keep track of | ||
36 | * the next bit after the most recent allocation. This is where the next | ||
37 | * search will start from. This should make allocation faster in cases | ||
38 | * where lots of allocations get made one after another. It shouldn't | ||
39 | * have a negative impact on the case where the allocator is fragmented. | ||
40 | */ | ||
41 | u64 next_blk; | ||
42 | |||
34 | unsigned long *bitmap; /* The actual bitmap! */ | 43 | unsigned long *bitmap; /* The actual bitmap! */ |
35 | struct rb_root allocs; /* Tree of outstanding allocations. */ | 44 | struct rb_root allocs; /* Tree of outstanding allocations. */ |
36 | 45 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c index a2aa85fa..03ccbe85 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_bitmap.c | |||
@@ -211,7 +211,7 @@ static int __gk20a_bitmap_store_alloc(struct gk20a_bitmap_allocator *a, | |||
211 | static u64 gk20a_bitmap_alloc(struct gk20a_allocator *__a, u64 len) | 211 | static u64 gk20a_bitmap_alloc(struct gk20a_allocator *__a, u64 len) |
212 | { | 212 | { |
213 | u64 blks, addr; | 213 | u64 blks, addr; |
214 | unsigned long offs, adjusted_offs; | 214 | unsigned long offs, adjusted_offs, limit; |
215 | struct gk20a_bitmap_allocator *a = bitmap_allocator(__a); | 215 | struct gk20a_bitmap_allocator *a = bitmap_allocator(__a); |
216 | 216 | ||
217 | blks = len >> a->blk_shift; | 217 | blks = len >> a->blk_shift; |
@@ -221,11 +221,26 @@ static u64 gk20a_bitmap_alloc(struct gk20a_allocator *__a, u64 len) | |||
221 | 221 | ||
222 | alloc_lock(__a); | 222 | alloc_lock(__a); |
223 | 223 | ||
224 | offs = bitmap_find_next_zero_area(a->bitmap, a->num_bits, 0, blks, 0); | 224 | /* |
225 | if (offs >= a->num_bits) | 225 | * First look from next_blk and onwards... |
226 | goto fail; | 226 | */ |
227 | offs = bitmap_find_next_zero_area(a->bitmap, a->num_bits, | ||
228 | a->next_blk, blks, 0); | ||
229 | if (offs >= a->num_bits) { | ||
230 | /* | ||
231 | * If that didn't work try the remaining area. Since there can | ||
232 | * be available space that spans across a->next_blk we need to | ||
233 | * search up to the first set bit after that. | ||
234 | */ | ||
235 | limit = find_next_bit(a->bitmap, a->num_bits, a->next_blk); | ||
236 | offs = bitmap_find_next_zero_area(a->bitmap, limit, | ||
237 | 0, blks, 0); | ||
238 | if (offs >= a->next_blk) | ||
239 | goto fail; | ||
240 | } | ||
227 | 241 | ||
228 | bitmap_set(a->bitmap, offs, blks); | 242 | bitmap_set(a->bitmap, offs, blks); |
243 | a->next_blk = offs + blks; | ||
229 | 244 | ||
230 | adjusted_offs = offs + a->bit_offs; | 245 | adjusted_offs = offs + a->bit_offs; |
231 | addr = ((u64)adjusted_offs) * a->blk_size; | 246 | addr = ((u64)adjusted_offs) * a->blk_size; |
@@ -255,6 +270,7 @@ static u64 gk20a_bitmap_alloc(struct gk20a_allocator *__a, u64 len) | |||
255 | fail_reset_bitmap: | 270 | fail_reset_bitmap: |
256 | bitmap_clear(a->bitmap, offs, blks); | 271 | bitmap_clear(a->bitmap, offs, blks); |
257 | fail: | 272 | fail: |
273 | a->next_blk = 0; | ||
258 | alloc_unlock(__a); | 274 | alloc_unlock(__a); |
259 | alloc_dbg(__a, "Alloc failed!\n"); | 275 | alloc_dbg(__a, "Alloc failed!\n"); |
260 | return 0; | 276 | return 0; |