From 35b2507fe3ec6c28c27dd7fb289c003c7a0baf33 Mon Sep 17 00:00:00 2001 From: Sachit Kadle Date: Thu, 1 Sep 2016 20:50:06 -0700 Subject: gpu: nvgpu: implement lockless allocator Implement a lockless allocator for fixed-size data structures. Bug 1795076 Change-Id: I70a5f52cbdb4452cc0fd9a8edf26735be29ede57 Signed-off-by: Sachit Kadle Reviewed-on: http://git-master/r/1213211 (cherry picked from commit e4bff7da0f39c8f4b5691169c02e482bc9d4166e) Reviewed-on: http://git-master/r/1223246 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom Tested-by: Terje Bergstrom --- drivers/gpu/nvgpu/Makefile.nvgpu | 1 + drivers/gpu/nvgpu/gk20a/gk20a_allocator.h | 9 + drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c | 204 +++++++++++++++++++++ drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h | 121 ++++++++++++ 4 files changed, 335 insertions(+) create mode 100644 drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c create mode 100644 drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index b8e38919..33d158bb 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu @@ -57,6 +57,7 @@ nvgpu-y := \ gk20a/gk20a_allocator_bitmap.o \ gk20a/gk20a_allocator_buddy.o \ gk20a/gk20a_allocator_page.o \ + gk20a/gk20a_allocator_lockless.o \ gk20a/cde_gk20a.o \ gk20a/platform_gk20a_generic.o \ gk20a/tsg_gk20a.o \ diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h index 93865190..0d611ba3 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h @@ -165,6 +165,15 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a, const char *name, u64 base, u64 length, u64 blk_size, u64 flags); +/* + * Lockless allocatior initializers. + * Note: This allocator can only allocate fixed-size structures of a + * pre-defined size. + */ +int gk20a_lockless_allocator_init(struct gk20a_allocator *__a, + const char *name, u64 base, u64 length, + u64 struct_size, u64 flags); + #define GPU_BALLOC_MAX_ORDER 31 /* diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c new file mode 100644 index 00000000..32455c98 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#include "gk20a_allocator.h" +#include "lockless_allocator_priv.h" + +static u64 gk20a_lockless_alloc_length(struct gk20a_allocator *a) +{ + struct gk20a_lockless_allocator *pa = a->priv; + + return pa->length; +} + +static u64 gk20a_lockless_alloc_base(struct gk20a_allocator *a) +{ + struct gk20a_lockless_allocator *pa = a->priv; + + return pa->base; +} + +static int gk20a_lockless_alloc_inited(struct gk20a_allocator *a) +{ + struct gk20a_lockless_allocator *pa = a->priv; + int inited = pa->inited; + + rmb(); + return inited; +} + +static u64 gk20a_lockless_alloc_end(struct gk20a_allocator *a) +{ + struct gk20a_lockless_allocator *pa = a->priv; + + return pa->base + pa->length; +} + +static u64 gk20a_lockless_alloc(struct gk20a_allocator *a, u64 len) +{ + struct gk20a_lockless_allocator *pa = a->priv; + int head, new_head, ret; + u64 addr = 0; + + if (len != pa->blk_size) + return 0; + + head = ACCESS_ONCE(pa->head); + while (head >= 0) { + new_head = ACCESS_ONCE(pa->next[head]); + ret = cmpxchg(&pa->head, head, new_head); + if (ret == head) { + addr = pa->base + head * pa->blk_size; + atomic_inc(&pa->nr_allocs); + alloc_dbg(a, "Alloc node # %d @ addr 0x%llx\n", head, + addr); + break; + } + head = ACCESS_ONCE(pa->head); + } + return addr; +} + +static void gk20a_lockless_free(struct gk20a_allocator *a, u64 addr) +{ + struct gk20a_lockless_allocator *pa = a->priv; + int head, ret; + u64 cur_idx, rem; + + cur_idx = addr - pa->base; + rem = do_div(cur_idx, pa->blk_size); + + while (1) { + head = ACCESS_ONCE(pa->head); + ACCESS_ONCE(pa->next[cur_idx]) = head; + ret = cmpxchg(&pa->head, head, cur_idx); + if (ret == head) { + atomic_dec(&pa->nr_allocs); + alloc_dbg(a, "Free node # %llu\n", cur_idx); + break; + } + } +} + +static void gk20a_lockless_alloc_destroy(struct gk20a_allocator *a) +{ + struct gk20a_lockless_allocator *pa = a->priv; + + vfree(pa->next); + kfree(pa); +} + +static void gk20a_lockless_print_stats(struct gk20a_allocator *a, + struct seq_file *s, int lock) +{ + struct gk20a_lockless_allocator *pa = a->priv; + + __alloc_pstat(s, a, "Lockless allocator params:\n"); + __alloc_pstat(s, a, " start = 0x%llx\n", pa->base); + __alloc_pstat(s, a, " end = 0x%llx\n", pa->base + pa->length); + + /* Actual stats. */ + __alloc_pstat(s, a, "Stats:\n"); + __alloc_pstat(s, a, " Number allocs = %d\n", + atomic_read(&pa->nr_allocs)); + __alloc_pstat(s, a, " Number free = %d\n", + pa->nr_nodes - atomic_read(&pa->nr_allocs)); +} + +static const struct gk20a_allocator_ops pool_ops = { + .alloc = gk20a_lockless_alloc, + .free = gk20a_lockless_free, + + .base = gk20a_lockless_alloc_base, + .length = gk20a_lockless_alloc_length, + .end = gk20a_lockless_alloc_end, + .inited = gk20a_lockless_alloc_inited, + + .fini = gk20a_lockless_alloc_destroy, + + .print_stats = gk20a_lockless_print_stats, +}; + +int gk20a_lockless_allocator_init(struct gk20a_allocator *__a, + const char *name, u64 base, u64 length, + u64 blk_size, u64 flags) +{ + int i; + int err; + int nr_nodes; + u64 count, rem; + struct gk20a_lockless_allocator *a; + + if (!blk_size) + return -EINVAL; + + /* + * Ensure we have space for atleast one node & there's no overflow. + * In order to control memory footprint, we require count < INT_MAX + */ + count = length; + rem = do_div(count, blk_size); + if (!base || !count || count > INT_MAX) + return -EINVAL; + + a = kzalloc(sizeof(struct gk20a_lockless_allocator), GFP_KERNEL); + if (!a) + return -ENOMEM; + + err = __gk20a_alloc_common_init(__a, name, a, false, &pool_ops); + if (err) + goto fail; + + a->next = vzalloc(sizeof(*a->next) * count); + if (!a->next) { + err = -ENOMEM; + goto fail; + } + + /* chain the elements together to form the initial free list */ + nr_nodes = (int)count; + for (i = 0; i < nr_nodes; i++) + a->next[i] = i + 1; + a->next[nr_nodes - 1] = -1; + + a->base = base; + a->length = length; + a->blk_size = blk_size; + a->nr_nodes = nr_nodes; + a->flags = flags; + atomic_set(&a->nr_allocs, 0); + + wmb(); + a->inited = true; + + gk20a_init_alloc_debug(__a); + alloc_dbg(__a, "New allocator: type lockless\n"); + alloc_dbg(__a, " base 0x%llx\n", a->base); + alloc_dbg(__a, " nodes %d\n", a->nr_nodes); + alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); + alloc_dbg(__a, " flags 0x%llx\n", a->flags); + + return 0; + +fail: + kfree(a); + return err; +} diff --git a/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h new file mode 100644 index 00000000..f9b03e0e --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* + * Basics: + * + * - Lockless memory allocator for fixed-size structures, whose + * size is defined up front at init time. + * - Memory footprint scales linearly w/ the number of structures in + * the pool. It is ~= sizeof(int) * N. + * - Memory is pre-allocated by the client. The allocator itself + * only computes the addresses for allocations. + * - Limit of MAX_INT nodes that the allocator can be responsible for. + * + * Implementation details: + * + * The allocator maintains a single list of free nodes. We allocate & + * free nodes from the head of the list. We rely on the cmpxchg() operator + * to maintain atomicity on the head. + * + * So, both allocs & frees are O(1)!! + * + * -- Definitions -- + * Block Size - size of a single structure that this allocator will + * allocate. + * Node - one of the elements of size blk_size in the + * client-allocated buffer. + * Node Index - zero-based index of a node in the client-allocated + * contiguous buffer. + * + * -- Initial State -- + * We maintain the following to track the state of the free list: + * + * 1) A "head" index to track the index of the first free node in the list + * 2) A "next" array to track the index of the next free node in the list + * for every node. So next[head], will give the index to the 2nd free + * element in the list. + * + * So, to begin with, the free list consists of all node indices, and each + * position in the next array contains index N + 1: + * + * head = 0 + * next = [1, 2, 3, 4, -1] : Example for a user-allocated buffer of 5 nodes + * free_list = 0->1->2->3->4->-1 + * + * -- Allocations -- + * 1) Read the current head (aka acq_head) + * 2) Read next[acq_head], to get the 2nd free element (aka new_head) + * 3) cmp_xchg(&head, acq_head, new_head) + * 4) If it succeeds, compute the address of the node, based on + * base address, blk_size, & acq_head. + * + * head = 1; + * next = [1, 2, 3, 4, -1] : Example after allocating Node #0 + * free_list = 1->2->3->4->-1 + * + * head = 2; + * next = [1, 2, 3, 4, -1] : Example after allocating Node #1 + * free_list = 2->3->4->-1 + * + * -- Frees -- + * 1) Based on the address to be freed, calculate the index of the node + * being freed (cur_idx) + * 2) Read the current head (old_head) + * 3) So the freed node is going to go at the head of the list, and we + * want to put the old_head after it. So next[cur_idx] = old_head + * 4) cmpxchg(head, old_head, cur_idx) + * + * head = 0 + * next = [2, 2, 3, 4, -1] + * free_list = 0->2->3->4->-1 : Example after freeing Node #0 + * + * head = 1 + * next = [2, 0, 3, 4, -1] + * free_list = 1->0->2->3->4->-1 : Example after freeing Node #1 + */ + +#ifndef LOCKLESS_ALLOCATOR_PRIV_H +#define LOCKLESS_ALLOCATOR_PRIV_H + +struct gk20a_allocator; + +struct gk20a_lockless_allocator { + struct gk20a_allocator *owner; + + u64 base; /* Base address of the space. */ + u64 length; /* Length of the space. */ + u64 blk_size; /* Size of the structure being allocated */ + int nr_nodes; /* Number of nodes available for allocation */ + + int *next; /* An array holding the next indices per node */ + int head; /* Current node at the top of the stack */ + + u64 flags; + + bool inited; + + /* Statistics */ + atomic_t nr_allocs; +}; + +static inline struct gk20a_lockless_allocator *lockless_allocator( + struct gk20a_allocator *a) +{ + return (struct gk20a_lockless_allocator *)(a)->priv; +} + +#endif -- cgit v1.2.2