aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2014-03-01 18:56:21 -0500
committerChristian König <christian.koenig@amd.com>2014-03-03 04:57:19 -0500
commitc9b76548899cde2e729e3bca015d7e78ec5baad7 (patch)
treee7be2b702803f238bfa5e4406fa9ed8e70f9351f
parent4330441a745ea0f1fd881438a0bbdfedda65f74a (diff)
drm/radeon: validate relocations in the order determined by userspace v3
Userspace should set the first 4 bits of drm_radeon_cs_reloc::flags to a number from 0 to 15. The higher the number, the higher the priority, which means a buffer with a higher number will be validated sooner. The old behavior is preserved: Buffers used for write are prioritized over read-only buffers if the userspace doesn't set the number. v2: add buffers to buckets directly, then concatenate them v3: use a stable sort Signed-off-by: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com>
-rw-r--r--drivers/gpu/drm/radeon/radeon.h1
-rw-r--r--drivers/gpu/drm/radeon/radeon_cs.c64
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.c10
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.h2
4 files changed, 61 insertions, 16 deletions
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 7bb8fd96f3ce..efad56705e32 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -483,7 +483,6 @@ struct radeon_bo_list {
483 struct ttm_validate_buffer tv; 483 struct ttm_validate_buffer tv;
484 struct radeon_bo *bo; 484 struct radeon_bo *bo;
485 uint64_t gpu_offset; 485 uint64_t gpu_offset;
486 bool written;
487 unsigned domain; 486 unsigned domain;
488 unsigned alt_domain; 487 unsigned alt_domain;
489 u32 tiling_flags; 488 u32 tiling_flags;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index d49a3f705e49..07e165128dbf 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -31,10 +31,52 @@
31#include "radeon.h" 31#include "radeon.h"
32#include "radeon_trace.h" 32#include "radeon_trace.h"
33 33
34#define RADEON_CS_MAX_PRIORITY 32u
35#define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1)
36
37/* This is based on the bucket sort with O(n) time complexity.
38 * An item with priority "i" is added to bucket[i]. The lists are then
39 * concatenated in descending order.
40 */
41struct radeon_cs_buckets {
42 struct list_head bucket[RADEON_CS_NUM_BUCKETS];
43};
44
45static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
46{
47 unsigned i;
48
49 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
50 INIT_LIST_HEAD(&b->bucket[i]);
51}
52
53static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
54 struct list_head *item, unsigned priority)
55{
56 /* Since buffers which appear sooner in the relocation list are
57 * likely to be used more often than buffers which appear later
58 * in the list, the sort mustn't change the ordering of buffers
59 * with the same priority, i.e. it must be stable.
60 */
61 list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
62}
63
64static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
65 struct list_head *out_list)
66{
67 unsigned i;
68
69 /* Connect the sorted buckets in the output list. */
70 for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
71 list_splice(&b->bucket[i], out_list);
72 }
73}
74
34static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) 75static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
35{ 76{
36 struct drm_device *ddev = p->rdev->ddev; 77 struct drm_device *ddev = p->rdev->ddev;
37 struct radeon_cs_chunk *chunk; 78 struct radeon_cs_chunk *chunk;
79 struct radeon_cs_buckets buckets;
38 unsigned i, j; 80 unsigned i, j;
39 bool duplicate; 81 bool duplicate;
40 82
@@ -53,8 +95,12 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
53 if (p->relocs == NULL) { 95 if (p->relocs == NULL) {
54 return -ENOMEM; 96 return -ENOMEM;
55 } 97 }
98
99 radeon_cs_buckets_init(&buckets);
100
56 for (i = 0; i < p->nrelocs; i++) { 101 for (i = 0; i < p->nrelocs; i++) {
57 struct drm_radeon_cs_reloc *r; 102 struct drm_radeon_cs_reloc *r;
103 unsigned priority;
58 104
59 duplicate = false; 105 duplicate = false;
60 r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4]; 106 r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
@@ -80,7 +126,14 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
80 p->relocs_ptr[i] = &p->relocs[i]; 126 p->relocs_ptr[i] = &p->relocs[i];
81 p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj); 127 p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
82 p->relocs[i].lobj.bo = p->relocs[i].robj; 128 p->relocs[i].lobj.bo = p->relocs[i].robj;
83 p->relocs[i].lobj.written = !!r->write_domain; 129
130 /* The userspace buffer priorities are from 0 to 15. A higher
131 * number means the buffer is more important.
132 * Also, the buffers used for write have a higher priority than
133 * the buffers used for read only, which doubles the range
134 * to 0 to 31. 32 is reserved for the kernel driver.
135 */
136 priority = (r->flags & 0xf) * 2 + !!r->write_domain;
84 137
85 /* the first reloc of an UVD job is the msg and that must be in 138 /* the first reloc of an UVD job is the msg and that must be in
86 VRAM, also but everything into VRAM on AGP cards to avoid 139 VRAM, also but everything into VRAM on AGP cards to avoid
@@ -94,6 +147,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
94 p->relocs[i].lobj.alt_domain = 147 p->relocs[i].lobj.alt_domain =
95 RADEON_GEM_DOMAIN_VRAM; 148 RADEON_GEM_DOMAIN_VRAM;
96 149
150 /* prioritize this over any other relocation */
151 priority = RADEON_CS_MAX_PRIORITY;
97 } else { 152 } else {
98 uint32_t domain = r->write_domain ? 153 uint32_t domain = r->write_domain ?
99 r->write_domain : r->read_domains; 154 r->write_domain : r->read_domains;
@@ -107,9 +162,12 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
107 p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; 162 p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
108 p->relocs[i].handle = r->handle; 163 p->relocs[i].handle = r->handle;
109 164
110 radeon_bo_list_add_object(&p->relocs[i].lobj, 165 radeon_cs_buckets_add(&buckets, &p->relocs[i].lobj.tv.head,
111 &p->validated); 166 priority);
112 } 167 }
168
169 radeon_cs_buckets_get_list(&buckets, &p->validated);
170
113 return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring); 171 return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring);
114} 172}
115 173
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 282d6a248396..8399fe021769 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -366,16 +366,6 @@ void radeon_bo_fini(struct radeon_device *rdev)
366 arch_phys_wc_del(rdev->mc.vram_mtrr); 366 arch_phys_wc_del(rdev->mc.vram_mtrr);
367} 367}
368 368
369void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
370 struct list_head *head)
371{
372 if (lobj->written) {
373 list_add(&lobj->tv.head, head);
374 } else {
375 list_add_tail(&lobj->tv.head, head);
376 }
377}
378
379int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, 369int radeon_bo_list_validate(struct ww_acquire_ctx *ticket,
380 struct list_head *head, int ring) 370 struct list_head *head, int ring)
381{ 371{
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index a9a8c11bd80d..6c3ca9edc2f4 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -138,8 +138,6 @@ extern int radeon_bo_evict_vram(struct radeon_device *rdev);
138extern void radeon_bo_force_delete(struct radeon_device *rdev); 138extern void radeon_bo_force_delete(struct radeon_device *rdev);
139extern int radeon_bo_init(struct radeon_device *rdev); 139extern int radeon_bo_init(struct radeon_device *rdev);
140extern void radeon_bo_fini(struct radeon_device *rdev); 140extern void radeon_bo_fini(struct radeon_device *rdev);
141extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
142 struct list_head *head);
143extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, 141extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket,
144 struct list_head *head, int ring); 142 struct list_head *head, int ring);
145extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, 143extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,