diff options
author | Marek Olšák <marek.olsak@amd.com> | 2014-03-01 18:56:21 -0500 |
---|---|---|
committer | Christian König <christian.koenig@amd.com> | 2014-03-03 04:57:19 -0500 |
commit | c9b76548899cde2e729e3bca015d7e78ec5baad7 (patch) | |
tree | e7be2b702803f238bfa5e4406fa9ed8e70f9351f | |
parent | 4330441a745ea0f1fd881438a0bbdfedda65f74a (diff) |
drm/radeon: validate relocations in the order determined by userspace v3
Userspace should set the first 4 bits of drm_radeon_cs_reloc::flags to
a number from 0 to 15. The higher the number, the higher the priority,
which means a buffer with a higher number will be validated sooner.
The old behavior is preserved: Buffers used for write are prioritized over
read-only buffers if the userspace doesn't set the number.
v2: add buffers to buckets directly, then concatenate them
v3: use a stable sort
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
-rw-r--r-- | drivers/gpu/drm/radeon/radeon.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_cs.c | 64 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_object.c | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_object.h | 2 |
4 files changed, 61 insertions, 16 deletions
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 7bb8fd96f3ce..efad56705e32 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h | |||
@@ -483,7 +483,6 @@ struct radeon_bo_list { | |||
483 | struct ttm_validate_buffer tv; | 483 | struct ttm_validate_buffer tv; |
484 | struct radeon_bo *bo; | 484 | struct radeon_bo *bo; |
485 | uint64_t gpu_offset; | 485 | uint64_t gpu_offset; |
486 | bool written; | ||
487 | unsigned domain; | 486 | unsigned domain; |
488 | unsigned alt_domain; | 487 | unsigned alt_domain; |
489 | u32 tiling_flags; | 488 | u32 tiling_flags; |
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index d49a3f705e49..07e165128dbf 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c | |||
@@ -31,10 +31,52 @@ | |||
31 | #include "radeon.h" | 31 | #include "radeon.h" |
32 | #include "radeon_trace.h" | 32 | #include "radeon_trace.h" |
33 | 33 | ||
34 | #define RADEON_CS_MAX_PRIORITY 32u | ||
35 | #define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_PRIORITY + 1) | ||
36 | |||
37 | /* This is based on the bucket sort with O(n) time complexity. | ||
38 | * An item with priority "i" is added to bucket[i]. The lists are then | ||
39 | * concatenated in descending order. | ||
40 | */ | ||
41 | struct radeon_cs_buckets { | ||
42 | struct list_head bucket[RADEON_CS_NUM_BUCKETS]; | ||
43 | }; | ||
44 | |||
45 | static void radeon_cs_buckets_init(struct radeon_cs_buckets *b) | ||
46 | { | ||
47 | unsigned i; | ||
48 | |||
49 | for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) | ||
50 | INIT_LIST_HEAD(&b->bucket[i]); | ||
51 | } | ||
52 | |||
53 | static void radeon_cs_buckets_add(struct radeon_cs_buckets *b, | ||
54 | struct list_head *item, unsigned priority) | ||
55 | { | ||
56 | /* Since buffers which appear sooner in the relocation list are | ||
57 | * likely to be used more often than buffers which appear later | ||
58 | * in the list, the sort mustn't change the ordering of buffers | ||
59 | * with the same priority, i.e. it must be stable. | ||
60 | */ | ||
61 | list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]); | ||
62 | } | ||
63 | |||
64 | static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b, | ||
65 | struct list_head *out_list) | ||
66 | { | ||
67 | unsigned i; | ||
68 | |||
69 | /* Connect the sorted buckets in the output list. */ | ||
70 | for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) { | ||
71 | list_splice(&b->bucket[i], out_list); | ||
72 | } | ||
73 | } | ||
74 | |||
34 | static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) | 75 | static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) |
35 | { | 76 | { |
36 | struct drm_device *ddev = p->rdev->ddev; | 77 | struct drm_device *ddev = p->rdev->ddev; |
37 | struct radeon_cs_chunk *chunk; | 78 | struct radeon_cs_chunk *chunk; |
79 | struct radeon_cs_buckets buckets; | ||
38 | unsigned i, j; | 80 | unsigned i, j; |
39 | bool duplicate; | 81 | bool duplicate; |
40 | 82 | ||
@@ -53,8 +95,12 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) | |||
53 | if (p->relocs == NULL) { | 95 | if (p->relocs == NULL) { |
54 | return -ENOMEM; | 96 | return -ENOMEM; |
55 | } | 97 | } |
98 | |||
99 | radeon_cs_buckets_init(&buckets); | ||
100 | |||
56 | for (i = 0; i < p->nrelocs; i++) { | 101 | for (i = 0; i < p->nrelocs; i++) { |
57 | struct drm_radeon_cs_reloc *r; | 102 | struct drm_radeon_cs_reloc *r; |
103 | unsigned priority; | ||
58 | 104 | ||
59 | duplicate = false; | 105 | duplicate = false; |
60 | r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4]; | 106 | r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4]; |
@@ -80,7 +126,14 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) | |||
80 | p->relocs_ptr[i] = &p->relocs[i]; | 126 | p->relocs_ptr[i] = &p->relocs[i]; |
81 | p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj); | 127 | p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj); |
82 | p->relocs[i].lobj.bo = p->relocs[i].robj; | 128 | p->relocs[i].lobj.bo = p->relocs[i].robj; |
83 | p->relocs[i].lobj.written = !!r->write_domain; | 129 | |
130 | /* The userspace buffer priorities are from 0 to 15. A higher | ||
131 | * number means the buffer is more important. | ||
132 | * Also, the buffers used for write have a higher priority than | ||
133 | * the buffers used for read only, which doubles the range | ||
134 | * to 0 to 31. 32 is reserved for the kernel driver. | ||
135 | */ | ||
136 | priority = (r->flags & 0xf) * 2 + !!r->write_domain; | ||
84 | 137 | ||
85 | /* the first reloc of an UVD job is the msg and that must be in | 138 | /* the first reloc of an UVD job is the msg and that must be in |
86 | VRAM, also but everything into VRAM on AGP cards to avoid | 139 | VRAM, also but everything into VRAM on AGP cards to avoid |
@@ -94,6 +147,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) | |||
94 | p->relocs[i].lobj.alt_domain = | 147 | p->relocs[i].lobj.alt_domain = |
95 | RADEON_GEM_DOMAIN_VRAM; | 148 | RADEON_GEM_DOMAIN_VRAM; |
96 | 149 | ||
150 | /* prioritize this over any other relocation */ | ||
151 | priority = RADEON_CS_MAX_PRIORITY; | ||
97 | } else { | 152 | } else { |
98 | uint32_t domain = r->write_domain ? | 153 | uint32_t domain = r->write_domain ? |
99 | r->write_domain : r->read_domains; | 154 | r->write_domain : r->read_domains; |
@@ -107,9 +162,12 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) | |||
107 | p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; | 162 | p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; |
108 | p->relocs[i].handle = r->handle; | 163 | p->relocs[i].handle = r->handle; |
109 | 164 | ||
110 | radeon_bo_list_add_object(&p->relocs[i].lobj, | 165 | radeon_cs_buckets_add(&buckets, &p->relocs[i].lobj.tv.head, |
111 | &p->validated); | 166 | priority); |
112 | } | 167 | } |
168 | |||
169 | radeon_cs_buckets_get_list(&buckets, &p->validated); | ||
170 | |||
113 | return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring); | 171 | return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring); |
114 | } | 172 | } |
115 | 173 | ||
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 282d6a248396..8399fe021769 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c | |||
@@ -366,16 +366,6 @@ void radeon_bo_fini(struct radeon_device *rdev) | |||
366 | arch_phys_wc_del(rdev->mc.vram_mtrr); | 366 | arch_phys_wc_del(rdev->mc.vram_mtrr); |
367 | } | 367 | } |
368 | 368 | ||
369 | void radeon_bo_list_add_object(struct radeon_bo_list *lobj, | ||
370 | struct list_head *head) | ||
371 | { | ||
372 | if (lobj->written) { | ||
373 | list_add(&lobj->tv.head, head); | ||
374 | } else { | ||
375 | list_add_tail(&lobj->tv.head, head); | ||
376 | } | ||
377 | } | ||
378 | |||
379 | int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, | 369 | int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, |
380 | struct list_head *head, int ring) | 370 | struct list_head *head, int ring) |
381 | { | 371 | { |
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index a9a8c11bd80d..6c3ca9edc2f4 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h | |||
@@ -138,8 +138,6 @@ extern int radeon_bo_evict_vram(struct radeon_device *rdev); | |||
138 | extern void radeon_bo_force_delete(struct radeon_device *rdev); | 138 | extern void radeon_bo_force_delete(struct radeon_device *rdev); |
139 | extern int radeon_bo_init(struct radeon_device *rdev); | 139 | extern int radeon_bo_init(struct radeon_device *rdev); |
140 | extern void radeon_bo_fini(struct radeon_device *rdev); | 140 | extern void radeon_bo_fini(struct radeon_device *rdev); |
141 | extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj, | ||
142 | struct list_head *head); | ||
143 | extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, | 141 | extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, |
144 | struct list_head *head, int ring); | 142 | struct list_head *head, int ring); |
145 | extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, | 143 | extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo, |