aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2014-11-04 07:51:40 -0500
committerDaniel Vetter <daniel.vetter@ffwll.ch>2014-11-14 04:29:18 -0500
commit6a2c4232ece145d8b5a8f95f767bd6d0d2d2f2bb (patch)
treef30684b518183e8f093b3c1e124bdf495ebbd425
parent132f3f1767dbabfb01f3c9bd63098c65d91eeac9 (diff)
drm/i915: Make the physical object coherent with GTT
Currently objects for which the hardware needs a contiguous physical address are allocated a shadow backing storage to satisfy the contraint. This shadow buffer is not wired into the normal obj->pages and so the physical object is incoherent with accesses via the GPU, GTT and CPU. By setting up the appropriate scatter-gather table, we can allow userspace to access the physical object via either a GTT mmaping of or by rendering into the GEM bo. However, keeping the CPU mmap of the shmemfs backing storage coherent with the contiguous shadow is not yet possible. Fortuituously, CPU mmaps of objects requiring physical addresses are not expected to be coherent anyway. This allows the physical constraint of the GEM object to be transparent to userspace and allow it to efficiently render into or update them via the GTT and GPU. v2: Fix leak of pci handle spotted by Ville v3: Remove the now duplicate call to detach_phys_object during free. v4: Wait for rendering before pwrite. As this patch makes it possible to render into the phys object, we should make it correct as well! Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c3
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h6
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c207
-rw-r--r--include/uapi/drm/i915_drm.h1
4 files changed, 150 insertions, 67 deletions
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 9a7353302b3f..5dc37f0233b2 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1027,6 +1027,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
1027 case I915_PARAM_CMD_PARSER_VERSION: 1027 case I915_PARAM_CMD_PARSER_VERSION:
1028 value = i915_cmd_parser_get_version(); 1028 value = i915_cmd_parser_get_version();
1029 break; 1029 break;
1030 case I915_PARAM_HAS_COHERENT_PHYS_GTT:
1031 value = 1;
1032 break;
1030 default: 1033 default:
1031 DRM_DEBUG("Unknown parameter %d\n", param->param); 1034 DRM_DEBUG("Unknown parameter %d\n", param->param);
1032 return -EINVAL; 1035 return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f830596faa9e..3df9ef32d011 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1957,10 +1957,10 @@ struct drm_i915_gem_object {
1957 unsigned long user_pin_count; 1957 unsigned long user_pin_count;
1958 struct drm_file *pin_filp; 1958 struct drm_file *pin_filp;
1959 1959
1960 /** for phy allocated objects */
1961 struct drm_dma_handle *phys_handle;
1962
1963 union { 1960 union {
1961 /** for phy allocated objects */
1962 struct drm_dma_handle *phys_handle;
1963
1964 struct i915_gem_userptr { 1964 struct i915_gem_userptr {
1965 uintptr_t ptr; 1965 uintptr_t ptr;
1966 unsigned read_only :1; 1966 unsigned read_only :1;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3e0cabe9b544..86cf428b6c4e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -208,40 +208,137 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
208 return 0; 208 return 0;
209} 209}
210 210
211static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj) 211static int
212i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
212{ 213{
213 drm_dma_handle_t *phys = obj->phys_handle; 214 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
215 char *vaddr = obj->phys_handle->vaddr;
216 struct sg_table *st;
217 struct scatterlist *sg;
218 int i;
214 219
215 if (!phys) 220 if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
216 return; 221 return -EINVAL;
222
223 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
224 struct page *page;
225 char *src;
226
227 page = shmem_read_mapping_page(mapping, i);
228 if (IS_ERR(page))
229 return PTR_ERR(page);
230
231 src = kmap_atomic(page);
232 memcpy(vaddr, src, PAGE_SIZE);
233 drm_clflush_virt_range(vaddr, PAGE_SIZE);
234 kunmap_atomic(src);
235
236 page_cache_release(page);
237 vaddr += PAGE_SIZE;
238 }
239
240 i915_gem_chipset_flush(obj->base.dev);
241
242 st = kmalloc(sizeof(*st), GFP_KERNEL);
243 if (st == NULL)
244 return -ENOMEM;
245
246 if (sg_alloc_table(st, 1, GFP_KERNEL)) {
247 kfree(st);
248 return -ENOMEM;
249 }
250
251 sg = st->sgl;
252 sg->offset = 0;
253 sg->length = obj->base.size;
217 254
218 if (obj->madv == I915_MADV_WILLNEED) { 255 sg_dma_address(sg) = obj->phys_handle->busaddr;
256 sg_dma_len(sg) = obj->base.size;
257
258 obj->pages = st;
259 obj->has_dma_mapping = true;
260 return 0;
261}
262
263static void
264i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
265{
266 int ret;
267
268 BUG_ON(obj->madv == __I915_MADV_PURGED);
269
270 ret = i915_gem_object_set_to_cpu_domain(obj, true);
271 if (ret) {
272 /* In the event of a disaster, abandon all caches and
273 * hope for the best.
274 */
275 WARN_ON(ret != -EIO);
276 obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
277 }
278
279 if (obj->madv == I915_MADV_DONTNEED)
280 obj->dirty = 0;
281
282 if (obj->dirty) {
219 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping; 283 struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
220 char *vaddr = phys->vaddr; 284 char *vaddr = obj->phys_handle->vaddr;
221 int i; 285 int i;
222 286
223 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { 287 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
224 struct page *page = shmem_read_mapping_page(mapping, i); 288 struct page *page;
225 if (!IS_ERR(page)) { 289 char *dst;
226 char *dst = kmap_atomic(page); 290
227 memcpy(dst, vaddr, PAGE_SIZE); 291 page = shmem_read_mapping_page(mapping, i);
228 drm_clflush_virt_range(dst, PAGE_SIZE); 292 if (IS_ERR(page))
229 kunmap_atomic(dst); 293 continue;
230 294
231 set_page_dirty(page); 295 dst = kmap_atomic(page);
296 drm_clflush_virt_range(vaddr, PAGE_SIZE);
297 memcpy(dst, vaddr, PAGE_SIZE);
298 kunmap_atomic(dst);
299
300 set_page_dirty(page);
301 if (obj->madv == I915_MADV_WILLNEED)
232 mark_page_accessed(page); 302 mark_page_accessed(page);
233 page_cache_release(page); 303 page_cache_release(page);
234 }
235 vaddr += PAGE_SIZE; 304 vaddr += PAGE_SIZE;
236 } 305 }
237 i915_gem_chipset_flush(obj->base.dev); 306 obj->dirty = 0;
238 } 307 }
239 308
240#ifdef CONFIG_X86 309 sg_free_table(obj->pages);
241 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); 310 kfree(obj->pages);
242#endif 311
243 drm_pci_free(obj->base.dev, phys); 312 obj->has_dma_mapping = false;
244 obj->phys_handle = NULL; 313}
314
315static void
316i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
317{
318 drm_pci_free(obj->base.dev, obj->phys_handle);
319}
320
321static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
322 .get_pages = i915_gem_object_get_pages_phys,
323 .put_pages = i915_gem_object_put_pages_phys,
324 .release = i915_gem_object_release_phys,
325};
326
327static int
328drop_pages(struct drm_i915_gem_object *obj)
329{
330 struct i915_vma *vma, *next;
331 int ret;
332
333 drm_gem_object_reference(&obj->base);
334 list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
335 if (i915_vma_unbind(vma))
336 break;
337
338 ret = i915_gem_object_put_pages(obj);
339 drm_gem_object_unreference(&obj->base);
340
341 return ret;
245} 342}
246 343
247int 344int
@@ -249,9 +346,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
249 int align) 346 int align)
250{ 347{
251 drm_dma_handle_t *phys; 348 drm_dma_handle_t *phys;
252 struct address_space *mapping; 349 int ret;
253 char *vaddr;
254 int i;
255 350
256 if (obj->phys_handle) { 351 if (obj->phys_handle) {
257 if ((unsigned long)obj->phys_handle->vaddr & (align -1)) 352 if ((unsigned long)obj->phys_handle->vaddr & (align -1))
@@ -266,41 +361,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
266 if (obj->base.filp == NULL) 361 if (obj->base.filp == NULL)
267 return -EINVAL; 362 return -EINVAL;
268 363
364 ret = drop_pages(obj);
365 if (ret)
366 return ret;
367
269 /* create a new object */ 368 /* create a new object */
270 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); 369 phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
271 if (!phys) 370 if (!phys)
272 return -ENOMEM; 371 return -ENOMEM;
273 372
274 vaddr = phys->vaddr;
275#ifdef CONFIG_X86
276 set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
277#endif
278 mapping = file_inode(obj->base.filp)->i_mapping;
279 for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
280 struct page *page;
281 char *src;
282
283 page = shmem_read_mapping_page(mapping, i);
284 if (IS_ERR(page)) {
285#ifdef CONFIG_X86
286 set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
287#endif
288 drm_pci_free(obj->base.dev, phys);
289 return PTR_ERR(page);
290 }
291
292 src = kmap_atomic(page);
293 memcpy(vaddr, src, PAGE_SIZE);
294 kunmap_atomic(src);
295
296 mark_page_accessed(page);
297 page_cache_release(page);
298
299 vaddr += PAGE_SIZE;
300 }
301
302 obj->phys_handle = phys; 373 obj->phys_handle = phys;
303 return 0; 374 obj->ops = &i915_gem_phys_ops;
375
376 return i915_gem_object_get_pages(obj);
304} 377}
305 378
306static int 379static int
@@ -311,6 +384,14 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
311 struct drm_device *dev = obj->base.dev; 384 struct drm_device *dev = obj->base.dev;
312 void *vaddr = obj->phys_handle->vaddr + args->offset; 385 void *vaddr = obj->phys_handle->vaddr + args->offset;
313 char __user *user_data = to_user_ptr(args->data_ptr); 386 char __user *user_data = to_user_ptr(args->data_ptr);
387 int ret;
388
389 /* We manually control the domain here and pretend that it
390 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
391 */
392 ret = i915_gem_object_wait_rendering(obj, false);
393 if (ret)
394 return ret;
314 395
315 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { 396 if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
316 unsigned long unwritten; 397 unsigned long unwritten;
@@ -326,6 +407,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
326 return -EFAULT; 407 return -EFAULT;
327 } 408 }
328 409
410 drm_clflush_virt_range(vaddr, args->size);
329 i915_gem_chipset_flush(dev); 411 i915_gem_chipset_flush(dev);
330 return 0; 412 return 0;
331} 413}
@@ -1046,11 +1128,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1046 * pread/pwrite currently are reading and writing from the CPU 1128 * pread/pwrite currently are reading and writing from the CPU
1047 * perspective, requiring manual detiling by the client. 1129 * perspective, requiring manual detiling by the client.
1048 */ 1130 */
1049 if (obj->phys_handle) {
1050 ret = i915_gem_phys_pwrite(obj, args, file);
1051 goto out;
1052 }
1053
1054 if (obj->tiling_mode == I915_TILING_NONE && 1131 if (obj->tiling_mode == I915_TILING_NONE &&
1055 obj->base.write_domain != I915_GEM_DOMAIN_CPU && 1132 obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
1056 cpu_write_needs_clflush(obj)) { 1133 cpu_write_needs_clflush(obj)) {
@@ -1060,8 +1137,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1060 * textures). Fallback to the shmem path in that case. */ 1137 * textures). Fallback to the shmem path in that case. */
1061 } 1138 }
1062 1139
1063 if (ret == -EFAULT || ret == -ENOSPC) 1140 if (ret == -EFAULT || ret == -ENOSPC) {
1064 ret = i915_gem_shmem_pwrite(dev, obj, args, file); 1141 if (obj->phys_handle)
1142 ret = i915_gem_phys_pwrite(obj, args, file);
1143 else
1144 ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1145 }
1065 1146
1066out: 1147out:
1067 drm_gem_object_unreference(&obj->base); 1148 drm_gem_object_unreference(&obj->base);
@@ -3509,7 +3590,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3509 * Stolen memory is always coherent with the GPU as it is explicitly 3590 * Stolen memory is always coherent with the GPU as it is explicitly
3510 * marked as wc by the system, or the system is cache-coherent. 3591 * marked as wc by the system, or the system is cache-coherent.
3511 */ 3592 */
3512 if (obj->stolen) 3593 if (obj->stolen || obj->phys_handle)
3513 return false; 3594 return false;
3514 3595
3515 /* If the GPU is snooping the contents of the CPU cache, 3596 /* If the GPU is snooping the contents of the CPU cache,
@@ -4471,8 +4552,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
4471 } 4552 }
4472 } 4553 }
4473 4554
4474 i915_gem_object_detach_phys(obj);
4475
4476 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up 4555 /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4477 * before progressing. */ 4556 * before progressing. */
4478 if (obj->stolen) 4557 if (obj->stolen)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2ec0efcaa719..250262265ee3 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -340,6 +340,7 @@ typedef struct drm_i915_irq_wait {
340#define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 340#define I915_PARAM_HAS_EXEC_HANDLE_LUT 26
341#define I915_PARAM_HAS_WT 27 341#define I915_PARAM_HAS_WT 27
342#define I915_PARAM_CMD_PARSER_VERSION 28 342#define I915_PARAM_CMD_PARSER_VERSION 28
343#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
343 344
344typedef struct drm_i915_getparam { 345typedef struct drm_i915_getparam {
345 int param; 346 int param;