diff options
author | Eric Anholt <eric@anholt.net> | 2008-11-14 16:35:19 -0500 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2008-12-03 20:21:55 -0500 |
commit | e47c68e9c5d71e2faab8c2b82f57c6c73e6456fd (patch) | |
tree | 5edeeaa59dbccf418b4cae659efdcaa4cf4cc496 | |
parent | 2ef7eeaa553d88e78d9a4520271f26a7bc0e2968 (diff) |
drm/i915: Make a single set-to-cpu-domain path and use it wherever needed.
This fixes several domain management bugs, including potential lack of cache
invalidation for pread, potential failure to wait for set_domain(CPU, 0),
and more, along with producing more intelligible code.
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 363 |
2 files changed, 215 insertions, 152 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 76d9a706d8fd..adc972cc6bfc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h | |||
@@ -379,8 +379,8 @@ struct drm_i915_gem_object { | |||
379 | uint32_t agp_type; | 379 | uint32_t agp_type; |
380 | 380 | ||
381 | /** | 381 | /** |
382 | * Flagging of which individual pages are valid in GEM_DOMAIN_CPU when | 382 | * If present, while GEM_DOMAIN_CPU is in the read domain this array |
383 | * GEM_DOMAIN_CPU is not in the object's read domain. | 383 | * flags which individual pages are valid. |
384 | */ | 384 | */ |
385 | uint8_t *page_cpu_valid; | 385 | uint8_t *page_cpu_valid; |
386 | }; | 386 | }; |
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 62a059dce85f..f7e9f2c2934c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
@@ -37,19 +37,17 @@ static int | |||
37 | i915_gem_object_set_domain(struct drm_gem_object *obj, | 37 | i915_gem_object_set_domain(struct drm_gem_object *obj, |
38 | uint32_t read_domains, | 38 | uint32_t read_domains, |
39 | uint32_t write_domain); | 39 | uint32_t write_domain); |
40 | static int | 40 | static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj); |
41 | i915_gem_object_set_domain_range(struct drm_gem_object *obj, | 41 | static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj); |
42 | uint64_t offset, | 42 | static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj); |
43 | uint64_t size, | ||
44 | uint32_t read_domains, | ||
45 | uint32_t write_domain); | ||
46 | static int | ||
47 | i915_gem_set_domain(struct drm_gem_object *obj, | ||
48 | struct drm_file *file_priv, | ||
49 | uint32_t read_domains, | ||
50 | uint32_t write_domain); | ||
51 | static int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, | 43 | static int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, |
52 | int write); | 44 | int write); |
45 | static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, | ||
46 | int write); | ||
47 | static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, | ||
48 | uint64_t offset, | ||
49 | uint64_t size); | ||
50 | static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj); | ||
53 | static int i915_gem_object_get_page_list(struct drm_gem_object *obj); | 51 | static int i915_gem_object_get_page_list(struct drm_gem_object *obj); |
54 | static void i915_gem_object_free_page_list(struct drm_gem_object *obj); | 52 | static void i915_gem_object_free_page_list(struct drm_gem_object *obj); |
55 | static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); | 53 | static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); |
@@ -164,8 +162,8 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, | |||
164 | 162 | ||
165 | mutex_lock(&dev->struct_mutex); | 163 | mutex_lock(&dev->struct_mutex); |
166 | 164 | ||
167 | ret = i915_gem_object_set_domain_range(obj, args->offset, args->size, | 165 | ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, |
168 | I915_GEM_DOMAIN_CPU, 0); | 166 | args->size); |
169 | if (ret != 0) { | 167 | if (ret != 0) { |
170 | drm_gem_object_unreference(obj); | 168 | drm_gem_object_unreference(obj); |
171 | mutex_unlock(&dev->struct_mutex); | 169 | mutex_unlock(&dev->struct_mutex); |
@@ -321,8 +319,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, | |||
321 | 319 | ||
322 | mutex_lock(&dev->struct_mutex); | 320 | mutex_lock(&dev->struct_mutex); |
323 | 321 | ||
324 | ret = i915_gem_set_domain(obj, file_priv, | 322 | ret = i915_gem_object_set_to_cpu_domain(obj, 1); |
325 | I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); | ||
326 | if (ret) { | 323 | if (ret) { |
327 | mutex_unlock(&dev->struct_mutex); | 324 | mutex_unlock(&dev->struct_mutex); |
328 | return ret; | 325 | return ret; |
@@ -439,8 +436,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, | |||
439 | if (read_domains & I915_GEM_DOMAIN_GTT) { | 436 | if (read_domains & I915_GEM_DOMAIN_GTT) { |
440 | ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); | 437 | ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); |
441 | } else { | 438 | } else { |
442 | ret = i915_gem_set_domain(obj, file_priv, | 439 | ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); |
443 | read_domains, write_domain); | ||
444 | } | 440 | } |
445 | 441 | ||
446 | drm_gem_object_unreference(obj); | 442 | drm_gem_object_unreference(obj); |
@@ -477,10 +473,9 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, | |||
477 | obj_priv = obj->driver_private; | 473 | obj_priv = obj->driver_private; |
478 | 474 | ||
479 | /* Pinned buffers may be scanout, so flush the cache */ | 475 | /* Pinned buffers may be scanout, so flush the cache */ |
480 | if ((obj->write_domain & I915_GEM_DOMAIN_CPU) && obj_priv->pin_count) { | 476 | if (obj_priv->pin_count) |
481 | i915_gem_clflush_object(obj); | 477 | i915_gem_object_flush_cpu_write_domain(obj); |
482 | drm_agp_chipset_flush(dev); | 478 | |
483 | } | ||
484 | drm_gem_object_unreference(obj); | 479 | drm_gem_object_unreference(obj); |
485 | mutex_unlock(&dev->struct_mutex); | 480 | mutex_unlock(&dev->struct_mutex); |
486 | return ret; | 481 | return ret; |
@@ -925,23 +920,10 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj) | |||
925 | struct drm_i915_gem_object *obj_priv = obj->driver_private; | 920 | struct drm_i915_gem_object *obj_priv = obj->driver_private; |
926 | int ret; | 921 | int ret; |
927 | 922 | ||
928 | /* If there are writes queued to the buffer, flush and | 923 | /* This function only exists to support waiting for existing rendering, |
929 | * create a new seqno to wait for. | 924 | * not for emitting required flushes. |
930 | */ | 925 | */ |
931 | if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) { | 926 | BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0); |
932 | uint32_t seqno, write_domain = obj->write_domain; | ||
933 | #if WATCH_BUF | ||
934 | DRM_INFO("%s: flushing object %p from write domain %08x\n", | ||
935 | __func__, obj, write_domain); | ||
936 | #endif | ||
937 | i915_gem_flush(dev, 0, write_domain); | ||
938 | |||
939 | seqno = i915_add_request(dev, write_domain); | ||
940 | i915_gem_object_move_to_active(obj, seqno); | ||
941 | #if WATCH_LRU | ||
942 | DRM_INFO("%s: flush moves to exec list %p\n", __func__, obj); | ||
943 | #endif | ||
944 | } | ||
945 | 927 | ||
946 | /* If there is rendering queued on the buffer being evicted, wait for | 928 | /* If there is rendering queued on the buffer being evicted, wait for |
947 | * it. | 929 | * it. |
@@ -981,24 +963,16 @@ i915_gem_object_unbind(struct drm_gem_object *obj) | |||
981 | return -EINVAL; | 963 | return -EINVAL; |
982 | } | 964 | } |
983 | 965 | ||
984 | /* Wait for any rendering to complete | ||
985 | */ | ||
986 | ret = i915_gem_object_wait_rendering(obj); | ||
987 | if (ret) { | ||
988 | DRM_ERROR("wait_rendering failed: %d\n", ret); | ||
989 | return ret; | ||
990 | } | ||
991 | |||
992 | /* Move the object to the CPU domain to ensure that | 966 | /* Move the object to the CPU domain to ensure that |
993 | * any possible CPU writes while it's not in the GTT | 967 | * any possible CPU writes while it's not in the GTT |
994 | * are flushed when we go to remap it. This will | 968 | * are flushed when we go to remap it. This will |
995 | * also ensure that all pending GPU writes are finished | 969 | * also ensure that all pending GPU writes are finished |
996 | * before we unbind. | 970 | * before we unbind. |
997 | */ | 971 | */ |
998 | ret = i915_gem_object_set_domain(obj, I915_GEM_DOMAIN_CPU, | 972 | ret = i915_gem_object_set_to_cpu_domain(obj, 1); |
999 | I915_GEM_DOMAIN_CPU); | ||
1000 | if (ret) { | 973 | if (ret) { |
1001 | DRM_ERROR("set_domain failed: %d\n", ret); | 974 | if (ret != -ERESTARTSYS) |
975 | DRM_ERROR("set_domain failed: %d\n", ret); | ||
1002 | return ret; | 976 | return ret; |
1003 | } | 977 | } |
1004 | 978 | ||
@@ -1259,6 +1233,51 @@ i915_gem_clflush_object(struct drm_gem_object *obj) | |||
1259 | drm_clflush_pages(obj_priv->page_list, obj->size / PAGE_SIZE); | 1233 | drm_clflush_pages(obj_priv->page_list, obj->size / PAGE_SIZE); |
1260 | } | 1234 | } |
1261 | 1235 | ||
1236 | /** Flushes any GPU write domain for the object if it's dirty. */ | ||
1237 | static void | ||
1238 | i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj) | ||
1239 | { | ||
1240 | struct drm_device *dev = obj->dev; | ||
1241 | uint32_t seqno; | ||
1242 | |||
1243 | if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0) | ||
1244 | return; | ||
1245 | |||
1246 | /* Queue the GPU write cache flushing we need. */ | ||
1247 | i915_gem_flush(dev, 0, obj->write_domain); | ||
1248 | seqno = i915_add_request(dev, obj->write_domain); | ||
1249 | obj->write_domain = 0; | ||
1250 | i915_gem_object_move_to_active(obj, seqno); | ||
1251 | } | ||
1252 | |||
1253 | /** Flushes the GTT write domain for the object if it's dirty. */ | ||
1254 | static void | ||
1255 | i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj) | ||
1256 | { | ||
1257 | if (obj->write_domain != I915_GEM_DOMAIN_GTT) | ||
1258 | return; | ||
1259 | |||
1260 | /* No actual flushing is required for the GTT write domain. Writes | ||
1261 | * to it immediately go to main memory as far as we know, so there's | ||
1262 | * no chipset flush. It also doesn't land in render cache. | ||
1263 | */ | ||
1264 | obj->write_domain = 0; | ||
1265 | } | ||
1266 | |||
1267 | /** Flushes the CPU write domain for the object if it's dirty. */ | ||
1268 | static void | ||
1269 | i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj) | ||
1270 | { | ||
1271 | struct drm_device *dev = obj->dev; | ||
1272 | |||
1273 | if (obj->write_domain != I915_GEM_DOMAIN_CPU) | ||
1274 | return; | ||
1275 | |||
1276 | i915_gem_clflush_object(obj); | ||
1277 | drm_agp_chipset_flush(dev); | ||
1278 | obj->write_domain = 0; | ||
1279 | } | ||
1280 | |||
1262 | /** | 1281 | /** |
1263 | * Moves a single object to the GTT read, and possibly write domain. | 1282 | * Moves a single object to the GTT read, and possibly write domain. |
1264 | * | 1283 | * |
@@ -1268,56 +1287,81 @@ i915_gem_clflush_object(struct drm_gem_object *obj) | |||
1268 | static int | 1287 | static int |
1269 | i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) | 1288 | i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write) |
1270 | { | 1289 | { |
1271 | struct drm_device *dev = obj->dev; | ||
1272 | struct drm_i915_gem_object *obj_priv = obj->driver_private; | 1290 | struct drm_i915_gem_object *obj_priv = obj->driver_private; |
1273 | uint32_t flush_domains; | 1291 | int ret; |
1274 | 1292 | ||
1275 | /* Figure out what GPU domains we need to flush or invalidate for | 1293 | i915_gem_object_flush_gpu_write_domain(obj); |
1276 | * moving to GTT. | 1294 | /* Wait on any GPU rendering and flushing to occur. */ |
1295 | ret = i915_gem_object_wait_rendering(obj); | ||
1296 | if (ret != 0) | ||
1297 | return ret; | ||
1298 | |||
1299 | /* If we're writing through the GTT domain, then CPU and GPU caches | ||
1300 | * will need to be invalidated at next use. | ||
1277 | */ | 1301 | */ |
1278 | flush_domains = obj->write_domain & I915_GEM_GPU_DOMAINS; | 1302 | if (write) |
1303 | obj->read_domains &= I915_GEM_DOMAIN_GTT; | ||
1279 | 1304 | ||
1280 | /* Queue the GPU write cache flushing we need. */ | 1305 | i915_gem_object_flush_cpu_write_domain(obj); |
1281 | if (flush_domains != 0) { | ||
1282 | uint32_t seqno; | ||
1283 | 1306 | ||
1284 | obj->write_domain &= ~I915_GEM_GPU_DOMAINS; | 1307 | /* It should now be out of any other write domains, and we can update |
1285 | i915_gem_flush(dev, 0, flush_domains); | 1308 | * the domain values for our changes. |
1286 | seqno = i915_add_request(dev, flush_domains); | 1309 | */ |
1287 | i915_gem_object_move_to_active(obj, seqno); | 1310 | BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); |
1311 | obj->read_domains |= I915_GEM_DOMAIN_GTT; | ||
1312 | if (write) { | ||
1313 | obj->write_domain = I915_GEM_DOMAIN_GTT; | ||
1314 | obj_priv->dirty = 1; | ||
1288 | } | 1315 | } |
1289 | 1316 | ||
1317 | return 0; | ||
1318 | } | ||
1319 | |||
1320 | /** | ||
1321 | * Moves a single object to the CPU read, and possibly write domain. | ||
1322 | * | ||
1323 | * This function returns when the move is complete, including waiting on | ||
1324 | * flushes to occur. | ||
1325 | */ | ||
1326 | static int | ||
1327 | i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write) | ||
1328 | { | ||
1329 | struct drm_device *dev = obj->dev; | ||
1330 | int ret; | ||
1331 | |||
1332 | i915_gem_object_flush_gpu_write_domain(obj); | ||
1290 | /* Wait on any GPU rendering and flushing to occur. */ | 1333 | /* Wait on any GPU rendering and flushing to occur. */ |
1291 | if (obj_priv->active) { | 1334 | ret = i915_gem_object_wait_rendering(obj); |
1292 | int ret; | 1335 | if (ret != 0) |
1336 | return ret; | ||
1293 | 1337 | ||
1294 | ret = i915_wait_request(dev, obj_priv->last_rendering_seqno); | 1338 | i915_gem_object_flush_gtt_write_domain(obj); |
1295 | if (ret != 0) | ||
1296 | return ret; | ||
1297 | } | ||
1298 | 1339 | ||
1299 | /* If we're writing through the GTT domain, then CPU and GPU caches | 1340 | /* If we have a partially-valid cache of the object in the CPU, |
1300 | * will need to be invalidated at next use. | 1341 | * finish invalidating it and free the per-page flags. |
1301 | */ | 1342 | */ |
1302 | if (write) | 1343 | i915_gem_object_set_to_full_cpu_read_domain(obj); |
1303 | obj->read_domains &= ~(I915_GEM_GPU_DOMAINS | | ||
1304 | I915_GEM_DOMAIN_CPU); | ||
1305 | 1344 | ||
1306 | /* Flush the CPU domain if it's dirty. */ | 1345 | /* Flush the CPU cache if it's still invalid. */ |
1307 | if (obj->write_domain & I915_GEM_DOMAIN_CPU) { | 1346 | if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { |
1308 | i915_gem_clflush_object(obj); | 1347 | i915_gem_clflush_object(obj); |
1309 | drm_agp_chipset_flush(dev); | 1348 | drm_agp_chipset_flush(dev); |
1310 | 1349 | ||
1311 | obj->write_domain &= ~I915_GEM_DOMAIN_CPU; | 1350 | obj->read_domains |= I915_GEM_DOMAIN_CPU; |
1312 | } | 1351 | } |
1313 | 1352 | ||
1314 | /* It should now be out of any other write domains, and we can update | 1353 | /* It should now be out of any other write domains, and we can update |
1315 | * the domain values for our changes. | 1354 | * the domain values for our changes. |
1316 | */ | 1355 | */ |
1317 | BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0); | 1356 | BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); |
1318 | obj->read_domains |= I915_GEM_DOMAIN_GTT; | 1357 | |
1319 | if (write) | 1358 | /* If we're writing through the CPU, then the GPU read domains will |
1320 | obj->write_domain = I915_GEM_DOMAIN_GTT; | 1359 | * need to be invalidated at next use. |
1360 | */ | ||
1361 | if (write) { | ||
1362 | obj->read_domains &= I915_GEM_DOMAIN_CPU; | ||
1363 | obj->write_domain = I915_GEM_DOMAIN_CPU; | ||
1364 | } | ||
1321 | 1365 | ||
1322 | return 0; | 1366 | return 0; |
1323 | } | 1367 | } |
@@ -1442,7 +1486,9 @@ i915_gem_object_set_domain(struct drm_gem_object *obj, | |||
1442 | struct drm_i915_gem_object *obj_priv = obj->driver_private; | 1486 | struct drm_i915_gem_object *obj_priv = obj->driver_private; |
1443 | uint32_t invalidate_domains = 0; | 1487 | uint32_t invalidate_domains = 0; |
1444 | uint32_t flush_domains = 0; | 1488 | uint32_t flush_domains = 0; |
1445 | int ret; | 1489 | |
1490 | BUG_ON(read_domains & I915_GEM_DOMAIN_CPU); | ||
1491 | BUG_ON(write_domain == I915_GEM_DOMAIN_CPU); | ||
1446 | 1492 | ||
1447 | #if WATCH_BUF | 1493 | #if WATCH_BUF |
1448 | DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n", | 1494 | DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n", |
@@ -1479,34 +1525,11 @@ i915_gem_object_set_domain(struct drm_gem_object *obj, | |||
1479 | DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n", | 1525 | DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n", |
1480 | __func__, flush_domains, invalidate_domains); | 1526 | __func__, flush_domains, invalidate_domains); |
1481 | #endif | 1527 | #endif |
1482 | /* | ||
1483 | * If we're invaliding the CPU cache and flushing a GPU cache, | ||
1484 | * then pause for rendering so that the GPU caches will be | ||
1485 | * flushed before the cpu cache is invalidated | ||
1486 | */ | ||
1487 | if ((invalidate_domains & I915_GEM_DOMAIN_CPU) && | ||
1488 | (flush_domains & ~(I915_GEM_DOMAIN_CPU | | ||
1489 | I915_GEM_DOMAIN_GTT))) { | ||
1490 | ret = i915_gem_object_wait_rendering(obj); | ||
1491 | if (ret) | ||
1492 | return ret; | ||
1493 | } | ||
1494 | i915_gem_clflush_object(obj); | 1528 | i915_gem_clflush_object(obj); |
1495 | } | 1529 | } |
1496 | 1530 | ||
1497 | if ((write_domain | flush_domains) != 0) | 1531 | if ((write_domain | flush_domains) != 0) |
1498 | obj->write_domain = write_domain; | 1532 | obj->write_domain = write_domain; |
1499 | |||
1500 | /* If we're invalidating the CPU domain, clear the per-page CPU | ||
1501 | * domain list as well. | ||
1502 | */ | ||
1503 | if (obj_priv->page_cpu_valid != NULL && | ||
1504 | (write_domain != 0 || | ||
1505 | read_domains & I915_GEM_DOMAIN_CPU)) { | ||
1506 | drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE, | ||
1507 | DRM_MEM_DRIVER); | ||
1508 | obj_priv->page_cpu_valid = NULL; | ||
1509 | } | ||
1510 | obj->read_domains = read_domains; | 1533 | obj->read_domains = read_domains; |
1511 | 1534 | ||
1512 | dev->invalidate_domains |= invalidate_domains; | 1535 | dev->invalidate_domains |= invalidate_domains; |
@@ -1521,43 +1544,91 @@ i915_gem_object_set_domain(struct drm_gem_object *obj, | |||
1521 | } | 1544 | } |
1522 | 1545 | ||
1523 | /** | 1546 | /** |
1524 | * Set the read/write domain on a range of the object. | 1547 | * Moves the object from a partially CPU read to a full one. |
1525 | * | 1548 | * |
1526 | * Currently only implemented for CPU reads, otherwise drops to normal | 1549 | * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(), |
1527 | * i915_gem_object_set_domain(). | 1550 | * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU). |
1528 | */ | 1551 | */ |
1529 | static int | 1552 | static void |
1530 | i915_gem_object_set_domain_range(struct drm_gem_object *obj, | 1553 | i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj) |
1531 | uint64_t offset, | ||
1532 | uint64_t size, | ||
1533 | uint32_t read_domains, | ||
1534 | uint32_t write_domain) | ||
1535 | { | 1554 | { |
1555 | struct drm_device *dev = obj->dev; | ||
1536 | struct drm_i915_gem_object *obj_priv = obj->driver_private; | 1556 | struct drm_i915_gem_object *obj_priv = obj->driver_private; |
1537 | int ret, i; | ||
1538 | 1557 | ||
1539 | if (obj->read_domains & I915_GEM_DOMAIN_CPU) | 1558 | if (!obj_priv->page_cpu_valid) |
1540 | return 0; | 1559 | return; |
1541 | 1560 | ||
1542 | if (read_domains != I915_GEM_DOMAIN_CPU || | 1561 | /* If we're partially in the CPU read domain, finish moving it in. |
1543 | write_domain != 0) | 1562 | */ |
1544 | return i915_gem_object_set_domain(obj, | 1563 | if (obj->read_domains & I915_GEM_DOMAIN_CPU) { |
1545 | read_domains, write_domain); | 1564 | int i; |
1565 | |||
1566 | for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) { | ||
1567 | if (obj_priv->page_cpu_valid[i]) | ||
1568 | continue; | ||
1569 | drm_clflush_pages(obj_priv->page_list + i, 1); | ||
1570 | } | ||
1571 | drm_agp_chipset_flush(dev); | ||
1572 | } | ||
1546 | 1573 | ||
1547 | /* Wait on any GPU rendering to the object to be flushed. */ | 1574 | /* Free the page_cpu_valid mappings which are now stale, whether |
1575 | * or not we've got I915_GEM_DOMAIN_CPU. | ||
1576 | */ | ||
1577 | drm_free(obj_priv->page_cpu_valid, obj->size / PAGE_SIZE, | ||
1578 | DRM_MEM_DRIVER); | ||
1579 | obj_priv->page_cpu_valid = NULL; | ||
1580 | } | ||
1581 | |||
1582 | /** | ||
1583 | * Set the CPU read domain on a range of the object. | ||
1584 | * | ||
1585 | * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's | ||
1586 | * not entirely valid. The page_cpu_valid member of the object flags which | ||
1587 | * pages have been flushed, and will be respected by | ||
1588 | * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping | ||
1589 | * of the whole object. | ||
1590 | * | ||
1591 | * This function returns when the move is complete, including waiting on | ||
1592 | * flushes to occur. | ||
1593 | */ | ||
1594 | static int | ||
1595 | i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, | ||
1596 | uint64_t offset, uint64_t size) | ||
1597 | { | ||
1598 | struct drm_i915_gem_object *obj_priv = obj->driver_private; | ||
1599 | int i, ret; | ||
1600 | |||
1601 | if (offset == 0 && size == obj->size) | ||
1602 | return i915_gem_object_set_to_cpu_domain(obj, 0); | ||
1603 | |||
1604 | i915_gem_object_flush_gpu_write_domain(obj); | ||
1605 | /* Wait on any GPU rendering and flushing to occur. */ | ||
1548 | ret = i915_gem_object_wait_rendering(obj); | 1606 | ret = i915_gem_object_wait_rendering(obj); |
1549 | if (ret) | 1607 | if (ret != 0) |
1550 | return ret; | 1608 | return ret; |
1609 | i915_gem_object_flush_gtt_write_domain(obj); | ||
1610 | |||
1611 | /* If we're already fully in the CPU read domain, we're done. */ | ||
1612 | if (obj_priv->page_cpu_valid == NULL && | ||
1613 | (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0) | ||
1614 | return 0; | ||
1551 | 1615 | ||
1616 | /* Otherwise, create/clear the per-page CPU read domain flag if we're | ||
1617 | * newly adding I915_GEM_DOMAIN_CPU | ||
1618 | */ | ||
1552 | if (obj_priv->page_cpu_valid == NULL) { | 1619 | if (obj_priv->page_cpu_valid == NULL) { |
1553 | obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE, | 1620 | obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE, |
1554 | DRM_MEM_DRIVER); | 1621 | DRM_MEM_DRIVER); |
1555 | } | 1622 | if (obj_priv->page_cpu_valid == NULL) |
1623 | return -ENOMEM; | ||
1624 | } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) | ||
1625 | memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE); | ||
1556 | 1626 | ||
1557 | /* Flush the cache on any pages that are still invalid from the CPU's | 1627 | /* Flush the cache on any pages that are still invalid from the CPU's |
1558 | * perspective. | 1628 | * perspective. |
1559 | */ | 1629 | */ |
1560 | for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; i++) { | 1630 | for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE; |
1631 | i++) { | ||
1561 | if (obj_priv->page_cpu_valid[i]) | 1632 | if (obj_priv->page_cpu_valid[i]) |
1562 | continue; | 1633 | continue; |
1563 | 1634 | ||
@@ -1566,6 +1637,13 @@ i915_gem_object_set_domain_range(struct drm_gem_object *obj, | |||
1566 | obj_priv->page_cpu_valid[i] = 1; | 1637 | obj_priv->page_cpu_valid[i] = 1; |
1567 | } | 1638 | } |
1568 | 1639 | ||
1640 | /* It should now be out of any other write domains, and we can update | ||
1641 | * the domain values for our changes. | ||
1642 | */ | ||
1643 | BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0); | ||
1644 | |||
1645 | obj->read_domains |= I915_GEM_DOMAIN_CPU; | ||
1646 | |||
1569 | return 0; | 1647 | return 0; |
1570 | } | 1648 | } |
1571 | 1649 | ||
@@ -1679,6 +1757,18 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, | |||
1679 | return -EINVAL; | 1757 | return -EINVAL; |
1680 | } | 1758 | } |
1681 | 1759 | ||
1760 | if (reloc.write_domain & I915_GEM_DOMAIN_CPU || | ||
1761 | reloc.read_domains & I915_GEM_DOMAIN_CPU) { | ||
1762 | DRM_ERROR("reloc with read/write CPU domains: " | ||
1763 | "obj %p target %d offset %d " | ||
1764 | "read %08x write %08x", | ||
1765 | obj, reloc.target_handle, | ||
1766 | (int) reloc.offset, | ||
1767 | reloc.read_domains, | ||
1768 | reloc.write_domain); | ||
1769 | return -EINVAL; | ||
1770 | } | ||
1771 | |||
1682 | if (reloc.write_domain && target_obj->pending_write_domain && | 1772 | if (reloc.write_domain && target_obj->pending_write_domain && |
1683 | reloc.write_domain != target_obj->pending_write_domain) { | 1773 | reloc.write_domain != target_obj->pending_write_domain) { |
1684 | DRM_ERROR("Write domain conflict: " | 1774 | DRM_ERROR("Write domain conflict: " |
@@ -2157,11 +2247,7 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data, | |||
2157 | /* XXX - flush the CPU caches for pinned objects | 2247 | /* XXX - flush the CPU caches for pinned objects |
2158 | * as the X server doesn't manage domains yet | 2248 | * as the X server doesn't manage domains yet |
2159 | */ | 2249 | */ |
2160 | if (obj->write_domain & I915_GEM_DOMAIN_CPU) { | 2250 | i915_gem_object_flush_cpu_write_domain(obj); |
2161 | i915_gem_clflush_object(obj); | ||
2162 | drm_agp_chipset_flush(dev); | ||
2163 | obj->write_domain = 0; | ||
2164 | } | ||
2165 | args->offset = obj_priv->gtt_offset; | 2251 | args->offset = obj_priv->gtt_offset; |
2166 | drm_gem_object_unreference(obj); | 2252 | drm_gem_object_unreference(obj); |
2167 | mutex_unlock(&dev->struct_mutex); | 2253 | mutex_unlock(&dev->struct_mutex); |
@@ -2263,29 +2349,6 @@ void i915_gem_free_object(struct drm_gem_object *obj) | |||
2263 | drm_free(obj->driver_private, 1, DRM_MEM_DRIVER); | 2349 | drm_free(obj->driver_private, 1, DRM_MEM_DRIVER); |
2264 | } | 2350 | } |
2265 | 2351 | ||
2266 | static int | ||
2267 | i915_gem_set_domain(struct drm_gem_object *obj, | ||
2268 | struct drm_file *file_priv, | ||
2269 | uint32_t read_domains, | ||
2270 | uint32_t write_domain) | ||
2271 | { | ||
2272 | struct drm_device *dev = obj->dev; | ||
2273 | int ret; | ||
2274 | uint32_t flush_domains; | ||
2275 | |||
2276 | BUG_ON(!mutex_is_locked(&dev->struct_mutex)); | ||
2277 | |||
2278 | ret = i915_gem_object_set_domain(obj, read_domains, write_domain); | ||
2279 | if (ret) | ||
2280 | return ret; | ||
2281 | flush_domains = i915_gem_dev_set_domain(obj->dev); | ||
2282 | |||
2283 | if (flush_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) | ||
2284 | (void) i915_add_request(dev, flush_domains); | ||
2285 | |||
2286 | return 0; | ||
2287 | } | ||
2288 | |||
2289 | /** Unbinds all objects that are on the given buffer list. */ | 2352 | /** Unbinds all objects that are on the given buffer list. */ |
2290 | static int | 2353 | static int |
2291 | i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head) | 2354 | i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head) |