summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c77
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a.h3
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c1
4 files changed, 59 insertions, 24 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 7df794bf..79d449e6 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -43,8 +43,6 @@
43 43
44#define NVMAP_HANDLE_PARAM_SIZE 1 44#define NVMAP_HANDLE_PARAM_SIZE 1
45 45
46#define NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT 64 /* channels */
47
48#define NVGPU_CHANNEL_MIN_TIMESLICE_US 1000 46#define NVGPU_CHANNEL_MIN_TIMESLICE_US 1000
49#define NVGPU_CHANNEL_MAX_TIMESLICE_US 50000 47#define NVGPU_CHANNEL_MAX_TIMESLICE_US 50000
50 48
@@ -91,7 +89,9 @@ static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
91 } 89 }
92 mutex_unlock(&f->free_chs_mutex); 90 mutex_unlock(&f->free_chs_mutex);
93 91
94 if (f->used_channels > NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT) 92 if (platform->aggressive_sync_destroy_thresh &&
93 (f->used_channels >
94 platform->aggressive_sync_destroy_thresh))
95 platform->aggressive_sync_destroy = true; 95 platform->aggressive_sync_destroy = true;
96 96
97 return ch; 97 return ch;
@@ -110,7 +110,9 @@ static void free_channel(struct fifo_gk20a *f,
110 f->used_channels--; 110 f->used_channels--;
111 mutex_unlock(&f->free_chs_mutex); 111 mutex_unlock(&f->free_chs_mutex);
112 112
113 if (f->used_channels < NVGPU_BEGIN_AGGRESSIVE_SYNC_DESTROY_LIMIT) 113 if (platform->aggressive_sync_destroy_thresh &&
114 (f->used_channels <
115 platform->aggressive_sync_destroy_thresh))
114 platform->aggressive_sync_destroy = false; 116 platform->aggressive_sync_destroy = false;
115} 117}
116 118
@@ -1424,6 +1426,7 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1424{ 1426{
1425 struct gk20a *g = c->g; 1427 struct gk20a *g = c->g;
1426 struct device *d = dev_from_gk20a(g); 1428 struct device *d = dev_from_gk20a(g);
1429 struct gk20a_platform *platform = gk20a_get_platform(d);
1427 struct vm_gk20a *ch_vm; 1430 struct vm_gk20a *ch_vm;
1428 u32 gpfifo_size; 1431 u32 gpfifo_size;
1429 int err = 0; 1432 int err = 0;
@@ -1487,26 +1490,46 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
1487 1490
1488 channel_gk20a_setup_userd(c); 1491 channel_gk20a_setup_userd(c);
1489 1492
1493 if (!platform->aggressive_sync_destroy_thresh) {
1494 mutex_lock(&c->sync_lock);
1495 c->sync = gk20a_channel_sync_create(c);
1496 if (!c->sync) {
1497 err = -ENOMEM;
1498 mutex_unlock(&c->sync_lock);
1499 goto clean_up_unmap;
1500 }
1501 mutex_unlock(&c->sync_lock);
1502
1503 if (g->ops.fifo.resetup_ramfc) {
1504 err = g->ops.fifo.resetup_ramfc(c);
1505 if (err)
1506 goto clean_up_sync;
1507 }
1508 }
1509
1490 err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va, 1510 err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va,
1491 c->gpfifo.entry_num, args->flags); 1511 c->gpfifo.entry_num, args->flags);
1492 if (err) 1512 if (err)
1493 goto clean_up_unmap; 1513 goto clean_up_sync;
1494 1514
1495 /* TBD: setup engine contexts */ 1515 /* TBD: setup engine contexts */
1496 1516
1497 err = channel_gk20a_alloc_priv_cmdbuf(c); 1517 err = channel_gk20a_alloc_priv_cmdbuf(c);
1498 if (err) 1518 if (err)
1499 goto clean_up_unmap; 1519 goto clean_up_sync;
1500 1520
1501 err = channel_gk20a_update_runlist(c, true); 1521 err = channel_gk20a_update_runlist(c, true);
1502 if (err) 1522 if (err)
1503 goto clean_up_unmap; 1523 goto clean_up_sync;
1504 1524
1505 g->ops.fifo.bind_channel(c); 1525 g->ops.fifo.bind_channel(c);
1506 1526
1507 gk20a_dbg_fn("done"); 1527 gk20a_dbg_fn("done");
1508 return 0; 1528 return 0;
1509 1529
1530clean_up_sync:
1531 gk20a_channel_sync_destroy(c->sync);
1532 c->sync = NULL;
1510clean_up_unmap: 1533clean_up_unmap:
1511 nvgpu_free(c->gpfifo.pipe); 1534 nvgpu_free(c->gpfifo.pipe);
1512 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem); 1535 gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
@@ -1911,18 +1934,21 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
1911 1934
1912 gk20a_channel_timeout_stop(c); 1935 gk20a_channel_timeout_stop(c);
1913 1936
1914 mutex_lock(&c->sync_lock); 1937 WARN_ON(!c->sync);
1938
1915 if (c->sync) { 1939 if (c->sync) {
1916 c->sync->signal_timeline(c->sync); 1940 c->sync->signal_timeline(c->sync);
1917 if (atomic_dec_and_test(&c->sync->refcount) && 1941
1918 platform->aggressive_sync_destroy) { 1942 if (platform->aggressive_sync_destroy_thresh) {
1919 gk20a_channel_sync_destroy(c->sync); 1943 mutex_lock(&c->sync_lock);
1920 c->sync = NULL; 1944 if (atomic_dec_and_test(&c->sync->refcount) &&
1945 platform->aggressive_sync_destroy) {
1946 gk20a_channel_sync_destroy(c->sync);
1947 c->sync = NULL;
1948 }
1949 mutex_unlock(&c->sync_lock);
1921 } 1950 }
1922 } else {
1923 WARN_ON(1);
1924 } 1951 }
1925 mutex_unlock(&c->sync_lock);
1926 1952
1927 if (job->num_mapped_buffers) 1953 if (job->num_mapped_buffers)
1928 gk20a_vm_put_buffers(vm, job->mapped_buffers, 1954 gk20a_vm_put_buffers(vm, job->mapped_buffers,
@@ -2099,6 +2125,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
2099 u32 flags) 2125 u32 flags)
2100{ 2126{
2101 struct gk20a *g = c->g; 2127 struct gk20a *g = c->g;
2128 struct gk20a_platform *platform = gk20a_get_platform(g->dev);
2102 bool need_sync_fence = false; 2129 bool need_sync_fence = false;
2103 bool new_sync_created = false; 2130 bool new_sync_created = false;
2104 int wait_fence_fd = -1; 2131 int wait_fence_fd = -1;
@@ -2112,18 +2139,20 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
2112 if (force_need_sync_fence) 2139 if (force_need_sync_fence)
2113 need_sync_fence = true; 2140 need_sync_fence = true;
2114 2141
2115 mutex_lock(&c->sync_lock); 2142 if (platform->aggressive_sync_destroy_thresh) {
2116 if (!c->sync) { 2143 mutex_lock(&c->sync_lock);
2117 c->sync = gk20a_channel_sync_create(c);
2118 if (!c->sync) { 2144 if (!c->sync) {
2119 err = -ENOMEM; 2145 c->sync = gk20a_channel_sync_create(c);
2120 mutex_unlock(&c->sync_lock); 2146 if (!c->sync) {
2121 goto fail; 2147 err = -ENOMEM;
2148 mutex_unlock(&c->sync_lock);
2149 goto fail;
2150 }
2151 new_sync_created = true;
2122 } 2152 }
2123 new_sync_created = true; 2153 atomic_inc(&c->sync->refcount);
2154 mutex_unlock(&c->sync_lock);
2124 } 2155 }
2125 atomic_inc(&c->sync->refcount);
2126 mutex_unlock(&c->sync_lock);
2127 2156
2128 if (g->ops.fifo.resetup_ramfc && new_sync_created) { 2157 if (g->ops.fifo.resetup_ramfc && new_sync_created) {
2129 err = g->ops.fifo.resetup_ramfc(c); 2158 err = g->ops.fifo.resetup_ramfc(c);
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
index 93158cc7..f038b072 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
@@ -50,6 +50,9 @@ struct gk20a_platform {
50 /* Should be populated at probe. */ 50 /* Should be populated at probe. */
51 bool has_syncpoints; 51 bool has_syncpoints;
52 52
53 /* channel limit after which to start aggressive sync destroy */
54 int aggressive_sync_destroy_thresh;
55
53 /* flag to set sync destroy aggressiveness */ 56 /* flag to set sync destroy aggressiveness */
54 bool aggressive_sync_destroy; 57 bool aggressive_sync_destroy;
55 58
diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
index 819c50a4..90ba54ea 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c
@@ -847,6 +847,7 @@ static int gk20a_clk_get_freqs(struct device *dev,
847 847
848struct gk20a_platform gk20a_tegra_platform = { 848struct gk20a_platform gk20a_tegra_platform = {
849 .has_syncpoints = true, 849 .has_syncpoints = true,
850 .aggressive_sync_destroy_thresh = 64,
850 851
851 /* power management configuration */ 852 /* power management configuration */
852 .railgate_delay = 500, 853 .railgate_delay = 500,
@@ -909,6 +910,7 @@ struct gk20a_platform gk20a_tegra_platform = {
909 910
910struct gk20a_platform gm20b_tegra_platform = { 911struct gk20a_platform gm20b_tegra_platform = {
911 .has_syncpoints = true, 912 .has_syncpoints = true,
913 .aggressive_sync_destroy_thresh = 64,
912 914
913 /* power management configuration */ 915 /* power management configuration */
914 .railgate_delay = 500, 916 .railgate_delay = 500,
diff --git a/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c
index b260b3ac..dc898226 100644
--- a/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c
+++ b/drivers/gpu/nvgpu/gk20a/platform_vgpu_tegra.c
@@ -49,6 +49,7 @@ static int gk20a_tegra_probe(struct device *dev)
49 49
50struct gk20a_platform vgpu_tegra_platform = { 50struct gk20a_platform vgpu_tegra_platform = {
51 .has_syncpoints = true, 51 .has_syncpoints = true,
52 .aggressive_sync_destroy_thresh = 64,
52 53
53 /* power management configuration */ 54 /* power management configuration */
54 .can_railgate = false, 55 .can_railgate = false,