summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h9
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c48
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h23
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h8
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c78
-rw-r--r--drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h20
6 files changed, 177 insertions, 9 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index d5310b02..275619c9 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -178,6 +178,15 @@ struct gpu_ops {
178 struct gr_zcull_info *zcull_params); 178 struct gr_zcull_info *zcull_params);
179 bool (*is_tpc_addr)(struct gk20a *g, u32 addr); 179 bool (*is_tpc_addr)(struct gk20a *g, u32 addr);
180 u32 (*get_tpc_num)(struct gk20a *g, u32 addr); 180 u32 (*get_tpc_num)(struct gk20a *g, u32 addr);
181 bool (*is_ltcs_ltss_addr)(struct gk20a *g, u32 addr);
182 bool (*is_ltcn_ltss_addr)(struct gk20a *g, u32 addr);
183 bool (*get_lts_in_ltc_shared_base)(void);
184 void (*split_lts_broadcast_addr)(struct gk20a *g, u32 addr,
185 u32 *priv_addr_table,
186 u32 *priv_addr_table_index);
187 void (*split_ltc_broadcast_addr)(struct gk20a *g, u32 addr,
188 u32 *priv_addr_table,
189 u32 *priv_addr_table_index);
181 void (*detect_sm_arch)(struct gk20a *g); 190 void (*detect_sm_arch)(struct gk20a *g);
182 int (*add_zbc_color)(struct gk20a *g, struct gr_gk20a *gr, 191 int (*add_zbc_color)(struct gk20a *g, struct gr_gk20a *gr,
183 struct zbc_entry *color_val, u32 index); 192 struct zbc_entry *color_val, u32 index);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 901fea8c..943b4085 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -6318,6 +6318,13 @@ static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
6318 } 6318 }
6319 *be_num = pri_get_be_num(g, addr); 6319 *be_num = pri_get_be_num(g, addr);
6320 return 0; 6320 return 0;
6321 } else if (pri_is_ltc_addr(addr)) {
6322 *addr_type = CTXSW_ADDR_TYPE_LTCS;
6323 if (g->ops.gr.is_ltcs_ltss_addr(g, addr))
6324 *broadcast_flags |= PRI_BROADCAST_FLAGS_LTCS;
6325 else if (g->ops.gr.is_ltcn_ltss_addr(g, addr))
6326 *broadcast_flags |= PRI_BROADCAST_FLAGS_LTSS;
6327 return 0;
6321 } else { 6328 } else {
6322 *addr_type = CTXSW_ADDR_TYPE_SYS; 6329 *addr_type = CTXSW_ADDR_TYPE_SYS;
6323 return 0; 6330 return 0;
@@ -6412,7 +6419,15 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g,
6412 pri_gpc_addr(g, pri_gpccs_addr_mask(addr), 6419 pri_gpc_addr(g, pri_gpccs_addr_mask(addr),
6413 gpc_num); 6420 gpc_num);
6414 } 6421 }
6415 } else { 6422 }
6423
6424 if (broadcast_flags & PRI_BROADCAST_FLAGS_LTSS) {
6425 g->ops.gr.split_lts_broadcast_addr(g, addr,
6426 priv_addr_table, &t);
6427 } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTCS) {
6428 g->ops.gr.split_ltc_broadcast_addr(g, addr,
6429 priv_addr_table, &t);
6430 } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_GPC)) {
6416 if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) 6431 if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC)
6417 for (tpc_num = 0; 6432 for (tpc_num = 0;
6418 tpc_num < g->gr.gpc_tpc_count[gpc_num]; 6433 tpc_num < g->gr.gpc_tpc_count[gpc_num];
@@ -7296,8 +7311,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
7296 num_tpcs) << 2); 7311 num_tpcs) << 2);
7297 } 7312 }
7298 } else { 7313 } else {
7299 gk20a_err(dev_from_gk20a(g), 7314 gk20a_dbg_fn("Unknown address type.");
7300 " Unknown address type.\n");
7301 return -EINVAL; 7315 return -EINVAL;
7302 } 7316 }
7303 err = gr_gk20a_process_context_buffer_priv_segment(g, 7317 err = gr_gk20a_process_context_buffer_priv_segment(g,
@@ -8653,6 +8667,28 @@ static int gr_gk20a_get_preemption_mode_flags(struct gk20a *g,
8653 return 0; 8667 return 0;
8654} 8668}
8655 8669
8670static bool gr_gk20a_is_ltcs_ltss_addr_stub(struct gk20a *g, u32 addr)
8671{
8672 return false;
8673}
8674
8675static bool gr_gk20a_is_ltcn_ltss_addr_stub(struct gk20a *g, u32 addr)
8676{
8677 return false;
8678}
8679
8680static void gr_gk20a_split_lts_broadcast_addr_stub(struct gk20a *g, u32 addr,
8681 u32 *priv_addr_table,
8682 u32 *priv_addr_table_index)
8683{
8684}
8685
8686static void gr_gk20a_split_ltc_broadcast_addr_stub(struct gk20a *g, u32 addr,
8687 u32 *priv_addr_table,
8688 u32 *priv_addr_table_index)
8689{
8690}
8691
8656void gk20a_init_gr_ops(struct gpu_ops *gops) 8692void gk20a_init_gr_ops(struct gpu_ops *gops)
8657{ 8693{
8658 gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; 8694 gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg;
@@ -8723,4 +8759,10 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
8723 gops->gr.get_preemption_mode_flags = gr_gk20a_get_preemption_mode_flags; 8759 gops->gr.get_preemption_mode_flags = gr_gk20a_get_preemption_mode_flags;
8724 gops->gr.program_active_tpc_counts = gr_gk20a_program_active_tpc_counts; 8760 gops->gr.program_active_tpc_counts = gr_gk20a_program_active_tpc_counts;
8725 gops->gr.program_sm_id_numbering = gr_gk20a_program_sm_id_numbering; 8761 gops->gr.program_sm_id_numbering = gr_gk20a_program_sm_id_numbering;
8762 gops->gr.is_ltcs_ltss_addr = gr_gk20a_is_ltcs_ltss_addr_stub;
8763 gops->gr.is_ltcn_ltss_addr = gr_gk20a_is_ltcn_ltss_addr_stub;
8764 gops->gr.split_lts_broadcast_addr =
8765 gr_gk20a_split_lts_broadcast_addr_stub;
8766 gops->gr.split_ltc_broadcast_addr =
8767 gr_gk20a_split_ltc_broadcast_addr_stub;
8726} 8768}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h
index 248fa291..62e276de 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * GK20A Graphics Context Pri Register Addressing 2 * GK20A Graphics Context Pri Register Addressing
3 * 3 *
4 * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -167,12 +167,21 @@ static inline u32 pri_ppc_addr(struct gk20a *g, u32 addr, u32 gpc, u32 ppc)
167 ppc_in_gpc_base + (ppc * ppc_in_gpc_stride) + addr; 167 ppc_in_gpc_base + (ppc * ppc_in_gpc_stride) + addr;
168} 168}
169 169
170/*
171 * LTC pri addressing
172 */
173static inline bool pri_is_ltc_addr(u32 addr)
174{
175 return ((addr >= ltc_pltcg_base_v()) && (addr < ltc_pltcg_extent_v()));
176}
177
170enum ctxsw_addr_type { 178enum ctxsw_addr_type {
171 CTXSW_ADDR_TYPE_SYS = 0, 179 CTXSW_ADDR_TYPE_SYS = 0,
172 CTXSW_ADDR_TYPE_GPC = 1, 180 CTXSW_ADDR_TYPE_GPC = 1,
173 CTXSW_ADDR_TYPE_TPC = 2, 181 CTXSW_ADDR_TYPE_TPC = 2,
174 CTXSW_ADDR_TYPE_BE = 3, 182 CTXSW_ADDR_TYPE_BE = 3,
175 CTXSW_ADDR_TYPE_PPC = 4 183 CTXSW_ADDR_TYPE_PPC = 4,
184 CTXSW_ADDR_TYPE_LTCS = 5
176}; 185};
177 186
178#define PRI_BROADCAST_FLAGS_NONE 0 187#define PRI_BROADCAST_FLAGS_NONE 0
@@ -180,5 +189,7 @@ enum ctxsw_addr_type {
180#define PRI_BROADCAST_FLAGS_TPC BIT(1) 189#define PRI_BROADCAST_FLAGS_TPC BIT(1)
181#define PRI_BROADCAST_FLAGS_BE BIT(2) 190#define PRI_BROADCAST_FLAGS_BE BIT(2)
182#define PRI_BROADCAST_FLAGS_PPC BIT(3) 191#define PRI_BROADCAST_FLAGS_PPC BIT(3)
192#define PRI_BROADCAST_FLAGS_LTCS BIT(4)
193#define PRI_BROADCAST_FLAGS_LTSS BIT(5)
183 194
184#endif /* GR_PRI_GK20A_H */ 195#endif /* GR_PRI_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h
index 94770431..84b9c9a6 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h
@@ -50,6 +50,14 @@
50#ifndef _hw_ltc_gk20a_h_ 50#ifndef _hw_ltc_gk20a_h_
51#define _hw_ltc_gk20a_h_ 51#define _hw_ltc_gk20a_h_
52 52
53static inline u32 ltc_pltcg_base_v(void)
54{
55 return 0x00140000;
56}
57static inline u32 ltc_pltcg_extent_v(void)
58{
59 return 0x0017ffff;
60}
53static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void) 61static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void)
54{ 62{
55 return 0x001410c8; 63 return 0x001410c8;
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 0659eefd..3b0a399d 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -29,6 +29,7 @@
29#include "hw_fifo_gm20b.h" 29#include "hw_fifo_gm20b.h"
30#include "hw_fb_gm20b.h" 30#include "hw_fb_gm20b.h"
31#include "hw_top_gm20b.h" 31#include "hw_top_gm20b.h"
32#include "hw_ltc_gm20b.h"
32#include "hw_ctxsw_prog_gm20b.h" 33#include "hw_ctxsw_prog_gm20b.h"
33#include "hw_fuse_gm20b.h" 34#include "hw_fuse_gm20b.h"
34#include "pmu_gm20b.h" 35#include "pmu_gm20b.h"
@@ -1402,6 +1403,79 @@ static int gm20b_gr_fuse_override(struct gk20a *g)
1402 return 0; 1403 return 0;
1403} 1404}
1404 1405
1406static bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr)
1407{
1408 u32 ltc_shared_base = ltc_ltcs_ltss_v();
1409 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
1410
1411 return (addr >= ltc_shared_base) &&
1412 (addr < (ltc_shared_base + lts_stride));
1413}
1414
1415static bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr)
1416{
1417 u32 lts_shared_base = ltc_ltc0_ltss_v();
1418 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
1419 u32 addr_mask = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE) - 1;
1420 u32 base_offset = lts_shared_base & addr_mask;
1421 u32 end_offset = base_offset + lts_stride;
1422
1423 return (!gr_gm20b_is_ltcs_ltss_addr(g, addr)) &&
1424 ((addr & addr_mask) >= base_offset) &&
1425 ((addr & addr_mask) < end_offset);
1426}
1427
1428static void gr_gm20b_update_ltc_lts_addr(struct gk20a *g, u32 addr, u32 ltc_num,
1429 u32 *priv_addr_table,
1430 u32 *priv_addr_table_index)
1431{
1432 u32 num_ltc_slices = g->ops.gr.get_max_lts_per_ltc(g);
1433 u32 index = *priv_addr_table_index;
1434 u32 lts_num;
1435 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
1436 u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
1437
1438 for (lts_num = 0; lts_num < num_ltc_slices; lts_num++)
1439 priv_addr_table[index++] = ltc_ltc0_lts0_v() +
1440 ltc_num * ltc_stride +
1441 lts_num * lts_stride +
1442 (addr & (lts_stride - 1));
1443
1444 *priv_addr_table_index = index;
1445}
1446
1447static void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr,
1448 u32 *priv_addr_table,
1449 u32 *priv_addr_table_index)
1450{
1451 u32 num_ltc = g->ltc_count;
1452 u32 i, start, ltc_num = 0;
1453 u32 pltcg_base = ltc_pltcg_base_v();
1454 u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
1455
1456 for (i = 0; i < num_ltc; i++) {
1457 start = pltcg_base + i * ltc_stride;
1458 if ((addr >= start) && (addr < (start + ltc_stride))) {
1459 ltc_num = i;
1460 break;
1461 }
1462 }
1463 gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num, priv_addr_table,
1464 priv_addr_table_index);
1465}
1466
1467static void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr,
1468 u32 *priv_addr_table,
1469 u32 *priv_addr_table_index)
1470{
1471 u32 num_ltc = g->ltc_count;
1472 u32 ltc_num;
1473
1474 for (ltc_num = 0; ltc_num < num_ltc; ltc_num++)
1475 gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num,
1476 priv_addr_table, priv_addr_table_index);
1477}
1478
1405void gm20b_init_gr(struct gpu_ops *gops) 1479void gm20b_init_gr(struct gpu_ops *gops)
1406{ 1480{
1407 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; 1481 gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
@@ -1478,4 +1552,8 @@ void gm20b_init_gr(struct gpu_ops *gops)
1478 gops->gr.fuse_override = gm20b_gr_fuse_override; 1552 gops->gr.fuse_override = gm20b_gr_fuse_override;
1479 gops->gr.load_smid_config = gr_gm20b_load_smid_config; 1553 gops->gr.load_smid_config = gr_gm20b_load_smid_config;
1480 gops->gr.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering; 1554 gops->gr.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering;
1555 gops->gr.is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr;
1556 gops->gr.is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr;
1557 gops->gr.split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr;
1558 gops->gr.split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr;
1481} 1559}
diff --git a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h
index aa01e945..8c00520c 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h
@@ -50,6 +50,26 @@
50#ifndef _hw_ltc_gm20b_h_ 50#ifndef _hw_ltc_gm20b_h_
51#define _hw_ltc_gm20b_h_ 51#define _hw_ltc_gm20b_h_
52 52
53static inline u32 ltc_pltcg_base_v(void)
54{
55 return 0x00140000;
56}
57static inline u32 ltc_pltcg_extent_v(void)
58{
59 return 0x0017ffff;
60}
61static inline u32 ltc_ltc0_ltss_v(void)
62{
63 return 0x00140200;
64}
65static inline u32 ltc_ltc0_lts0_v(void)
66{
67 return 0x00140400;
68}
69static inline u32 ltc_ltcs_ltss_v(void)
70{
71 return 0x0017e200;
72}
53static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void) 73static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void)
54{ 74{
55 return 0x0014046c; 75 return 0x0014046c;