diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 9 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 48 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h | 23 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 78 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h | 20 |
6 files changed, 177 insertions, 9 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index d5310b02..275619c9 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -178,6 +178,15 @@ struct gpu_ops { | |||
178 | struct gr_zcull_info *zcull_params); | 178 | struct gr_zcull_info *zcull_params); |
179 | bool (*is_tpc_addr)(struct gk20a *g, u32 addr); | 179 | bool (*is_tpc_addr)(struct gk20a *g, u32 addr); |
180 | u32 (*get_tpc_num)(struct gk20a *g, u32 addr); | 180 | u32 (*get_tpc_num)(struct gk20a *g, u32 addr); |
181 | bool (*is_ltcs_ltss_addr)(struct gk20a *g, u32 addr); | ||
182 | bool (*is_ltcn_ltss_addr)(struct gk20a *g, u32 addr); | ||
183 | bool (*get_lts_in_ltc_shared_base)(void); | ||
184 | void (*split_lts_broadcast_addr)(struct gk20a *g, u32 addr, | ||
185 | u32 *priv_addr_table, | ||
186 | u32 *priv_addr_table_index); | ||
187 | void (*split_ltc_broadcast_addr)(struct gk20a *g, u32 addr, | ||
188 | u32 *priv_addr_table, | ||
189 | u32 *priv_addr_table_index); | ||
181 | void (*detect_sm_arch)(struct gk20a *g); | 190 | void (*detect_sm_arch)(struct gk20a *g); |
182 | int (*add_zbc_color)(struct gk20a *g, struct gr_gk20a *gr, | 191 | int (*add_zbc_color)(struct gk20a *g, struct gr_gk20a *gr, |
183 | struct zbc_entry *color_val, u32 index); | 192 | struct zbc_entry *color_val, u32 index); |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 901fea8c..943b4085 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
@@ -6318,6 +6318,13 @@ static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr, | |||
6318 | } | 6318 | } |
6319 | *be_num = pri_get_be_num(g, addr); | 6319 | *be_num = pri_get_be_num(g, addr); |
6320 | return 0; | 6320 | return 0; |
6321 | } else if (pri_is_ltc_addr(addr)) { | ||
6322 | *addr_type = CTXSW_ADDR_TYPE_LTCS; | ||
6323 | if (g->ops.gr.is_ltcs_ltss_addr(g, addr)) | ||
6324 | *broadcast_flags |= PRI_BROADCAST_FLAGS_LTCS; | ||
6325 | else if (g->ops.gr.is_ltcn_ltss_addr(g, addr)) | ||
6326 | *broadcast_flags |= PRI_BROADCAST_FLAGS_LTSS; | ||
6327 | return 0; | ||
6321 | } else { | 6328 | } else { |
6322 | *addr_type = CTXSW_ADDR_TYPE_SYS; | 6329 | *addr_type = CTXSW_ADDR_TYPE_SYS; |
6323 | return 0; | 6330 | return 0; |
@@ -6412,7 +6419,15 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g, | |||
6412 | pri_gpc_addr(g, pri_gpccs_addr_mask(addr), | 6419 | pri_gpc_addr(g, pri_gpccs_addr_mask(addr), |
6413 | gpc_num); | 6420 | gpc_num); |
6414 | } | 6421 | } |
6415 | } else { | 6422 | } |
6423 | |||
6424 | if (broadcast_flags & PRI_BROADCAST_FLAGS_LTSS) { | ||
6425 | g->ops.gr.split_lts_broadcast_addr(g, addr, | ||
6426 | priv_addr_table, &t); | ||
6427 | } else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTCS) { | ||
6428 | g->ops.gr.split_ltc_broadcast_addr(g, addr, | ||
6429 | priv_addr_table, &t); | ||
6430 | } else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_GPC)) { | ||
6416 | if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) | 6431 | if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC) |
6417 | for (tpc_num = 0; | 6432 | for (tpc_num = 0; |
6418 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; | 6433 | tpc_num < g->gr.gpc_tpc_count[gpc_num]; |
@@ -7296,8 +7311,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, | |||
7296 | num_tpcs) << 2); | 7311 | num_tpcs) << 2); |
7297 | } | 7312 | } |
7298 | } else { | 7313 | } else { |
7299 | gk20a_err(dev_from_gk20a(g), | 7314 | gk20a_dbg_fn("Unknown address type."); |
7300 | " Unknown address type.\n"); | ||
7301 | return -EINVAL; | 7315 | return -EINVAL; |
7302 | } | 7316 | } |
7303 | err = gr_gk20a_process_context_buffer_priv_segment(g, | 7317 | err = gr_gk20a_process_context_buffer_priv_segment(g, |
@@ -8653,6 +8667,28 @@ static int gr_gk20a_get_preemption_mode_flags(struct gk20a *g, | |||
8653 | return 0; | 8667 | return 0; |
8654 | } | 8668 | } |
8655 | 8669 | ||
8670 | static bool gr_gk20a_is_ltcs_ltss_addr_stub(struct gk20a *g, u32 addr) | ||
8671 | { | ||
8672 | return false; | ||
8673 | } | ||
8674 | |||
8675 | static bool gr_gk20a_is_ltcn_ltss_addr_stub(struct gk20a *g, u32 addr) | ||
8676 | { | ||
8677 | return false; | ||
8678 | } | ||
8679 | |||
8680 | static void gr_gk20a_split_lts_broadcast_addr_stub(struct gk20a *g, u32 addr, | ||
8681 | u32 *priv_addr_table, | ||
8682 | u32 *priv_addr_table_index) | ||
8683 | { | ||
8684 | } | ||
8685 | |||
8686 | static void gr_gk20a_split_ltc_broadcast_addr_stub(struct gk20a *g, u32 addr, | ||
8687 | u32 *priv_addr_table, | ||
8688 | u32 *priv_addr_table_index) | ||
8689 | { | ||
8690 | } | ||
8691 | |||
8656 | void gk20a_init_gr_ops(struct gpu_ops *gops) | 8692 | void gk20a_init_gr_ops(struct gpu_ops *gops) |
8657 | { | 8693 | { |
8658 | gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; | 8694 | gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; |
@@ -8723,4 +8759,10 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) | |||
8723 | gops->gr.get_preemption_mode_flags = gr_gk20a_get_preemption_mode_flags; | 8759 | gops->gr.get_preemption_mode_flags = gr_gk20a_get_preemption_mode_flags; |
8724 | gops->gr.program_active_tpc_counts = gr_gk20a_program_active_tpc_counts; | 8760 | gops->gr.program_active_tpc_counts = gr_gk20a_program_active_tpc_counts; |
8725 | gops->gr.program_sm_id_numbering = gr_gk20a_program_sm_id_numbering; | 8761 | gops->gr.program_sm_id_numbering = gr_gk20a_program_sm_id_numbering; |
8762 | gops->gr.is_ltcs_ltss_addr = gr_gk20a_is_ltcs_ltss_addr_stub; | ||
8763 | gops->gr.is_ltcn_ltss_addr = gr_gk20a_is_ltcn_ltss_addr_stub; | ||
8764 | gops->gr.split_lts_broadcast_addr = | ||
8765 | gr_gk20a_split_lts_broadcast_addr_stub; | ||
8766 | gops->gr.split_ltc_broadcast_addr = | ||
8767 | gr_gk20a_split_ltc_broadcast_addr_stub; | ||
8726 | } | 8768 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h index 248fa291..62e276de 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * GK20A Graphics Context Pri Register Addressing | 2 | * GK20A Graphics Context Pri Register Addressing |
3 | * | 3 | * |
4 | * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -167,12 +167,21 @@ static inline u32 pri_ppc_addr(struct gk20a *g, u32 addr, u32 gpc, u32 ppc) | |||
167 | ppc_in_gpc_base + (ppc * ppc_in_gpc_stride) + addr; | 167 | ppc_in_gpc_base + (ppc * ppc_in_gpc_stride) + addr; |
168 | } | 168 | } |
169 | 169 | ||
170 | /* | ||
171 | * LTC pri addressing | ||
172 | */ | ||
173 | static inline bool pri_is_ltc_addr(u32 addr) | ||
174 | { | ||
175 | return ((addr >= ltc_pltcg_base_v()) && (addr < ltc_pltcg_extent_v())); | ||
176 | } | ||
177 | |||
170 | enum ctxsw_addr_type { | 178 | enum ctxsw_addr_type { |
171 | CTXSW_ADDR_TYPE_SYS = 0, | 179 | CTXSW_ADDR_TYPE_SYS = 0, |
172 | CTXSW_ADDR_TYPE_GPC = 1, | 180 | CTXSW_ADDR_TYPE_GPC = 1, |
173 | CTXSW_ADDR_TYPE_TPC = 2, | 181 | CTXSW_ADDR_TYPE_TPC = 2, |
174 | CTXSW_ADDR_TYPE_BE = 3, | 182 | CTXSW_ADDR_TYPE_BE = 3, |
175 | CTXSW_ADDR_TYPE_PPC = 4 | 183 | CTXSW_ADDR_TYPE_PPC = 4, |
184 | CTXSW_ADDR_TYPE_LTCS = 5 | ||
176 | }; | 185 | }; |
177 | 186 | ||
178 | #define PRI_BROADCAST_FLAGS_NONE 0 | 187 | #define PRI_BROADCAST_FLAGS_NONE 0 |
@@ -180,5 +189,7 @@ enum ctxsw_addr_type { | |||
180 | #define PRI_BROADCAST_FLAGS_TPC BIT(1) | 189 | #define PRI_BROADCAST_FLAGS_TPC BIT(1) |
181 | #define PRI_BROADCAST_FLAGS_BE BIT(2) | 190 | #define PRI_BROADCAST_FLAGS_BE BIT(2) |
182 | #define PRI_BROADCAST_FLAGS_PPC BIT(3) | 191 | #define PRI_BROADCAST_FLAGS_PPC BIT(3) |
192 | #define PRI_BROADCAST_FLAGS_LTCS BIT(4) | ||
193 | #define PRI_BROADCAST_FLAGS_LTSS BIT(5) | ||
183 | 194 | ||
184 | #endif /* GR_PRI_GK20A_H */ | 195 | #endif /* GR_PRI_GK20A_H */ |
diff --git a/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h index 94770431..84b9c9a6 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_ltc_gk20a.h | |||
@@ -50,6 +50,14 @@ | |||
50 | #ifndef _hw_ltc_gk20a_h_ | 50 | #ifndef _hw_ltc_gk20a_h_ |
51 | #define _hw_ltc_gk20a_h_ | 51 | #define _hw_ltc_gk20a_h_ |
52 | 52 | ||
53 | static inline u32 ltc_pltcg_base_v(void) | ||
54 | { | ||
55 | return 0x00140000; | ||
56 | } | ||
57 | static inline u32 ltc_pltcg_extent_v(void) | ||
58 | { | ||
59 | return 0x0017ffff; | ||
60 | } | ||
53 | static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void) | 61 | static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void) |
54 | { | 62 | { |
55 | return 0x001410c8; | 63 | return 0x001410c8; |
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 0659eefd..3b0a399d 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include "hw_fifo_gm20b.h" | 29 | #include "hw_fifo_gm20b.h" |
30 | #include "hw_fb_gm20b.h" | 30 | #include "hw_fb_gm20b.h" |
31 | #include "hw_top_gm20b.h" | 31 | #include "hw_top_gm20b.h" |
32 | #include "hw_ltc_gm20b.h" | ||
32 | #include "hw_ctxsw_prog_gm20b.h" | 33 | #include "hw_ctxsw_prog_gm20b.h" |
33 | #include "hw_fuse_gm20b.h" | 34 | #include "hw_fuse_gm20b.h" |
34 | #include "pmu_gm20b.h" | 35 | #include "pmu_gm20b.h" |
@@ -1402,6 +1403,79 @@ static int gm20b_gr_fuse_override(struct gk20a *g) | |||
1402 | return 0; | 1403 | return 0; |
1403 | } | 1404 | } |
1404 | 1405 | ||
1406 | static bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr) | ||
1407 | { | ||
1408 | u32 ltc_shared_base = ltc_ltcs_ltss_v(); | ||
1409 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
1410 | |||
1411 | return (addr >= ltc_shared_base) && | ||
1412 | (addr < (ltc_shared_base + lts_stride)); | ||
1413 | } | ||
1414 | |||
1415 | static bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr) | ||
1416 | { | ||
1417 | u32 lts_shared_base = ltc_ltc0_ltss_v(); | ||
1418 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
1419 | u32 addr_mask = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE) - 1; | ||
1420 | u32 base_offset = lts_shared_base & addr_mask; | ||
1421 | u32 end_offset = base_offset + lts_stride; | ||
1422 | |||
1423 | return (!gr_gm20b_is_ltcs_ltss_addr(g, addr)) && | ||
1424 | ((addr & addr_mask) >= base_offset) && | ||
1425 | ((addr & addr_mask) < end_offset); | ||
1426 | } | ||
1427 | |||
1428 | static void gr_gm20b_update_ltc_lts_addr(struct gk20a *g, u32 addr, u32 ltc_num, | ||
1429 | u32 *priv_addr_table, | ||
1430 | u32 *priv_addr_table_index) | ||
1431 | { | ||
1432 | u32 num_ltc_slices = g->ops.gr.get_max_lts_per_ltc(g); | ||
1433 | u32 index = *priv_addr_table_index; | ||
1434 | u32 lts_num; | ||
1435 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
1436 | u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); | ||
1437 | |||
1438 | for (lts_num = 0; lts_num < num_ltc_slices; lts_num++) | ||
1439 | priv_addr_table[index++] = ltc_ltc0_lts0_v() + | ||
1440 | ltc_num * ltc_stride + | ||
1441 | lts_num * lts_stride + | ||
1442 | (addr & (lts_stride - 1)); | ||
1443 | |||
1444 | *priv_addr_table_index = index; | ||
1445 | } | ||
1446 | |||
1447 | static void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr, | ||
1448 | u32 *priv_addr_table, | ||
1449 | u32 *priv_addr_table_index) | ||
1450 | { | ||
1451 | u32 num_ltc = g->ltc_count; | ||
1452 | u32 i, start, ltc_num = 0; | ||
1453 | u32 pltcg_base = ltc_pltcg_base_v(); | ||
1454 | u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); | ||
1455 | |||
1456 | for (i = 0; i < num_ltc; i++) { | ||
1457 | start = pltcg_base + i * ltc_stride; | ||
1458 | if ((addr >= start) && (addr < (start + ltc_stride))) { | ||
1459 | ltc_num = i; | ||
1460 | break; | ||
1461 | } | ||
1462 | } | ||
1463 | gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num, priv_addr_table, | ||
1464 | priv_addr_table_index); | ||
1465 | } | ||
1466 | |||
1467 | static void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr, | ||
1468 | u32 *priv_addr_table, | ||
1469 | u32 *priv_addr_table_index) | ||
1470 | { | ||
1471 | u32 num_ltc = g->ltc_count; | ||
1472 | u32 ltc_num; | ||
1473 | |||
1474 | for (ltc_num = 0; ltc_num < num_ltc; ltc_num++) | ||
1475 | gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num, | ||
1476 | priv_addr_table, priv_addr_table_index); | ||
1477 | } | ||
1478 | |||
1405 | void gm20b_init_gr(struct gpu_ops *gops) | 1479 | void gm20b_init_gr(struct gpu_ops *gops) |
1406 | { | 1480 | { |
1407 | gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; | 1481 | gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; |
@@ -1478,4 +1552,8 @@ void gm20b_init_gr(struct gpu_ops *gops) | |||
1478 | gops->gr.fuse_override = gm20b_gr_fuse_override; | 1552 | gops->gr.fuse_override = gm20b_gr_fuse_override; |
1479 | gops->gr.load_smid_config = gr_gm20b_load_smid_config; | 1553 | gops->gr.load_smid_config = gr_gm20b_load_smid_config; |
1480 | gops->gr.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering; | 1554 | gops->gr.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering; |
1555 | gops->gr.is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr; | ||
1556 | gops->gr.is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr; | ||
1557 | gops->gr.split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr; | ||
1558 | gops->gr.split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr; | ||
1481 | } | 1559 | } |
diff --git a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h index aa01e945..8c00520c 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_ltc_gm20b.h | |||
@@ -50,6 +50,26 @@ | |||
50 | #ifndef _hw_ltc_gm20b_h_ | 50 | #ifndef _hw_ltc_gm20b_h_ |
51 | #define _hw_ltc_gm20b_h_ | 51 | #define _hw_ltc_gm20b_h_ |
52 | 52 | ||
53 | static inline u32 ltc_pltcg_base_v(void) | ||
54 | { | ||
55 | return 0x00140000; | ||
56 | } | ||
57 | static inline u32 ltc_pltcg_extent_v(void) | ||
58 | { | ||
59 | return 0x0017ffff; | ||
60 | } | ||
61 | static inline u32 ltc_ltc0_ltss_v(void) | ||
62 | { | ||
63 | return 0x00140200; | ||
64 | } | ||
65 | static inline u32 ltc_ltc0_lts0_v(void) | ||
66 | { | ||
67 | return 0x00140400; | ||
68 | } | ||
69 | static inline u32 ltc_ltcs_ltss_v(void) | ||
70 | { | ||
71 | return 0x0017e200; | ||
72 | } | ||
53 | static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void) | 73 | static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void) |
54 | { | 74 | { |
55 | return 0x0014046c; | 75 | return 0x0014046c; |