drivers/gpu/nvgpu/gp10b/cde_gp10b.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149

/*
 * GP10B CDE
 *
 * Copyright (c) 2015-2016, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 */

#include "gk20a/gk20a.h"
#include "cde_gp10b.h"

#include <nvgpu/log.h>

enum gp10b_programs {
	GP10B_PROG_HPASS              = 0,
	GP10B_PROG_HPASS_4K           = 1,
	GP10B_PROG_VPASS              = 2,
	GP10B_PROG_VPASS_4K           = 3,
	GP10B_PROG_HPASS_DEBUG        = 4,
	GP10B_PROG_HPASS_4K_DEBUG     = 5,
	GP10B_PROG_VPASS_DEBUG        = 6,
	GP10B_PROG_VPASS_4K_DEBUG     = 7,
	GP10B_PROG_PASSTHROUGH        = 8,
};

static void gp10b_cde_get_program_numbers(struct gk20a *g,
					  u32 block_height_log2,
					  int *hprog_out, int *vprog_out)
{
	int hprog, vprog;

	if (g->cde_app.shader_parameter == 1) {
		hprog = GP10B_PROG_PASSTHROUGH;
		vprog = GP10B_PROG_PASSTHROUGH;
	} else {
		hprog = GP10B_PROG_HPASS;
		vprog = GP10B_PROG_VPASS;
		if (g->cde_app.shader_parameter == 2) {
			hprog = GP10B_PROG_HPASS_DEBUG;
			vprog = GP10B_PROG_VPASS_DEBUG;
		}
		if (g->mm.bypass_smmu) {
			if (!g->mm.disable_bigpage) {
				nvgpu_warn(g,
					   "when bypass_smmu is 1, disable_bigpage must be 1 too");
			}
			hprog |= 1;
			vprog |= 1;
		}
	}

	*hprog_out = hprog;
	*vprog_out = vprog;
}

static bool gp10b_need_scatter_buffer(struct gk20a *g)
{
	return g->mm.bypass_smmu;
}

static u8 parity(u32 a)
{
	a ^= a>>16u;
	a ^= a>>8u;
	a ^= a>>4u;
	a &= 0xfu;
	return (0x6996u >> a) & 1u;
}

static int gp10b_populate_scatter_buffer(struct gk20a *g,
					 struct sg_table *sgt,
					 size_t surface_size,
					 void *scatter_buffer_ptr,
					 size_t scatter_buffer_size)
{
	/* map scatter buffer to CPU VA and fill it */
	const u32 page_size_log2 = 12;
	const u32 page_size = 1 << page_size_log2;
	const u32 page_size_shift = page_size_log2 - 7u;

	/* 0011 1111 1111 1111 1111 1110 0100 1000 */
	const u32 getSliceMaskGP10B = 0x3ffffe48;
	u8 *scatter_buffer = scatter_buffer_ptr;

	size_t i;
	struct scatterlist *sg = NULL;
	u8 d = 0;
	size_t page = 0;
	size_t pages_left;

	surface_size = round_up(surface_size, page_size);

	pages_left = surface_size >> page_size_log2;
	if ((pages_left >> 3) > scatter_buffer_size)
	    return -ENOMEM;

	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
		unsigned int j;
		u64 surf_pa = sg_phys(sg);
		unsigned int n = (int)(sg->length >> page_size_log2);

		gk20a_dbg(gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n);

		for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) {
			u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift;
			u8 scatter_bit = parity(addr);
			u8 bit = page & 7;

			d |= scatter_bit << bit;
			if (bit == 7) {
				scatter_buffer[page >> 3] = d;
				d = 0;
			}

			++page;
			--pages_left;
		}

		if (pages_left == 0)
			break;
	}

	/* write the last byte in case the number of pages is not divisible by 8 */
	if ((page & 7) != 0)
		scatter_buffer[page >> 3] = d;

	if (nvgpu_log_mask_enabled(g, gpu_dbg_cde)) {
		gk20a_dbg(gpu_dbg_cde, "scatterBuffer content:");
		for (i = 0; i < page >> 3; i++) {
			gk20a_dbg(gpu_dbg_cde, " %x", scatter_buffer[i]);
		}
	}

	return 0;
}

void gp10b_init_cde_ops(struct gpu_ops *gops)
{
	gops->cde.get_program_numbers = gp10b_cde_get_program_numbers;
	gops->cde.need_scatter_buffer = gp10b_need_scatter_buffer;
	gops->cde.populate_scatter_buffer = gp10b_populate_scatter_buffer;
}