Commit | Line | Data |
---|---|---|
a4d4bbf1 | 1 | /* |
c4d0f8f6 | 2 | * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. |
a4d4bbf1 AC |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |
20 | * DEALINGS IN THE SOFTWARE. | |
21 | */ | |
c4d0f8f6 | 22 | #include "gk20a.h" |
e3c71eb2 | 23 | #include "ctxgf100.h" |
a4d4bbf1 | 24 | |
e3c71eb2 | 25 | #include <nvif/class.h> |
c4d0f8f6 | 26 | #include <subdev/timer.h> |
a4d4bbf1 | 27 | |
e3c71eb2 | 28 | static struct nvkm_oclass |
b8bf04e1 | 29 | gk20a_gr_sclass[] = { |
3740c825 BS |
30 | { FERMI_TWOD_A, &nvkm_object_ofuncs }, |
31 | { KEPLER_INLINE_TO_MEMORY_A, &nvkm_object_ofuncs }, | |
e3c71eb2 BS |
32 | { KEPLER_C, &gf100_fermi_ofuncs, gf100_gr_9097_omthds }, |
33 | { KEPLER_COMPUTE_A, &nvkm_object_ofuncs, gf100_gr_90c0_omthds }, | |
a4d4bbf1 AC |
34 | {} |
35 | }; | |
36 | ||
c4d0f8f6 AC |
37 | static void |
38 | gk20a_gr_init_dtor(struct gf100_gr_pack *pack) | |
39 | { | |
40 | vfree(pack); | |
41 | } | |
42 | ||
43 | struct gk20a_fw_av | |
44 | { | |
45 | u32 addr; | |
46 | u32 data; | |
47 | }; | |
48 | ||
49 | static struct gf100_gr_pack * | |
50 | gk20a_gr_av_to_init(struct gf100_gr_fuc *fuc) | |
51 | { | |
52 | struct gf100_gr_init *init; | |
53 | struct gf100_gr_pack *pack; | |
54 | const int nent = (fuc->size / sizeof(struct gk20a_fw_av)); | |
55 | int i; | |
56 | ||
57 | pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1))); | |
58 | if (!pack) | |
59 | return ERR_PTR(-ENOMEM); | |
60 | ||
61 | init = (void *)(pack + 2); | |
62 | ||
63 | pack[0].init = init; | |
64 | ||
65 | for (i = 0; i < nent; i++) { | |
66 | struct gf100_gr_init *ent = &init[i]; | |
67 | struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc->data)[i]; | |
68 | ||
69 | ent->addr = av->addr; | |
70 | ent->data = av->data; | |
71 | ent->count = 1; | |
72 | ent->pitch = 1; | |
73 | } | |
74 | ||
75 | return pack; | |
76 | } | |
77 | ||
78 | struct gk20a_fw_aiv | |
79 | { | |
80 | u32 addr; | |
81 | u32 index; | |
82 | u32 data; | |
83 | }; | |
84 | ||
85 | static struct gf100_gr_pack * | |
86 | gk20a_gr_aiv_to_init(struct gf100_gr_fuc *fuc) | |
87 | { | |
88 | struct gf100_gr_init *init; | |
89 | struct gf100_gr_pack *pack; | |
90 | const int nent = (fuc->size / sizeof(struct gk20a_fw_aiv)); | |
91 | int i; | |
92 | ||
93 | pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1))); | |
94 | if (!pack) | |
95 | return ERR_PTR(-ENOMEM); | |
96 | ||
97 | init = (void *)(pack + 2); | |
98 | ||
99 | pack[0].init = init; | |
100 | ||
101 | for (i = 0; i < nent; i++) { | |
102 | struct gf100_gr_init *ent = &init[i]; | |
103 | struct gk20a_fw_aiv *av = &((struct gk20a_fw_aiv *)fuc->data)[i]; | |
104 | ||
105 | ent->addr = av->addr; | |
106 | ent->data = av->data; | |
107 | ent->count = 1; | |
108 | ent->pitch = 1; | |
109 | } | |
110 | ||
111 | return pack; | |
112 | } | |
113 | ||
114 | static struct gf100_gr_pack * | |
115 | gk20a_gr_av_to_method(struct gf100_gr_fuc *fuc) | |
116 | { | |
117 | struct gf100_gr_init *init; | |
118 | struct gf100_gr_pack *pack; | |
119 | /* We don't suppose we will initialize more than 16 classes here... */ | |
120 | static const unsigned int max_classes = 16; | |
121 | const int nent = (fuc->size / sizeof(struct gk20a_fw_av)); | |
122 | int i, classidx = 0; | |
123 | u32 prevclass = 0; | |
124 | ||
125 | pack = vzalloc((sizeof(*pack) * max_classes) + | |
126 | (sizeof(*init) * (nent + 1))); | |
127 | if (!pack) | |
128 | return ERR_PTR(-ENOMEM); | |
129 | ||
130 | init = (void *)(pack + max_classes); | |
131 | ||
132 | for (i = 0; i < nent; i++) { | |
133 | struct gf100_gr_init *ent = &init[i]; | |
134 | struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc->data)[i]; | |
135 | u32 class = av->addr & 0xffff; | |
136 | u32 addr = (av->addr & 0xffff0000) >> 14; | |
137 | ||
138 | if (prevclass != class) { | |
139 | pack[classidx].init = ent; | |
140 | pack[classidx].type = class; | |
141 | prevclass = class; | |
142 | if (++classidx >= max_classes) { | |
143 | vfree(pack); | |
144 | return ERR_PTR(-ENOSPC); | |
145 | } | |
146 | } | |
147 | ||
148 | ent->addr = addr; | |
149 | ent->data = av->data; | |
150 | ent->count = 1; | |
151 | ent->pitch = 1; | |
152 | } | |
153 | ||
154 | return pack; | |
155 | } | |
156 | ||
a032fb9d | 157 | int |
c4d0f8f6 AC |
158 | gk20a_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine, |
159 | struct nvkm_oclass *oclass, void *data, u32 size, | |
160 | struct nvkm_object **pobject) | |
161 | { | |
162 | int err; | |
bfee3f3d | 163 | struct gf100_gr *gr; |
c4d0f8f6 AC |
164 | struct gf100_gr_fuc fuc; |
165 | ||
166 | err = gf100_gr_ctor(parent, engine, oclass, data, size, pobject); | |
167 | if (err) | |
168 | return err; | |
169 | ||
bfee3f3d | 170 | gr = (void *)*pobject; |
c4d0f8f6 | 171 | |
bfee3f3d | 172 | err = gf100_gr_ctor_fw(gr, "sw_nonctx", &fuc); |
c4d0f8f6 AC |
173 | if (err) |
174 | return err; | |
bfee3f3d | 175 | gr->fuc_sw_nonctx = gk20a_gr_av_to_init(&fuc); |
c4d0f8f6 | 176 | gf100_gr_dtor_fw(&fuc); |
bfee3f3d BS |
177 | if (IS_ERR(gr->fuc_sw_nonctx)) |
178 | return PTR_ERR(gr->fuc_sw_nonctx); | |
c4d0f8f6 | 179 | |
bfee3f3d | 180 | err = gf100_gr_ctor_fw(gr, "sw_ctx", &fuc); |
c4d0f8f6 AC |
181 | if (err) |
182 | return err; | |
bfee3f3d | 183 | gr->fuc_sw_ctx = gk20a_gr_aiv_to_init(&fuc); |
c4d0f8f6 | 184 | gf100_gr_dtor_fw(&fuc); |
bfee3f3d BS |
185 | if (IS_ERR(gr->fuc_sw_ctx)) |
186 | return PTR_ERR(gr->fuc_sw_ctx); | |
c4d0f8f6 | 187 | |
bfee3f3d | 188 | err = gf100_gr_ctor_fw(gr, "sw_bundle_init", &fuc); |
c4d0f8f6 AC |
189 | if (err) |
190 | return err; | |
bfee3f3d | 191 | gr->fuc_bundle = gk20a_gr_av_to_init(&fuc); |
c4d0f8f6 | 192 | gf100_gr_dtor_fw(&fuc); |
bfee3f3d BS |
193 | if (IS_ERR(gr->fuc_bundle)) |
194 | return PTR_ERR(gr->fuc_bundle); | |
c4d0f8f6 | 195 | |
bfee3f3d | 196 | err = gf100_gr_ctor_fw(gr, "sw_method_init", &fuc); |
c4d0f8f6 AC |
197 | if (err) |
198 | return err; | |
bfee3f3d | 199 | gr->fuc_method = gk20a_gr_av_to_method(&fuc); |
c4d0f8f6 | 200 | gf100_gr_dtor_fw(&fuc); |
bfee3f3d BS |
201 | if (IS_ERR(gr->fuc_method)) |
202 | return PTR_ERR(gr->fuc_method); | |
c4d0f8f6 AC |
203 | |
204 | return 0; | |
205 | } | |
206 | ||
a032fb9d | 207 | void |
c4d0f8f6 AC |
208 | gk20a_gr_dtor(struct nvkm_object *object) |
209 | { | |
bfee3f3d | 210 | struct gf100_gr *gr = (void *)object; |
c4d0f8f6 | 211 | |
bfee3f3d BS |
212 | gk20a_gr_init_dtor(gr->fuc_method); |
213 | gk20a_gr_init_dtor(gr->fuc_bundle); | |
214 | gk20a_gr_init_dtor(gr->fuc_sw_ctx); | |
215 | gk20a_gr_init_dtor(gr->fuc_sw_nonctx); | |
c4d0f8f6 AC |
216 | |
217 | gf100_gr_dtor(object); | |
218 | } | |
219 | ||
220 | static int | |
bfee3f3d | 221 | gk20a_gr_wait_mem_scrubbing(struct gf100_gr *gr) |
c4d0f8f6 | 222 | { |
bfee3f3d BS |
223 | if (!nv_wait(gr, 0x40910c, 0x6, 0x0)) { |
224 | nv_error(gr, "FECS mem scrubbing timeout\n"); | |
c4d0f8f6 AC |
225 | return -ETIMEDOUT; |
226 | } | |
227 | ||
bfee3f3d BS |
228 | if (!nv_wait(gr, 0x41a10c, 0x6, 0x0)) { |
229 | nv_error(gr, "GPCCS mem scrubbing timeout\n"); | |
c4d0f8f6 AC |
230 | return -ETIMEDOUT; |
231 | } | |
232 | ||
233 | return 0; | |
234 | } | |
235 | ||
236 | static void | |
bfee3f3d | 237 | gk20a_gr_set_hww_esr_report_mask(struct gf100_gr *gr) |
c4d0f8f6 | 238 | { |
bfee3f3d BS |
239 | nv_wr32(gr, 0x419e44, 0x1ffffe); |
240 | nv_wr32(gr, 0x419e4c, 0x7f); | |
c4d0f8f6 AC |
241 | } |
242 | ||
a032fb9d | 243 | int |
c4d0f8f6 AC |
244 | gk20a_gr_init(struct nvkm_object *object) |
245 | { | |
246 | struct gk20a_gr_oclass *oclass = (void *)object->oclass; | |
bfee3f3d BS |
247 | struct gf100_gr *gr = (void *)object; |
248 | const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total); | |
c4d0f8f6 AC |
249 | u32 data[TPC_MAX / 8] = {}; |
250 | u8 tpcnr[GPC_MAX]; | |
251 | int gpc, tpc; | |
252 | int ret, i; | |
253 | ||
bfee3f3d | 254 | ret = nvkm_gr_init(&gr->base); |
c4d0f8f6 AC |
255 | if (ret) |
256 | return ret; | |
257 | ||
258 | /* Clear SCC RAM */ | |
bfee3f3d | 259 | nv_wr32(gr, 0x40802c, 0x1); |
c4d0f8f6 | 260 | |
bfee3f3d | 261 | gf100_gr_mmio(gr, gr->fuc_sw_nonctx); |
c4d0f8f6 | 262 | |
bfee3f3d | 263 | ret = gk20a_gr_wait_mem_scrubbing(gr); |
c4d0f8f6 AC |
264 | if (ret) |
265 | return ret; | |
266 | ||
bfee3f3d | 267 | ret = gf100_gr_wait_idle(gr); |
c4d0f8f6 AC |
268 | if (ret) |
269 | return ret; | |
270 | ||
271 | /* MMU debug buffer */ | |
bfee3f3d BS |
272 | nv_wr32(gr, 0x100cc8, gr->unk4188b4->addr >> 8); |
273 | nv_wr32(gr, 0x100ccc, gr->unk4188b8->addr >> 8); | |
c4d0f8f6 AC |
274 | |
275 | if (oclass->init_gpc_mmu) | |
bfee3f3d | 276 | oclass->init_gpc_mmu(gr); |
c4d0f8f6 AC |
277 | |
278 | /* Set the PE as stream master */ | |
bfee3f3d | 279 | nv_mask(gr, 0x503018, 0x1, 0x1); |
c4d0f8f6 AC |
280 | |
281 | /* Zcull init */ | |
282 | memset(data, 0x00, sizeof(data)); | |
bfee3f3d BS |
283 | memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr)); |
284 | for (i = 0, gpc = -1; i < gr->tpc_total; i++) { | |
c4d0f8f6 | 285 | do { |
bfee3f3d | 286 | gpc = (gpc + 1) % gr->gpc_nr; |
c4d0f8f6 | 287 | } while (!tpcnr[gpc]); |
bfee3f3d | 288 | tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--; |
c4d0f8f6 AC |
289 | |
290 | data[i / 8] |= tpc << ((i % 8) * 4); | |
291 | } | |
292 | ||
bfee3f3d BS |
293 | nv_wr32(gr, GPC_BCAST(0x0980), data[0]); |
294 | nv_wr32(gr, GPC_BCAST(0x0984), data[1]); | |
295 | nv_wr32(gr, GPC_BCAST(0x0988), data[2]); | |
296 | nv_wr32(gr, GPC_BCAST(0x098c), data[3]); | |
297 | ||
298 | for (gpc = 0; gpc < gr->gpc_nr; gpc++) { | |
299 | nv_wr32(gr, GPC_UNIT(gpc, 0x0914), | |
300 | gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]); | |
301 | nv_wr32(gr, GPC_UNIT(gpc, 0x0910), 0x00040000 | | |
302 | gr->tpc_total); | |
303 | nv_wr32(gr, GPC_UNIT(gpc, 0x0918), magicgpc918); | |
c4d0f8f6 AC |
304 | } |
305 | ||
bfee3f3d | 306 | nv_wr32(gr, GPC_BCAST(0x3fd4), magicgpc918); |
c4d0f8f6 AC |
307 | |
308 | /* Enable FIFO access */ | |
bfee3f3d | 309 | nv_wr32(gr, 0x400500, 0x00010001); |
c4d0f8f6 AC |
310 | |
311 | /* Enable interrupts */ | |
bfee3f3d BS |
312 | nv_wr32(gr, 0x400100, 0xffffffff); |
313 | nv_wr32(gr, 0x40013c, 0xffffffff); | |
c4d0f8f6 AC |
314 | |
315 | /* Enable FECS error interrupts */ | |
bfee3f3d | 316 | nv_wr32(gr, 0x409c24, 0x000f0000); |
c4d0f8f6 AC |
317 | |
318 | /* Enable hardware warning exceptions */ | |
bfee3f3d BS |
319 | nv_wr32(gr, 0x404000, 0xc0000000); |
320 | nv_wr32(gr, 0x404600, 0xc0000000); | |
c4d0f8f6 AC |
321 | |
322 | if (oclass->set_hww_esr_report_mask) | |
bfee3f3d | 323 | oclass->set_hww_esr_report_mask(gr); |
c4d0f8f6 AC |
324 | |
325 | /* Enable TPC exceptions per GPC */ | |
bfee3f3d BS |
326 | nv_wr32(gr, 0x419d0c, 0x2); |
327 | nv_wr32(gr, 0x41ac94, (((1 << gr->tpc_total) - 1) & 0xff) << 16); | |
c4d0f8f6 AC |
328 | |
329 | /* Reset and enable all exceptions */ | |
bfee3f3d BS |
330 | nv_wr32(gr, 0x400108, 0xffffffff); |
331 | nv_wr32(gr, 0x400138, 0xffffffff); | |
332 | nv_wr32(gr, 0x400118, 0xffffffff); | |
333 | nv_wr32(gr, 0x400130, 0xffffffff); | |
334 | nv_wr32(gr, 0x40011c, 0xffffffff); | |
335 | nv_wr32(gr, 0x400134, 0xffffffff); | |
c4d0f8f6 | 336 | |
bfee3f3d | 337 | gf100_gr_zbc_init(gr); |
c4d0f8f6 | 338 | |
bfee3f3d | 339 | return gf100_gr_init_ctxctl(gr); |
c4d0f8f6 AC |
340 | } |
341 | ||
e3c71eb2 | 342 | struct nvkm_oclass * |
c4d0f8f6 AC |
343 | gk20a_gr_oclass = &(struct gk20a_gr_oclass) { |
344 | .gf100 = { | |
345 | .base.handle = NV_ENGINE(GR, 0xea), | |
346 | .base.ofuncs = &(struct nvkm_ofuncs) { | |
347 | .ctor = gk20a_gr_ctor, | |
348 | .dtor = gk20a_gr_dtor, | |
349 | .init = gk20a_gr_init, | |
350 | .fini = _nvkm_gr_fini, | |
351 | }, | |
352 | .cclass = &gk20a_grctx_oclass, | |
353 | .sclass = gk20a_gr_sclass, | |
354 | .ppc_nr = 1, | |
a4d4bbf1 | 355 | }, |
c4d0f8f6 AC |
356 | .set_hww_esr_report_mask = gk20a_gr_set_hww_esr_report_mask, |
357 | }.gf100.base; |