Commit | Line | Data |
---|---|---|
fbeb661b YS |
1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | * | |
22 | */ | |
23 | ||
24 | #include <linux/types.h> | |
25 | #include <linux/kernel.h> | |
26 | #include <linux/log2.h> | |
27 | #include <linux/sched.h> | |
28 | #include <linux/slab.h> | |
29 | #include <linux/mutex.h> | |
30 | #include <linux/device.h> | |
31 | ||
32 | #include "kfd_pm4_headers.h" | |
33 | #include "kfd_pm4_headers_diq.h" | |
34 | #include "kfd_kernel_queue.h" | |
35 | #include "kfd_priv.h" | |
36 | #include "kfd_pm4_opcodes.h" | |
37 | #include "cik_regs.h" | |
38 | #include "kfd_dbgmgr.h" | |
39 | #include "kfd_dbgdev.h" | |
40 | #include "kfd_device_queue_manager.h" | |
41 | #include "../../radeon/cik_reg.h" | |
42 | ||
43 | static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) | |
44 | { | |
45 | BUG_ON(!dev || !dev->kfd2kgd); | |
46 | ||
47 | dev->kfd2kgd->address_watch_disable(dev->kgd); | |
48 | } | |
49 | ||
788bf83d YS |
50 | static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, |
51 | unsigned int pasid, uint64_t vmid0_address, | |
52 | uint32_t *packet_buff, size_t size_in_bytes) | |
53 | { | |
54 | struct pm4__release_mem *rm_packet; | |
55 | struct pm4__indirect_buffer_pasid *ib_packet; | |
56 | struct kfd_mem_obj *mem_obj; | |
57 | size_t pq_packets_size_in_bytes; | |
58 | union ULARGE_INTEGER *largep; | |
59 | union ULARGE_INTEGER addr; | |
60 | struct kernel_queue *kq; | |
61 | uint64_t *rm_state; | |
62 | unsigned int *ib_packet_buff; | |
63 | int status; | |
64 | ||
65 | BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes); | |
66 | ||
67 | kq = dbgdev->kq; | |
68 | ||
69 | pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) + | |
70 | sizeof(struct pm4__indirect_buffer_pasid); | |
71 | ||
72 | /* | |
73 | * We acquire a buffer from DIQ | |
74 | * The receive packet buff will be sitting on the Indirect Buffer | |
75 | * and in the PQ we put the IB packet + sync packet(s). | |
76 | */ | |
77 | status = kq->ops.acquire_packet_buffer(kq, | |
78 | pq_packets_size_in_bytes / sizeof(uint32_t), | |
79 | &ib_packet_buff); | |
80 | if (status != 0) { | |
81 | pr_err("amdkfd: acquire_packet_buffer failed\n"); | |
82 | return status; | |
83 | } | |
84 | ||
85 | memset(ib_packet_buff, 0, pq_packets_size_in_bytes); | |
86 | ||
87 | ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); | |
88 | ||
89 | ib_packet->header.count = 3; | |
90 | ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID; | |
91 | ib_packet->header.type = PM4_TYPE_3; | |
92 | ||
93 | largep = (union ULARGE_INTEGER *) &vmid0_address; | |
94 | ||
95 | ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2; | |
96 | ib_packet->bitfields3.ib_base_hi = largep->u.high_part; | |
97 | ||
98 | ib_packet->control = (1 << 23) | (1 << 31) | | |
99 | ((size_in_bytes / sizeof(uint32_t)) & 0xfffff); | |
100 | ||
101 | ib_packet->bitfields5.pasid = pasid; | |
102 | ||
103 | /* | |
104 | * for now we use release mem for GPU-CPU synchronization | |
105 | * Consider WaitRegMem + WriteData as a better alternative | |
106 | * we get a GART allocations ( gpu/cpu mapping), | |
107 | * for the sync variable, and wait until: | |
108 | * (a) Sync with HW | |
109 | * (b) Sync var is written by CP to mem. | |
110 | */ | |
111 | rm_packet = (struct pm4__release_mem *) (ib_packet_buff + | |
112 | (sizeof(struct pm4__indirect_buffer_pasid) / | |
113 | sizeof(unsigned int))); | |
114 | ||
115 | status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), | |
116 | &mem_obj); | |
117 | ||
118 | if (status != 0) { | |
119 | pr_err("amdkfd: Failed to allocate GART memory\n"); | |
120 | kq->ops.rollback_packet(kq); | |
121 | return status; | |
122 | } | |
123 | ||
124 | rm_state = (uint64_t *) mem_obj->cpu_ptr; | |
125 | ||
126 | *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING; | |
127 | ||
128 | rm_packet->header.opcode = IT_RELEASE_MEM; | |
129 | rm_packet->header.type = PM4_TYPE_3; | |
130 | rm_packet->header.count = sizeof(struct pm4__release_mem) / | |
131 | sizeof(unsigned int) - 2; | |
132 | ||
133 | rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; | |
134 | rm_packet->bitfields2.event_index = | |
135 | event_index___release_mem__end_of_pipe; | |
136 | ||
137 | rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru; | |
138 | rm_packet->bitfields2.atc = 0; | |
139 | rm_packet->bitfields2.tc_wb_action_ena = 1; | |
140 | ||
141 | addr.quad_part = mem_obj->gpu_addr; | |
142 | ||
143 | rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2; | |
144 | rm_packet->address_hi = addr.u.high_part; | |
145 | ||
146 | rm_packet->bitfields3.data_sel = | |
147 | data_sel___release_mem__send_64_bit_data; | |
148 | ||
149 | rm_packet->bitfields3.int_sel = | |
150 | int_sel___release_mem__send_data_after_write_confirm; | |
151 | ||
152 | rm_packet->bitfields3.dst_sel = | |
153 | dst_sel___release_mem__memory_controller; | |
154 | ||
155 | rm_packet->data_lo = QUEUESTATE__ACTIVE; | |
156 | ||
157 | kq->ops.submit_packet(kq); | |
158 | ||
159 | /* Wait till CP writes sync code: */ | |
160 | status = amdkfd_fence_wait_timeout( | |
161 | (unsigned int *) rm_state, | |
162 | QUEUESTATE__ACTIVE, 1500); | |
163 | ||
164 | kfd_gtt_sa_free(dbgdev->dev, mem_obj); | |
165 | ||
166 | return status; | |
167 | } | |
168 | ||
fbeb661b YS |
169 | static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) |
170 | { | |
171 | BUG_ON(!dbgdev); | |
172 | ||
173 | /* | |
174 | * no action is needed in this case, | |
175 | * just make sure diq will not be used | |
176 | */ | |
177 | ||
178 | dbgdev->kq = NULL; | |
179 | ||
180 | return 0; | |
181 | } | |
182 | ||
183 | static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) | |
184 | { | |
185 | struct queue_properties properties; | |
186 | unsigned int qid; | |
187 | struct kernel_queue *kq = NULL; | |
188 | int status; | |
189 | ||
190 | BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev); | |
191 | ||
192 | status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, | |
193 | &properties, 0, KFD_QUEUE_TYPE_DIQ, | |
194 | &qid); | |
195 | ||
196 | if (status) { | |
197 | pr_err("amdkfd: Failed to create DIQ\n"); | |
198 | return status; | |
199 | } | |
200 | ||
201 | pr_debug("DIQ Created with queue id: %d\n", qid); | |
202 | ||
203 | kq = pqm_get_kernel_queue(dbgdev->pqm, qid); | |
204 | ||
205 | if (kq == NULL) { | |
206 | pr_err("amdkfd: Error getting DIQ\n"); | |
207 | pqm_destroy_queue(dbgdev->pqm, qid); | |
208 | return -EFAULT; | |
209 | } | |
210 | ||
211 | dbgdev->kq = kq; | |
212 | ||
213 | return status; | |
214 | } | |
215 | ||
216 | static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev) | |
217 | { | |
218 | BUG_ON(!dbgdev || !dbgdev->dev); | |
219 | ||
220 | /* disable watch address */ | |
221 | dbgdev_address_watch_disable_nodiq(dbgdev->dev); | |
222 | return 0; | |
223 | } | |
224 | ||
225 | static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) | |
226 | { | |
227 | /* todo - disable address watch */ | |
228 | int status; | |
229 | ||
230 | BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq); | |
231 | ||
232 | status = pqm_destroy_queue(dbgdev->pqm, | |
233 | dbgdev->kq->queue->properties.queue_id); | |
234 | dbgdev->kq = NULL; | |
235 | ||
236 | return status; | |
237 | } | |
238 | ||
e2e9afc4 YS |
239 | static void dbgdev_address_watch_set_registers( |
240 | const struct dbg_address_watch_info *adw_info, | |
241 | union TCP_WATCH_ADDR_H_BITS *addrHi, | |
242 | union TCP_WATCH_ADDR_L_BITS *addrLo, | |
243 | union TCP_WATCH_CNTL_BITS *cntl, | |
244 | unsigned int index, unsigned int vmid) | |
245 | { | |
246 | union ULARGE_INTEGER addr; | |
247 | ||
248 | BUG_ON(!adw_info || !addrHi || !addrLo || !cntl); | |
249 | ||
250 | addr.quad_part = 0; | |
251 | addrHi->u32All = 0; | |
252 | addrLo->u32All = 0; | |
253 | cntl->u32All = 0; | |
254 | ||
255 | if (adw_info->watch_mask != NULL) | |
256 | cntl->bitfields.mask = | |
257 | (uint32_t) (adw_info->watch_mask[index] & | |
258 | ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); | |
259 | else | |
260 | cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; | |
261 | ||
262 | addr.quad_part = (unsigned long long) adw_info->watch_address[index]; | |
263 | ||
264 | addrHi->bitfields.addr = addr.u.high_part & | |
265 | ADDRESS_WATCH_REG_ADDHIGH_MASK; | |
266 | addrLo->bitfields.addr = | |
267 | (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT); | |
268 | ||
269 | cntl->bitfields.mode = adw_info->watch_mode[index]; | |
270 | cntl->bitfields.vmid = (uint32_t) vmid; | |
271 | /* for now assume it is an ATC address */ | |
272 | cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT; | |
273 | ||
274 | pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask); | |
275 | pr_debug("\t\t%20s %08x\n", "set reg add high :", | |
276 | addrHi->bitfields.addr); | |
277 | pr_debug("\t\t%20s %08x\n", "set reg add low :", | |
278 | addrLo->bitfields.addr); | |
279 | } | |
280 | ||
281 | static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, | |
282 | struct dbg_address_watch_info *adw_info) | |
283 | { | |
284 | union TCP_WATCH_ADDR_H_BITS addrHi; | |
285 | union TCP_WATCH_ADDR_L_BITS addrLo; | |
286 | union TCP_WATCH_CNTL_BITS cntl; | |
287 | struct kfd_process_device *pdd; | |
288 | unsigned int i; | |
289 | ||
290 | BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); | |
291 | ||
292 | /* taking the vmid for that process on the safe way using pdd */ | |
293 | pdd = kfd_get_process_device_data(dbgdev->dev, | |
294 | adw_info->process); | |
295 | if (!pdd) { | |
296 | pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); | |
297 | return -EFAULT; | |
298 | } | |
299 | ||
300 | addrHi.u32All = 0; | |
301 | addrLo.u32All = 0; | |
302 | cntl.u32All = 0; | |
303 | ||
304 | if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || | |
305 | (adw_info->num_watch_points == 0)) { | |
306 | pr_err("amdkfd: num_watch_points is invalid\n"); | |
307 | return -EINVAL; | |
308 | } | |
309 | ||
310 | if ((adw_info->watch_mode == NULL) || | |
311 | (adw_info->watch_address == NULL)) { | |
312 | pr_err("amdkfd: adw_info fields are not valid\n"); | |
313 | return -EINVAL; | |
314 | } | |
315 | ||
316 | for (i = 0 ; i < adw_info->num_watch_points ; i++) { | |
317 | dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, | |
318 | &cntl, i, pdd->qpd.vmid); | |
319 | ||
320 | pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); | |
321 | pr_debug("\t\t%20s %08x\n", "register index :", i); | |
322 | pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid); | |
323 | pr_debug("\t\t%20s %08x\n", "Address Low is :", | |
324 | addrLo.bitfields.addr); | |
325 | pr_debug("\t\t%20s %08x\n", "Address high is :", | |
326 | addrHi.bitfields.addr); | |
327 | pr_debug("\t\t%20s %08x\n", "Address high is :", | |
328 | addrHi.bitfields.addr); | |
329 | pr_debug("\t\t%20s %08x\n", "Control Mask is :", | |
330 | cntl.bitfields.mask); | |
331 | pr_debug("\t\t%20s %08x\n", "Control Mode is :", | |
332 | cntl.bitfields.mode); | |
333 | pr_debug("\t\t%20s %08x\n", "Control Vmid is :", | |
334 | cntl.bitfields.vmid); | |
335 | pr_debug("\t\t%20s %08x\n", "Control atc is :", | |
336 | cntl.bitfields.atc); | |
337 | pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); | |
338 | ||
339 | pdd->dev->kfd2kgd->address_watch_execute( | |
340 | dbgdev->dev->kgd, | |
341 | i, | |
342 | cntl.u32All, | |
343 | addrHi.u32All, | |
344 | addrLo.u32All); | |
345 | } | |
346 | ||
347 | return 0; | |
348 | } | |
349 | ||
350 | static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, | |
351 | struct dbg_address_watch_info *adw_info) | |
352 | { | |
353 | struct pm4__set_config_reg *packets_vec; | |
354 | union TCP_WATCH_ADDR_H_BITS addrHi; | |
355 | union TCP_WATCH_ADDR_L_BITS addrLo; | |
356 | union TCP_WATCH_CNTL_BITS cntl; | |
357 | struct kfd_mem_obj *mem_obj; | |
358 | unsigned int aw_reg_add_dword; | |
359 | uint32_t *packet_buff_uint; | |
360 | unsigned int i; | |
361 | int status; | |
362 | size_t ib_size = sizeof(struct pm4__set_config_reg) * 4; | |
363 | /* we do not control the vmid in DIQ mode, just a place holder */ | |
364 | unsigned int vmid = 0; | |
365 | ||
366 | BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); | |
367 | ||
368 | addrHi.u32All = 0; | |
369 | addrLo.u32All = 0; | |
370 | cntl.u32All = 0; | |
371 | ||
372 | if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || | |
373 | (adw_info->num_watch_points == 0)) { | |
374 | pr_err("amdkfd: num_watch_points is invalid\n"); | |
375 | return -EINVAL; | |
376 | } | |
377 | ||
378 | if ((NULL == adw_info->watch_mode) || | |
379 | (NULL == adw_info->watch_address)) { | |
380 | pr_err("amdkfd: adw_info fields are not valid\n"); | |
381 | return -EINVAL; | |
382 | } | |
383 | ||
384 | status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); | |
385 | ||
386 | if (status != 0) { | |
387 | pr_err("amdkfd: Failed to allocate GART memory\n"); | |
388 | return status; | |
389 | } | |
390 | ||
391 | packet_buff_uint = mem_obj->cpu_ptr; | |
392 | ||
393 | memset(packet_buff_uint, 0, ib_size); | |
394 | ||
395 | packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); | |
396 | ||
397 | packets_vec[0].header.count = 1; | |
398 | packets_vec[0].header.opcode = IT_SET_CONFIG_REG; | |
399 | packets_vec[0].header.type = PM4_TYPE_3; | |
400 | packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; | |
401 | packets_vec[0].bitfields2.insert_vmid = 1; | |
402 | packets_vec[1].ordinal1 = packets_vec[0].ordinal1; | |
403 | packets_vec[1].bitfields2.insert_vmid = 0; | |
404 | packets_vec[2].ordinal1 = packets_vec[0].ordinal1; | |
405 | packets_vec[2].bitfields2.insert_vmid = 0; | |
406 | packets_vec[3].ordinal1 = packets_vec[0].ordinal1; | |
407 | packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; | |
408 | packets_vec[3].bitfields2.insert_vmid = 1; | |
409 | ||
410 | for (i = 0; i < adw_info->num_watch_points; i++) { | |
411 | dbgdev_address_watch_set_registers(adw_info, | |
412 | &addrHi, | |
413 | &addrLo, | |
414 | &cntl, | |
415 | i, | |
416 | vmid); | |
417 | ||
418 | pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); | |
419 | pr_debug("\t\t%20s %08x\n", "register index :", i); | |
420 | pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); | |
421 | pr_debug("\t\t%20s %p\n", "Add ptr is :", | |
422 | adw_info->watch_address); | |
423 | pr_debug("\t\t%20s %08llx\n", "Add is :", | |
424 | adw_info->watch_address[i]); | |
425 | pr_debug("\t\t%20s %08x\n", "Address Low is :", | |
426 | addrLo.bitfields.addr); | |
427 | pr_debug("\t\t%20s %08x\n", "Address high is :", | |
428 | addrHi.bitfields.addr); | |
429 | pr_debug("\t\t%20s %08x\n", "Control Mask is :", | |
430 | cntl.bitfields.mask); | |
431 | pr_debug("\t\t%20s %08x\n", "Control Mode is :", | |
432 | cntl.bitfields.mode); | |
433 | pr_debug("\t\t%20s %08x\n", "Control Vmid is :", | |
434 | cntl.bitfields.vmid); | |
435 | pr_debug("\t\t%20s %08x\n", "Control atc is :", | |
436 | cntl.bitfields.atc); | |
437 | pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); | |
438 | ||
439 | aw_reg_add_dword = | |
440 | dbgdev->dev->kfd2kgd->address_watch_get_offset( | |
441 | dbgdev->dev->kgd, | |
442 | i, | |
443 | ADDRESS_WATCH_REG_CNTL); | |
444 | ||
445 | aw_reg_add_dword /= sizeof(uint32_t); | |
446 | ||
447 | packets_vec[0].bitfields2.reg_offset = | |
f4e04022 | 448 | aw_reg_add_dword - AMD_CONFIG_REG_BASE; |
e2e9afc4 YS |
449 | |
450 | packets_vec[0].reg_data[0] = cntl.u32All; | |
451 | ||
452 | aw_reg_add_dword = | |
453 | dbgdev->dev->kfd2kgd->address_watch_get_offset( | |
454 | dbgdev->dev->kgd, | |
455 | i, | |
456 | ADDRESS_WATCH_REG_ADDR_HI); | |
457 | ||
458 | aw_reg_add_dword /= sizeof(uint32_t); | |
459 | ||
460 | packets_vec[1].bitfields2.reg_offset = | |
f4e04022 | 461 | aw_reg_add_dword - AMD_CONFIG_REG_BASE; |
e2e9afc4 YS |
462 | packets_vec[1].reg_data[0] = addrHi.u32All; |
463 | ||
464 | aw_reg_add_dword = | |
465 | dbgdev->dev->kfd2kgd->address_watch_get_offset( | |
466 | dbgdev->dev->kgd, | |
467 | i, | |
468 | ADDRESS_WATCH_REG_ADDR_LO); | |
469 | ||
470 | aw_reg_add_dword /= sizeof(uint32_t); | |
471 | ||
472 | packets_vec[2].bitfields2.reg_offset = | |
f4e04022 | 473 | aw_reg_add_dword - AMD_CONFIG_REG_BASE; |
e2e9afc4 YS |
474 | packets_vec[2].reg_data[0] = addrLo.u32All; |
475 | ||
476 | /* enable watch flag if address is not zero*/ | |
477 | if (adw_info->watch_address[i] > 0) | |
478 | cntl.bitfields.valid = 1; | |
479 | else | |
480 | cntl.bitfields.valid = 0; | |
481 | ||
482 | aw_reg_add_dword = | |
483 | dbgdev->dev->kfd2kgd->address_watch_get_offset( | |
484 | dbgdev->dev->kgd, | |
485 | i, | |
486 | ADDRESS_WATCH_REG_CNTL); | |
487 | ||
488 | aw_reg_add_dword /= sizeof(uint32_t); | |
489 | ||
490 | packets_vec[3].bitfields2.reg_offset = | |
f4e04022 | 491 | aw_reg_add_dword - AMD_CONFIG_REG_BASE; |
e2e9afc4 YS |
492 | packets_vec[3].reg_data[0] = cntl.u32All; |
493 | ||
494 | status = dbgdev_diq_submit_ib( | |
495 | dbgdev, | |
496 | adw_info->process->pasid, | |
497 | mem_obj->gpu_addr, | |
498 | packet_buff_uint, | |
499 | ib_size); | |
500 | ||
501 | if (status != 0) { | |
502 | pr_err("amdkfd: Failed to submit IB to DIQ\n"); | |
503 | break; | |
504 | } | |
505 | } | |
506 | ||
507 | kfd_gtt_sa_free(dbgdev->dev, mem_obj); | |
508 | return status; | |
509 | } | |
510 | ||
788bf83d YS |
511 | static int dbgdev_wave_control_set_registers( |
512 | struct dbg_wave_control_info *wac_info, | |
513 | union SQ_CMD_BITS *in_reg_sq_cmd, | |
514 | union GRBM_GFX_INDEX_BITS *in_reg_gfx_index) | |
515 | { | |
516 | int status; | |
517 | union SQ_CMD_BITS reg_sq_cmd; | |
518 | union GRBM_GFX_INDEX_BITS reg_gfx_index; | |
519 | struct HsaDbgWaveMsgAMDGen2 *pMsg; | |
520 | ||
521 | BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index); | |
522 | ||
523 | reg_sq_cmd.u32All = 0; | |
524 | reg_gfx_index.u32All = 0; | |
525 | pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; | |
526 | ||
527 | switch (wac_info->mode) { | |
528 | /* Send command to single wave */ | |
529 | case HSA_DBG_WAVEMODE_SINGLE: | |
530 | /* | |
531 | * Limit access to the process waves only, | |
532 | * by setting vmid check | |
533 | */ | |
534 | reg_sq_cmd.bits.check_vmid = 1; | |
535 | reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD; | |
536 | reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId; | |
537 | reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE; | |
538 | ||
539 | reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; | |
540 | reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; | |
541 | reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; | |
542 | ||
543 | break; | |
544 | ||
545 | /* Send command to all waves with matching VMID */ | |
546 | case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: | |
547 | ||
548 | reg_gfx_index.bits.sh_broadcast_writes = 1; | |
549 | reg_gfx_index.bits.se_broadcast_writes = 1; | |
550 | reg_gfx_index.bits.instance_broadcast_writes = 1; | |
551 | ||
552 | reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; | |
553 | ||
554 | break; | |
555 | ||
556 | /* Send command to all CU waves with matching VMID */ | |
557 | case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: | |
558 | ||
559 | reg_sq_cmd.bits.check_vmid = 1; | |
560 | reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; | |
561 | ||
562 | reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; | |
563 | reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; | |
564 | reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; | |
565 | ||
566 | break; | |
567 | ||
568 | default: | |
569 | return -EINVAL; | |
570 | } | |
571 | ||
572 | switch (wac_info->operand) { | |
573 | case HSA_DBG_WAVEOP_HALT: | |
574 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT; | |
575 | break; | |
576 | ||
577 | case HSA_DBG_WAVEOP_RESUME: | |
578 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME; | |
579 | break; | |
580 | ||
581 | case HSA_DBG_WAVEOP_KILL: | |
582 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; | |
583 | break; | |
584 | ||
585 | case HSA_DBG_WAVEOP_DEBUG: | |
586 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG; | |
587 | break; | |
588 | ||
589 | case HSA_DBG_WAVEOP_TRAP: | |
590 | if (wac_info->trapId < MAX_TRAPID) { | |
591 | reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP; | |
592 | reg_sq_cmd.bits.trap_id = wac_info->trapId; | |
593 | } else { | |
594 | status = -EINVAL; | |
595 | } | |
596 | break; | |
597 | ||
598 | default: | |
599 | status = -EINVAL; | |
600 | break; | |
601 | } | |
602 | ||
603 | if (status == 0) { | |
604 | *in_reg_sq_cmd = reg_sq_cmd; | |
605 | *in_reg_gfx_index = reg_gfx_index; | |
606 | } | |
607 | ||
608 | return status; | |
609 | } | |
610 | ||
611 | static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, | |
612 | struct dbg_wave_control_info *wac_info) | |
613 | { | |
614 | ||
615 | int status; | |
616 | union SQ_CMD_BITS reg_sq_cmd; | |
617 | union GRBM_GFX_INDEX_BITS reg_gfx_index; | |
618 | struct kfd_mem_obj *mem_obj; | |
619 | uint32_t *packet_buff_uint; | |
620 | struct pm4__set_config_reg *packets_vec; | |
621 | size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; | |
622 | ||
623 | BUG_ON(!dbgdev || !wac_info); | |
624 | ||
625 | reg_sq_cmd.u32All = 0; | |
626 | ||
627 | status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, | |
628 | ®_gfx_index); | |
629 | if (status) { | |
630 | pr_err("amdkfd: Failed to set wave control registers\n"); | |
631 | return status; | |
632 | } | |
633 | ||
634 | /* we do not control the VMID in DIQ,so reset it to a known value */ | |
635 | reg_sq_cmd.bits.vm_id = 0; | |
636 | ||
637 | pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); | |
638 | ||
639 | pr_debug("\t\t mode is: %u\n", wac_info->mode); | |
640 | pr_debug("\t\t operand is: %u\n", wac_info->operand); | |
641 | pr_debug("\t\t trap id is: %u\n", wac_info->trapId); | |
642 | pr_debug("\t\t msg value is: %u\n", | |
643 | wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); | |
644 | pr_debug("\t\t vmid is: N/A\n"); | |
645 | ||
646 | pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); | |
647 | pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); | |
648 | pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); | |
649 | pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); | |
650 | pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); | |
651 | pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); | |
652 | pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); | |
653 | ||
654 | pr_debug("\t\t ibw is : %u\n", | |
655 | reg_gfx_index.bitfields.instance_broadcast_writes); | |
656 | pr_debug("\t\t ii is : %u\n", | |
657 | reg_gfx_index.bitfields.instance_index); | |
658 | pr_debug("\t\t sebw is : %u\n", | |
659 | reg_gfx_index.bitfields.se_broadcast_writes); | |
660 | pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); | |
661 | pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); | |
662 | pr_debug("\t\t sbw is : %u\n", | |
663 | reg_gfx_index.bitfields.sh_broadcast_writes); | |
664 | ||
665 | pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); | |
666 | ||
667 | status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); | |
668 | ||
669 | if (status != 0) { | |
670 | pr_err("amdkfd: Failed to allocate GART memory\n"); | |
671 | return status; | |
672 | } | |
673 | ||
674 | packet_buff_uint = mem_obj->cpu_ptr; | |
675 | ||
676 | memset(packet_buff_uint, 0, ib_size); | |
677 | ||
678 | packets_vec = (struct pm4__set_config_reg *) packet_buff_uint; | |
679 | packets_vec[0].header.count = 1; | |
680 | packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; | |
681 | packets_vec[0].header.type = PM4_TYPE_3; | |
682 | packets_vec[0].bitfields2.reg_offset = | |
683 | GRBM_GFX_INDEX / (sizeof(uint32_t)) - | |
684 | USERCONFIG_REG_BASE; | |
685 | ||
686 | packets_vec[0].bitfields2.insert_vmid = 0; | |
687 | packets_vec[0].reg_data[0] = reg_gfx_index.u32All; | |
688 | ||
689 | packets_vec[1].header.count = 1; | |
690 | packets_vec[1].header.opcode = IT_SET_CONFIG_REG; | |
691 | packets_vec[1].header.type = PM4_TYPE_3; | |
692 | packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) - | |
f4e04022 | 693 | AMD_CONFIG_REG_BASE; |
788bf83d YS |
694 | |
695 | packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; | |
696 | packets_vec[1].bitfields2.insert_vmid = 1; | |
697 | packets_vec[1].reg_data[0] = reg_sq_cmd.u32All; | |
698 | ||
699 | /* Restore the GRBM_GFX_INDEX register */ | |
700 | ||
701 | reg_gfx_index.u32All = 0; | |
702 | reg_gfx_index.bits.sh_broadcast_writes = 1; | |
703 | reg_gfx_index.bits.instance_broadcast_writes = 1; | |
704 | reg_gfx_index.bits.se_broadcast_writes = 1; | |
705 | ||
706 | ||
707 | packets_vec[2].ordinal1 = packets_vec[0].ordinal1; | |
708 | packets_vec[2].bitfields2.reg_offset = | |
709 | GRBM_GFX_INDEX / (sizeof(uint32_t)) - | |
710 | USERCONFIG_REG_BASE; | |
711 | ||
712 | packets_vec[2].bitfields2.insert_vmid = 0; | |
713 | packets_vec[2].reg_data[0] = reg_gfx_index.u32All; | |
714 | ||
715 | status = dbgdev_diq_submit_ib( | |
716 | dbgdev, | |
717 | wac_info->process->pasid, | |
718 | mem_obj->gpu_addr, | |
719 | packet_buff_uint, | |
720 | ib_size); | |
721 | ||
722 | if (status != 0) | |
723 | pr_err("amdkfd: Failed to submit IB to DIQ\n"); | |
724 | ||
725 | kfd_gtt_sa_free(dbgdev->dev, mem_obj); | |
726 | ||
727 | return status; | |
728 | } | |
729 | ||
730 | static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, | |
731 | struct dbg_wave_control_info *wac_info) | |
732 | { | |
733 | int status; | |
734 | union SQ_CMD_BITS reg_sq_cmd; | |
735 | union GRBM_GFX_INDEX_BITS reg_gfx_index; | |
736 | struct kfd_process_device *pdd; | |
737 | ||
738 | BUG_ON(!dbgdev || !dbgdev->dev || !wac_info); | |
739 | ||
740 | reg_sq_cmd.u32All = 0; | |
741 | ||
742 | /* taking the VMID for that process on the safe way using PDD */ | |
743 | pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process); | |
744 | ||
745 | if (!pdd) { | |
746 | pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); | |
747 | return -EFAULT; | |
748 | } | |
749 | status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, | |
750 | ®_gfx_index); | |
751 | if (status) { | |
752 | pr_err("amdkfd: Failed to set wave control registers\n"); | |
753 | return status; | |
754 | } | |
755 | ||
756 | /* for non DIQ we need to patch the VMID: */ | |
757 | ||
758 | reg_sq_cmd.bits.vm_id = pdd->qpd.vmid; | |
759 | ||
760 | pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); | |
761 | ||
762 | pr_debug("\t\t mode is: %u\n", wac_info->mode); | |
763 | pr_debug("\t\t operand is: %u\n", wac_info->operand); | |
764 | pr_debug("\t\t trap id is: %u\n", wac_info->trapId); | |
765 | pr_debug("\t\t msg value is: %u\n", | |
766 | wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); | |
767 | pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid); | |
768 | ||
769 | pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); | |
770 | pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); | |
771 | pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); | |
772 | pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); | |
773 | pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); | |
774 | pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); | |
775 | pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); | |
776 | ||
777 | pr_debug("\t\t ibw is : %u\n", | |
778 | reg_gfx_index.bitfields.instance_broadcast_writes); | |
779 | pr_debug("\t\t ii is : %u\n", | |
780 | reg_gfx_index.bitfields.instance_index); | |
781 | pr_debug("\t\t sebw is : %u\n", | |
782 | reg_gfx_index.bitfields.se_broadcast_writes); | |
783 | pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); | |
784 | pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); | |
785 | pr_debug("\t\t sbw is : %u\n", | |
786 | reg_gfx_index.bitfields.sh_broadcast_writes); | |
787 | ||
788 | pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); | |
789 | ||
790 | return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd, | |
791 | reg_gfx_index.u32All, | |
792 | reg_sq_cmd.u32All); | |
793 | } | |
794 | ||
c3447e81 BG |
795 | int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) |
796 | { | |
797 | int status = 0; | |
798 | unsigned int vmid; | |
799 | union SQ_CMD_BITS reg_sq_cmd; | |
800 | union GRBM_GFX_INDEX_BITS reg_gfx_index; | |
801 | struct kfd_process_device *pdd; | |
802 | struct dbg_wave_control_info wac_info; | |
803 | int temp; | |
804 | int first_vmid_to_scan = 8; | |
805 | int last_vmid_to_scan = 15; | |
806 | ||
807 | first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1; | |
808 | temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan; | |
809 | last_vmid_to_scan = first_vmid_to_scan + ffz(temp); | |
810 | ||
811 | reg_sq_cmd.u32All = 0; | |
812 | status = 0; | |
813 | ||
814 | wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS; | |
815 | wac_info.operand = HSA_DBG_WAVEOP_KILL; | |
816 | ||
817 | pr_debug("Killing all process wavefronts\n"); | |
818 | ||
819 | /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. | |
820 | * ATC_VMID15_PASID_MAPPING | |
821 | * to check which VMID the current process is mapped to. */ | |
822 | ||
823 | for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { | |
824 | if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid | |
825 | (dev->kgd, vmid)) { | |
826 | if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid | |
827 | (dev->kgd, vmid) == p->pasid) { | |
828 | pr_debug("Killing wave fronts of vmid %d and pasid %d\n", | |
829 | vmid, p->pasid); | |
830 | break; | |
831 | } | |
832 | } | |
833 | } | |
834 | ||
835 | if (vmid > last_vmid_to_scan) { | |
836 | pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid); | |
837 | return -EFAULT; | |
838 | } | |
839 | ||
840 | /* taking the VMID for that process on the safe way using PDD */ | |
841 | pdd = kfd_get_process_device_data(dev, p); | |
842 | if (!pdd) | |
843 | return -EFAULT; | |
844 | ||
845 | status = dbgdev_wave_control_set_registers(&wac_info, ®_sq_cmd, | |
846 | ®_gfx_index); | |
847 | if (status != 0) | |
848 | return -EINVAL; | |
849 | ||
850 | /* for non DIQ we need to patch the VMID: */ | |
851 | reg_sq_cmd.bits.vm_id = vmid; | |
852 | ||
853 | dev->kfd2kgd->wave_control_execute(dev->kgd, | |
854 | reg_gfx_index.u32All, | |
855 | reg_sq_cmd.u32All); | |
856 | ||
857 | return 0; | |
858 | } | |
859 | ||
fbeb661b YS |
860 | void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, |
861 | enum DBGDEV_TYPE type) | |
862 | { | |
863 | BUG_ON(!pdbgdev || !pdev); | |
864 | ||
865 | pdbgdev->dev = pdev; | |
866 | pdbgdev->kq = NULL; | |
867 | pdbgdev->type = type; | |
868 | pdbgdev->pqm = NULL; | |
869 | ||
870 | switch (type) { | |
871 | case DBGDEV_TYPE_NODIQ: | |
872 | pdbgdev->dbgdev_register = dbgdev_register_nodiq; | |
873 | pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq; | |
788bf83d | 874 | pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq; |
e2e9afc4 | 875 | pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq; |
fbeb661b YS |
876 | break; |
877 | case DBGDEV_TYPE_DIQ: | |
878 | default: | |
879 | pdbgdev->dbgdev_register = dbgdev_register_diq; | |
880 | pdbgdev->dbgdev_unregister = dbgdev_unregister_diq; | |
788bf83d | 881 | pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq; |
e2e9afc4 | 882 | pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq; |
fbeb661b YS |
883 | break; |
884 | } | |
885 | ||
886 | } |