Commit | Line | Data |
---|---|---|
2249d558 AL |
1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
22 | ||
23 | /* | |
24 | * KFD Interrupts. | |
25 | * | |
26 | * AMD GPUs deliver interrupts by pushing an interrupt description onto the | |
27 | * interrupt ring and then sending an interrupt. KGD receives the interrupt | |
28 | * in ISR and sends us a pointer to each new entry on the interrupt ring. | |
29 | * | |
30 | * We generally can't process interrupt-signaled events from ISR, so we call | |
31 | * out to each interrupt client module (currently only the scheduler) to ask if | |
32 | * each interrupt is interesting. If they return true, then it requires further | |
33 | * processing so we copy it to an internal interrupt ring and call each | |
34 | * interrupt client again from a work-queue. | |
35 | * | |
36 | * There's no acknowledgment for the interrupts we use. The hardware simply | |
37 | * queues a new interrupt each time without waiting. | |
38 | * | |
39 | * The fixed-size internal queue means that it's possible for us to lose | |
40 | * interrupts because we have no back-pressure to the hardware. | |
41 | */ | |
42 | ||
43 | #include <linux/slab.h> | |
44 | #include <linux/device.h> | |
45 | #include "kfd_priv.h" | |
46 | ||
47 | #define KFD_INTERRUPT_RING_SIZE 1024 | |
48 | ||
49 | static void interrupt_wq(struct work_struct *); | |
50 | ||
51 | int kfd_interrupt_init(struct kfd_dev *kfd) | |
52 | { | |
53 | void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE, | |
54 | kfd->device_info->ih_ring_entry_size, | |
55 | GFP_KERNEL); | |
56 | if (!interrupt_ring) | |
57 | return -ENOMEM; | |
58 | ||
59 | kfd->interrupt_ring = interrupt_ring; | |
60 | kfd->interrupt_ring_size = | |
61 | KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size; | |
62 | atomic_set(&kfd->interrupt_ring_wptr, 0); | |
63 | atomic_set(&kfd->interrupt_ring_rptr, 0); | |
64 | ||
65 | spin_lock_init(&kfd->interrupt_lock); | |
66 | ||
67 | INIT_WORK(&kfd->interrupt_work, interrupt_wq); | |
68 | ||
69 | kfd->interrupts_active = true; | |
70 | ||
71 | /* | |
72 | * After this function returns, the interrupt will be enabled. This | |
73 | * barrier ensures that the interrupt running on a different processor | |
74 | * sees all the above writes. | |
75 | */ | |
76 | smp_wmb(); | |
77 | ||
78 | return 0; | |
79 | } | |
80 | ||
81 | void kfd_interrupt_exit(struct kfd_dev *kfd) | |
82 | { | |
83 | /* | |
84 | * Stop the interrupt handler from writing to the ring and scheduling | |
85 | * workqueue items. The spinlock ensures that any interrupt running | |
86 | * after we have unlocked sees interrupts_active = false. | |
87 | */ | |
88 | unsigned long flags; | |
89 | ||
90 | spin_lock_irqsave(&kfd->interrupt_lock, flags); | |
91 | kfd->interrupts_active = false; | |
92 | spin_unlock_irqrestore(&kfd->interrupt_lock, flags); | |
93 | ||
94 | /* | |
95 | * Flush_scheduled_work ensures that there are no outstanding | |
96 | * work-queue items that will access interrupt_ring. New work items | |
97 | * can't be created because we stopped interrupt handling above. | |
98 | */ | |
99 | flush_scheduled_work(); | |
100 | ||
101 | kfree(kfd->interrupt_ring); | |
102 | } | |
103 | ||
104 | /* | |
105 | * This assumes that it can't be called concurrently with itself | |
106 | * but only with dequeue_ih_ring_entry. | |
107 | */ | |
108 | bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry) | |
109 | { | |
110 | unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); | |
111 | unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); | |
112 | ||
113 | if ((rptr - wptr) % kfd->interrupt_ring_size == | |
114 | kfd->device_info->ih_ring_entry_size) { | |
115 | /* This is very bad, the system is likely to hang. */ | |
116 | dev_err_ratelimited(kfd_chardev(), | |
117 | "Interrupt ring overflow, dropping interrupt.\n"); | |
118 | return false; | |
119 | } | |
120 | ||
121 | memcpy(kfd->interrupt_ring + wptr, ih_ring_entry, | |
122 | kfd->device_info->ih_ring_entry_size); | |
123 | ||
124 | wptr = (wptr + kfd->device_info->ih_ring_entry_size) % | |
125 | kfd->interrupt_ring_size; | |
126 | smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */ | |
127 | atomic_set(&kfd->interrupt_ring_wptr, wptr); | |
128 | ||
129 | return true; | |
130 | } | |
131 | ||
132 | /* | |
133 | * This assumes that it can't be called concurrently with itself | |
134 | * but only with enqueue_ih_ring_entry. | |
135 | */ | |
136 | static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry) | |
137 | { | |
138 | /* | |
139 | * Assume that wait queues have an implicit barrier, i.e. anything that | |
140 | * happened in the ISR before it queued work is visible. | |
141 | */ | |
142 | ||
143 | unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); | |
144 | unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); | |
145 | ||
146 | if (rptr == wptr) | |
147 | return false; | |
148 | ||
149 | memcpy(ih_ring_entry, kfd->interrupt_ring + rptr, | |
150 | kfd->device_info->ih_ring_entry_size); | |
151 | ||
152 | rptr = (rptr + kfd->device_info->ih_ring_entry_size) % | |
153 | kfd->interrupt_ring_size; | |
154 | ||
155 | /* | |
156 | * Ensure the rptr write update is not visible until | |
157 | * memcpy has finished reading. | |
158 | */ | |
159 | smp_mb(); | |
160 | atomic_set(&kfd->interrupt_ring_rptr, rptr); | |
161 | ||
162 | return true; | |
163 | } | |
164 | ||
165 | static void interrupt_wq(struct work_struct *work) | |
166 | { | |
167 | struct kfd_dev *dev = container_of(work, struct kfd_dev, | |
168 | interrupt_work); | |
169 | ||
170 | uint32_t ih_ring_entry[DIV_ROUND_UP( | |
171 | dev->device_info->ih_ring_entry_size, | |
172 | sizeof(uint32_t))]; | |
173 | ||
174 | while (dequeue_ih_ring_entry(dev, ih_ring_entry)) | |
f3a39818 AL |
175 | dev->device_info->event_interrupt_class->interrupt_wq(dev, |
176 | ih_ring_entry); | |
2249d558 AL |
177 | } |
178 | ||
179 | bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry) | |
180 | { | |
f3a39818 AL |
181 | /* integer and bitwise OR so there is no boolean short-circuiting */ |
182 | unsigned wanted = 0; | |
183 | ||
184 | wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev, | |
185 | ih_ring_entry); | |
186 | ||
187 | return wanted != 0; | |
2249d558 | 188 | } |