Commit | Line | Data |
---|---|---|
19f6d2a6 OG |
1 | /* |
2 | * Copyright 2014 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | */ | |
22 | #include "kfd_priv.h" | |
23 | #include <linux/mm.h> | |
24 | #include <linux/mman.h> | |
25 | #include <linux/slab.h> | |
824cb7d1 | 26 | #include <linux/io.h> |
19f6d2a6 OG |
27 | |
28 | /* | |
29 | * This extension supports a kernel level doorbells management for | |
30 | * the kernel queues. | |
31 | * Basically the last doorbells page is devoted to kernel queues | |
32 | * and that's assures that any user process won't get access to the | |
33 | * kernel doorbells page | |
34 | */ | |
19f6d2a6 OG |
35 | |
36 | #define KERNEL_DOORBELL_PASID 1 | |
37 | #define KFD_SIZE_OF_DOORBELL_IN_BYTES 4 | |
38 | ||
39 | /* | |
40 | * Each device exposes a doorbell aperture, a PCI MMIO aperture that | |
41 | * receives 32-bit writes that are passed to queues as wptr values. | |
42 | * The doorbells are intended to be written by applications as part | |
43 | * of queueing work on user-mode queues. | |
44 | * We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks. | |
45 | * We map the doorbell address space into user-mode when a process creates | |
46 | * its first queue on each device. | |
47 | * Although the mapping is done by KFD, it is equivalent to an mmap of | |
48 | * the /dev/kfd with the particular device encoded in the mmap offset. | |
49 | * There will be other uses for mmap of /dev/kfd, so only a range of | |
50 | * offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells. | |
51 | */ | |
52 | ||
53 | /* # of doorbell bytes allocated for each process. */ | |
54 | static inline size_t doorbell_process_allocation(void) | |
55 | { | |
56 | return roundup(KFD_SIZE_OF_DOORBELL_IN_BYTES * | |
57 | KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, | |
58 | PAGE_SIZE); | |
59 | } | |
60 | ||
61 | /* Doorbell calculations for device init. */ | |
62 | void kfd_doorbell_init(struct kfd_dev *kfd) | |
63 | { | |
64 | size_t doorbell_start_offset; | |
65 | size_t doorbell_aperture_size; | |
66 | size_t doorbell_process_limit; | |
67 | ||
68 | /* | |
69 | * We start with calculations in bytes because the input data might | |
70 | * only be byte-aligned. | |
71 | * Only after we have done the rounding can we assume any alignment. | |
72 | */ | |
73 | ||
74 | doorbell_start_offset = | |
75 | roundup(kfd->shared_resources.doorbell_start_offset, | |
76 | doorbell_process_allocation()); | |
77 | ||
78 | doorbell_aperture_size = | |
79 | rounddown(kfd->shared_resources.doorbell_aperture_size, | |
80 | doorbell_process_allocation()); | |
81 | ||
82 | if (doorbell_aperture_size > doorbell_start_offset) | |
83 | doorbell_process_limit = | |
84 | (doorbell_aperture_size - doorbell_start_offset) / | |
85 | doorbell_process_allocation(); | |
86 | else | |
87 | doorbell_process_limit = 0; | |
88 | ||
89 | kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address + | |
90 | doorbell_start_offset; | |
91 | ||
92 | kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32); | |
93 | kfd->doorbell_process_limit = doorbell_process_limit - 1; | |
94 | ||
95 | kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, | |
96 | doorbell_process_allocation()); | |
97 | ||
98 | BUG_ON(!kfd->doorbell_kernel_ptr); | |
99 | ||
100 | pr_debug("kfd: doorbell initialization:\n"); | |
101 | pr_debug("kfd: doorbell base == 0x%08lX\n", | |
102 | (uintptr_t)kfd->doorbell_base); | |
103 | ||
104 | pr_debug("kfd: doorbell_id_offset == 0x%08lX\n", | |
105 | kfd->doorbell_id_offset); | |
106 | ||
107 | pr_debug("kfd: doorbell_process_limit == 0x%08lX\n", | |
108 | doorbell_process_limit); | |
109 | ||
110 | pr_debug("kfd: doorbell_kernel_offset == 0x%08lX\n", | |
111 | (uintptr_t)kfd->doorbell_base); | |
112 | ||
113 | pr_debug("kfd: doorbell aperture size == 0x%08lX\n", | |
114 | kfd->shared_resources.doorbell_aperture_size); | |
115 | ||
116 | pr_debug("kfd: doorbell kernel address == 0x%08lX\n", | |
117 | (uintptr_t)kfd->doorbell_kernel_ptr); | |
118 | } | |
119 | ||
120 | int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) | |
121 | { | |
122 | phys_addr_t address; | |
123 | struct kfd_dev *dev; | |
124 | ||
125 | /* | |
126 | * For simplicitly we only allow mapping of the entire doorbell | |
127 | * allocation of a single device & process. | |
128 | */ | |
129 | if (vma->vm_end - vma->vm_start != doorbell_process_allocation()) | |
130 | return -EINVAL; | |
131 | ||
132 | /* Find kfd device according to gpu id */ | |
133 | dev = kfd_device_by_id(vma->vm_pgoff); | |
134 | if (dev == NULL) | |
135 | return -EINVAL; | |
136 | ||
19f6d2a6 OG |
137 | /* Calculate physical address of doorbell */ |
138 | address = kfd_get_process_doorbells(dev, process); | |
139 | ||
140 | vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | | |
141 | VM_DONTDUMP | VM_PFNMAP; | |
142 | ||
143 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | |
144 | ||
8856d8e0 OG |
145 | pr_debug("mapping doorbell page:\n"); |
146 | pr_debug(" target user address == 0x%08llX\n", | |
147 | (unsigned long long) vma->vm_start); | |
148 | pr_debug(" physical address == 0x%08llX\n", address); | |
149 | pr_debug(" vm_flags == 0x%04lX\n", vma->vm_flags); | |
150 | pr_debug(" size == 0x%04lX\n", | |
151 | doorbell_process_allocation()); | |
19f6d2a6 OG |
152 | |
153 | return io_remap_pfn_range(vma, | |
154 | vma->vm_start, | |
155 | address >> PAGE_SHIFT, | |
156 | doorbell_process_allocation(), | |
157 | vma->vm_page_prot); | |
158 | } | |
159 | ||
160 | ||
161 | /* get kernel iomem pointer for a doorbell */ | |
162 | u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, | |
163 | unsigned int *doorbell_off) | |
164 | { | |
165 | u32 inx; | |
166 | ||
167 | BUG_ON(!kfd || !doorbell_off); | |
168 | ||
cea405b1 XZ |
169 | mutex_lock(&kfd->doorbell_mutex); |
170 | inx = find_first_zero_bit(kfd->doorbell_available_index, | |
19f6d2a6 OG |
171 | KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); |
172 | ||
cea405b1 XZ |
173 | __set_bit(inx, kfd->doorbell_available_index); |
174 | mutex_unlock(&kfd->doorbell_mutex); | |
19f6d2a6 OG |
175 | |
176 | if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) | |
177 | return NULL; | |
178 | ||
179 | /* | |
180 | * Calculating the kernel doorbell offset using "faked" kernel | |
181 | * pasid that allocated for kernel queues only | |
182 | */ | |
183 | *doorbell_off = KERNEL_DOORBELL_PASID * (doorbell_process_allocation() / | |
184 | sizeof(u32)) + inx; | |
185 | ||
186 | pr_debug("kfd: get kernel queue doorbell\n" | |
187 | " doorbell offset == 0x%08d\n" | |
188 | " kernel address == 0x%08lX\n", | |
189 | *doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx)); | |
190 | ||
191 | return kfd->doorbell_kernel_ptr + inx; | |
192 | } | |
193 | ||
194 | void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) | |
195 | { | |
196 | unsigned int inx; | |
197 | ||
198 | BUG_ON(!kfd || !db_addr); | |
199 | ||
200 | inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr); | |
201 | ||
cea405b1 XZ |
202 | mutex_lock(&kfd->doorbell_mutex); |
203 | __clear_bit(inx, kfd->doorbell_available_index); | |
204 | mutex_unlock(&kfd->doorbell_mutex); | |
19f6d2a6 OG |
205 | } |
206 | ||
207 | inline void write_kernel_doorbell(u32 __iomem *db, u32 value) | |
208 | { | |
209 | if (db) { | |
210 | writel(value, db); | |
211 | pr_debug("writing %d to doorbell address 0x%p\n", value, db); | |
212 | } | |
213 | } | |
214 | ||
215 | /* | |
216 | * queue_ids are in the range [0,MAX_PROCESS_QUEUES) and are mapped 1:1 | |
217 | * to doorbells with the process's doorbell page | |
218 | */ | |
219 | unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, | |
220 | struct kfd_process *process, | |
221 | unsigned int queue_id) | |
222 | { | |
223 | /* | |
224 | * doorbell_id_offset accounts for doorbells taken by KGD. | |
225 | * pasid * doorbell_process_allocation/sizeof(u32) adjusts | |
226 | * to the process's doorbells | |
227 | */ | |
228 | return kfd->doorbell_id_offset + | |
229 | process->pasid * (doorbell_process_allocation()/sizeof(u32)) + | |
230 | queue_id; | |
231 | } | |
232 | ||
233 | uint64_t kfd_get_number_elems(struct kfd_dev *kfd) | |
234 | { | |
235 | uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size - | |
236 | kfd->shared_resources.doorbell_start_offset) / | |
237 | doorbell_process_allocation() + 1; | |
238 | ||
239 | return num_of_elems; | |
240 | ||
241 | } | |
242 | ||
243 | phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, | |
244 | struct kfd_process *process) | |
245 | { | |
246 | return dev->doorbell_base + | |
247 | process->pasid * doorbell_process_allocation(); | |
248 | } |