2 * Copyright(c) 2015, 2016 Intel Corporation.
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47 #include <linux/topology.h>
48 #include <linux/cpumask.h>
49 #include <linux/module.h>
56 /* Name of IRQ types, indexed by enum irq_type */
57 static const char * const irq_type_names
[] = {
64 static inline void init_cpu_mask_set(struct cpu_mask_set
*set
)
66 cpumask_clear(&set
->mask
);
67 cpumask_clear(&set
->used
);
71 /* Initialize non-HT cpu cores mask */
72 int init_real_cpu_mask(struct hfi1_devdata
*dd
)
74 struct hfi1_affinity
*info
;
75 int possible
, curr_cpu
, i
, ht
;
77 info
= kzalloc(sizeof(*info
), GFP_KERNEL
);
81 cpumask_clear(&info
->real_cpu_mask
);
83 /* Start with cpu online mask as the real cpu mask */
84 cpumask_copy(&info
->real_cpu_mask
, cpu_online_mask
);
87 * Remove HT cores from the real cpu mask. Do this in two steps below.
89 possible
= cpumask_weight(&info
->real_cpu_mask
);
90 ht
= cpumask_weight(topology_sibling_cpumask(
91 cpumask_first(&info
->real_cpu_mask
)));
93 * Step 1. Skip over the first N HT siblings and use them as the
94 * "real" cores. Assumes that HT cores are not enumerated in
95 * succession (except in the single core case).
97 curr_cpu
= cpumask_first(&info
->real_cpu_mask
);
98 for (i
= 0; i
< possible
/ ht
; i
++)
99 curr_cpu
= cpumask_next(curr_cpu
, &info
->real_cpu_mask
);
101 * Step 2. Remove the remaining HT siblings. Use cpumask_next() to
104 for (; i
< possible
; i
++) {
105 cpumask_clear_cpu(curr_cpu
, &info
->real_cpu_mask
);
106 curr_cpu
= cpumask_next(curr_cpu
, &info
->real_cpu_mask
);
114 * Interrupt affinity.
116 * non-rcv avail gets a default mask that
117 * starts as possible cpus with threads reset
118 * and each rcv avail reset.
120 * rcv avail gets node relative 1 wrapping back
121 * to the node relative 1 as necessary.
124 void hfi1_dev_affinity_init(struct hfi1_devdata
*dd
)
126 int node
= pcibus_to_node(dd
->pcidev
->bus
);
127 struct hfi1_affinity
*info
= dd
->affinity
;
128 const struct cpumask
*local_mask
;
129 int curr_cpu
, possible
, i
;
132 node
= numa_node_id();
135 spin_lock_init(&info
->lock
);
137 init_cpu_mask_set(&info
->def_intr
);
138 init_cpu_mask_set(&info
->rcv_intr
);
139 init_cpu_mask_set(&info
->proc
);
141 local_mask
= cpumask_of_node(dd
->node
);
142 if (cpumask_first(local_mask
) >= nr_cpu_ids
)
143 local_mask
= topology_core_cpumask(0);
144 /* Use the "real" cpu mask of this node as the default */
145 cpumask_and(&info
->def_intr
.mask
, &info
->real_cpu_mask
, local_mask
);
147 /* fill in the receive list */
148 possible
= cpumask_weight(&info
->def_intr
.mask
);
149 curr_cpu
= cpumask_first(&info
->def_intr
.mask
);
151 /* only one CPU, everyone will use it */
152 cpumask_set_cpu(curr_cpu
, &info
->rcv_intr
.mask
);
155 * Retain the first CPU in the default list for the control
158 curr_cpu
= cpumask_next(curr_cpu
, &info
->def_intr
.mask
);
160 * Remove the remaining kernel receive queues from
161 * the default list and add them to the receive list.
163 for (i
= 0; i
< dd
->n_krcv_queues
- 1; i
++) {
164 cpumask_clear_cpu(curr_cpu
, &info
->def_intr
.mask
);
165 cpumask_set_cpu(curr_cpu
, &info
->rcv_intr
.mask
);
166 curr_cpu
= cpumask_next(curr_cpu
, &info
->def_intr
.mask
);
167 if (curr_cpu
>= nr_cpu_ids
)
172 cpumask_copy(&info
->proc
.mask
, cpu_online_mask
);
175 void hfi1_dev_affinity_free(struct hfi1_devdata
*dd
)
180 int hfi1_get_irq_affinity(struct hfi1_devdata
*dd
, struct hfi1_msix_entry
*msix
)
184 struct cpu_mask_set
*set
;
185 struct sdma_engine
*sde
= NULL
;
186 struct hfi1_ctxtdata
*rcd
= NULL
;
191 cpumask_clear(&msix
->mask
);
193 ret
= zalloc_cpumask_var(&diff
, GFP_KERNEL
);
197 switch (msix
->type
) {
199 sde
= (struct sdma_engine
*)msix
->arg
;
200 scnprintf(extra
, 64, "engine %u", sde
->this_idx
);
203 set
= &dd
->affinity
->def_intr
;
206 rcd
= (struct hfi1_ctxtdata
*)msix
->arg
;
207 if (rcd
->ctxt
== HFI1_CTRL_CTXT
) {
208 set
= &dd
->affinity
->def_intr
;
209 cpu
= cpumask_first(&set
->mask
);
211 set
= &dd
->affinity
->rcv_intr
;
213 scnprintf(extra
, 64, "ctxt %u", rcd
->ctxt
);
216 dd_dev_err(dd
, "Invalid IRQ type %d\n", msix
->type
);
221 * The control receive context is placed on a particular CPU, which
222 * is set above. Skip accounting for it. Everything else finds its
226 spin_lock(&dd
->affinity
->lock
);
227 if (cpumask_equal(&set
->mask
, &set
->used
)) {
229 * We've used up all the CPUs, bump up the generation
230 * and reset the 'used' map
233 cpumask_clear(&set
->used
);
235 cpumask_andnot(diff
, &set
->mask
, &set
->used
);
236 cpu
= cpumask_first(diff
);
237 cpumask_set_cpu(cpu
, &set
->used
);
238 spin_unlock(&dd
->affinity
->lock
);
241 switch (msix
->type
) {
251 cpumask_set_cpu(cpu
, &msix
->mask
);
252 dd_dev_info(dd
, "IRQ vector: %u, type %s %s -> cpu: %d\n",
253 msix
->msix
.vector
, irq_type_names
[msix
->type
],
255 irq_set_affinity_hint(msix
->msix
.vector
, &msix
->mask
);
257 free_cpumask_var(diff
);
261 void hfi1_put_irq_affinity(struct hfi1_devdata
*dd
,
262 struct hfi1_msix_entry
*msix
)
264 struct cpu_mask_set
*set
= NULL
;
265 struct hfi1_ctxtdata
*rcd
;
267 switch (msix
->type
) {
270 set
= &dd
->affinity
->def_intr
;
273 rcd
= (struct hfi1_ctxtdata
*)msix
->arg
;
274 /* only do accounting for non control contexts */
275 if (rcd
->ctxt
!= HFI1_CTRL_CTXT
)
276 set
= &dd
->affinity
->rcv_intr
;
283 spin_lock(&dd
->affinity
->lock
);
284 cpumask_andnot(&set
->used
, &set
->used
, &msix
->mask
);
285 if (cpumask_empty(&set
->used
) && set
->gen
) {
287 cpumask_copy(&set
->used
, &set
->mask
);
289 spin_unlock(&dd
->affinity
->lock
);
292 irq_set_affinity_hint(msix
->msix
.vector
, NULL
);
293 cpumask_clear(&msix
->mask
);
296 int hfi1_get_proc_affinity(struct hfi1_devdata
*dd
, int node
)
299 cpumask_var_t diff
, mask
, intrs
;
300 const struct cpumask
*node_mask
,
301 *proc_mask
= tsk_cpus_allowed(current
);
302 struct cpu_mask_set
*set
= &dd
->affinity
->proc
;
306 * check whether process/context affinity has already
309 if (cpumask_weight(proc_mask
) == 1) {
310 scnprintf(buf
, 1024, "%*pbl", cpumask_pr_args(proc_mask
));
311 hfi1_cdbg(PROC
, "PID %u %s affinity set to CPU %s",
312 current
->pid
, current
->comm
, buf
);
314 * Mark the pre-set CPU as used. This is atomic so we don't
317 cpu
= cpumask_first(proc_mask
);
318 cpumask_set_cpu(cpu
, &set
->used
);
320 } else if (cpumask_weight(proc_mask
) < cpumask_weight(&set
->mask
)) {
321 scnprintf(buf
, 1024, "%*pbl", cpumask_pr_args(proc_mask
));
322 hfi1_cdbg(PROC
, "PID %u %s affinity set to CPU set(s) %s",
323 current
->pid
, current
->comm
, buf
);
328 * The process does not have a preset CPU affinity so find one to
329 * recommend. We prefer CPUs on the same NUMA as the device.
332 ret
= zalloc_cpumask_var(&diff
, GFP_KERNEL
);
335 ret
= zalloc_cpumask_var(&mask
, GFP_KERNEL
);
338 ret
= zalloc_cpumask_var(&intrs
, GFP_KERNEL
);
342 spin_lock(&dd
->affinity
->lock
);
344 * If we've used all available CPUs, clear the mask and start
347 if (cpumask_equal(&set
->mask
, &set
->used
)) {
349 cpumask_clear(&set
->used
);
352 /* CPUs used by interrupt handlers */
353 cpumask_copy(intrs
, (dd
->affinity
->def_intr
.gen
?
354 &dd
->affinity
->def_intr
.mask
:
355 &dd
->affinity
->def_intr
.used
));
356 cpumask_or(intrs
, intrs
, (dd
->affinity
->rcv_intr
.gen
?
357 &dd
->affinity
->rcv_intr
.mask
:
358 &dd
->affinity
->rcv_intr
.used
));
359 scnprintf(buf
, 1024, "%*pbl", cpumask_pr_args(intrs
));
360 hfi1_cdbg(PROC
, "CPUs used by interrupts: %s", buf
);
363 * If we don't have a NUMA node requested, preference is towards
368 node_mask
= cpumask_of_node(node
);
369 scnprintf(buf
, 1024, "%*pbl", cpumask_pr_args(node_mask
));
370 hfi1_cdbg(PROC
, "device on NUMA %u, CPUs %s", node
, buf
);
372 /* diff will hold all unused cpus */
373 cpumask_andnot(diff
, &set
->mask
, &set
->used
);
374 scnprintf(buf
, 1024, "%*pbl", cpumask_pr_args(diff
));
375 hfi1_cdbg(PROC
, "unused CPUs (all) %s", buf
);
377 /* get cpumask of available CPUs on preferred NUMA */
378 cpumask_and(mask
, diff
, node_mask
);
379 scnprintf(buf
, 1024, "%*pbl", cpumask_pr_args(mask
));
380 hfi1_cdbg(PROC
, "available cpus on NUMA %s", buf
);
383 * At first, we don't want to place processes on the same
384 * CPUs as interrupt handlers.
386 cpumask_andnot(diff
, mask
, intrs
);
387 if (!cpumask_empty(diff
))
388 cpumask_copy(mask
, diff
);
391 * if we don't have a cpu on the preferred NUMA, get
392 * the list of the remaining available CPUs
394 if (cpumask_empty(mask
)) {
395 cpumask_andnot(diff
, &set
->mask
, &set
->used
);
396 cpumask_andnot(mask
, diff
, node_mask
);
398 scnprintf(buf
, 1024, "%*pbl", cpumask_pr_args(mask
));
399 hfi1_cdbg(PROC
, "possible CPUs for process %s", buf
);
401 cpu
= cpumask_first(mask
);
402 if (cpu
>= nr_cpu_ids
) /* empty */
405 cpumask_set_cpu(cpu
, &set
->used
);
406 spin_unlock(&dd
->affinity
->lock
);
408 free_cpumask_var(intrs
);
410 free_cpumask_var(mask
);
412 free_cpumask_var(diff
);
417 void hfi1_put_proc_affinity(struct hfi1_devdata
*dd
, int cpu
)
419 struct cpu_mask_set
*set
= &dd
->affinity
->proc
;
423 spin_lock(&dd
->affinity
->lock
);
424 cpumask_clear_cpu(cpu
, &set
->used
);
425 if (cpumask_empty(&set
->used
) && set
->gen
) {
427 cpumask_copy(&set
->used
, &set
->mask
);
429 spin_unlock(&dd
->affinity
->lock
);
This page took 0.050386 seconds and 5 git commands to generate.