2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
4 * Functions for EQs, NEQs and interrupts
6 * Authors: Heiko J Schick <schickhj@de.ibm.com>
7 * Khadija Souissi <souissi@de.ibm.com>
9 * Copyright (c) 2005 IBM Corporation
11 * All rights reserved.
13 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
18 * Redistribution and use in source and binary forms, with or without
19 * modification, are permitted provided that the following conditions are met:
21 * Redistributions of source code must retain the above copyright notice, this
22 * list of conditions and the following disclaimer.
24 * Redistributions in binary form must reproduce the above copyright notice,
25 * this list of conditions and the following disclaimer in the documentation
26 * and/or other materials
27 * provided with the distribution.
29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
36 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
37 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 * POSSIBILITY OF SUCH DAMAGE.
42 #include "ehca_classes.h"
44 #include "ehca_iverbs.h"
45 #include "ehca_tools.h"
48 #include "ipz_pt_fn.h"
50 #define EQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1)
51 #define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM(8,31)
52 #define EQE_EE_IDENTIFIER EHCA_BMASK_IBM(2,7)
53 #define EQE_CQ_NUMBER EHCA_BMASK_IBM(8,31)
54 #define EQE_QP_NUMBER EHCA_BMASK_IBM(8,31)
55 #define EQE_QP_TOKEN EHCA_BMASK_IBM(32,63)
56 #define EQE_CQ_TOKEN EHCA_BMASK_IBM(32,63)
58 #define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM(1,1)
59 #define NEQE_EVENT_CODE EHCA_BMASK_IBM(2,7)
60 #define NEQE_PORT_NUMBER EHCA_BMASK_IBM(8,15)
61 #define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16)
63 #define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52,63)
64 #define ERROR_DATA_TYPE EHCA_BMASK_IBM(0,7)
66 #ifdef CONFIG_INFINIBAND_EHCA_SCALING
68 static void queue_comp_task(struct ehca_cq
*__cq
);
70 static struct ehca_comp_pool
* pool
;
71 static struct notifier_block comp_pool_callback_nb
;
75 static inline void comp_event_callback(struct ehca_cq
*cq
)
77 if (!cq
->ib_cq
.comp_handler
)
80 spin_lock(&cq
->cb_lock
);
81 cq
->ib_cq
.comp_handler(&cq
->ib_cq
, cq
->ib_cq
.cq_context
);
82 spin_unlock(&cq
->cb_lock
);
87 static void print_error_data(struct ehca_shca
* shca
, void* data
,
88 u64
* rblock
, int length
)
90 u64 type
= EHCA_BMASK_GET(ERROR_DATA_TYPE
, rblock
[2]);
91 u64 resource
= rblock
[1];
94 case 0x1: /* Queue Pair */
96 struct ehca_qp
*qp
= (struct ehca_qp
*)data
;
98 /* only print error data if AER is set */
102 ehca_err(&shca
->ib_device
,
103 "QP 0x%x (resource=%lx) has errors.",
104 qp
->ib_qp
.qp_num
, resource
);
107 case 0x4: /* Completion Queue */
109 struct ehca_cq
*cq
= (struct ehca_cq
*)data
;
111 ehca_err(&shca
->ib_device
,
112 "CQ 0x%x (resource=%lx) has errors.",
113 cq
->cq_number
, resource
);
117 ehca_err(&shca
->ib_device
,
118 "Unknown errror type: %lx on %s.",
119 type
, shca
->ib_device
.name
);
123 ehca_err(&shca
->ib_device
, "Error data is available: %lx.", resource
);
124 ehca_err(&shca
->ib_device
, "EHCA ----- error data begin "
125 "---------------------------------------------------");
126 ehca_dmp(rblock
, length
, "resource=%lx", resource
);
127 ehca_err(&shca
->ib_device
, "EHCA ----- error data end "
128 "----------------------------------------------------");
133 int ehca_error_data(struct ehca_shca
*shca
, void *data
,
139 unsigned long block_count
;
141 rblock
= ehca_alloc_fw_ctrlblock();
143 ehca_err(&shca
->ib_device
, "Cannot allocate rblock memory.");
148 /* rblock must be 4K aligned and should be 4K large */
149 ret
= hipz_h_error_data(shca
->ipz_hca_handle
,
154 if (ret
== H_R_STATE
)
155 ehca_err(&shca
->ib_device
,
156 "No error data is available: %lx.", resource
);
157 else if (ret
== H_SUCCESS
) {
160 length
= EHCA_BMASK_GET(ERROR_DATA_LENGTH
, rblock
[0]);
162 if (length
> EHCA_PAGESIZE
)
163 length
= EHCA_PAGESIZE
;
165 print_error_data(shca
, data
, rblock
, length
);
167 ehca_err(&shca
->ib_device
,
168 "Error data could not be fetched: %lx", resource
);
170 ehca_free_fw_ctrlblock(rblock
);
177 static void qp_event_callback(struct ehca_shca
*shca
,
179 enum ib_event_type event_type
)
181 struct ib_event event
;
184 u32 token
= EHCA_BMASK_GET(EQE_QP_TOKEN
, eqe
);
186 spin_lock_irqsave(&ehca_qp_idr_lock
, flags
);
187 qp
= idr_find(&ehca_qp_idr
, token
);
188 spin_unlock_irqrestore(&ehca_qp_idr_lock
, flags
);
194 ehca_error_data(shca
, qp
, qp
->ipz_qp_handle
.handle
);
196 if (!qp
->ib_qp
.event_handler
)
199 event
.device
= &shca
->ib_device
;
200 event
.event
= event_type
;
201 event
.element
.qp
= &qp
->ib_qp
;
203 qp
->ib_qp
.event_handler(&event
, qp
->ib_qp
.qp_context
);
208 static void cq_event_callback(struct ehca_shca
*shca
,
213 u32 token
= EHCA_BMASK_GET(EQE_CQ_TOKEN
, eqe
);
215 spin_lock_irqsave(&ehca_cq_idr_lock
, flags
);
216 cq
= idr_find(&ehca_cq_idr
, token
);
217 spin_unlock_irqrestore(&ehca_cq_idr_lock
, flags
);
222 ehca_error_data(shca
, cq
, cq
->ipz_cq_handle
.handle
);
227 static void parse_identifier(struct ehca_shca
*shca
, u64 eqe
)
229 u8 identifier
= EHCA_BMASK_GET(EQE_EE_IDENTIFIER
, eqe
);
231 switch (identifier
) {
232 case 0x02: /* path migrated */
233 qp_event_callback(shca
, eqe
, IB_EVENT_PATH_MIG
);
235 case 0x03: /* communication established */
236 qp_event_callback(shca
, eqe
, IB_EVENT_COMM_EST
);
238 case 0x04: /* send queue drained */
239 qp_event_callback(shca
, eqe
, IB_EVENT_SQ_DRAINED
);
241 case 0x05: /* QP error */
242 case 0x06: /* QP error */
243 qp_event_callback(shca
, eqe
, IB_EVENT_QP_FATAL
);
245 case 0x07: /* CQ error */
246 case 0x08: /* CQ error */
247 cq_event_callback(shca
, eqe
);
249 case 0x09: /* MRMWPTE error */
250 ehca_err(&shca
->ib_device
, "MRMWPTE error.");
252 case 0x0A: /* port event */
253 ehca_err(&shca
->ib_device
, "Port event.");
255 case 0x0B: /* MR access error */
256 ehca_err(&shca
->ib_device
, "MR access error.");
258 case 0x0C: /* EQ error */
259 ehca_err(&shca
->ib_device
, "EQ error.");
261 case 0x0D: /* P/Q_Key mismatch */
262 ehca_err(&shca
->ib_device
, "P/Q_Key mismatch.");
264 case 0x10: /* sampling complete */
265 ehca_err(&shca
->ib_device
, "Sampling complete.");
267 case 0x11: /* unaffiliated access error */
268 ehca_err(&shca
->ib_device
, "Unaffiliated access error.");
270 case 0x12: /* path migrating error */
271 ehca_err(&shca
->ib_device
, "Path migration error.");
273 case 0x13: /* interface trace stopped */
274 ehca_err(&shca
->ib_device
, "Interface trace stopped.");
276 case 0x14: /* first error capture info available */
278 ehca_err(&shca
->ib_device
, "Unknown identifier: %x on %s.",
279 identifier
, shca
->ib_device
.name
);
286 static void parse_ec(struct ehca_shca
*shca
, u64 eqe
)
288 struct ib_event event
;
289 u8 ec
= EHCA_BMASK_GET(NEQE_EVENT_CODE
, eqe
);
290 u8 port
= EHCA_BMASK_GET(NEQE_PORT_NUMBER
, eqe
);
293 case 0x30: /* port availability change */
294 if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY
, eqe
)) {
295 ehca_info(&shca
->ib_device
,
296 "port %x is active.", port
);
297 event
.device
= &shca
->ib_device
;
298 event
.event
= IB_EVENT_PORT_ACTIVE
;
299 event
.element
.port_num
= port
;
300 shca
->sport
[port
- 1].port_state
= IB_PORT_ACTIVE
;
301 ib_dispatch_event(&event
);
303 ehca_info(&shca
->ib_device
,
304 "port %x is inactive.", port
);
305 event
.device
= &shca
->ib_device
;
306 event
.event
= IB_EVENT_PORT_ERR
;
307 event
.element
.port_num
= port
;
308 shca
->sport
[port
- 1].port_state
= IB_PORT_DOWN
;
309 ib_dispatch_event(&event
);
313 /* port configuration change
314 * disruptive change is caused by
315 * LID, PKEY or SM change
317 ehca_warn(&shca
->ib_device
,
318 "disruptive port %x configuration change", port
);
320 ehca_info(&shca
->ib_device
,
321 "port %x is inactive.", port
);
322 event
.device
= &shca
->ib_device
;
323 event
.event
= IB_EVENT_PORT_ERR
;
324 event
.element
.port_num
= port
;
325 shca
->sport
[port
- 1].port_state
= IB_PORT_DOWN
;
326 ib_dispatch_event(&event
);
328 ehca_info(&shca
->ib_device
,
329 "port %x is active.", port
);
330 event
.device
= &shca
->ib_device
;
331 event
.event
= IB_EVENT_PORT_ACTIVE
;
332 event
.element
.port_num
= port
;
333 shca
->sport
[port
- 1].port_state
= IB_PORT_ACTIVE
;
334 ib_dispatch_event(&event
);
336 case 0x32: /* adapter malfunction */
337 ehca_err(&shca
->ib_device
, "Adapter malfunction.");
339 case 0x33: /* trace stopped */
340 ehca_err(&shca
->ib_device
, "Traced stopped.");
343 ehca_err(&shca
->ib_device
, "Unknown event code: %x on %s.",
344 ec
, shca
->ib_device
.name
);
351 static inline void reset_eq_pending(struct ehca_cq
*cq
)
354 struct h_galpa gal
= cq
->galpas
.kernel
;
356 hipz_galpa_store_cq(gal
, cqx_ep
, 0x0);
357 CQx_EP
= hipz_galpa_load(gal
, CQTEMM_OFFSET(cqx_ep
));
362 irqreturn_t
ehca_interrupt_neq(int irq
, void *dev_id
)
364 struct ehca_shca
*shca
= (struct ehca_shca
*)dev_id
;
366 tasklet_hi_schedule(&shca
->neq
.interrupt_task
);
371 void ehca_tasklet_neq(unsigned long data
)
373 struct ehca_shca
*shca
= (struct ehca_shca
*)data
;
374 struct ehca_eqe
*eqe
;
377 eqe
= (struct ehca_eqe
*)ehca_poll_eq(shca
, &shca
->neq
);
380 if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT
, eqe
->entry
))
381 parse_ec(shca
, eqe
->entry
);
383 eqe
= (struct ehca_eqe
*)ehca_poll_eq(shca
, &shca
->neq
);
386 ret
= hipz_h_reset_event(shca
->ipz_hca_handle
,
387 shca
->neq
.ipz_eq_handle
, 0xFFFFFFFFFFFFFFFFL
);
389 if (ret
!= H_SUCCESS
)
390 ehca_err(&shca
->ib_device
, "Can't clear notification events.");
395 irqreturn_t
ehca_interrupt_eq(int irq
, void *dev_id
)
397 struct ehca_shca
*shca
= (struct ehca_shca
*)dev_id
;
399 tasklet_hi_schedule(&shca
->eq
.interrupt_task
);
404 void ehca_tasklet_eq(unsigned long data
)
406 struct ehca_shca
*shca
= (struct ehca_shca
*)data
;
407 struct ehca_eqe
*eqe
;
412 eqe
= (struct ehca_eqe
*)ehca_poll_eq(shca
, &shca
->eq
);
414 if ((shca
->hw_level
>= 2) && eqe
)
419 while ((int_state
== 1) || eqe
) {
421 u64 eqe_value
= eqe
->entry
;
423 ehca_dbg(&shca
->ib_device
,
424 "eqe_value=%lx", eqe_value
);
426 /* TODO: better structure */
427 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT
,
433 ehca_dbg(&shca
->ib_device
,
434 "... completion event");
436 EHCA_BMASK_GET(EQE_CQ_TOKEN
,
438 spin_lock_irqsave(&ehca_cq_idr_lock
,
440 cq
= idr_find(&ehca_cq_idr
, token
);
443 spin_unlock(&ehca_cq_idr_lock
);
447 reset_eq_pending(cq
);
448 #ifdef CONFIG_INFINIBAND_EHCA_SCALING
450 spin_unlock_irqrestore(&ehca_cq_idr_lock
,
453 spin_unlock_irqrestore(&ehca_cq_idr_lock
,
455 comp_event_callback(cq
);
458 ehca_dbg(&shca
->ib_device
,
459 "... non completion event");
460 parse_identifier(shca
, eqe_value
);
463 (struct ehca_eqe
*)ehca_poll_eq(shca
,
467 if (shca
->hw_level
>= 2) {
469 hipz_h_query_int_state(shca
->ipz_hca_handle
,
473 if (query_cnt
>= 100) {
478 eqe
= (struct ehca_eqe
*)ehca_poll_eq(shca
, &shca
->eq
);
481 } while (int_state
!= 0);
486 #ifdef CONFIG_INFINIBAND_EHCA_SCALING
488 static inline int find_next_online_cpu(struct ehca_comp_pool
* pool
)
490 unsigned long flags_last_cpu
;
492 if (ehca_debug_level
)
493 ehca_dmp(&cpu_online_map
, sizeof(cpumask_t
), "");
495 spin_lock_irqsave(&pool
->last_cpu_lock
, flags_last_cpu
);
496 pool
->last_cpu
= next_cpu(pool
->last_cpu
, cpu_online_map
);
497 if (pool
->last_cpu
== NR_CPUS
)
498 pool
->last_cpu
= first_cpu(cpu_online_map
);
499 spin_unlock_irqrestore(&pool
->last_cpu_lock
, flags_last_cpu
);
501 return pool
->last_cpu
;
504 static void __queue_comp_task(struct ehca_cq
*__cq
,
505 struct ehca_cpu_comp_task
*cct
)
507 unsigned long flags_cct
;
508 unsigned long flags_cq
;
510 spin_lock_irqsave(&cct
->task_lock
, flags_cct
);
511 spin_lock_irqsave(&__cq
->task_lock
, flags_cq
);
513 if (__cq
->nr_callbacks
== 0) {
514 __cq
->nr_callbacks
++;
515 list_add_tail(&__cq
->entry
, &cct
->cq_list
);
517 wake_up(&cct
->wait_queue
);
520 __cq
->nr_callbacks
++;
522 spin_unlock_irqrestore(&__cq
->task_lock
, flags_cq
);
523 spin_unlock_irqrestore(&cct
->task_lock
, flags_cct
);
526 static void queue_comp_task(struct ehca_cq
*__cq
)
530 struct ehca_cpu_comp_task
*cct
;
533 cpu_id
= find_next_online_cpu(pool
);
535 BUG_ON(!cpu_online(cpu_id
));
537 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu_id
);
539 if (cct
->cq_jobs
> 0) {
540 cpu_id
= find_next_online_cpu(pool
);
541 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu_id
);
544 __queue_comp_task(__cq
, cct
);
551 static void run_comp_task(struct ehca_cpu_comp_task
* cct
)
554 unsigned long flags_cct
;
555 unsigned long flags_cq
;
557 spin_lock_irqsave(&cct
->task_lock
, flags_cct
);
559 while (!list_empty(&cct
->cq_list
)) {
560 cq
= list_entry(cct
->cq_list
.next
, struct ehca_cq
, entry
);
561 spin_unlock_irqrestore(&cct
->task_lock
, flags_cct
);
562 comp_event_callback(cq
);
563 spin_lock_irqsave(&cct
->task_lock
, flags_cct
);
565 spin_lock_irqsave(&cq
->task_lock
, flags_cq
);
567 if (cq
->nr_callbacks
== 0) {
568 list_del_init(cct
->cq_list
.next
);
571 spin_unlock_irqrestore(&cq
->task_lock
, flags_cq
);
575 spin_unlock_irqrestore(&cct
->task_lock
, flags_cct
);
580 static int comp_task(void *__cct
)
582 struct ehca_cpu_comp_task
* cct
= __cct
;
583 DECLARE_WAITQUEUE(wait
, current
);
585 set_current_state(TASK_INTERRUPTIBLE
);
586 while(!kthread_should_stop()) {
587 add_wait_queue(&cct
->wait_queue
, &wait
);
589 if (list_empty(&cct
->cq_list
))
592 __set_current_state(TASK_RUNNING
);
594 remove_wait_queue(&cct
->wait_queue
, &wait
);
596 if (!list_empty(&cct
->cq_list
))
597 run_comp_task(__cct
);
599 set_current_state(TASK_INTERRUPTIBLE
);
601 __set_current_state(TASK_RUNNING
);
606 static struct task_struct
*create_comp_task(struct ehca_comp_pool
*pool
,
609 struct ehca_cpu_comp_task
*cct
;
611 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
612 spin_lock_init(&cct
->task_lock
);
613 INIT_LIST_HEAD(&cct
->cq_list
);
614 init_waitqueue_head(&cct
->wait_queue
);
615 cct
->task
= kthread_create(comp_task
, cct
, "ehca_comp/%d", cpu
);
620 static void destroy_comp_task(struct ehca_comp_pool
*pool
,
623 struct ehca_cpu_comp_task
*cct
;
624 struct task_struct
*task
;
625 unsigned long flags_cct
;
627 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
629 spin_lock_irqsave(&cct
->task_lock
, flags_cct
);
635 spin_unlock_irqrestore(&cct
->task_lock
, flags_cct
);
643 static void take_over_work(struct ehca_comp_pool
*pool
,
646 struct ehca_cpu_comp_task
*cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
649 unsigned long flags_cct
;
651 spin_lock_irqsave(&cct
->task_lock
, flags_cct
);
653 list_splice_init(&cct
->cq_list
, &list
);
655 while(!list_empty(&list
)) {
656 cq
= list_entry(cct
->cq_list
.next
, struct ehca_cq
, entry
);
658 list_del(&cq
->entry
);
659 __queue_comp_task(cq
, per_cpu_ptr(pool
->cpu_comp_tasks
,
660 smp_processor_id()));
663 spin_unlock_irqrestore(&cct
->task_lock
, flags_cct
);
667 static int comp_pool_callback(struct notifier_block
*nfb
,
668 unsigned long action
,
671 unsigned int cpu
= (unsigned long)hcpu
;
672 struct ehca_cpu_comp_task
*cct
;
676 ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu
);
677 if(!create_comp_task(pool
, cpu
)) {
678 ehca_gen_err("Can't create comp_task for cpu: %x", cpu
);
682 case CPU_UP_CANCELED
:
683 ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu
);
684 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
685 kthread_bind(cct
->task
, any_online_cpu(cpu_online_map
));
686 destroy_comp_task(pool
, cpu
);
689 ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu
);
690 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
691 kthread_bind(cct
->task
, cpu
);
692 wake_up_process(cct
->task
);
694 case CPU_DOWN_PREPARE
:
695 ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu
);
697 case CPU_DOWN_FAILED
:
698 ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu
);
701 ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu
);
702 destroy_comp_task(pool
, cpu
);
703 take_over_work(pool
, cpu
);
712 int ehca_create_comp_pool(void)
714 #ifdef CONFIG_INFINIBAND_EHCA_SCALING
716 struct task_struct
*task
;
718 pool
= kzalloc(sizeof(struct ehca_comp_pool
), GFP_KERNEL
);
722 spin_lock_init(&pool
->last_cpu_lock
);
723 pool
->last_cpu
= any_online_cpu(cpu_online_map
);
725 pool
->cpu_comp_tasks
= alloc_percpu(struct ehca_cpu_comp_task
);
726 if (pool
->cpu_comp_tasks
== NULL
) {
731 for_each_online_cpu(cpu
) {
732 task
= create_comp_task(pool
, cpu
);
734 kthread_bind(task
, cpu
);
735 wake_up_process(task
);
739 comp_pool_callback_nb
.notifier_call
= comp_pool_callback
;
740 comp_pool_callback_nb
.priority
=0;
741 register_cpu_notifier(&comp_pool_callback_nb
);
747 void ehca_destroy_comp_pool(void)
749 #ifdef CONFIG_INFINIBAND_EHCA_SCALING
752 unregister_cpu_notifier(&comp_pool_callback_nb
);
754 for (i
= 0; i
< NR_CPUS
; i
++) {
756 destroy_comp_task(pool
, i
);