2 * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
3 * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/init.h>
35 #include <linux/interrupt.h>
36 #include <linux/dma-mapping.h>
38 #include <linux/mlx4/cmd.h>
44 MLX4_NUM_ASYNC_EQE
= 0x100,
45 MLX4_NUM_SPARE_EQE
= 0x80,
46 MLX4_EQ_ENTRY_SIZE
= 0x20
50 * Must be packed because start is 64 bits but only aligned to 32 bits.
52 struct mlx4_eq_context
{
66 __be32 mtt_base_addr_l
;
68 __be32 consumer_index
;
69 __be32 producer_index
;
73 #define MLX4_EQ_STATUS_OK ( 0 << 28)
74 #define MLX4_EQ_STATUS_WRITE_FAIL (10 << 28)
75 #define MLX4_EQ_OWNER_SW ( 0 << 24)
76 #define MLX4_EQ_OWNER_HW ( 1 << 24)
77 #define MLX4_EQ_FLAG_EC ( 1 << 18)
78 #define MLX4_EQ_FLAG_OI ( 1 << 17)
79 #define MLX4_EQ_STATE_ARMED ( 9 << 8)
80 #define MLX4_EQ_STATE_FIRED (10 << 8)
81 #define MLX4_EQ_STATE_ALWAYS_ARMED (11 << 8)
83 #define MLX4_ASYNC_EVENT_MASK ((1ull << MLX4_EVENT_TYPE_PATH_MIG) | \
84 (1ull << MLX4_EVENT_TYPE_COMM_EST) | \
85 (1ull << MLX4_EVENT_TYPE_SQ_DRAINED) | \
86 (1ull << MLX4_EVENT_TYPE_CQ_ERROR) | \
87 (1ull << MLX4_EVENT_TYPE_WQ_CATAS_ERROR) | \
88 (1ull << MLX4_EVENT_TYPE_EEC_CATAS_ERROR) | \
89 (1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED) | \
90 (1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
91 (1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR) | \
92 (1ull << MLX4_EVENT_TYPE_PORT_CHANGE) | \
93 (1ull << MLX4_EVENT_TYPE_ECC_DETECT) | \
94 (1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR) | \
95 (1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE) | \
96 (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \
97 (1ull << MLX4_EVENT_TYPE_CMD))
108 } __attribute__((packed
)) comp
;
116 } __attribute__((packed
)) cmd
;
119 } __attribute__((packed
)) qp
;
122 } __attribute__((packed
)) srq
;
128 } __attribute__((packed
)) cq_err
;
132 } __attribute__((packed
)) port_change
;
136 } __attribute__((packed
));
138 static void eq_set_ci(struct mlx4_eq
*eq
, int req_not
)
140 __raw_writel((__force u32
) cpu_to_be32((eq
->cons_index
& 0xffffff) |
143 /* We still want ordering, just not swabbing, so add a barrier */
147 static struct mlx4_eqe
*get_eqe(struct mlx4_eq
*eq
, u32 entry
)
149 unsigned long off
= (entry
& (eq
->nent
- 1)) * MLX4_EQ_ENTRY_SIZE
;
150 return eq
->page_list
[off
/ PAGE_SIZE
].buf
+ off
% PAGE_SIZE
;
153 static struct mlx4_eqe
*next_eqe_sw(struct mlx4_eq
*eq
)
155 struct mlx4_eqe
*eqe
= get_eqe(eq
, eq
->cons_index
);
156 return !!(eqe
->owner
& 0x80) ^ !!(eq
->cons_index
& eq
->nent
) ? NULL
: eqe
;
159 static int mlx4_eq_int(struct mlx4_dev
*dev
, struct mlx4_eq
*eq
)
161 struct mlx4_eqe
*eqe
;
166 while ((eqe
= next_eqe_sw(eq
))) {
168 * Make sure we read EQ entry contents after we've
169 * checked the ownership bit.
174 case MLX4_EVENT_TYPE_COMP
:
175 cqn
= be32_to_cpu(eqe
->event
.comp
.cqn
) & 0xffffff;
176 mlx4_cq_completion(dev
, cqn
);
179 case MLX4_EVENT_TYPE_PATH_MIG
:
180 case MLX4_EVENT_TYPE_COMM_EST
:
181 case MLX4_EVENT_TYPE_SQ_DRAINED
:
182 case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE
:
183 case MLX4_EVENT_TYPE_WQ_CATAS_ERROR
:
184 case MLX4_EVENT_TYPE_PATH_MIG_FAILED
:
185 case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR
:
186 case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR
:
187 mlx4_qp_event(dev
, be32_to_cpu(eqe
->event
.qp
.qpn
) & 0xffffff,
191 case MLX4_EVENT_TYPE_SRQ_LIMIT
:
192 case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR
:
193 mlx4_srq_event(dev
, be32_to_cpu(eqe
->event
.srq
.srqn
) & 0xffffff,
197 case MLX4_EVENT_TYPE_CMD
:
199 be16_to_cpu(eqe
->event
.cmd
.token
),
200 eqe
->event
.cmd
.status
,
201 be64_to_cpu(eqe
->event
.cmd
.out_param
));
204 case MLX4_EVENT_TYPE_PORT_CHANGE
:
205 mlx4_dispatch_event(dev
,
206 eqe
->subtype
== MLX4_PORT_CHANGE_SUBTYPE_ACTIVE
?
207 MLX4_DEV_EVENT_PORT_UP
:
208 MLX4_DEV_EVENT_PORT_DOWN
,
209 be32_to_cpu(eqe
->event
.port_change
.port
) >> 28);
212 case MLX4_EVENT_TYPE_CQ_ERROR
:
213 mlx4_warn(dev
, "CQ %s on CQN %06x\n",
214 eqe
->event
.cq_err
.syndrome
== 1 ?
215 "overrun" : "access violation",
216 be32_to_cpu(eqe
->event
.cq_err
.cqn
) & 0xffffff);
217 mlx4_cq_event(dev
, be32_to_cpu(eqe
->event
.cq_err
.cqn
),
221 case MLX4_EVENT_TYPE_EQ_OVERFLOW
:
222 mlx4_warn(dev
, "EQ overrun on EQN %d\n", eq
->eqn
);
225 case MLX4_EVENT_TYPE_EEC_CATAS_ERROR
:
226 case MLX4_EVENT_TYPE_ECC_DETECT
:
228 mlx4_warn(dev
, "Unhandled event %02x(%02x) on EQ %d at index %u\n",
229 eqe
->type
, eqe
->subtype
, eq
->eqn
, eq
->cons_index
);
238 * The HCA will think the queue has overflowed if we
239 * don't tell it we've been processing events. We
240 * create our EQs with MLX4_NUM_SPARE_EQE extra
241 * entries, so we must update our consumer index at
244 if (unlikely(set_ci
>= MLX4_NUM_SPARE_EQE
)) {
246 * Conditional on hca_type is OK here because
247 * this is a rare case, not the fast path.
259 static irqreturn_t
mlx4_interrupt(int irq
, void *dev_ptr
)
261 struct mlx4_dev
*dev
= dev_ptr
;
262 struct mlx4_priv
*priv
= mlx4_priv(dev
);
266 writel(priv
->eq_table
.clr_mask
, priv
->eq_table
.clr_int
);
268 for (i
= 0; i
< MLX4_NUM_EQ
; ++i
)
269 work
|= mlx4_eq_int(dev
, &priv
->eq_table
.eq
[i
]);
271 return IRQ_RETVAL(work
);
274 static irqreturn_t
mlx4_msi_x_interrupt(int irq
, void *eq_ptr
)
276 struct mlx4_eq
*eq
= eq_ptr
;
277 struct mlx4_dev
*dev
= eq
->dev
;
279 mlx4_eq_int(dev
, eq
);
281 /* MSI-X vectors always belong to us */
285 static int mlx4_MAP_EQ(struct mlx4_dev
*dev
, u64 event_mask
, int unmap
,
288 return mlx4_cmd(dev
, event_mask
, (unmap
<< 31) | eq_num
,
289 0, MLX4_CMD_MAP_EQ
, MLX4_CMD_TIME_CLASS_B
);
292 static int mlx4_SW2HW_EQ(struct mlx4_dev
*dev
, struct mlx4_cmd_mailbox
*mailbox
,
295 return mlx4_cmd(dev
, mailbox
->dma
, eq_num
, 0, MLX4_CMD_SW2HW_EQ
,
296 MLX4_CMD_TIME_CLASS_A
);
299 static int mlx4_HW2SW_EQ(struct mlx4_dev
*dev
, struct mlx4_cmd_mailbox
*mailbox
,
302 return mlx4_cmd_box(dev
, 0, mailbox
->dma
, eq_num
, 0, MLX4_CMD_HW2SW_EQ
,
303 MLX4_CMD_TIME_CLASS_A
);
306 static void __iomem
*mlx4_get_eq_uar(struct mlx4_dev
*dev
, struct mlx4_eq
*eq
)
308 struct mlx4_priv
*priv
= mlx4_priv(dev
);
311 index
= eq
->eqn
/ 4 - dev
->caps
.reserved_eqs
/ 4;
313 if (!priv
->eq_table
.uar_map
[index
]) {
314 priv
->eq_table
.uar_map
[index
] =
315 ioremap(pci_resource_start(dev
->pdev
, 2) +
316 ((eq
->eqn
/ 4) << PAGE_SHIFT
),
318 if (!priv
->eq_table
.uar_map
[index
]) {
319 mlx4_err(dev
, "Couldn't map EQ doorbell for EQN 0x%06x\n",
325 return priv
->eq_table
.uar_map
[index
] + 0x800 + 8 * (eq
->eqn
% 4);
328 static int mlx4_create_eq(struct mlx4_dev
*dev
, int nent
,
329 u8 intr
, struct mlx4_eq
*eq
)
331 struct mlx4_priv
*priv
= mlx4_priv(dev
);
332 struct mlx4_cmd_mailbox
*mailbox
;
333 struct mlx4_eq_context
*eq_context
;
335 u64
*dma_list
= NULL
;
342 eq
->nent
= roundup_pow_of_two(max(nent
, 2));
343 npages
= PAGE_ALIGN(eq
->nent
* MLX4_EQ_ENTRY_SIZE
) / PAGE_SIZE
;
345 eq
->page_list
= kmalloc(npages
* sizeof *eq
->page_list
,
350 for (i
= 0; i
< npages
; ++i
)
351 eq
->page_list
[i
].buf
= NULL
;
353 dma_list
= kmalloc(npages
* sizeof *dma_list
, GFP_KERNEL
);
357 mailbox
= mlx4_alloc_cmd_mailbox(dev
);
360 eq_context
= mailbox
->buf
;
362 for (i
= 0; i
< npages
; ++i
) {
363 eq
->page_list
[i
].buf
= dma_alloc_coherent(&dev
->pdev
->dev
,
364 PAGE_SIZE
, &t
, GFP_KERNEL
);
365 if (!eq
->page_list
[i
].buf
)
366 goto err_out_free_pages
;
369 eq
->page_list
[i
].map
= t
;
371 memset(eq
->page_list
[i
].buf
, 0, PAGE_SIZE
);
374 eq
->eqn
= mlx4_bitmap_alloc(&priv
->eq_table
.bitmap
);
376 goto err_out_free_pages
;
378 eq
->doorbell
= mlx4_get_eq_uar(dev
, eq
);
381 goto err_out_free_eq
;
384 err
= mlx4_mtt_init(dev
, npages
, PAGE_SHIFT
, &eq
->mtt
);
386 goto err_out_free_eq
;
388 err
= mlx4_write_mtt(dev
, &eq
->mtt
, 0, npages
, dma_list
);
390 goto err_out_free_mtt
;
392 memset(eq_context
, 0, sizeof *eq_context
);
393 eq_context
->flags
= cpu_to_be32(MLX4_EQ_STATUS_OK
|
394 MLX4_EQ_STATE_ARMED
);
395 eq_context
->log_eq_size
= ilog2(eq
->nent
);
396 eq_context
->intr
= intr
;
397 eq_context
->log_page_size
= PAGE_SHIFT
- MLX4_ICM_PAGE_SHIFT
;
399 mtt_addr
= mlx4_mtt_addr(dev
, &eq
->mtt
);
400 eq_context
->mtt_base_addr_h
= mtt_addr
>> 32;
401 eq_context
->mtt_base_addr_l
= cpu_to_be32(mtt_addr
& 0xffffffff);
403 err
= mlx4_SW2HW_EQ(dev
, mailbox
, eq
->eqn
);
405 mlx4_warn(dev
, "SW2HW_EQ failed (%d)\n", err
);
406 goto err_out_free_mtt
;
410 mlx4_free_cmd_mailbox(dev
, mailbox
);
417 mlx4_mtt_cleanup(dev
, &eq
->mtt
);
420 mlx4_bitmap_free(&priv
->eq_table
.bitmap
, eq
->eqn
);
423 for (i
= 0; i
< npages
; ++i
)
424 if (eq
->page_list
[i
].buf
)
425 dma_free_coherent(&dev
->pdev
->dev
, PAGE_SIZE
,
426 eq
->page_list
[i
].buf
,
427 eq
->page_list
[i
].map
);
429 mlx4_free_cmd_mailbox(dev
, mailbox
);
432 kfree(eq
->page_list
);
439 static void mlx4_free_eq(struct mlx4_dev
*dev
,
442 struct mlx4_priv
*priv
= mlx4_priv(dev
);
443 struct mlx4_cmd_mailbox
*mailbox
;
445 int npages
= PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE
* eq
->nent
) / PAGE_SIZE
;
448 mailbox
= mlx4_alloc_cmd_mailbox(dev
);
452 err
= mlx4_HW2SW_EQ(dev
, mailbox
, eq
->eqn
);
454 mlx4_warn(dev
, "HW2SW_EQ failed (%d)\n", err
);
457 mlx4_dbg(dev
, "Dumping EQ context %02x:\n", eq
->eqn
);
458 for (i
= 0; i
< sizeof (struct mlx4_eq_context
) / 4; ++i
) {
460 printk("[%02x] ", i
* 4);
461 printk(" %08x", be32_to_cpup(mailbox
->buf
+ i
* 4));
462 if ((i
+ 1) % 4 == 0)
467 mlx4_mtt_cleanup(dev
, &eq
->mtt
);
468 for (i
= 0; i
< npages
; ++i
)
469 pci_free_consistent(dev
->pdev
, PAGE_SIZE
,
470 eq
->page_list
[i
].buf
,
471 eq
->page_list
[i
].map
);
473 kfree(eq
->page_list
);
474 mlx4_bitmap_free(&priv
->eq_table
.bitmap
, eq
->eqn
);
475 mlx4_free_cmd_mailbox(dev
, mailbox
);
478 static void mlx4_free_irqs(struct mlx4_dev
*dev
)
480 struct mlx4_eq_table
*eq_table
= &mlx4_priv(dev
)->eq_table
;
483 if (eq_table
->have_irq
)
484 free_irq(dev
->pdev
->irq
, dev
);
485 for (i
= 0; i
< MLX4_NUM_EQ
; ++i
)
486 if (eq_table
->eq
[i
].have_irq
)
487 free_irq(eq_table
->eq
[i
].irq
, eq_table
->eq
+ i
);
490 static int mlx4_map_clr_int(struct mlx4_dev
*dev
)
492 struct mlx4_priv
*priv
= mlx4_priv(dev
);
494 priv
->clr_base
= ioremap(pci_resource_start(dev
->pdev
, priv
->fw
.clr_int_bar
) +
495 priv
->fw
.clr_int_base
, MLX4_CLR_INT_SIZE
);
496 if (!priv
->clr_base
) {
497 mlx4_err(dev
, "Couldn't map interrupt clear register, aborting.\n");
504 static void mlx4_unmap_clr_int(struct mlx4_dev
*dev
)
506 struct mlx4_priv
*priv
= mlx4_priv(dev
);
508 iounmap(priv
->clr_base
);
511 int mlx4_map_eq_icm(struct mlx4_dev
*dev
, u64 icm_virt
)
513 struct mlx4_priv
*priv
= mlx4_priv(dev
);
517 * We assume that mapping one page is enough for the whole EQ
518 * context table. This is fine with all current HCAs, because
519 * we only use 32 EQs and each EQ uses 64 bytes of context
520 * memory, or 1 KB total.
522 priv
->eq_table
.icm_virt
= icm_virt
;
523 priv
->eq_table
.icm_page
= alloc_page(GFP_HIGHUSER
);
524 if (!priv
->eq_table
.icm_page
)
526 priv
->eq_table
.icm_dma
= pci_map_page(dev
->pdev
, priv
->eq_table
.icm_page
, 0,
527 PAGE_SIZE
, PCI_DMA_BIDIRECTIONAL
);
528 if (pci_dma_mapping_error(priv
->eq_table
.icm_dma
)) {
529 __free_page(priv
->eq_table
.icm_page
);
533 ret
= mlx4_MAP_ICM_page(dev
, priv
->eq_table
.icm_dma
, icm_virt
);
535 pci_unmap_page(dev
->pdev
, priv
->eq_table
.icm_dma
, PAGE_SIZE
,
536 PCI_DMA_BIDIRECTIONAL
);
537 __free_page(priv
->eq_table
.icm_page
);
543 void mlx4_unmap_eq_icm(struct mlx4_dev
*dev
)
545 struct mlx4_priv
*priv
= mlx4_priv(dev
);
547 mlx4_UNMAP_ICM(dev
, priv
->eq_table
.icm_virt
, 1);
548 pci_unmap_page(dev
->pdev
, priv
->eq_table
.icm_dma
, PAGE_SIZE
,
549 PCI_DMA_BIDIRECTIONAL
);
550 __free_page(priv
->eq_table
.icm_page
);
553 int mlx4_init_eq_table(struct mlx4_dev
*dev
)
555 struct mlx4_priv
*priv
= mlx4_priv(dev
);
559 err
= mlx4_bitmap_init(&priv
->eq_table
.bitmap
, dev
->caps
.num_eqs
,
560 dev
->caps
.num_eqs
- 1, dev
->caps
.reserved_eqs
);
564 for (i
= 0; i
< ARRAY_SIZE(priv
->eq_table
.uar_map
); ++i
)
565 priv
->eq_table
.uar_map
[i
] = NULL
;
567 err
= mlx4_map_clr_int(dev
);
571 priv
->eq_table
.clr_mask
=
572 swab32(1 << (priv
->eq_table
.inta_pin
& 31));
573 priv
->eq_table
.clr_int
= priv
->clr_base
+
574 (priv
->eq_table
.inta_pin
< 32 ? 4 : 0);
576 err
= mlx4_create_eq(dev
, dev
->caps
.num_cqs
+ MLX4_NUM_SPARE_EQE
,
577 (dev
->flags
& MLX4_FLAG_MSI_X
) ? MLX4_EQ_COMP
: 0,
578 &priv
->eq_table
.eq
[MLX4_EQ_COMP
]);
582 err
= mlx4_create_eq(dev
, MLX4_NUM_ASYNC_EQE
+ MLX4_NUM_SPARE_EQE
,
583 (dev
->flags
& MLX4_FLAG_MSI_X
) ? MLX4_EQ_ASYNC
: 0,
584 &priv
->eq_table
.eq
[MLX4_EQ_ASYNC
]);
588 if (dev
->flags
& MLX4_FLAG_MSI_X
) {
589 static const char *eq_name
[] = {
590 [MLX4_EQ_COMP
] = DRV_NAME
" (comp)",
591 [MLX4_EQ_ASYNC
] = DRV_NAME
" (async)"
594 for (i
= 0; i
< MLX4_NUM_EQ
; ++i
) {
595 err
= request_irq(priv
->eq_table
.eq
[i
].irq
,
596 mlx4_msi_x_interrupt
,
597 0, eq_name
[i
], priv
->eq_table
.eq
+ i
);
601 priv
->eq_table
.eq
[i
].have_irq
= 1;
605 err
= request_irq(dev
->pdev
->irq
, mlx4_interrupt
,
606 IRQF_SHARED
, DRV_NAME
, dev
);
610 priv
->eq_table
.have_irq
= 1;
613 err
= mlx4_MAP_EQ(dev
, MLX4_ASYNC_EVENT_MASK
, 0,
614 priv
->eq_table
.eq
[MLX4_EQ_ASYNC
].eqn
);
616 mlx4_warn(dev
, "MAP_EQ for async EQ %d failed (%d)\n",
617 priv
->eq_table
.eq
[MLX4_EQ_ASYNC
].eqn
, err
);
619 for (i
= 0; i
< MLX4_NUM_EQ
; ++i
)
620 eq_set_ci(&priv
->eq_table
.eq
[i
], 1);
625 mlx4_free_eq(dev
, &priv
->eq_table
.eq
[MLX4_EQ_ASYNC
]);
628 mlx4_free_eq(dev
, &priv
->eq_table
.eq
[MLX4_EQ_COMP
]);
631 mlx4_unmap_clr_int(dev
);
635 mlx4_bitmap_cleanup(&priv
->eq_table
.bitmap
);
639 void mlx4_cleanup_eq_table(struct mlx4_dev
*dev
)
641 struct mlx4_priv
*priv
= mlx4_priv(dev
);
644 mlx4_MAP_EQ(dev
, MLX4_ASYNC_EVENT_MASK
, 1,
645 priv
->eq_table
.eq
[MLX4_EQ_ASYNC
].eqn
);
649 for (i
= 0; i
< MLX4_NUM_EQ
; ++i
)
650 mlx4_free_eq(dev
, &priv
->eq_table
.eq
[i
]);
652 mlx4_unmap_clr_int(dev
);
654 for (i
= 0; i
< ARRAY_SIZE(priv
->eq_table
.uar_map
); ++i
)
655 if (priv
->eq_table
.uar_map
[i
])
656 iounmap(priv
->eq_table
.uar_map
[i
]);
658 mlx4_bitmap_cleanup(&priv
->eq_table
.bitmap
);