2 * Copyright (c) 2006 QLogic, Inc. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <rdma/ib_mad.h>
35 #include <rdma/ib_user_verbs.h>
36 #include <linux/utsname.h>
38 #include "ipath_kernel.h"
39 #include "ipath_verbs.h"
40 #include "ipath_common.h"
42 /* Not static, because we don't want the compiler removing it */
43 const char ipath_verbs_version
[] = "ipath_verbs " IPATH_IDSTR
;
45 static unsigned int ib_ipath_qp_table_size
= 251;
46 module_param_named(qp_table_size
, ib_ipath_qp_table_size
, uint
, S_IRUGO
);
47 MODULE_PARM_DESC(qp_table_size
, "QP table size");
49 unsigned int ib_ipath_lkey_table_size
= 12;
50 module_param_named(lkey_table_size
, ib_ipath_lkey_table_size
, uint
,
52 MODULE_PARM_DESC(lkey_table_size
,
53 "LKEY table size in bits (2^n, 1 <= n <= 23)");
55 unsigned int ib_ipath_debug
; /* debug mask */
56 module_param_named(debug
, ib_ipath_debug
, uint
, S_IWUSR
| S_IRUGO
);
57 MODULE_PARM_DESC(debug
, "Verbs debug mask");
59 static unsigned int ib_ipath_max_pds
= 0xFFFF;
60 module_param_named(max_pds
, ib_ipath_max_pds
, uint
, S_IWUSR
| S_IRUGO
);
61 MODULE_PARM_DESC(max_pds
,
62 "Maximum number of protection domains to support");
64 static unsigned int ib_ipath_max_ahs
= 0xFFFF;
65 module_param_named(max_ahs
, ib_ipath_max_ahs
, uint
, S_IWUSR
| S_IRUGO
);
66 MODULE_PARM_DESC(max_ahs
, "Maximum number of address handles to support");
68 unsigned int ib_ipath_max_cqes
= 0x2FFFF;
69 module_param_named(max_cqes
, ib_ipath_max_cqes
, uint
, S_IWUSR
| S_IRUGO
);
70 MODULE_PARM_DESC(max_cqes
,
71 "Maximum number of completion queue entries to support");
73 unsigned int ib_ipath_max_cqs
= 0x1FFFF;
74 module_param_named(max_cqs
, ib_ipath_max_cqs
, uint
, S_IWUSR
| S_IRUGO
);
75 MODULE_PARM_DESC(max_cqs
, "Maximum number of completion queues to support");
77 unsigned int ib_ipath_max_qp_wrs
= 0x3FFF;
78 module_param_named(max_qp_wrs
, ib_ipath_max_qp_wrs
, uint
,
80 MODULE_PARM_DESC(max_qp_wrs
, "Maximum number of QP WRs to support");
82 unsigned int ib_ipath_max_sges
= 0x60;
83 module_param_named(max_sges
, ib_ipath_max_sges
, uint
, S_IWUSR
| S_IRUGO
);
84 MODULE_PARM_DESC(max_sges
, "Maximum number of SGEs to support");
86 unsigned int ib_ipath_max_mcast_grps
= 16384;
87 module_param_named(max_mcast_grps
, ib_ipath_max_mcast_grps
, uint
,
89 MODULE_PARM_DESC(max_mcast_grps
,
90 "Maximum number of multicast groups to support");
92 unsigned int ib_ipath_max_mcast_qp_attached
= 16;
93 module_param_named(max_mcast_qp_attached
, ib_ipath_max_mcast_qp_attached
,
94 uint
, S_IWUSR
| S_IRUGO
);
95 MODULE_PARM_DESC(max_mcast_qp_attached
,
96 "Maximum number of attached QPs to support");
98 unsigned int ib_ipath_max_srqs
= 1024;
99 module_param_named(max_srqs
, ib_ipath_max_srqs
, uint
, S_IWUSR
| S_IRUGO
);
100 MODULE_PARM_DESC(max_srqs
, "Maximum number of SRQs to support");
102 unsigned int ib_ipath_max_srq_sges
= 128;
103 module_param_named(max_srq_sges
, ib_ipath_max_srq_sges
,
104 uint
, S_IWUSR
| S_IRUGO
);
105 MODULE_PARM_DESC(max_srq_sges
, "Maximum number of SRQ SGEs to support");
107 unsigned int ib_ipath_max_srq_wrs
= 0x1FFFF;
108 module_param_named(max_srq_wrs
, ib_ipath_max_srq_wrs
,
109 uint
, S_IWUSR
| S_IRUGO
);
110 MODULE_PARM_DESC(max_srq_wrs
, "Maximum number of SRQ WRs support");
112 MODULE_LICENSE("GPL");
113 MODULE_AUTHOR("QLogic <support@pathscale.com>");
114 MODULE_DESCRIPTION("QLogic InfiniPath driver");
116 const int ib_ipath_state_ops
[IB_QPS_ERR
+ 1] = {
118 [IB_QPS_INIT
] = IPATH_POST_RECV_OK
,
119 [IB_QPS_RTR
] = IPATH_POST_RECV_OK
| IPATH_PROCESS_RECV_OK
,
120 [IB_QPS_RTS
] = IPATH_POST_RECV_OK
| IPATH_PROCESS_RECV_OK
|
121 IPATH_POST_SEND_OK
| IPATH_PROCESS_SEND_OK
,
122 [IB_QPS_SQD
] = IPATH_POST_RECV_OK
| IPATH_PROCESS_RECV_OK
|
124 [IB_QPS_SQE
] = IPATH_POST_RECV_OK
| IPATH_PROCESS_RECV_OK
,
129 * Translate ib_wr_opcode into ib_wc_opcode.
131 const enum ib_wc_opcode ib_ipath_wc_opcode
[] = {
132 [IB_WR_RDMA_WRITE
] = IB_WC_RDMA_WRITE
,
133 [IB_WR_RDMA_WRITE_WITH_IMM
] = IB_WC_RDMA_WRITE
,
134 [IB_WR_SEND
] = IB_WC_SEND
,
135 [IB_WR_SEND_WITH_IMM
] = IB_WC_SEND
,
136 [IB_WR_RDMA_READ
] = IB_WC_RDMA_READ
,
137 [IB_WR_ATOMIC_CMP_AND_SWP
] = IB_WC_COMP_SWAP
,
138 [IB_WR_ATOMIC_FETCH_AND_ADD
] = IB_WC_FETCH_ADD
144 static __be64 sys_image_guid
;
147 * ipath_copy_sge - copy data to SGE memory
149 * @data: the data to copy
150 * @length: the length of the data
152 void ipath_copy_sge(struct ipath_sge_state
*ss
, void *data
, u32 length
)
154 struct ipath_sge
*sge
= &ss
->sge
;
157 u32 len
= sge
->length
;
162 memcpy(sge
->vaddr
, data
, len
);
165 sge
->sge_length
-= len
;
166 if (sge
->sge_length
== 0) {
168 *sge
= *ss
->sg_list
++;
169 } else if (sge
->length
== 0 && sge
->mr
!= NULL
) {
170 if (++sge
->n
>= IPATH_SEGSZ
) {
171 if (++sge
->m
>= sge
->mr
->mapsz
)
176 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
178 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
186 * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
188 * @length: the number of bytes to skip
190 void ipath_skip_sge(struct ipath_sge_state
*ss
, u32 length
)
192 struct ipath_sge
*sge
= &ss
->sge
;
195 u32 len
= sge
->length
;
202 sge
->sge_length
-= len
;
203 if (sge
->sge_length
== 0) {
205 *sge
= *ss
->sg_list
++;
206 } else if (sge
->length
== 0 && sge
->mr
!= NULL
) {
207 if (++sge
->n
>= IPATH_SEGSZ
) {
208 if (++sge
->m
>= sge
->mr
->mapsz
)
213 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
215 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
222 * ipath_post_send - post a send on a QP
223 * @ibqp: the QP to post the send on
224 * @wr: the list of work requests to post
225 * @bad_wr: the first bad WR is put here
227 * This may be called from interrupt context.
229 static int ipath_post_send(struct ib_qp
*ibqp
, struct ib_send_wr
*wr
,
230 struct ib_send_wr
**bad_wr
)
232 struct ipath_qp
*qp
= to_iqp(ibqp
);
235 /* Check that state is OK to post send. */
236 if (!(ib_ipath_state_ops
[qp
->state
] & IPATH_POST_SEND_OK
)) {
242 for (; wr
; wr
= wr
->next
) {
243 switch (qp
->ibqp
.qp_type
) {
246 err
= ipath_post_ruc_send(qp
, wr
);
252 err
= ipath_post_ud_send(qp
, wr
);
269 * ipath_post_receive - post a receive on a QP
270 * @ibqp: the QP to post the receive on
271 * @wr: the WR to post
272 * @bad_wr: the first bad WR is put here
274 * This may be called from interrupt context.
276 static int ipath_post_receive(struct ib_qp
*ibqp
, struct ib_recv_wr
*wr
,
277 struct ib_recv_wr
**bad_wr
)
279 struct ipath_qp
*qp
= to_iqp(ibqp
);
280 struct ipath_rwq
*wq
= qp
->r_rq
.wq
;
284 /* Check that state is OK to post receive. */
285 if (!(ib_ipath_state_ops
[qp
->state
] & IPATH_POST_RECV_OK
) || !wq
) {
291 for (; wr
; wr
= wr
->next
) {
292 struct ipath_rwqe
*wqe
;
296 if ((unsigned) wr
->num_sge
> qp
->r_rq
.max_sge
) {
302 spin_lock_irqsave(&qp
->r_rq
.lock
, flags
);
304 if (next
>= qp
->r_rq
.size
)
306 if (next
== wq
->tail
) {
307 spin_unlock_irqrestore(&qp
->r_rq
.lock
, flags
);
313 wqe
= get_rwqe_ptr(&qp
->r_rq
, wq
->head
);
314 wqe
->wr_id
= wr
->wr_id
;
315 wqe
->num_sge
= wr
->num_sge
;
316 for (i
= 0; i
< wr
->num_sge
; i
++)
317 wqe
->sg_list
[i
] = wr
->sg_list
[i
];
319 spin_unlock_irqrestore(&qp
->r_rq
.lock
, flags
);
328 * ipath_qp_rcv - processing an incoming packet on a QP
329 * @dev: the device the packet came on
330 * @hdr: the packet header
331 * @has_grh: true if the packet has a GRH
332 * @data: the packet data
333 * @tlen: the packet length
334 * @qp: the QP the packet came on
336 * This is called from ipath_ib_rcv() to process an incoming packet
338 * Called at interrupt level.
340 static void ipath_qp_rcv(struct ipath_ibdev
*dev
,
341 struct ipath_ib_header
*hdr
, int has_grh
,
342 void *data
, u32 tlen
, struct ipath_qp
*qp
)
344 /* Check for valid receive state. */
345 if (!(ib_ipath_state_ops
[qp
->state
] & IPATH_PROCESS_RECV_OK
)) {
350 switch (qp
->ibqp
.qp_type
) {
354 ipath_ud_rcv(dev
, hdr
, has_grh
, data
, tlen
, qp
);
358 ipath_rc_rcv(dev
, hdr
, has_grh
, data
, tlen
, qp
);
362 ipath_uc_rcv(dev
, hdr
, has_grh
, data
, tlen
, qp
);
371 * ipath_ib_rcv - process and incoming packet
372 * @arg: the device pointer
373 * @rhdr: the header of the packet
374 * @data: the packet data
375 * @tlen: the packet length
377 * This is called from ipath_kreceive() to process an incoming packet at
378 * interrupt level. Tlen is the length of the header + data + CRC in bytes.
380 static void ipath_ib_rcv(void *arg
, void *rhdr
, void *data
, u32 tlen
)
382 struct ipath_ibdev
*dev
= (struct ipath_ibdev
*) arg
;
383 struct ipath_ib_header
*hdr
= rhdr
;
384 struct ipath_other_headers
*ohdr
;
391 if (unlikely(dev
== NULL
))
394 if (unlikely(tlen
< 24)) { /* LRH+BTH+CRC */
399 /* Check for a valid destination LID (see ch. 7.11.1). */
400 lid
= be16_to_cpu(hdr
->lrh
[1]);
401 if (lid
< IPATH_MULTICAST_LID_BASE
) {
402 lid
&= ~((1 << (dev
->mkeyprot_resv_lmc
& 7)) - 1);
403 if (unlikely(lid
!= ipath_layer_get_lid(dev
->dd
))) {
410 lnh
= be16_to_cpu(hdr
->lrh
[0]) & 3;
411 if (lnh
== IPATH_LRH_BTH
)
413 else if (lnh
== IPATH_LRH_GRH
)
414 ohdr
= &hdr
->u
.l
.oth
;
420 opcode
= be32_to_cpu(ohdr
->bth
[0]) >> 24;
421 dev
->opstats
[opcode
].n_bytes
+= tlen
;
422 dev
->opstats
[opcode
].n_packets
++;
424 /* Get the destination QP number. */
425 qp_num
= be32_to_cpu(ohdr
->bth
[1]) & IPATH_QPN_MASK
;
426 if (qp_num
== IPATH_MULTICAST_QPN
) {
427 struct ipath_mcast
*mcast
;
428 struct ipath_mcast_qp
*p
;
430 mcast
= ipath_mcast_find(&hdr
->u
.l
.grh
.dgid
);
435 dev
->n_multicast_rcv
++;
436 list_for_each_entry_rcu(p
, &mcast
->qp_list
, list
)
437 ipath_qp_rcv(dev
, hdr
, lnh
== IPATH_LRH_GRH
, data
,
440 * Notify ipath_multicast_detach() if it is waiting for us
443 if (atomic_dec_return(&mcast
->refcount
) <= 1)
444 wake_up(&mcast
->wait
);
446 qp
= ipath_lookup_qpn(&dev
->qp_table
, qp_num
);
448 dev
->n_unicast_rcv
++;
449 ipath_qp_rcv(dev
, hdr
, lnh
== IPATH_LRH_GRH
, data
,
452 * Notify ipath_destroy_qp() if it is waiting
455 if (atomic_dec_and_test(&qp
->refcount
))
465 * ipath_ib_timer - verbs timer
466 * @arg: the device pointer
468 * This is called from ipath_do_rcv_timer() at interrupt level to check for
469 * QPs which need retransmits and to collect performance numbers.
471 static void ipath_ib_timer(void *arg
)
473 struct ipath_ibdev
*dev
= (struct ipath_ibdev
*) arg
;
474 struct ipath_qp
*resend
= NULL
;
475 struct list_head
*last
;
482 spin_lock_irqsave(&dev
->pending_lock
, flags
);
483 /* Start filling the next pending queue. */
484 if (++dev
->pending_index
>= ARRAY_SIZE(dev
->pending
))
485 dev
->pending_index
= 0;
486 /* Save any requests still in the new queue, they have timed out. */
487 last
= &dev
->pending
[dev
->pending_index
];
488 while (!list_empty(last
)) {
489 qp
= list_entry(last
->next
, struct ipath_qp
, timerwait
);
490 list_del_init(&qp
->timerwait
);
491 qp
->timer_next
= resend
;
493 atomic_inc(&qp
->refcount
);
495 last
= &dev
->rnrwait
;
496 if (!list_empty(last
)) {
497 qp
= list_entry(last
->next
, struct ipath_qp
, timerwait
);
498 if (--qp
->s_rnr_timeout
== 0) {
500 list_del_init(&qp
->timerwait
);
501 tasklet_hi_schedule(&qp
->s_task
);
502 if (list_empty(last
))
504 qp
= list_entry(last
->next
, struct ipath_qp
,
506 } while (qp
->s_rnr_timeout
== 0);
510 * We should only be in the started state if pma_sample_start != 0
512 if (dev
->pma_sample_status
== IB_PMA_SAMPLE_STATUS_STARTED
&&
513 --dev
->pma_sample_start
== 0) {
514 dev
->pma_sample_status
= IB_PMA_SAMPLE_STATUS_RUNNING
;
515 ipath_layer_snapshot_counters(dev
->dd
, &dev
->ipath_sword
,
519 &dev
->ipath_xmit_wait
);
521 if (dev
->pma_sample_status
== IB_PMA_SAMPLE_STATUS_RUNNING
) {
522 if (dev
->pma_sample_interval
== 0) {
523 u64 ta
, tb
, tc
, td
, te
;
525 dev
->pma_sample_status
= IB_PMA_SAMPLE_STATUS_DONE
;
526 ipath_layer_snapshot_counters(dev
->dd
, &ta
, &tb
,
529 dev
->ipath_sword
= ta
- dev
->ipath_sword
;
530 dev
->ipath_rword
= tb
- dev
->ipath_rword
;
531 dev
->ipath_spkts
= tc
- dev
->ipath_spkts
;
532 dev
->ipath_rpkts
= td
- dev
->ipath_rpkts
;
533 dev
->ipath_xmit_wait
= te
- dev
->ipath_xmit_wait
;
536 dev
->pma_sample_interval
--;
538 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
540 /* XXX What if timer fires again while this is running? */
541 for (qp
= resend
; qp
!= NULL
; qp
= qp
->timer_next
) {
544 spin_lock_irqsave(&qp
->s_lock
, flags
);
545 if (qp
->s_last
!= qp
->s_tail
&& qp
->state
== IB_QPS_RTS
) {
547 ipath_restart_rc(qp
, qp
->s_last_psn
+ 1, &wc
);
549 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
551 /* Notify ipath_destroy_qp() if it is waiting. */
552 if (atomic_dec_and_test(&qp
->refcount
))
558 * ipath_ib_piobufavail - callback when a PIO buffer is available
559 * @arg: the device pointer
561 * This is called from ipath_intr() at interrupt level when a PIO buffer is
562 * available after ipath_verbs_send() returned an error that no buffers were
563 * available. Return 1 if we consumed all the PIO buffers and we still have
564 * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and
567 static int ipath_ib_piobufavail(void *arg
)
569 struct ipath_ibdev
*dev
= (struct ipath_ibdev
*) arg
;
576 spin_lock_irqsave(&dev
->pending_lock
, flags
);
577 while (!list_empty(&dev
->piowait
)) {
578 qp
= list_entry(dev
->piowait
.next
, struct ipath_qp
,
580 list_del_init(&qp
->piowait
);
581 tasklet_hi_schedule(&qp
->s_task
);
583 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
589 static int ipath_query_device(struct ib_device
*ibdev
,
590 struct ib_device_attr
*props
)
592 struct ipath_ibdev
*dev
= to_idev(ibdev
);
594 memset(props
, 0, sizeof(*props
));
596 props
->device_cap_flags
= IB_DEVICE_BAD_PKEY_CNTR
|
597 IB_DEVICE_BAD_QKEY_CNTR
| IB_DEVICE_SHUTDOWN_PORT
|
598 IB_DEVICE_SYS_IMAGE_GUID
;
599 props
->page_size_cap
= PAGE_SIZE
;
600 props
->vendor_id
= ipath_layer_get_vendorid(dev
->dd
);
601 props
->vendor_part_id
= ipath_layer_get_deviceid(dev
->dd
);
602 props
->hw_ver
= ipath_layer_get_pcirev(dev
->dd
);
604 props
->sys_image_guid
= dev
->sys_image_guid
;
606 props
->max_mr_size
= ~0ull;
607 props
->max_qp
= dev
->qp_table
.max
;
608 props
->max_qp_wr
= ib_ipath_max_qp_wrs
;
609 props
->max_sge
= ib_ipath_max_sges
;
610 props
->max_cq
= ib_ipath_max_cqs
;
611 props
->max_ah
= ib_ipath_max_ahs
;
612 props
->max_cqe
= ib_ipath_max_cqes
;
613 props
->max_mr
= dev
->lk_table
.max
;
614 props
->max_pd
= ib_ipath_max_pds
;
615 props
->max_qp_rd_atom
= 1;
616 props
->max_qp_init_rd_atom
= 1;
617 /* props->max_res_rd_atom */
618 props
->max_srq
= ib_ipath_max_srqs
;
619 props
->max_srq_wr
= ib_ipath_max_srq_wrs
;
620 props
->max_srq_sge
= ib_ipath_max_srq_sges
;
621 /* props->local_ca_ack_delay */
622 props
->atomic_cap
= IB_ATOMIC_HCA
;
623 props
->max_pkeys
= ipath_layer_get_npkeys(dev
->dd
);
624 props
->max_mcast_grp
= ib_ipath_max_mcast_grps
;
625 props
->max_mcast_qp_attach
= ib_ipath_max_mcast_qp_attached
;
626 props
->max_total_mcast_qp_attach
= props
->max_mcast_qp_attach
*
627 props
->max_mcast_grp
;
632 const u8 ipath_cvt_physportstate
[16] = {
633 [INFINIPATH_IBCS_LT_STATE_DISABLED
] = 3,
634 [INFINIPATH_IBCS_LT_STATE_LINKUP
] = 5,
635 [INFINIPATH_IBCS_LT_STATE_POLLACTIVE
] = 2,
636 [INFINIPATH_IBCS_LT_STATE_POLLQUIET
] = 2,
637 [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY
] = 1,
638 [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET
] = 1,
639 [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE
] = 4,
640 [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG
] = 4,
641 [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT
] = 4,
642 [INFINIPATH_IBCS_LT_STATE_CFGIDLE
] = 4,
643 [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN
] = 6,
644 [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT
] = 6,
645 [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE
] = 6,
648 static int ipath_query_port(struct ib_device
*ibdev
,
649 u8 port
, struct ib_port_attr
*props
)
651 struct ipath_ibdev
*dev
= to_idev(ibdev
);
653 u16 lid
= ipath_layer_get_lid(dev
->dd
);
656 memset(props
, 0, sizeof(*props
));
657 props
->lid
= lid
? lid
: __constant_be16_to_cpu(IB_LID_PERMISSIVE
);
658 props
->lmc
= dev
->mkeyprot_resv_lmc
& 7;
659 props
->sm_lid
= dev
->sm_lid
;
660 props
->sm_sl
= dev
->sm_sl
;
661 ibcstat
= ipath_layer_get_lastibcstat(dev
->dd
);
662 props
->state
= ((ibcstat
>> 4) & 0x3) + 1;
663 /* See phys_state_show() */
664 props
->phys_state
= ipath_cvt_physportstate
[
665 ipath_layer_get_lastibcstat(dev
->dd
) & 0xf];
666 props
->port_cap_flags
= dev
->port_cap_flags
;
667 props
->gid_tbl_len
= 1;
668 props
->max_msg_sz
= 0x80000000;
669 props
->pkey_tbl_len
= ipath_layer_get_npkeys(dev
->dd
);
670 props
->bad_pkey_cntr
= ipath_layer_get_cr_errpkey(dev
->dd
) -
671 dev
->z_pkey_violations
;
672 props
->qkey_viol_cntr
= dev
->qkey_violations
;
673 props
->active_width
= IB_WIDTH_4X
;
674 /* See rate_show() */
675 props
->active_speed
= 1; /* Regular 10Mbs speed. */
676 props
->max_vl_num
= 1; /* VLCap = VL0 */
677 props
->init_type_reply
= 0;
679 props
->max_mtu
= IB_MTU_4096
;
680 switch (ipath_layer_get_ibmtu(dev
->dd
)) {
699 props
->active_mtu
= mtu
;
700 props
->subnet_timeout
= dev
->subnet_timeout
;
705 static int ipath_modify_device(struct ib_device
*device
,
706 int device_modify_mask
,
707 struct ib_device_modify
*device_modify
)
711 if (device_modify_mask
& ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID
|
712 IB_DEVICE_MODIFY_NODE_DESC
)) {
717 if (device_modify_mask
& IB_DEVICE_MODIFY_NODE_DESC
)
718 memcpy(device
->node_desc
, device_modify
->node_desc
, 64);
720 if (device_modify_mask
& IB_DEVICE_MODIFY_SYS_IMAGE_GUID
)
721 to_idev(device
)->sys_image_guid
=
722 cpu_to_be64(device_modify
->sys_image_guid
);
730 static int ipath_modify_port(struct ib_device
*ibdev
,
731 u8 port
, int port_modify_mask
,
732 struct ib_port_modify
*props
)
734 struct ipath_ibdev
*dev
= to_idev(ibdev
);
736 dev
->port_cap_flags
|= props
->set_port_cap_mask
;
737 dev
->port_cap_flags
&= ~props
->clr_port_cap_mask
;
738 if (port_modify_mask
& IB_PORT_SHUTDOWN
)
739 ipath_layer_set_linkstate(dev
->dd
, IPATH_IB_LINKDOWN
);
740 if (port_modify_mask
& IB_PORT_RESET_QKEY_CNTR
)
741 dev
->qkey_violations
= 0;
745 static int ipath_query_gid(struct ib_device
*ibdev
, u8 port
,
746 int index
, union ib_gid
*gid
)
748 struct ipath_ibdev
*dev
= to_idev(ibdev
);
755 gid
->global
.subnet_prefix
= dev
->gid_prefix
;
756 gid
->global
.interface_id
= ipath_layer_get_guid(dev
->dd
);
764 static struct ib_pd
*ipath_alloc_pd(struct ib_device
*ibdev
,
765 struct ib_ucontext
*context
,
766 struct ib_udata
*udata
)
768 struct ipath_ibdev
*dev
= to_idev(ibdev
);
773 * This is actually totally arbitrary. Some correctness tests
774 * assume there's a maximum number of PDs that can be allocated.
775 * We don't actually have this limit, but we fail the test if
776 * we allow allocations of more than we report for this value.
779 if (dev
->n_pds_allocated
== ib_ipath_max_pds
) {
780 ret
= ERR_PTR(-ENOMEM
);
784 pd
= kmalloc(sizeof *pd
, GFP_KERNEL
);
786 ret
= ERR_PTR(-ENOMEM
);
790 dev
->n_pds_allocated
++;
792 /* ib_alloc_pd() will initialize pd->ibpd. */
793 pd
->user
= udata
!= NULL
;
801 static int ipath_dealloc_pd(struct ib_pd
*ibpd
)
803 struct ipath_pd
*pd
= to_ipd(ibpd
);
804 struct ipath_ibdev
*dev
= to_idev(ibpd
->device
);
806 dev
->n_pds_allocated
--;
814 * ipath_create_ah - create an address handle
815 * @pd: the protection domain
816 * @ah_attr: the attributes of the AH
818 * This may be called from interrupt context.
820 static struct ib_ah
*ipath_create_ah(struct ib_pd
*pd
,
821 struct ib_ah_attr
*ah_attr
)
825 struct ipath_ibdev
*dev
= to_idev(pd
->device
);
827 if (dev
->n_ahs_allocated
== ib_ipath_max_ahs
) {
828 ret
= ERR_PTR(-ENOMEM
);
832 /* A multicast address requires a GRH (see ch. 8.4.1). */
833 if (ah_attr
->dlid
>= IPATH_MULTICAST_LID_BASE
&&
834 ah_attr
->dlid
!= IPATH_PERMISSIVE_LID
&&
835 !(ah_attr
->ah_flags
& IB_AH_GRH
)) {
836 ret
= ERR_PTR(-EINVAL
);
840 if (ah_attr
->dlid
== 0) {
841 ret
= ERR_PTR(-EINVAL
);
845 if (ah_attr
->port_num
< 1 ||
846 ah_attr
->port_num
> pd
->device
->phys_port_cnt
) {
847 ret
= ERR_PTR(-EINVAL
);
851 ah
= kmalloc(sizeof *ah
, GFP_ATOMIC
);
853 ret
= ERR_PTR(-ENOMEM
);
857 dev
->n_ahs_allocated
++;
859 /* ib_create_ah() will initialize ah->ibah. */
869 * ipath_destroy_ah - destroy an address handle
870 * @ibah: the AH to destroy
872 * This may be called from interrupt context.
874 static int ipath_destroy_ah(struct ib_ah
*ibah
)
876 struct ipath_ibdev
*dev
= to_idev(ibah
->device
);
877 struct ipath_ah
*ah
= to_iah(ibah
);
879 dev
->n_ahs_allocated
--;
886 static int ipath_query_ah(struct ib_ah
*ibah
, struct ib_ah_attr
*ah_attr
)
888 struct ipath_ah
*ah
= to_iah(ibah
);
895 static int ipath_query_pkey(struct ib_device
*ibdev
, u8 port
, u16 index
,
898 struct ipath_ibdev
*dev
= to_idev(ibdev
);
901 if (index
>= ipath_layer_get_npkeys(dev
->dd
)) {
906 *pkey
= ipath_layer_get_pkey(dev
->dd
, index
);
915 * ipath_alloc_ucontext - allocate a ucontest
916 * @ibdev: the infiniband device
917 * @udata: not used by the InfiniPath driver
920 static struct ib_ucontext
*ipath_alloc_ucontext(struct ib_device
*ibdev
,
921 struct ib_udata
*udata
)
923 struct ipath_ucontext
*context
;
924 struct ib_ucontext
*ret
;
926 context
= kmalloc(sizeof *context
, GFP_KERNEL
);
928 ret
= ERR_PTR(-ENOMEM
);
932 ret
= &context
->ibucontext
;
938 static int ipath_dealloc_ucontext(struct ib_ucontext
*context
)
940 kfree(to_iucontext(context
));
944 static int ipath_verbs_register_sysfs(struct ib_device
*dev
);
947 * ipath_register_ib_device - register our device with the infiniband core
948 * @unit: the device number to register
949 * @dd: the device data structure
950 * Return the allocated ipath_ibdev pointer or NULL on error.
952 static void *ipath_register_ib_device(int unit
, struct ipath_devdata
*dd
)
954 struct ipath_layer_counters cntrs
;
955 struct ipath_ibdev
*idev
;
956 struct ib_device
*dev
;
959 idev
= (struct ipath_ibdev
*)ib_alloc_device(sizeof *idev
);
965 /* Only need to initialize non-zero fields. */
966 spin_lock_init(&idev
->qp_table
.lock
);
967 spin_lock_init(&idev
->lk_table
.lock
);
968 idev
->sm_lid
= __constant_be16_to_cpu(IB_LID_PERMISSIVE
);
969 /* Set the prefix to the default value (see ch. 4.1.1) */
970 idev
->gid_prefix
= __constant_cpu_to_be64(0xfe80000000000000ULL
);
972 ret
= ipath_init_qp_table(idev
, ib_ipath_qp_table_size
);
977 * The top ib_ipath_lkey_table_size bits are used to index the
978 * table. The lower 8 bits can be owned by the user (copied from
979 * the LKEY). The remaining bits act as a generation number or tag.
981 idev
->lk_table
.max
= 1 << ib_ipath_lkey_table_size
;
982 idev
->lk_table
.table
= kzalloc(idev
->lk_table
.max
*
983 sizeof(*idev
->lk_table
.table
),
985 if (idev
->lk_table
.table
== NULL
) {
989 spin_lock_init(&idev
->pending_lock
);
990 INIT_LIST_HEAD(&idev
->pending
[0]);
991 INIT_LIST_HEAD(&idev
->pending
[1]);
992 INIT_LIST_HEAD(&idev
->pending
[2]);
993 INIT_LIST_HEAD(&idev
->piowait
);
994 INIT_LIST_HEAD(&idev
->rnrwait
);
995 idev
->pending_index
= 0;
996 idev
->port_cap_flags
=
997 IB_PORT_SYS_IMAGE_GUID_SUP
| IB_PORT_CLIENT_REG_SUP
;
998 idev
->pma_counter_select
[0] = IB_PMA_PORT_XMIT_DATA
;
999 idev
->pma_counter_select
[1] = IB_PMA_PORT_RCV_DATA
;
1000 idev
->pma_counter_select
[2] = IB_PMA_PORT_XMIT_PKTS
;
1001 idev
->pma_counter_select
[3] = IB_PMA_PORT_RCV_PKTS
;
1002 idev
->pma_counter_select
[5] = IB_PMA_PORT_XMIT_WAIT
;
1003 idev
->link_width_enabled
= 3; /* 1x or 4x */
1005 /* Snapshot current HW counters to "clear" them. */
1006 ipath_layer_get_counters(dd
, &cntrs
);
1007 idev
->z_symbol_error_counter
= cntrs
.symbol_error_counter
;
1008 idev
->z_link_error_recovery_counter
=
1009 cntrs
.link_error_recovery_counter
;
1010 idev
->z_link_downed_counter
= cntrs
.link_downed_counter
;
1011 idev
->z_port_rcv_errors
= cntrs
.port_rcv_errors
;
1012 idev
->z_port_rcv_remphys_errors
=
1013 cntrs
.port_rcv_remphys_errors
;
1014 idev
->z_port_xmit_discards
= cntrs
.port_xmit_discards
;
1015 idev
->z_port_xmit_data
= cntrs
.port_xmit_data
;
1016 idev
->z_port_rcv_data
= cntrs
.port_rcv_data
;
1017 idev
->z_port_xmit_packets
= cntrs
.port_xmit_packets
;
1018 idev
->z_port_rcv_packets
= cntrs
.port_rcv_packets
;
1019 idev
->z_local_link_integrity_errors
=
1020 cntrs
.local_link_integrity_errors
;
1021 idev
->z_excessive_buffer_overrun_errors
=
1022 cntrs
.excessive_buffer_overrun_errors
;
1025 * The system image GUID is supposed to be the same for all
1026 * IB HCAs in a single system but since there can be other
1027 * device types in the system, we can't be sure this is unique.
1029 if (!sys_image_guid
)
1030 sys_image_guid
= ipath_layer_get_guid(dd
);
1031 idev
->sys_image_guid
= sys_image_guid
;
1032 idev
->ib_unit
= unit
;
1035 strlcpy(dev
->name
, "ipath%d", IB_DEVICE_NAME_MAX
);
1036 dev
->owner
= THIS_MODULE
;
1037 dev
->node_guid
= ipath_layer_get_guid(dd
);
1038 dev
->uverbs_abi_ver
= IPATH_UVERBS_ABI_VERSION
;
1039 dev
->uverbs_cmd_mask
=
1040 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT
) |
1041 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE
) |
1042 (1ull << IB_USER_VERBS_CMD_QUERY_PORT
) |
1043 (1ull << IB_USER_VERBS_CMD_ALLOC_PD
) |
1044 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD
) |
1045 (1ull << IB_USER_VERBS_CMD_CREATE_AH
) |
1046 (1ull << IB_USER_VERBS_CMD_DESTROY_AH
) |
1047 (1ull << IB_USER_VERBS_CMD_QUERY_AH
) |
1048 (1ull << IB_USER_VERBS_CMD_REG_MR
) |
1049 (1ull << IB_USER_VERBS_CMD_DEREG_MR
) |
1050 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL
) |
1051 (1ull << IB_USER_VERBS_CMD_CREATE_CQ
) |
1052 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ
) |
1053 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ
) |
1054 (1ull << IB_USER_VERBS_CMD_POLL_CQ
) |
1055 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ
) |
1056 (1ull << IB_USER_VERBS_CMD_CREATE_QP
) |
1057 (1ull << IB_USER_VERBS_CMD_QUERY_QP
) |
1058 (1ull << IB_USER_VERBS_CMD_MODIFY_QP
) |
1059 (1ull << IB_USER_VERBS_CMD_DESTROY_QP
) |
1060 (1ull << IB_USER_VERBS_CMD_POST_SEND
) |
1061 (1ull << IB_USER_VERBS_CMD_POST_RECV
) |
1062 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST
) |
1063 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST
) |
1064 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ
) |
1065 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ
) |
1066 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ
) |
1067 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ
) |
1068 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV
);
1069 dev
->node_type
= IB_NODE_CA
;
1070 dev
->phys_port_cnt
= 1;
1071 dev
->dma_device
= ipath_layer_get_device(dd
);
1072 dev
->class_dev
.dev
= dev
->dma_device
;
1073 dev
->query_device
= ipath_query_device
;
1074 dev
->modify_device
= ipath_modify_device
;
1075 dev
->query_port
= ipath_query_port
;
1076 dev
->modify_port
= ipath_modify_port
;
1077 dev
->query_pkey
= ipath_query_pkey
;
1078 dev
->query_gid
= ipath_query_gid
;
1079 dev
->alloc_ucontext
= ipath_alloc_ucontext
;
1080 dev
->dealloc_ucontext
= ipath_dealloc_ucontext
;
1081 dev
->alloc_pd
= ipath_alloc_pd
;
1082 dev
->dealloc_pd
= ipath_dealloc_pd
;
1083 dev
->create_ah
= ipath_create_ah
;
1084 dev
->destroy_ah
= ipath_destroy_ah
;
1085 dev
->query_ah
= ipath_query_ah
;
1086 dev
->create_srq
= ipath_create_srq
;
1087 dev
->modify_srq
= ipath_modify_srq
;
1088 dev
->query_srq
= ipath_query_srq
;
1089 dev
->destroy_srq
= ipath_destroy_srq
;
1090 dev
->create_qp
= ipath_create_qp
;
1091 dev
->modify_qp
= ipath_modify_qp
;
1092 dev
->query_qp
= ipath_query_qp
;
1093 dev
->destroy_qp
= ipath_destroy_qp
;
1094 dev
->post_send
= ipath_post_send
;
1095 dev
->post_recv
= ipath_post_receive
;
1096 dev
->post_srq_recv
= ipath_post_srq_receive
;
1097 dev
->create_cq
= ipath_create_cq
;
1098 dev
->destroy_cq
= ipath_destroy_cq
;
1099 dev
->resize_cq
= ipath_resize_cq
;
1100 dev
->poll_cq
= ipath_poll_cq
;
1101 dev
->req_notify_cq
= ipath_req_notify_cq
;
1102 dev
->get_dma_mr
= ipath_get_dma_mr
;
1103 dev
->reg_phys_mr
= ipath_reg_phys_mr
;
1104 dev
->reg_user_mr
= ipath_reg_user_mr
;
1105 dev
->dereg_mr
= ipath_dereg_mr
;
1106 dev
->alloc_fmr
= ipath_alloc_fmr
;
1107 dev
->map_phys_fmr
= ipath_map_phys_fmr
;
1108 dev
->unmap_fmr
= ipath_unmap_fmr
;
1109 dev
->dealloc_fmr
= ipath_dealloc_fmr
;
1110 dev
->attach_mcast
= ipath_multicast_attach
;
1111 dev
->detach_mcast
= ipath_multicast_detach
;
1112 dev
->process_mad
= ipath_process_mad
;
1113 dev
->mmap
= ipath_mmap
;
1115 snprintf(dev
->node_desc
, sizeof(dev
->node_desc
),
1116 IPATH_IDSTR
" %s kernel_SMA", system_utsname
.nodename
);
1118 ret
= ib_register_device(dev
);
1122 if (ipath_verbs_register_sysfs(dev
))
1125 ipath_layer_enable_timer(dd
);
1130 ib_unregister_device(dev
);
1132 kfree(idev
->lk_table
.table
);
1134 kfree(idev
->qp_table
.table
);
1136 ib_dealloc_device(dev
);
1137 _VERBS_ERROR("ib_ipath%d cannot register verbs (%d)!\n",
1145 static void ipath_unregister_ib_device(void *arg
)
1147 struct ipath_ibdev
*dev
= (struct ipath_ibdev
*) arg
;
1148 struct ib_device
*ibdev
= &dev
->ibdev
;
1150 ipath_layer_disable_timer(dev
->dd
);
1152 ib_unregister_device(ibdev
);
1154 if (!list_empty(&dev
->pending
[0]) ||
1155 !list_empty(&dev
->pending
[1]) ||
1156 !list_empty(&dev
->pending
[2]))
1157 _VERBS_ERROR("ipath%d pending list not empty!\n",
1159 if (!list_empty(&dev
->piowait
))
1160 _VERBS_ERROR("ipath%d piowait list not empty!\n",
1162 if (!list_empty(&dev
->rnrwait
))
1163 _VERBS_ERROR("ipath%d rnrwait list not empty!\n",
1165 if (!ipath_mcast_tree_empty())
1166 _VERBS_ERROR("ipath%d multicast table memory leak!\n",
1169 * Note that ipath_unregister_ib_device() can be called before all
1170 * the QPs are destroyed!
1172 ipath_free_all_qps(&dev
->qp_table
);
1173 kfree(dev
->qp_table
.table
);
1174 kfree(dev
->lk_table
.table
);
1175 ib_dealloc_device(ibdev
);
1178 static int __init
ipath_verbs_init(void)
1180 return ipath_verbs_register(ipath_register_ib_device
,
1181 ipath_unregister_ib_device
,
1182 ipath_ib_piobufavail
, ipath_ib_rcv
,
1186 static void __exit
ipath_verbs_cleanup(void)
1188 ipath_verbs_unregister();
1191 static ssize_t
show_rev(struct class_device
*cdev
, char *buf
)
1193 struct ipath_ibdev
*dev
=
1194 container_of(cdev
, struct ipath_ibdev
, ibdev
.class_dev
);
1196 return sprintf(buf
, "%x\n", ipath_layer_get_pcirev(dev
->dd
));
1199 static ssize_t
show_hca(struct class_device
*cdev
, char *buf
)
1201 struct ipath_ibdev
*dev
=
1202 container_of(cdev
, struct ipath_ibdev
, ibdev
.class_dev
);
1205 ret
= ipath_layer_get_boardname(dev
->dd
, buf
, 128);
1215 static ssize_t
show_stats(struct class_device
*cdev
, char *buf
)
1217 struct ipath_ibdev
*dev
=
1218 container_of(cdev
, struct ipath_ibdev
, ibdev
.class_dev
);
1236 dev
->n_rc_resends
, dev
->n_rc_qacks
, dev
->n_rc_acks
,
1237 dev
->n_seq_naks
, dev
->n_rdma_seq
, dev
->n_rnr_naks
,
1238 dev
->n_other_naks
, dev
->n_timeouts
,
1239 dev
->n_rdma_dup_busy
, dev
->n_piowait
,
1240 dev
->n_no_piobuf
, dev
->n_pkt_drops
, dev
->n_wqe_errs
);
1241 for (i
= 0; i
< ARRAY_SIZE(dev
->opstats
); i
++) {
1242 const struct ipath_opcode_stats
*si
= &dev
->opstats
[i
];
1244 if (!si
->n_packets
&& !si
->n_bytes
)
1246 len
+= sprintf(buf
+ len
, "%02x %llu/%llu\n", i
,
1247 (unsigned long long) si
->n_packets
,
1248 (unsigned long long) si
->n_bytes
);
1253 static CLASS_DEVICE_ATTR(hw_rev
, S_IRUGO
, show_rev
, NULL
);
1254 static CLASS_DEVICE_ATTR(hca_type
, S_IRUGO
, show_hca
, NULL
);
1255 static CLASS_DEVICE_ATTR(board_id
, S_IRUGO
, show_hca
, NULL
);
1256 static CLASS_DEVICE_ATTR(stats
, S_IRUGO
, show_stats
, NULL
);
1258 static struct class_device_attribute
*ipath_class_attributes
[] = {
1259 &class_device_attr_hw_rev
,
1260 &class_device_attr_hca_type
,
1261 &class_device_attr_board_id
,
1262 &class_device_attr_stats
1265 static int ipath_verbs_register_sysfs(struct ib_device
*dev
)
1270 for (i
= 0; i
< ARRAY_SIZE(ipath_class_attributes
); ++i
)
1271 if (class_device_create_file(&dev
->class_dev
,
1272 ipath_class_attributes
[i
])) {
1283 module_init(ipath_verbs_init
);
1284 module_exit(ipath_verbs_cleanup
);