Commit | Line | Data |
---|---|---|
77241056 MM |
1 | /* |
2 | * | |
3 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
4 | * redistributing this file, you may do so under either license. | |
5 | * | |
6 | * GPL LICENSE SUMMARY | |
7 | * | |
8 | * Copyright(c) 2015 Intel Corporation. | |
9 | * | |
10 | * This program is free software; you can redistribute it and/or modify | |
11 | * it under the terms of version 2 of the GNU General Public License as | |
12 | * published by the Free Software Foundation. | |
13 | * | |
14 | * This program is distributed in the hope that it will be useful, but | |
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 | * General Public License for more details. | |
18 | * | |
19 | * BSD LICENSE | |
20 | * | |
21 | * Copyright(c) 2015 Intel Corporation. | |
22 | * | |
23 | * Redistribution and use in source and binary forms, with or without | |
24 | * modification, are permitted provided that the following conditions | |
25 | * are met: | |
26 | * | |
27 | * - Redistributions of source code must retain the above copyright | |
28 | * notice, this list of conditions and the following disclaimer. | |
29 | * - Redistributions in binary form must reproduce the above copyright | |
30 | * notice, this list of conditions and the following disclaimer in | |
31 | * the documentation and/or other materials provided with the | |
32 | * distribution. | |
33 | * - Neither the name of Intel Corporation nor the names of its | |
34 | * contributors may be used to endorse or promote products derived | |
35 | * from this software without specific prior written permission. | |
36 | * | |
37 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
38 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
39 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
40 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
41 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
42 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
43 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
44 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
45 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
46 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
47 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
48 | * | |
49 | */ | |
50 | ||
51 | #include <rdma/ib_mad.h> | |
52 | #include <rdma/ib_user_verbs.h> | |
53 | #include <linux/io.h> | |
54 | #include <linux/module.h> | |
55 | #include <linux/utsname.h> | |
56 | #include <linux/rculist.h> | |
57 | #include <linux/mm.h> | |
58 | #include <linux/random.h> | |
59 | #include <linux/vmalloc.h> | |
60 | ||
61 | #include "hfi.h" | |
62 | #include "common.h" | |
63 | #include "device.h" | |
64 | #include "trace.h" | |
65 | #include "qp.h" | |
66 | #include "sdma.h" | |
67 | ||
68 | unsigned int hfi1_lkey_table_size = 16; | |
69 | module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, | |
70 | S_IRUGO); | |
71 | MODULE_PARM_DESC(lkey_table_size, | |
72 | "LKEY table size in bits (2^n, 1 <= n <= 23)"); | |
73 | ||
74 | static unsigned int hfi1_max_pds = 0xFFFF; | |
75 | module_param_named(max_pds, hfi1_max_pds, uint, S_IRUGO); | |
76 | MODULE_PARM_DESC(max_pds, | |
77 | "Maximum number of protection domains to support"); | |
78 | ||
79 | static unsigned int hfi1_max_ahs = 0xFFFF; | |
80 | module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO); | |
81 | MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support"); | |
82 | ||
83 | unsigned int hfi1_max_cqes = 0x2FFFF; | |
84 | module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO); | |
85 | MODULE_PARM_DESC(max_cqes, | |
86 | "Maximum number of completion queue entries to support"); | |
87 | ||
88 | unsigned int hfi1_max_cqs = 0x1FFFF; | |
89 | module_param_named(max_cqs, hfi1_max_cqs, uint, S_IRUGO); | |
90 | MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support"); | |
91 | ||
92 | unsigned int hfi1_max_qp_wrs = 0x3FFF; | |
93 | module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO); | |
94 | MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); | |
95 | ||
96 | unsigned int hfi1_max_qps = 16384; | |
97 | module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO); | |
98 | MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); | |
99 | ||
100 | unsigned int hfi1_max_sges = 0x60; | |
101 | module_param_named(max_sges, hfi1_max_sges, uint, S_IRUGO); | |
102 | MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support"); | |
103 | ||
104 | unsigned int hfi1_max_mcast_grps = 16384; | |
105 | module_param_named(max_mcast_grps, hfi1_max_mcast_grps, uint, S_IRUGO); | |
106 | MODULE_PARM_DESC(max_mcast_grps, | |
107 | "Maximum number of multicast groups to support"); | |
108 | ||
109 | unsigned int hfi1_max_mcast_qp_attached = 16; | |
110 | module_param_named(max_mcast_qp_attached, hfi1_max_mcast_qp_attached, | |
111 | uint, S_IRUGO); | |
112 | MODULE_PARM_DESC(max_mcast_qp_attached, | |
113 | "Maximum number of attached QPs to support"); | |
114 | ||
115 | unsigned int hfi1_max_srqs = 1024; | |
116 | module_param_named(max_srqs, hfi1_max_srqs, uint, S_IRUGO); | |
117 | MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support"); | |
118 | ||
119 | unsigned int hfi1_max_srq_sges = 128; | |
120 | module_param_named(max_srq_sges, hfi1_max_srq_sges, uint, S_IRUGO); | |
121 | MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support"); | |
122 | ||
123 | unsigned int hfi1_max_srq_wrs = 0x1FFFF; | |
124 | module_param_named(max_srq_wrs, hfi1_max_srq_wrs, uint, S_IRUGO); | |
125 | MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); | |
126 | ||
127 | static void verbs_sdma_complete( | |
128 | struct sdma_txreq *cookie, | |
129 | int status, | |
130 | int drained); | |
131 | ||
132 | /* | |
133 | * Note that it is OK to post send work requests in the SQE and ERR | |
134 | * states; hfi1_do_send() will process them and generate error | |
135 | * completions as per IB 1.2 C10-96. | |
136 | */ | |
137 | const int ib_hfi1_state_ops[IB_QPS_ERR + 1] = { | |
138 | [IB_QPS_RESET] = 0, | |
139 | [IB_QPS_INIT] = HFI1_POST_RECV_OK, | |
140 | [IB_QPS_RTR] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK, | |
141 | [IB_QPS_RTS] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK | | |
142 | HFI1_POST_SEND_OK | HFI1_PROCESS_SEND_OK | | |
143 | HFI1_PROCESS_NEXT_SEND_OK, | |
144 | [IB_QPS_SQD] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK | | |
145 | HFI1_POST_SEND_OK | HFI1_PROCESS_SEND_OK, | |
146 | [IB_QPS_SQE] = HFI1_POST_RECV_OK | HFI1_PROCESS_RECV_OK | | |
147 | HFI1_POST_SEND_OK | HFI1_FLUSH_SEND, | |
148 | [IB_QPS_ERR] = HFI1_POST_RECV_OK | HFI1_FLUSH_RECV | | |
149 | HFI1_POST_SEND_OK | HFI1_FLUSH_SEND, | |
150 | }; | |
151 | ||
152 | struct hfi1_ucontext { | |
153 | struct ib_ucontext ibucontext; | |
154 | }; | |
155 | ||
156 | static inline struct hfi1_ucontext *to_iucontext(struct ib_ucontext | |
157 | *ibucontext) | |
158 | { | |
159 | return container_of(ibucontext, struct hfi1_ucontext, ibucontext); | |
160 | } | |
161 | ||
162 | /* | |
163 | * Translate ib_wr_opcode into ib_wc_opcode. | |
164 | */ | |
165 | const enum ib_wc_opcode ib_hfi1_wc_opcode[] = { | |
166 | [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, | |
167 | [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, | |
168 | [IB_WR_SEND] = IB_WC_SEND, | |
169 | [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, | |
170 | [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, | |
171 | [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, | |
172 | [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD | |
173 | }; | |
174 | ||
175 | /* | |
176 | * Length of header by opcode, 0 --> not supported | |
177 | */ | |
178 | const u8 hdr_len_by_opcode[256] = { | |
179 | /* RC */ | |
180 | [IB_OPCODE_RC_SEND_FIRST] = 12 + 8, | |
181 | [IB_OPCODE_RC_SEND_MIDDLE] = 12 + 8, | |
182 | [IB_OPCODE_RC_SEND_LAST] = 12 + 8, | |
183 | [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, | |
184 | [IB_OPCODE_RC_SEND_ONLY] = 12 + 8, | |
185 | [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 4, | |
186 | [IB_OPCODE_RC_RDMA_WRITE_FIRST] = 12 + 8 + 16, | |
187 | [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = 12 + 8, | |
188 | [IB_OPCODE_RC_RDMA_WRITE_LAST] = 12 + 8, | |
189 | [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, | |
190 | [IB_OPCODE_RC_RDMA_WRITE_ONLY] = 12 + 8 + 16, | |
191 | [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = 12 + 8 + 20, | |
192 | [IB_OPCODE_RC_RDMA_READ_REQUEST] = 12 + 8 + 16, | |
193 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = 12 + 8 + 4, | |
194 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = 12 + 8, | |
195 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = 12 + 8 + 4, | |
196 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = 12 + 8 + 4, | |
197 | [IB_OPCODE_RC_ACKNOWLEDGE] = 12 + 8 + 4, | |
198 | [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4, | |
199 | [IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28, | |
200 | [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28, | |
201 | /* UC */ | |
202 | [IB_OPCODE_UC_SEND_FIRST] = 12 + 8, | |
203 | [IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8, | |
204 | [IB_OPCODE_UC_SEND_LAST] = 12 + 8, | |
205 | [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, | |
206 | [IB_OPCODE_UC_SEND_ONLY] = 12 + 8, | |
207 | [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 4, | |
208 | [IB_OPCODE_UC_RDMA_WRITE_FIRST] = 12 + 8 + 16, | |
209 | [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = 12 + 8, | |
210 | [IB_OPCODE_UC_RDMA_WRITE_LAST] = 12 + 8, | |
211 | [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, | |
212 | [IB_OPCODE_UC_RDMA_WRITE_ONLY] = 12 + 8 + 16, | |
213 | [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = 12 + 8 + 20, | |
214 | /* UD */ | |
215 | [IB_OPCODE_UD_SEND_ONLY] = 12 + 8 + 8, | |
216 | [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 12 | |
217 | }; | |
218 | ||
219 | static const opcode_handler opcode_handler_tbl[256] = { | |
220 | /* RC */ | |
221 | [IB_OPCODE_RC_SEND_FIRST] = &hfi1_rc_rcv, | |
222 | [IB_OPCODE_RC_SEND_MIDDLE] = &hfi1_rc_rcv, | |
223 | [IB_OPCODE_RC_SEND_LAST] = &hfi1_rc_rcv, | |
224 | [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = &hfi1_rc_rcv, | |
225 | [IB_OPCODE_RC_SEND_ONLY] = &hfi1_rc_rcv, | |
226 | [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_rc_rcv, | |
227 | [IB_OPCODE_RC_RDMA_WRITE_FIRST] = &hfi1_rc_rcv, | |
228 | [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = &hfi1_rc_rcv, | |
229 | [IB_OPCODE_RC_RDMA_WRITE_LAST] = &hfi1_rc_rcv, | |
230 | [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = &hfi1_rc_rcv, | |
231 | [IB_OPCODE_RC_RDMA_WRITE_ONLY] = &hfi1_rc_rcv, | |
232 | [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = &hfi1_rc_rcv, | |
233 | [IB_OPCODE_RC_RDMA_READ_REQUEST] = &hfi1_rc_rcv, | |
234 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = &hfi1_rc_rcv, | |
235 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = &hfi1_rc_rcv, | |
236 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = &hfi1_rc_rcv, | |
237 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = &hfi1_rc_rcv, | |
238 | [IB_OPCODE_RC_ACKNOWLEDGE] = &hfi1_rc_rcv, | |
239 | [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = &hfi1_rc_rcv, | |
240 | [IB_OPCODE_RC_COMPARE_SWAP] = &hfi1_rc_rcv, | |
241 | [IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv, | |
242 | /* UC */ | |
243 | [IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv, | |
244 | [IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv, | |
245 | [IB_OPCODE_UC_SEND_LAST] = &hfi1_uc_rcv, | |
246 | [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = &hfi1_uc_rcv, | |
247 | [IB_OPCODE_UC_SEND_ONLY] = &hfi1_uc_rcv, | |
248 | [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_uc_rcv, | |
249 | [IB_OPCODE_UC_RDMA_WRITE_FIRST] = &hfi1_uc_rcv, | |
250 | [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = &hfi1_uc_rcv, | |
251 | [IB_OPCODE_UC_RDMA_WRITE_LAST] = &hfi1_uc_rcv, | |
252 | [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = &hfi1_uc_rcv, | |
253 | [IB_OPCODE_UC_RDMA_WRITE_ONLY] = &hfi1_uc_rcv, | |
254 | [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = &hfi1_uc_rcv, | |
255 | /* UD */ | |
256 | [IB_OPCODE_UD_SEND_ONLY] = &hfi1_ud_rcv, | |
257 | [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_ud_rcv, | |
258 | /* CNP */ | |
259 | [IB_OPCODE_CNP] = &hfi1_cnp_rcv | |
260 | }; | |
261 | ||
262 | /* | |
263 | * System image GUID. | |
264 | */ | |
265 | __be64 ib_hfi1_sys_image_guid; | |
266 | ||
267 | /** | |
268 | * hfi1_copy_sge - copy data to SGE memory | |
269 | * @ss: the SGE state | |
270 | * @data: the data to copy | |
271 | * @length: the length of the data | |
272 | */ | |
273 | void hfi1_copy_sge( | |
274 | struct hfi1_sge_state *ss, | |
275 | void *data, u32 length, | |
276 | int release) | |
277 | { | |
278 | struct hfi1_sge *sge = &ss->sge; | |
279 | ||
280 | while (length) { | |
281 | u32 len = sge->length; | |
282 | ||
283 | if (len > length) | |
284 | len = length; | |
285 | if (len > sge->sge_length) | |
286 | len = sge->sge_length; | |
287 | WARN_ON_ONCE(len == 0); | |
288 | memcpy(sge->vaddr, data, len); | |
289 | sge->vaddr += len; | |
290 | sge->length -= len; | |
291 | sge->sge_length -= len; | |
292 | if (sge->sge_length == 0) { | |
293 | if (release) | |
294 | hfi1_put_mr(sge->mr); | |
295 | if (--ss->num_sge) | |
296 | *sge = *ss->sg_list++; | |
297 | } else if (sge->length == 0 && sge->mr->lkey) { | |
298 | if (++sge->n >= HFI1_SEGSZ) { | |
299 | if (++sge->m >= sge->mr->mapsz) | |
300 | break; | |
301 | sge->n = 0; | |
302 | } | |
303 | sge->vaddr = | |
304 | sge->mr->map[sge->m]->segs[sge->n].vaddr; | |
305 | sge->length = | |
306 | sge->mr->map[sge->m]->segs[sge->n].length; | |
307 | } | |
308 | data += len; | |
309 | length -= len; | |
310 | } | |
311 | } | |
312 | ||
313 | /** | |
314 | * hfi1_skip_sge - skip over SGE memory | |
315 | * @ss: the SGE state | |
316 | * @length: the number of bytes to skip | |
317 | */ | |
318 | void hfi1_skip_sge(struct hfi1_sge_state *ss, u32 length, int release) | |
319 | { | |
320 | struct hfi1_sge *sge = &ss->sge; | |
321 | ||
322 | while (length) { | |
323 | u32 len = sge->length; | |
324 | ||
325 | if (len > length) | |
326 | len = length; | |
327 | if (len > sge->sge_length) | |
328 | len = sge->sge_length; | |
329 | WARN_ON_ONCE(len == 0); | |
330 | sge->vaddr += len; | |
331 | sge->length -= len; | |
332 | sge->sge_length -= len; | |
333 | if (sge->sge_length == 0) { | |
334 | if (release) | |
335 | hfi1_put_mr(sge->mr); | |
336 | if (--ss->num_sge) | |
337 | *sge = *ss->sg_list++; | |
338 | } else if (sge->length == 0 && sge->mr->lkey) { | |
339 | if (++sge->n >= HFI1_SEGSZ) { | |
340 | if (++sge->m >= sge->mr->mapsz) | |
341 | break; | |
342 | sge->n = 0; | |
343 | } | |
344 | sge->vaddr = | |
345 | sge->mr->map[sge->m]->segs[sge->n].vaddr; | |
346 | sge->length = | |
347 | sge->mr->map[sge->m]->segs[sge->n].length; | |
348 | } | |
349 | length -= len; | |
350 | } | |
351 | } | |
352 | ||
353 | /** | |
354 | * post_one_send - post one RC, UC, or UD send work request | |
355 | * @qp: the QP to post on | |
356 | * @wr: the work request to send | |
357 | */ | |
358 | static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr) | |
359 | { | |
360 | struct hfi1_swqe *wqe; | |
361 | u32 next; | |
362 | int i; | |
363 | int j; | |
364 | int acc; | |
365 | struct hfi1_lkey_table *rkt; | |
366 | struct hfi1_pd *pd; | |
367 | struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); | |
368 | struct hfi1_pportdata *ppd; | |
369 | struct hfi1_ibport *ibp; | |
370 | ||
371 | /* IB spec says that num_sge == 0 is OK. */ | |
372 | if (unlikely(wr->num_sge > qp->s_max_sge)) | |
373 | return -EINVAL; | |
374 | ||
375 | ppd = &dd->pport[qp->port_num - 1]; | |
376 | ibp = &ppd->ibport_data; | |
377 | ||
378 | /* | |
379 | * Don't allow RDMA reads or atomic operations on UC or | |
380 | * undefined operations. | |
381 | * Make sure buffer is large enough to hold the result for atomics. | |
382 | */ | |
383 | if (wr->opcode == IB_WR_FAST_REG_MR) { | |
384 | return -EINVAL; | |
385 | } else if (qp->ibqp.qp_type == IB_QPT_UC) { | |
386 | if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) | |
387 | return -EINVAL; | |
388 | } else if (qp->ibqp.qp_type != IB_QPT_RC) { | |
389 | /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */ | |
390 | if (wr->opcode != IB_WR_SEND && | |
391 | wr->opcode != IB_WR_SEND_WITH_IMM) | |
392 | return -EINVAL; | |
393 | /* Check UD destination address PD */ | |
394 | if (qp->ibqp.pd != wr->wr.ud.ah->pd) | |
395 | return -EINVAL; | |
396 | } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) | |
397 | return -EINVAL; | |
398 | else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && | |
399 | (wr->num_sge == 0 || | |
400 | wr->sg_list[0].length < sizeof(u64) || | |
401 | wr->sg_list[0].addr & (sizeof(u64) - 1))) | |
402 | return -EINVAL; | |
403 | else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) | |
404 | return -EINVAL; | |
405 | ||
406 | next = qp->s_head + 1; | |
407 | if (next >= qp->s_size) | |
408 | next = 0; | |
409 | if (next == qp->s_last) | |
410 | return -ENOMEM; | |
411 | ||
412 | rkt = &to_idev(qp->ibqp.device)->lk_table; | |
413 | pd = to_ipd(qp->ibqp.pd); | |
414 | wqe = get_swqe_ptr(qp, qp->s_head); | |
415 | wqe->wr = *wr; | |
416 | wqe->length = 0; | |
417 | j = 0; | |
418 | if (wr->num_sge) { | |
419 | acc = wr->opcode >= IB_WR_RDMA_READ ? | |
420 | IB_ACCESS_LOCAL_WRITE : 0; | |
421 | for (i = 0; i < wr->num_sge; i++) { | |
422 | u32 length = wr->sg_list[i].length; | |
423 | int ok; | |
424 | ||
425 | if (length == 0) | |
426 | continue; | |
427 | ok = hfi1_lkey_ok(rkt, pd, &wqe->sg_list[j], | |
428 | &wr->sg_list[i], acc); | |
429 | if (!ok) | |
430 | goto bail_inval_free; | |
431 | wqe->length += length; | |
432 | j++; | |
433 | } | |
434 | wqe->wr.num_sge = j; | |
435 | } | |
436 | if (qp->ibqp.qp_type == IB_QPT_UC || | |
437 | qp->ibqp.qp_type == IB_QPT_RC) { | |
438 | if (wqe->length > 0x80000000U) | |
439 | goto bail_inval_free; | |
440 | } else { | |
441 | struct hfi1_ah *ah = to_iah(wr->wr.ud.ah); | |
442 | ||
443 | atomic_inc(&ah->refcount); | |
444 | } | |
445 | wqe->ssn = qp->s_ssn++; | |
446 | qp->s_head = next; | |
447 | ||
448 | return 0; | |
449 | ||
450 | bail_inval_free: | |
451 | /* release mr holds */ | |
452 | while (j) { | |
453 | struct hfi1_sge *sge = &wqe->sg_list[--j]; | |
454 | ||
455 | hfi1_put_mr(sge->mr); | |
456 | } | |
457 | return -EINVAL; | |
458 | } | |
459 | ||
460 | /** | |
461 | * post_send - post a send on a QP | |
462 | * @ibqp: the QP to post the send on | |
463 | * @wr: the list of work requests to post | |
464 | * @bad_wr: the first bad WR is put here | |
465 | * | |
466 | * This may be called from interrupt context. | |
467 | */ | |
468 | static int post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, | |
469 | struct ib_send_wr **bad_wr) | |
470 | { | |
471 | struct hfi1_qp *qp = to_iqp(ibqp); | |
472 | int err = 0; | |
473 | int call_send; | |
474 | unsigned long flags; | |
475 | unsigned nreq = 0; | |
476 | ||
477 | spin_lock_irqsave(&qp->s_lock, flags); | |
478 | ||
479 | /* Check that state is OK to post send. */ | |
480 | if (unlikely(!(ib_hfi1_state_ops[qp->state] & HFI1_POST_SEND_OK))) { | |
481 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
482 | return -EINVAL; | |
483 | } | |
484 | ||
485 | /* sq empty and not list -> call send */ | |
486 | call_send = qp->s_head == qp->s_last && !wr->next; | |
487 | ||
488 | for (; wr; wr = wr->next) { | |
489 | err = post_one_send(qp, wr); | |
490 | if (unlikely(err)) { | |
491 | *bad_wr = wr; | |
492 | goto bail; | |
493 | } | |
494 | nreq++; | |
495 | } | |
496 | bail: | |
497 | if (nreq && !call_send) | |
498 | hfi1_schedule_send(qp); | |
499 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
500 | if (nreq && call_send) | |
501 | hfi1_do_send(&qp->s_iowait.iowork); | |
502 | return err; | |
503 | } | |
504 | ||
505 | /** | |
506 | * post_receive - post a receive on a QP | |
507 | * @ibqp: the QP to post the receive on | |
508 | * @wr: the WR to post | |
509 | * @bad_wr: the first bad WR is put here | |
510 | * | |
511 | * This may be called from interrupt context. | |
512 | */ | |
513 | static int post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, | |
514 | struct ib_recv_wr **bad_wr) | |
515 | { | |
516 | struct hfi1_qp *qp = to_iqp(ibqp); | |
517 | struct hfi1_rwq *wq = qp->r_rq.wq; | |
518 | unsigned long flags; | |
519 | int ret; | |
520 | ||
521 | /* Check that state is OK to post receive. */ | |
522 | if (!(ib_hfi1_state_ops[qp->state] & HFI1_POST_RECV_OK) || !wq) { | |
523 | *bad_wr = wr; | |
524 | ret = -EINVAL; | |
525 | goto bail; | |
526 | } | |
527 | ||
528 | for (; wr; wr = wr->next) { | |
529 | struct hfi1_rwqe *wqe; | |
530 | u32 next; | |
531 | int i; | |
532 | ||
533 | if ((unsigned) wr->num_sge > qp->r_rq.max_sge) { | |
534 | *bad_wr = wr; | |
535 | ret = -EINVAL; | |
536 | goto bail; | |
537 | } | |
538 | ||
539 | spin_lock_irqsave(&qp->r_rq.lock, flags); | |
540 | next = wq->head + 1; | |
541 | if (next >= qp->r_rq.size) | |
542 | next = 0; | |
543 | if (next == wq->tail) { | |
544 | spin_unlock_irqrestore(&qp->r_rq.lock, flags); | |
545 | *bad_wr = wr; | |
546 | ret = -ENOMEM; | |
547 | goto bail; | |
548 | } | |
549 | ||
550 | wqe = get_rwqe_ptr(&qp->r_rq, wq->head); | |
551 | wqe->wr_id = wr->wr_id; | |
552 | wqe->num_sge = wr->num_sge; | |
553 | for (i = 0; i < wr->num_sge; i++) | |
554 | wqe->sg_list[i] = wr->sg_list[i]; | |
555 | /* Make sure queue entry is written before the head index. */ | |
556 | smp_wmb(); | |
557 | wq->head = next; | |
558 | spin_unlock_irqrestore(&qp->r_rq.lock, flags); | |
559 | } | |
560 | ret = 0; | |
561 | ||
562 | bail: | |
563 | return ret; | |
564 | } | |
565 | ||
566 | /* | |
567 | * Make sure the QP is ready and able to accept the given opcode. | |
568 | */ | |
569 | static inline int qp_ok(int opcode, struct hfi1_packet *packet) | |
570 | { | |
571 | struct hfi1_ibport *ibp; | |
572 | ||
573 | if (!(ib_hfi1_state_ops[packet->qp->state] & HFI1_PROCESS_RECV_OK)) | |
574 | goto dropit; | |
575 | if (((opcode & OPCODE_QP_MASK) == packet->qp->allowed_ops) || | |
576 | (opcode == IB_OPCODE_CNP)) | |
577 | return 1; | |
578 | dropit: | |
579 | ibp = &packet->rcd->ppd->ibport_data; | |
580 | ibp->n_pkt_drops++; | |
581 | return 0; | |
582 | } | |
583 | ||
584 | ||
585 | /** | |
586 | * hfi1_ib_rcv - process an incoming packet | |
587 | * @packet: data packet information | |
588 | * | |
589 | * This is called to process an incoming packet at interrupt level. | |
590 | * | |
591 | * Tlen is the length of the header + data + CRC in bytes. | |
592 | */ | |
593 | void hfi1_ib_rcv(struct hfi1_packet *packet) | |
594 | { | |
595 | struct hfi1_ctxtdata *rcd = packet->rcd; | |
596 | struct hfi1_ib_header *hdr = packet->hdr; | |
597 | u32 tlen = packet->tlen; | |
598 | struct hfi1_pportdata *ppd = rcd->ppd; | |
599 | struct hfi1_ibport *ibp = &ppd->ibport_data; | |
600 | u32 qp_num; | |
601 | int lnh; | |
602 | u8 opcode; | |
603 | u16 lid; | |
604 | ||
605 | /* Check for GRH */ | |
606 | lnh = be16_to_cpu(hdr->lrh[0]) & 3; | |
607 | if (lnh == HFI1_LRH_BTH) | |
608 | packet->ohdr = &hdr->u.oth; | |
609 | else if (lnh == HFI1_LRH_GRH) { | |
610 | u32 vtf; | |
611 | ||
612 | packet->ohdr = &hdr->u.l.oth; | |
613 | if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) | |
614 | goto drop; | |
615 | vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); | |
616 | if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) | |
617 | goto drop; | |
618 | packet->rcv_flags |= HFI1_HAS_GRH; | |
619 | } else | |
620 | goto drop; | |
621 | ||
622 | trace_input_ibhdr(rcd->dd, hdr); | |
623 | ||
624 | opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24); | |
625 | inc_opstats(tlen, &rcd->opstats->stats[opcode]); | |
626 | ||
627 | /* Get the destination QP number. */ | |
628 | qp_num = be32_to_cpu(packet->ohdr->bth[1]) & HFI1_QPN_MASK; | |
629 | lid = be16_to_cpu(hdr->lrh[1]); | |
630 | if (unlikely((lid >= HFI1_MULTICAST_LID_BASE) && | |
631 | (lid != HFI1_PERMISSIVE_LID))) { | |
632 | struct hfi1_mcast *mcast; | |
633 | struct hfi1_mcast_qp *p; | |
634 | ||
635 | if (lnh != HFI1_LRH_GRH) | |
636 | goto drop; | |
637 | mcast = hfi1_mcast_find(ibp, &hdr->u.l.grh.dgid); | |
638 | if (mcast == NULL) | |
639 | goto drop; | |
640 | list_for_each_entry_rcu(p, &mcast->qp_list, list) { | |
641 | packet->qp = p->qp; | |
642 | spin_lock(&packet->qp->r_lock); | |
643 | if (likely((qp_ok(opcode, packet)))) | |
644 | opcode_handler_tbl[opcode](packet); | |
645 | spin_unlock(&packet->qp->r_lock); | |
646 | } | |
647 | /* | |
648 | * Notify hfi1_multicast_detach() if it is waiting for us | |
649 | * to finish. | |
650 | */ | |
651 | if (atomic_dec_return(&mcast->refcount) <= 1) | |
652 | wake_up(&mcast->wait); | |
653 | } else { | |
654 | rcu_read_lock(); | |
655 | packet->qp = hfi1_lookup_qpn(ibp, qp_num); | |
656 | if (!packet->qp) { | |
657 | rcu_read_unlock(); | |
658 | goto drop; | |
659 | } | |
660 | spin_lock(&packet->qp->r_lock); | |
661 | if (likely((qp_ok(opcode, packet)))) | |
662 | opcode_handler_tbl[opcode](packet); | |
663 | spin_unlock(&packet->qp->r_lock); | |
664 | rcu_read_unlock(); | |
665 | } | |
666 | return; | |
667 | ||
668 | drop: | |
669 | ibp->n_pkt_drops++; | |
670 | } | |
671 | ||
672 | /* | |
673 | * This is called from a timer to check for QPs | |
674 | * which need kernel memory in order to send a packet. | |
675 | */ | |
676 | static void mem_timer(unsigned long data) | |
677 | { | |
678 | struct hfi1_ibdev *dev = (struct hfi1_ibdev *)data; | |
679 | struct list_head *list = &dev->memwait; | |
680 | struct hfi1_qp *qp = NULL; | |
681 | struct iowait *wait; | |
682 | unsigned long flags; | |
683 | ||
684 | write_seqlock_irqsave(&dev->iowait_lock, flags); | |
685 | if (!list_empty(list)) { | |
686 | wait = list_first_entry(list, struct iowait, list); | |
687 | qp = container_of(wait, struct hfi1_qp, s_iowait); | |
688 | list_del_init(&qp->s_iowait.list); | |
689 | /* refcount held until actual wake up */ | |
690 | if (!list_empty(list)) | |
691 | mod_timer(&dev->mem_timer, jiffies + 1); | |
692 | } | |
693 | write_sequnlock_irqrestore(&dev->iowait_lock, flags); | |
694 | ||
695 | if (qp) | |
696 | hfi1_qp_wakeup(qp, HFI1_S_WAIT_KMEM); | |
697 | } | |
698 | ||
699 | void update_sge(struct hfi1_sge_state *ss, u32 length) | |
700 | { | |
701 | struct hfi1_sge *sge = &ss->sge; | |
702 | ||
703 | sge->vaddr += length; | |
704 | sge->length -= length; | |
705 | sge->sge_length -= length; | |
706 | if (sge->sge_length == 0) { | |
707 | if (--ss->num_sge) | |
708 | *sge = *ss->sg_list++; | |
709 | } else if (sge->length == 0 && sge->mr->lkey) { | |
710 | if (++sge->n >= HFI1_SEGSZ) { | |
711 | if (++sge->m >= sge->mr->mapsz) | |
712 | return; | |
713 | sge->n = 0; | |
714 | } | |
715 | sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; | |
716 | sge->length = sge->mr->map[sge->m]->segs[sge->n].length; | |
717 | } | |
718 | } | |
719 | ||
720 | static noinline struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, | |
721 | struct hfi1_qp *qp) | |
722 | { | |
723 | struct verbs_txreq *tx; | |
724 | unsigned long flags; | |
725 | ||
726 | tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC); | |
727 | if (!tx) { | |
728 | spin_lock_irqsave(&qp->s_lock, flags); | |
729 | write_seqlock(&dev->iowait_lock); | |
730 | if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK && | |
731 | list_empty(&qp->s_iowait.list)) { | |
732 | dev->n_txwait++; | |
733 | qp->s_flags |= HFI1_S_WAIT_TX; | |
734 | list_add_tail(&qp->s_iowait.list, &dev->txwait); | |
735 | trace_hfi1_qpsleep(qp, HFI1_S_WAIT_TX); | |
736 | atomic_inc(&qp->refcount); | |
737 | } | |
738 | qp->s_flags &= ~HFI1_S_BUSY; | |
739 | write_sequnlock(&dev->iowait_lock); | |
740 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
741 | tx = ERR_PTR(-EBUSY); | |
742 | } | |
743 | return tx; | |
744 | } | |
745 | ||
746 | static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, | |
747 | struct hfi1_qp *qp) | |
748 | { | |
749 | struct verbs_txreq *tx; | |
750 | ||
751 | tx = kmem_cache_alloc(dev->verbs_txreq_cache, GFP_ATOMIC); | |
50b19729 | 752 | if (!tx) { |
77241056 MM |
753 | /* call slow path to get the lock */ |
754 | tx = __get_txreq(dev, qp); | |
50b19729 DC |
755 | if (IS_ERR(tx)) |
756 | return tx; | |
757 | } | |
758 | tx->qp = qp; | |
77241056 MM |
759 | return tx; |
760 | } | |
761 | ||
762 | void hfi1_put_txreq(struct verbs_txreq *tx) | |
763 | { | |
764 | struct hfi1_ibdev *dev; | |
765 | struct hfi1_qp *qp; | |
766 | unsigned long flags; | |
767 | unsigned int seq; | |
768 | ||
769 | qp = tx->qp; | |
770 | dev = to_idev(qp->ibqp.device); | |
771 | ||
772 | if (tx->mr) { | |
773 | hfi1_put_mr(tx->mr); | |
774 | tx->mr = NULL; | |
775 | } | |
776 | sdma_txclean(dd_from_dev(dev), &tx->txreq); | |
777 | ||
778 | /* Free verbs_txreq and return to slab cache */ | |
779 | kmem_cache_free(dev->verbs_txreq_cache, tx); | |
780 | ||
781 | do { | |
782 | seq = read_seqbegin(&dev->iowait_lock); | |
783 | if (!list_empty(&dev->txwait)) { | |
784 | struct iowait *wait; | |
785 | ||
786 | write_seqlock_irqsave(&dev->iowait_lock, flags); | |
787 | /* Wake up first QP wanting a free struct */ | |
788 | wait = list_first_entry(&dev->txwait, struct iowait, | |
789 | list); | |
790 | qp = container_of(wait, struct hfi1_qp, s_iowait); | |
791 | list_del_init(&qp->s_iowait.list); | |
792 | /* refcount held until actual wake up */ | |
793 | write_sequnlock_irqrestore(&dev->iowait_lock, flags); | |
794 | hfi1_qp_wakeup(qp, HFI1_S_WAIT_TX); | |
795 | break; | |
796 | } | |
797 | } while (read_seqretry(&dev->iowait_lock, seq)); | |
798 | } | |
799 | ||
800 | /* | |
801 | * This is called with progress side lock held. | |
802 | */ | |
803 | /* New API */ | |
804 | static void verbs_sdma_complete( | |
805 | struct sdma_txreq *cookie, | |
806 | int status, | |
807 | int drained) | |
808 | { | |
809 | struct verbs_txreq *tx = | |
810 | container_of(cookie, struct verbs_txreq, txreq); | |
811 | struct hfi1_qp *qp = tx->qp; | |
812 | ||
813 | spin_lock(&qp->s_lock); | |
814 | if (tx->wqe) | |
815 | hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS); | |
816 | else if (qp->ibqp.qp_type == IB_QPT_RC) { | |
817 | struct hfi1_ib_header *hdr; | |
818 | ||
819 | hdr = &tx->phdr.hdr; | |
820 | hfi1_rc_send_complete(qp, hdr); | |
821 | } | |
822 | if (drained) { | |
823 | /* | |
824 | * This happens when the send engine notes | |
825 | * a QP in the error state and cannot | |
826 | * do the flush work until that QP's | |
827 | * sdma work has finished. | |
828 | */ | |
829 | if (qp->s_flags & HFI1_S_WAIT_DMA) { | |
830 | qp->s_flags &= ~HFI1_S_WAIT_DMA; | |
831 | hfi1_schedule_send(qp); | |
832 | } | |
833 | } | |
834 | spin_unlock(&qp->s_lock); | |
835 | ||
836 | hfi1_put_txreq(tx); | |
837 | } | |
838 | ||
839 | static int wait_kmem(struct hfi1_ibdev *dev, struct hfi1_qp *qp) | |
840 | { | |
841 | unsigned long flags; | |
842 | int ret = 0; | |
843 | ||
844 | spin_lock_irqsave(&qp->s_lock, flags); | |
845 | if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) { | |
846 | write_seqlock(&dev->iowait_lock); | |
847 | if (list_empty(&qp->s_iowait.list)) { | |
848 | if (list_empty(&dev->memwait)) | |
849 | mod_timer(&dev->mem_timer, jiffies + 1); | |
850 | qp->s_flags |= HFI1_S_WAIT_KMEM; | |
851 | list_add_tail(&qp->s_iowait.list, &dev->memwait); | |
852 | trace_hfi1_qpsleep(qp, HFI1_S_WAIT_KMEM); | |
853 | atomic_inc(&qp->refcount); | |
854 | } | |
855 | write_sequnlock(&dev->iowait_lock); | |
856 | qp->s_flags &= ~HFI1_S_BUSY; | |
857 | ret = -EBUSY; | |
858 | } | |
859 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
860 | ||
861 | return ret; | |
862 | } | |
863 | ||
864 | /* | |
865 | * This routine calls txadds for each sg entry. | |
866 | * | |
867 | * Add failures will revert the sge cursor | |
868 | */ | |
869 | static int build_verbs_ulp_payload( | |
870 | struct sdma_engine *sde, | |
871 | struct hfi1_sge_state *ss, | |
872 | u32 length, | |
873 | struct verbs_txreq *tx) | |
874 | { | |
875 | struct hfi1_sge *sg_list = ss->sg_list; | |
876 | struct hfi1_sge sge = ss->sge; | |
877 | u8 num_sge = ss->num_sge; | |
878 | u32 len; | |
879 | int ret = 0; | |
880 | ||
881 | while (length) { | |
882 | len = ss->sge.length; | |
883 | if (len > length) | |
884 | len = length; | |
885 | if (len > ss->sge.sge_length) | |
886 | len = ss->sge.sge_length; | |
887 | WARN_ON_ONCE(len == 0); | |
888 | ret = sdma_txadd_kvaddr( | |
889 | sde->dd, | |
890 | &tx->txreq, | |
891 | ss->sge.vaddr, | |
892 | len); | |
893 | if (ret) | |
894 | goto bail_txadd; | |
895 | update_sge(ss, len); | |
896 | length -= len; | |
897 | } | |
898 | return ret; | |
899 | bail_txadd: | |
900 | /* unwind cursor */ | |
901 | ss->sge = sge; | |
902 | ss->num_sge = num_sge; | |
903 | ss->sg_list = sg_list; | |
904 | return ret; | |
905 | } | |
906 | ||
907 | /* | |
908 | * Build the number of DMA descriptors needed to send length bytes of data. | |
909 | * | |
910 | * NOTE: DMA mapping is held in the tx until completed in the ring or | |
911 | * the tx desc is freed without having been submitted to the ring | |
912 | * | |
913 | * This routine insures the following all the helper routine | |
914 | * calls succeed. | |
915 | */ | |
916 | /* New API */ | |
917 | static int build_verbs_tx_desc( | |
918 | struct sdma_engine *sde, | |
919 | struct hfi1_sge_state *ss, | |
920 | u32 length, | |
921 | struct verbs_txreq *tx, | |
922 | struct ahg_ib_header *ahdr, | |
923 | u64 pbc) | |
924 | { | |
925 | int ret = 0; | |
926 | struct hfi1_pio_header *phdr; | |
927 | u16 hdrbytes = tx->hdr_dwords << 2; | |
928 | ||
929 | phdr = &tx->phdr; | |
930 | if (!ahdr->ahgcount) { | |
931 | ret = sdma_txinit_ahg( | |
932 | &tx->txreq, | |
933 | ahdr->tx_flags, | |
934 | hdrbytes + length, | |
935 | ahdr->ahgidx, | |
936 | 0, | |
937 | NULL, | |
938 | 0, | |
939 | verbs_sdma_complete); | |
940 | if (ret) | |
941 | goto bail_txadd; | |
942 | phdr->pbc = cpu_to_le64(pbc); | |
943 | memcpy(&phdr->hdr, &ahdr->ibh, hdrbytes - sizeof(phdr->pbc)); | |
944 | /* add the header */ | |
945 | ret = sdma_txadd_kvaddr( | |
946 | sde->dd, | |
947 | &tx->txreq, | |
948 | &tx->phdr, | |
949 | tx->hdr_dwords << 2); | |
950 | if (ret) | |
951 | goto bail_txadd; | |
952 | } else { | |
953 | struct hfi1_other_headers *sohdr = &ahdr->ibh.u.oth; | |
954 | struct hfi1_other_headers *dohdr = &phdr->hdr.u.oth; | |
955 | ||
956 | /* needed in rc_send_complete() */ | |
957 | phdr->hdr.lrh[0] = ahdr->ibh.lrh[0]; | |
958 | if ((be16_to_cpu(phdr->hdr.lrh[0]) & 3) == HFI1_LRH_GRH) { | |
959 | sohdr = &ahdr->ibh.u.l.oth; | |
960 | dohdr = &phdr->hdr.u.l.oth; | |
961 | } | |
962 | /* opcode */ | |
963 | dohdr->bth[0] = sohdr->bth[0]; | |
964 | /* PSN/ACK */ | |
965 | dohdr->bth[2] = sohdr->bth[2]; | |
966 | ret = sdma_txinit_ahg( | |
967 | &tx->txreq, | |
968 | ahdr->tx_flags, | |
969 | length, | |
970 | ahdr->ahgidx, | |
971 | ahdr->ahgcount, | |
972 | ahdr->ahgdesc, | |
973 | hdrbytes, | |
974 | verbs_sdma_complete); | |
975 | if (ret) | |
976 | goto bail_txadd; | |
977 | } | |
978 | ||
979 | /* add the ulp payload - if any. ss can be NULL for acks */ | |
980 | if (ss) | |
981 | ret = build_verbs_ulp_payload(sde, ss, length, tx); | |
982 | bail_txadd: | |
983 | return ret; | |
984 | } | |
985 | ||
986 | int hfi1_verbs_send_dma(struct hfi1_qp *qp, struct ahg_ib_header *ahdr, | |
987 | u32 hdrwords, struct hfi1_sge_state *ss, u32 len, | |
988 | u32 plen, u32 dwords, u64 pbc) | |
989 | { | |
990 | struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); | |
991 | struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); | |
992 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); | |
993 | struct verbs_txreq *tx; | |
994 | struct sdma_txreq *stx; | |
995 | u64 pbc_flags = 0; | |
996 | struct sdma_engine *sde; | |
997 | u8 sc5 = qp->s_sc; | |
998 | int ret; | |
999 | ||
1000 | if (!list_empty(&qp->s_iowait.tx_head)) { | |
1001 | stx = list_first_entry( | |
1002 | &qp->s_iowait.tx_head, | |
1003 | struct sdma_txreq, | |
1004 | list); | |
1005 | list_del_init(&stx->list); | |
1006 | tx = container_of(stx, struct verbs_txreq, txreq); | |
1007 | ret = sdma_send_txreq(tx->sde, &qp->s_iowait, stx); | |
1008 | if (unlikely(ret == -ECOMM)) | |
1009 | goto bail_ecomm; | |
1010 | return ret; | |
1011 | } | |
1012 | ||
1013 | tx = get_txreq(dev, qp); | |
1014 | if (IS_ERR(tx)) | |
1015 | goto bail_tx; | |
1016 | ||
1017 | if (!qp->s_hdr->sde) { | |
1018 | tx->sde = sde = qp_to_sdma_engine(qp, sc5); | |
1019 | if (!sde) | |
1020 | goto bail_no_sde; | |
1021 | } else | |
1022 | tx->sde = sde = qp->s_hdr->sde; | |
1023 | ||
1024 | if (likely(pbc == 0)) { | |
1025 | u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); | |
1026 | /* No vl15 here */ | |
1027 | /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ | |
1028 | pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; | |
1029 | ||
1030 | pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); | |
1031 | } | |
1032 | tx->wqe = qp->s_wqe; | |
1033 | tx->mr = qp->s_rdma_mr; | |
1034 | if (qp->s_rdma_mr) | |
1035 | qp->s_rdma_mr = NULL; | |
1036 | tx->hdr_dwords = hdrwords + 2; | |
1037 | ret = build_verbs_tx_desc(sde, ss, len, tx, ahdr, pbc); | |
1038 | if (unlikely(ret)) | |
1039 | goto bail_build; | |
1040 | trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ahdr->ibh); | |
1041 | ret = sdma_send_txreq(sde, &qp->s_iowait, &tx->txreq); | |
1042 | if (unlikely(ret == -ECOMM)) | |
1043 | goto bail_ecomm; | |
1044 | return ret; | |
1045 | ||
1046 | bail_no_sde: | |
1047 | hfi1_put_txreq(tx); | |
1048 | bail_ecomm: | |
1049 | /* The current one got "sent" */ | |
1050 | return 0; | |
1051 | bail_build: | |
1052 | /* kmalloc or mapping fail */ | |
1053 | hfi1_put_txreq(tx); | |
1054 | return wait_kmem(dev, qp); | |
1055 | bail_tx: | |
1056 | return PTR_ERR(tx); | |
1057 | } | |
1058 | ||
1059 | /* | |
1060 | * If we are now in the error state, return zero to flush the | |
1061 | * send work request. | |
1062 | */ | |
1063 | static int no_bufs_available(struct hfi1_qp *qp, struct send_context *sc) | |
1064 | { | |
1065 | struct hfi1_devdata *dd = sc->dd; | |
1066 | struct hfi1_ibdev *dev = &dd->verbs_dev; | |
1067 | unsigned long flags; | |
1068 | int ret = 0; | |
1069 | ||
1070 | /* | |
1071 | * Note that as soon as want_buffer() is called and | |
1072 | * possibly before it returns, sc_piobufavail() | |
1073 | * could be called. Therefore, put QP on the I/O wait list before | |
1074 | * enabling the PIO avail interrupt. | |
1075 | */ | |
1076 | spin_lock_irqsave(&qp->s_lock, flags); | |
1077 | if (ib_hfi1_state_ops[qp->state] & HFI1_PROCESS_RECV_OK) { | |
1078 | write_seqlock(&dev->iowait_lock); | |
1079 | if (list_empty(&qp->s_iowait.list)) { | |
1080 | struct hfi1_ibdev *dev = &dd->verbs_dev; | |
1081 | int was_empty; | |
1082 | ||
1083 | dev->n_piowait++; | |
1084 | qp->s_flags |= HFI1_S_WAIT_PIO; | |
1085 | was_empty = list_empty(&sc->piowait); | |
1086 | list_add_tail(&qp->s_iowait.list, &sc->piowait); | |
1087 | trace_hfi1_qpsleep(qp, HFI1_S_WAIT_PIO); | |
1088 | atomic_inc(&qp->refcount); | |
1089 | /* counting: only call wantpiobuf_intr if first user */ | |
1090 | if (was_empty) | |
1091 | hfi1_sc_wantpiobuf_intr(sc, 1); | |
1092 | } | |
1093 | write_sequnlock(&dev->iowait_lock); | |
1094 | qp->s_flags &= ~HFI1_S_BUSY; | |
1095 | ret = -EBUSY; | |
1096 | } | |
1097 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
1098 | return ret; | |
1099 | } | |
1100 | ||
1101 | struct send_context *qp_to_send_context(struct hfi1_qp *qp, u8 sc5) | |
1102 | { | |
1103 | struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); | |
1104 | struct hfi1_pportdata *ppd = dd->pport + (qp->port_num - 1); | |
1105 | u8 vl; | |
1106 | ||
1107 | vl = sc_to_vlt(dd, sc5); | |
1108 | if (vl >= ppd->vls_supported && vl != 15) | |
1109 | return NULL; | |
1110 | return dd->vld[vl].sc; | |
1111 | } | |
1112 | ||
1113 | int hfi1_verbs_send_pio(struct hfi1_qp *qp, struct ahg_ib_header *ahdr, | |
1114 | u32 hdrwords, struct hfi1_sge_state *ss, u32 len, | |
1115 | u32 plen, u32 dwords, u64 pbc) | |
1116 | { | |
1117 | struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); | |
1118 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); | |
1119 | u32 *hdr = (u32 *)&ahdr->ibh; | |
1120 | u64 pbc_flags = 0; | |
1121 | u32 sc5; | |
1122 | unsigned long flags = 0; | |
1123 | struct send_context *sc; | |
1124 | struct pio_buf *pbuf; | |
1125 | int wc_status = IB_WC_SUCCESS; | |
1126 | ||
1127 | /* vl15 special case taken care of in ud.c */ | |
1128 | sc5 = qp->s_sc; | |
1129 | sc = qp_to_send_context(qp, sc5); | |
1130 | ||
1131 | if (!sc) | |
1132 | return -EINVAL; | |
1133 | if (likely(pbc == 0)) { | |
1134 | u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); | |
1135 | /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ | |
1136 | pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; | |
1137 | pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); | |
1138 | } | |
1139 | pbuf = sc_buffer_alloc(sc, plen, NULL, NULL); | |
1140 | if (unlikely(pbuf == NULL)) { | |
1141 | if (ppd->host_link_state != HLS_UP_ACTIVE) { | |
1142 | /* | |
1143 | * If we have filled the PIO buffers to capacity and are | |
1144 | * not in an active state this request is not going to | |
1145 | * go out to so just complete it with an error or else a | |
1146 | * ULP or the core may be stuck waiting. | |
1147 | */ | |
1148 | hfi1_cdbg( | |
1149 | PIO, | |
1150 | "alloc failed. state not active, completing"); | |
1151 | wc_status = IB_WC_GENERAL_ERR; | |
1152 | goto pio_bail; | |
1153 | } else { | |
1154 | /* | |
1155 | * This is a normal occurrence. The PIO buffs are full | |
1156 | * up but we are still happily sending, well we could be | |
1157 | * so lets continue to queue the request. | |
1158 | */ | |
1159 | hfi1_cdbg(PIO, "alloc failed. state active, queuing"); | |
1160 | return no_bufs_available(qp, sc); | |
1161 | } | |
1162 | } | |
1163 | ||
1164 | if (len == 0) { | |
1165 | pio_copy(ppd->dd, pbuf, pbc, hdr, hdrwords); | |
1166 | } else { | |
1167 | if (ss) { | |
1168 | seg_pio_copy_start(pbuf, pbc, hdr, hdrwords*4); | |
1169 | while (len) { | |
1170 | void *addr = ss->sge.vaddr; | |
1171 | u32 slen = ss->sge.length; | |
1172 | ||
1173 | if (slen > len) | |
1174 | slen = len; | |
1175 | update_sge(ss, slen); | |
1176 | seg_pio_copy_mid(pbuf, addr, slen); | |
1177 | len -= slen; | |
1178 | } | |
1179 | seg_pio_copy_end(pbuf); | |
1180 | } | |
1181 | } | |
1182 | ||
1183 | trace_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ahdr->ibh); | |
1184 | ||
1185 | if (qp->s_rdma_mr) { | |
1186 | hfi1_put_mr(qp->s_rdma_mr); | |
1187 | qp->s_rdma_mr = NULL; | |
1188 | } | |
1189 | ||
1190 | pio_bail: | |
1191 | if (qp->s_wqe) { | |
1192 | spin_lock_irqsave(&qp->s_lock, flags); | |
1193 | hfi1_send_complete(qp, qp->s_wqe, wc_status); | |
1194 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
1195 | } else if (qp->ibqp.qp_type == IB_QPT_RC) { | |
1196 | spin_lock_irqsave(&qp->s_lock, flags); | |
1197 | hfi1_rc_send_complete(qp, &ahdr->ibh); | |
1198 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
1199 | } | |
1200 | return 0; | |
1201 | } | |
1202 | /* | |
1203 | * egress_pkey_matches_entry - return 1 if the pkey matches ent (ent | |
1204 | * being an entry from the ingress partition key table), return 0 | |
1205 | * otherwise. Use the matching criteria for egress partition keys | |
1206 | * specified in the OPAv1 spec., section 9.1l.7. | |
1207 | */ | |
1208 | static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) | |
1209 | { | |
1210 | u16 mkey = pkey & PKEY_LOW_15_MASK; | |
1211 | u16 ment = ent & PKEY_LOW_15_MASK; | |
1212 | ||
1213 | if (mkey == ment) { | |
1214 | /* | |
1215 | * If pkey[15] is set (full partition member), | |
1216 | * is bit 15 in the corresponding table element | |
1217 | * clear (limited member)? | |
1218 | */ | |
1219 | if (pkey & PKEY_MEMBER_MASK) | |
1220 | return !!(ent & PKEY_MEMBER_MASK); | |
1221 | return 1; | |
1222 | } | |
1223 | return 0; | |
1224 | } | |
1225 | ||
1226 | /* | |
1227 | * egress_pkey_check - return 0 if hdr's pkey matches according to the | |
1228 | * criteria in the OPAv1 spec., section 9.11.7. | |
1229 | */ | |
1230 | static inline int egress_pkey_check(struct hfi1_pportdata *ppd, | |
1231 | struct hfi1_ib_header *hdr, | |
1232 | struct hfi1_qp *qp) | |
1233 | { | |
1234 | struct hfi1_other_headers *ohdr; | |
1235 | struct hfi1_devdata *dd; | |
1236 | int i = 0; | |
1237 | u16 pkey; | |
1238 | u8 lnh, sc5 = qp->s_sc; | |
1239 | ||
1240 | if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT)) | |
1241 | return 0; | |
1242 | ||
1243 | /* locate the pkey within the headers */ | |
1244 | lnh = be16_to_cpu(hdr->lrh[0]) & 3; | |
1245 | if (lnh == HFI1_LRH_GRH) | |
1246 | ohdr = &hdr->u.l.oth; | |
1247 | else | |
1248 | ohdr = &hdr->u.oth; | |
1249 | ||
1250 | pkey = (u16)be32_to_cpu(ohdr->bth[0]); | |
1251 | ||
1252 | /* If SC15, pkey[0:14] must be 0x7fff */ | |
1253 | if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK)) | |
1254 | goto bad; | |
1255 | ||
1256 | ||
1257 | /* Is the pkey = 0x0, or 0x8000? */ | |
1258 | if ((pkey & PKEY_LOW_15_MASK) == 0) | |
1259 | goto bad; | |
1260 | ||
1261 | /* The most likely matching pkey has index qp->s_pkey_index */ | |
1262 | if (unlikely(!egress_pkey_matches_entry(pkey, | |
1263 | ppd->pkeys[qp->s_pkey_index]))) { | |
1264 | /* no match - try the entire table */ | |
1265 | for (; i < MAX_PKEY_VALUES; i++) { | |
1266 | if (egress_pkey_matches_entry(pkey, ppd->pkeys[i])) | |
1267 | break; | |
1268 | } | |
1269 | } | |
1270 | ||
1271 | if (i < MAX_PKEY_VALUES) | |
1272 | return 0; | |
1273 | bad: | |
1274 | incr_cntr64(&ppd->port_xmit_constraint_errors); | |
1275 | dd = ppd->dd; | |
1276 | if (!(dd->err_info_xmit_constraint.status & OPA_EI_STATUS_SMASK)) { | |
1277 | u16 slid = be16_to_cpu(hdr->lrh[3]); | |
1278 | ||
1279 | dd->err_info_xmit_constraint.status |= OPA_EI_STATUS_SMASK; | |
1280 | dd->err_info_xmit_constraint.slid = slid; | |
1281 | dd->err_info_xmit_constraint.pkey = pkey; | |
1282 | } | |
1283 | return 1; | |
1284 | } | |
1285 | ||
1286 | /** | |
1287 | * hfi1_verbs_send - send a packet | |
1288 | * @qp: the QP to send on | |
1289 | * @ahdr: the packet header | |
1290 | * @hdrwords: the number of 32-bit words in the header | |
1291 | * @ss: the SGE to send | |
1292 | * @len: the length of the packet in bytes | |
1293 | * | |
1294 | * Return zero if packet is sent or queued OK. | |
1295 | * Return non-zero and clear qp->s_flags HFI1_S_BUSY otherwise. | |
1296 | */ | |
1297 | int hfi1_verbs_send(struct hfi1_qp *qp, struct ahg_ib_header *ahdr, | |
1298 | u32 hdrwords, struct hfi1_sge_state *ss, u32 len) | |
1299 | { | |
1300 | struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); | |
1301 | u32 plen; | |
1302 | int ret; | |
1303 | int pio = 0; | |
1304 | unsigned long flags = 0; | |
1305 | u32 dwords = (len + 3) >> 2; | |
1306 | ||
1307 | /* | |
1308 | * VL15 packets (IB_QPT_SMI) will always use PIO, so we | |
1309 | * can defer SDMA restart until link goes ACTIVE without | |
1310 | * worrying about just how we got there. | |
1311 | */ | |
1312 | if ((qp->ibqp.qp_type == IB_QPT_SMI) || | |
1313 | !(dd->flags & HFI1_HAS_SEND_DMA)) | |
1314 | pio = 1; | |
1315 | ||
1316 | ret = egress_pkey_check(dd->pport, &ahdr->ibh, qp); | |
1317 | if (unlikely(ret)) { | |
1318 | /* | |
1319 | * The value we are returning here does not get propagated to | |
1320 | * the verbs caller. Thus we need to complete the request with | |
1321 | * error otherwise the caller could be sitting waiting on the | |
1322 | * completion event. Only do this for PIO. SDMA has its own | |
1323 | * mechanism for handling the errors. So for SDMA we can just | |
1324 | * return. | |
1325 | */ | |
1326 | if (pio) { | |
1327 | hfi1_cdbg(PIO, "%s() Failed. Completing with err", | |
1328 | __func__); | |
1329 | spin_lock_irqsave(&qp->s_lock, flags); | |
1330 | hfi1_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR); | |
1331 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
1332 | } | |
1333 | return -EINVAL; | |
1334 | } | |
1335 | ||
1336 | /* | |
1337 | * Calculate the send buffer trigger address. | |
1338 | * The +2 counts for the pbc control qword | |
1339 | */ | |
1340 | plen = hdrwords + dwords + 2; | |
1341 | ||
1342 | if (pio) { | |
1343 | ret = dd->process_pio_send( | |
1344 | qp, ahdr, hdrwords, ss, len, plen, dwords, 0); | |
1345 | } else { | |
1346 | #ifdef CONFIG_SDMA_VERBOSITY | |
1347 | dd_dev_err(dd, "CONFIG SDMA %s:%d %s()\n", | |
1348 | slashstrip(__FILE__), __LINE__, __func__); | |
1349 | dd_dev_err(dd, "SDMA hdrwords = %u, len = %u\n", hdrwords, len); | |
1350 | #endif | |
1351 | ret = dd->process_dma_send( | |
1352 | qp, ahdr, hdrwords, ss, len, plen, dwords, 0); | |
1353 | } | |
1354 | ||
1355 | return ret; | |
1356 | } | |
1357 | ||
1358 | static int query_device(struct ib_device *ibdev, | |
1359 | struct ib_device_attr *props, | |
1360 | struct ib_udata *uhw) | |
1361 | { | |
1362 | struct hfi1_devdata *dd = dd_from_ibdev(ibdev); | |
1363 | struct hfi1_ibdev *dev = to_idev(ibdev); | |
1364 | ||
1365 | if (uhw->inlen || uhw->outlen) | |
1366 | return -EINVAL; | |
1367 | memset(props, 0, sizeof(*props)); | |
1368 | ||
1369 | props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | | |
1370 | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | | |
1371 | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | | |
1372 | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; | |
1373 | ||
1374 | props->page_size_cap = PAGE_SIZE; | |
1375 | props->vendor_id = | |
1376 | dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; | |
1377 | props->vendor_part_id = dd->pcidev->device; | |
1378 | props->hw_ver = dd->minrev; | |
1379 | props->sys_image_guid = ib_hfi1_sys_image_guid; | |
1380 | props->max_mr_size = ~0ULL; | |
1381 | props->max_qp = hfi1_max_qps; | |
1382 | props->max_qp_wr = hfi1_max_qp_wrs; | |
1383 | props->max_sge = hfi1_max_sges; | |
1384 | props->max_sge_rd = hfi1_max_sges; | |
1385 | props->max_cq = hfi1_max_cqs; | |
1386 | props->max_ah = hfi1_max_ahs; | |
1387 | props->max_cqe = hfi1_max_cqes; | |
1388 | props->max_mr = dev->lk_table.max; | |
1389 | props->max_fmr = dev->lk_table.max; | |
1390 | props->max_map_per_fmr = 32767; | |
1391 | props->max_pd = hfi1_max_pds; | |
1392 | props->max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC; | |
1393 | props->max_qp_init_rd_atom = 255; | |
1394 | /* props->max_res_rd_atom */ | |
1395 | props->max_srq = hfi1_max_srqs; | |
1396 | props->max_srq_wr = hfi1_max_srq_wrs; | |
1397 | props->max_srq_sge = hfi1_max_srq_sges; | |
1398 | /* props->local_ca_ack_delay */ | |
1399 | props->atomic_cap = IB_ATOMIC_GLOB; | |
1400 | props->max_pkeys = hfi1_get_npkeys(dd); | |
1401 | props->max_mcast_grp = hfi1_max_mcast_grps; | |
1402 | props->max_mcast_qp_attach = hfi1_max_mcast_qp_attached; | |
1403 | props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * | |
1404 | props->max_mcast_grp; | |
1405 | ||
1406 | return 0; | |
1407 | } | |
1408 | ||
1409 | static inline u16 opa_speed_to_ib(u16 in) | |
1410 | { | |
1411 | u16 out = 0; | |
1412 | ||
1413 | if (in & OPA_LINK_SPEED_25G) | |
1414 | out |= IB_SPEED_EDR; | |
1415 | if (in & OPA_LINK_SPEED_12_5G) | |
1416 | out |= IB_SPEED_FDR; | |
1417 | ||
1418 | return out; | |
1419 | } | |
1420 | ||
1421 | /* | |
1422 | * Convert a single OPA link width (no multiple flags) to an IB value. | |
1423 | * A zero OPA link width means link down, which means the IB width value | |
1424 | * is a don't care. | |
1425 | */ | |
1426 | static inline u16 opa_width_to_ib(u16 in) | |
1427 | { | |
1428 | switch (in) { | |
1429 | case OPA_LINK_WIDTH_1X: | |
1430 | /* map 2x and 3x to 1x as they don't exist in IB */ | |
1431 | case OPA_LINK_WIDTH_2X: | |
1432 | case OPA_LINK_WIDTH_3X: | |
1433 | return IB_WIDTH_1X; | |
1434 | default: /* link down or unknown, return our largest width */ | |
1435 | case OPA_LINK_WIDTH_4X: | |
1436 | return IB_WIDTH_4X; | |
1437 | } | |
1438 | } | |
1439 | ||
1440 | static int query_port(struct ib_device *ibdev, u8 port, | |
1441 | struct ib_port_attr *props) | |
1442 | { | |
1443 | struct hfi1_devdata *dd = dd_from_ibdev(ibdev); | |
1444 | struct hfi1_ibport *ibp = to_iport(ibdev, port); | |
1445 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); | |
1446 | u16 lid = ppd->lid; | |
1447 | ||
1448 | memset(props, 0, sizeof(*props)); | |
1449 | props->lid = lid ? lid : 0; | |
1450 | props->lmc = ppd->lmc; | |
1451 | props->sm_lid = ibp->sm_lid; | |
1452 | props->sm_sl = ibp->sm_sl; | |
1453 | /* OPA logical states match IB logical states */ | |
1454 | props->state = driver_lstate(ppd); | |
1455 | props->phys_state = hfi1_ibphys_portstate(ppd); | |
1456 | props->port_cap_flags = ibp->port_cap_flags; | |
1457 | props->gid_tbl_len = HFI1_GUIDS_PER_PORT; | |
1458 | props->max_msg_sz = 0x80000000; | |
1459 | props->pkey_tbl_len = hfi1_get_npkeys(dd); | |
1460 | props->bad_pkey_cntr = ibp->pkey_violations; | |
1461 | props->qkey_viol_cntr = ibp->qkey_violations; | |
1462 | props->active_width = (u8)opa_width_to_ib(ppd->link_width_active); | |
1463 | /* see rate_show() in ib core/sysfs.c */ | |
1464 | props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active); | |
1465 | props->max_vl_num = ppd->vls_supported; | |
1466 | props->init_type_reply = 0; | |
1467 | ||
1468 | /* Once we are a "first class" citizen and have added the OPA MTUs to | |
1469 | * the core we can advertise the larger MTU enum to the ULPs, for now | |
1470 | * advertise only 4K. | |
1471 | * | |
1472 | * Those applications which are either OPA aware or pass the MTU enum | |
1473 | * from the Path Records to us will get the new 8k MTU. Those that | |
1474 | * attempt to process the MTU enum may fail in various ways. | |
1475 | */ | |
1476 | props->max_mtu = mtu_to_enum((!valid_ib_mtu(hfi1_max_mtu) ? | |
1477 | 4096 : hfi1_max_mtu), IB_MTU_4096); | |
1478 | props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : | |
1479 | mtu_to_enum(ppd->ibmtu, IB_MTU_2048); | |
1480 | props->subnet_timeout = ibp->subnet_timeout; | |
1481 | ||
1482 | return 0; | |
1483 | } | |
1484 | ||
1485 | static int port_immutable(struct ib_device *ibdev, u8 port_num, | |
1486 | struct ib_port_immutable *immutable) | |
1487 | { | |
1488 | struct ib_port_attr attr; | |
1489 | int err; | |
1490 | ||
1491 | err = query_port(ibdev, port_num, &attr); | |
1492 | if (err) | |
1493 | return err; | |
1494 | ||
1495 | memset(immutable, 0, sizeof(*immutable)); | |
1496 | ||
1497 | immutable->pkey_tbl_len = attr.pkey_tbl_len; | |
1498 | immutable->gid_tbl_len = attr.gid_tbl_len; | |
1499 | immutable->core_cap_flags = RDMA_CORE_PORT_INTEL_OPA; | |
1500 | immutable->max_mad_size = OPA_MGMT_MAD_SIZE; | |
1501 | ||
1502 | return 0; | |
1503 | } | |
1504 | ||
1505 | static int modify_device(struct ib_device *device, | |
1506 | int device_modify_mask, | |
1507 | struct ib_device_modify *device_modify) | |
1508 | { | |
1509 | struct hfi1_devdata *dd = dd_from_ibdev(device); | |
1510 | unsigned i; | |
1511 | int ret; | |
1512 | ||
1513 | if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | | |
1514 | IB_DEVICE_MODIFY_NODE_DESC)) { | |
1515 | ret = -EOPNOTSUPP; | |
1516 | goto bail; | |
1517 | } | |
1518 | ||
1519 | if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) { | |
1520 | memcpy(device->node_desc, device_modify->node_desc, 64); | |
1521 | for (i = 0; i < dd->num_pports; i++) { | |
1522 | struct hfi1_ibport *ibp = &dd->pport[i].ibport_data; | |
1523 | ||
1524 | hfi1_node_desc_chg(ibp); | |
1525 | } | |
1526 | } | |
1527 | ||
1528 | if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) { | |
1529 | ib_hfi1_sys_image_guid = | |
1530 | cpu_to_be64(device_modify->sys_image_guid); | |
1531 | for (i = 0; i < dd->num_pports; i++) { | |
1532 | struct hfi1_ibport *ibp = &dd->pport[i].ibport_data; | |
1533 | ||
1534 | hfi1_sys_guid_chg(ibp); | |
1535 | } | |
1536 | } | |
1537 | ||
1538 | ret = 0; | |
1539 | ||
1540 | bail: | |
1541 | return ret; | |
1542 | } | |
1543 | ||
1544 | static int modify_port(struct ib_device *ibdev, u8 port, | |
1545 | int port_modify_mask, struct ib_port_modify *props) | |
1546 | { | |
1547 | struct hfi1_ibport *ibp = to_iport(ibdev, port); | |
1548 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); | |
1549 | int ret = 0; | |
1550 | ||
1551 | ibp->port_cap_flags |= props->set_port_cap_mask; | |
1552 | ibp->port_cap_flags &= ~props->clr_port_cap_mask; | |
1553 | if (props->set_port_cap_mask || props->clr_port_cap_mask) | |
1554 | hfi1_cap_mask_chg(ibp); | |
1555 | if (port_modify_mask & IB_PORT_SHUTDOWN) { | |
1556 | set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0, | |
1557 | OPA_LINKDOWN_REASON_UNKNOWN); | |
1558 | ret = set_link_state(ppd, HLS_DN_DOWNDEF); | |
1559 | } | |
1560 | if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) | |
1561 | ibp->qkey_violations = 0; | |
1562 | return ret; | |
1563 | } | |
1564 | ||
1565 | static int query_gid(struct ib_device *ibdev, u8 port, | |
1566 | int index, union ib_gid *gid) | |
1567 | { | |
1568 | struct hfi1_devdata *dd = dd_from_ibdev(ibdev); | |
1569 | int ret = 0; | |
1570 | ||
1571 | if (!port || port > dd->num_pports) | |
1572 | ret = -EINVAL; | |
1573 | else { | |
1574 | struct hfi1_ibport *ibp = to_iport(ibdev, port); | |
1575 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); | |
1576 | ||
1577 | gid->global.subnet_prefix = ibp->gid_prefix; | |
1578 | if (index == 0) | |
1579 | gid->global.interface_id = cpu_to_be64(ppd->guid); | |
1580 | else if (index < HFI1_GUIDS_PER_PORT) | |
1581 | gid->global.interface_id = ibp->guids[index - 1]; | |
1582 | else | |
1583 | ret = -EINVAL; | |
1584 | } | |
1585 | ||
1586 | return ret; | |
1587 | } | |
1588 | ||
1589 | static struct ib_pd *alloc_pd(struct ib_device *ibdev, | |
1590 | struct ib_ucontext *context, | |
1591 | struct ib_udata *udata) | |
1592 | { | |
1593 | struct hfi1_ibdev *dev = to_idev(ibdev); | |
1594 | struct hfi1_pd *pd; | |
1595 | struct ib_pd *ret; | |
1596 | ||
1597 | /* | |
1598 | * This is actually totally arbitrary. Some correctness tests | |
1599 | * assume there's a maximum number of PDs that can be allocated. | |
1600 | * We don't actually have this limit, but we fail the test if | |
1601 | * we allow allocations of more than we report for this value. | |
1602 | */ | |
1603 | ||
1604 | pd = kmalloc(sizeof(*pd), GFP_KERNEL); | |
1605 | if (!pd) { | |
1606 | ret = ERR_PTR(-ENOMEM); | |
1607 | goto bail; | |
1608 | } | |
1609 | ||
1610 | spin_lock(&dev->n_pds_lock); | |
1611 | if (dev->n_pds_allocated == hfi1_max_pds) { | |
1612 | spin_unlock(&dev->n_pds_lock); | |
1613 | kfree(pd); | |
1614 | ret = ERR_PTR(-ENOMEM); | |
1615 | goto bail; | |
1616 | } | |
1617 | ||
1618 | dev->n_pds_allocated++; | |
1619 | spin_unlock(&dev->n_pds_lock); | |
1620 | ||
1621 | /* ib_alloc_pd() will initialize pd->ibpd. */ | |
1622 | pd->user = udata != NULL; | |
1623 | ||
1624 | ret = &pd->ibpd; | |
1625 | ||
1626 | bail: | |
1627 | return ret; | |
1628 | } | |
1629 | ||
1630 | static int dealloc_pd(struct ib_pd *ibpd) | |
1631 | { | |
1632 | struct hfi1_pd *pd = to_ipd(ibpd); | |
1633 | struct hfi1_ibdev *dev = to_idev(ibpd->device); | |
1634 | ||
1635 | spin_lock(&dev->n_pds_lock); | |
1636 | dev->n_pds_allocated--; | |
1637 | spin_unlock(&dev->n_pds_lock); | |
1638 | ||
1639 | kfree(pd); | |
1640 | ||
1641 | return 0; | |
1642 | } | |
1643 | ||
1644 | /* | |
1645 | * convert ah port,sl to sc | |
1646 | */ | |
1647 | u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah) | |
1648 | { | |
1649 | struct hfi1_ibport *ibp = to_iport(ibdev, ah->port_num); | |
1650 | ||
1651 | return ibp->sl_to_sc[ah->sl]; | |
1652 | } | |
1653 | ||
1654 | int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) | |
1655 | { | |
1656 | struct hfi1_ibport *ibp; | |
1657 | struct hfi1_pportdata *ppd; | |
1658 | struct hfi1_devdata *dd; | |
1659 | u8 sc5; | |
1660 | ||
1661 | /* A multicast address requires a GRH (see ch. 8.4.1). */ | |
1662 | if (ah_attr->dlid >= HFI1_MULTICAST_LID_BASE && | |
1663 | ah_attr->dlid != HFI1_PERMISSIVE_LID && | |
1664 | !(ah_attr->ah_flags & IB_AH_GRH)) | |
1665 | goto bail; | |
1666 | if ((ah_attr->ah_flags & IB_AH_GRH) && | |
1667 | ah_attr->grh.sgid_index >= HFI1_GUIDS_PER_PORT) | |
1668 | goto bail; | |
1669 | if (ah_attr->dlid == 0) | |
1670 | goto bail; | |
1671 | if (ah_attr->port_num < 1 || | |
1672 | ah_attr->port_num > ibdev->phys_port_cnt) | |
1673 | goto bail; | |
1674 | if (ah_attr->static_rate != IB_RATE_PORT_CURRENT && | |
1675 | ib_rate_to_mbps(ah_attr->static_rate) < 0) | |
1676 | goto bail; | |
1677 | if (ah_attr->sl >= OPA_MAX_SLS) | |
1678 | goto bail; | |
1679 | /* test the mapping for validity */ | |
1680 | ibp = to_iport(ibdev, ah_attr->port_num); | |
1681 | ppd = ppd_from_ibp(ibp); | |
1682 | sc5 = ibp->sl_to_sc[ah_attr->sl]; | |
1683 | dd = dd_from_ppd(ppd); | |
1684 | if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf) | |
1685 | goto bail; | |
1686 | return 0; | |
1687 | bail: | |
1688 | return -EINVAL; | |
1689 | } | |
1690 | ||
1691 | /** | |
1692 | * create_ah - create an address handle | |
1693 | * @pd: the protection domain | |
1694 | * @ah_attr: the attributes of the AH | |
1695 | * | |
1696 | * This may be called from interrupt context. | |
1697 | */ | |
1698 | static struct ib_ah *create_ah(struct ib_pd *pd, | |
1699 | struct ib_ah_attr *ah_attr) | |
1700 | { | |
1701 | struct hfi1_ah *ah; | |
1702 | struct ib_ah *ret; | |
1703 | struct hfi1_ibdev *dev = to_idev(pd->device); | |
1704 | unsigned long flags; | |
1705 | ||
1706 | if (hfi1_check_ah(pd->device, ah_attr)) { | |
1707 | ret = ERR_PTR(-EINVAL); | |
1708 | goto bail; | |
1709 | } | |
1710 | ||
1711 | ah = kmalloc(sizeof(*ah), GFP_ATOMIC); | |
1712 | if (!ah) { | |
1713 | ret = ERR_PTR(-ENOMEM); | |
1714 | goto bail; | |
1715 | } | |
1716 | ||
1717 | spin_lock_irqsave(&dev->n_ahs_lock, flags); | |
1718 | if (dev->n_ahs_allocated == hfi1_max_ahs) { | |
1719 | spin_unlock_irqrestore(&dev->n_ahs_lock, flags); | |
1720 | kfree(ah); | |
1721 | ret = ERR_PTR(-ENOMEM); | |
1722 | goto bail; | |
1723 | } | |
1724 | ||
1725 | dev->n_ahs_allocated++; | |
1726 | spin_unlock_irqrestore(&dev->n_ahs_lock, flags); | |
1727 | ||
1728 | /* ib_create_ah() will initialize ah->ibah. */ | |
1729 | ah->attr = *ah_attr; | |
1730 | atomic_set(&ah->refcount, 0); | |
1731 | ||
1732 | ret = &ah->ibah; | |
1733 | ||
1734 | bail: | |
1735 | return ret; | |
1736 | } | |
1737 | ||
1738 | struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid) | |
1739 | { | |
1740 | struct ib_ah_attr attr; | |
1741 | struct ib_ah *ah = ERR_PTR(-EINVAL); | |
1742 | struct hfi1_qp *qp0; | |
1743 | ||
1744 | memset(&attr, 0, sizeof(attr)); | |
1745 | attr.dlid = dlid; | |
1746 | attr.port_num = ppd_from_ibp(ibp)->port; | |
1747 | rcu_read_lock(); | |
1748 | qp0 = rcu_dereference(ibp->qp[0]); | |
1749 | if (qp0) | |
1750 | ah = ib_create_ah(qp0->ibqp.pd, &attr); | |
1751 | rcu_read_unlock(); | |
1752 | return ah; | |
1753 | } | |
1754 | ||
1755 | /** | |
1756 | * destroy_ah - destroy an address handle | |
1757 | * @ibah: the AH to destroy | |
1758 | * | |
1759 | * This may be called from interrupt context. | |
1760 | */ | |
1761 | static int destroy_ah(struct ib_ah *ibah) | |
1762 | { | |
1763 | struct hfi1_ibdev *dev = to_idev(ibah->device); | |
1764 | struct hfi1_ah *ah = to_iah(ibah); | |
1765 | unsigned long flags; | |
1766 | ||
1767 | if (atomic_read(&ah->refcount) != 0) | |
1768 | return -EBUSY; | |
1769 | ||
1770 | spin_lock_irqsave(&dev->n_ahs_lock, flags); | |
1771 | dev->n_ahs_allocated--; | |
1772 | spin_unlock_irqrestore(&dev->n_ahs_lock, flags); | |
1773 | ||
1774 | kfree(ah); | |
1775 | ||
1776 | return 0; | |
1777 | } | |
1778 | ||
1779 | static int modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) | |
1780 | { | |
1781 | struct hfi1_ah *ah = to_iah(ibah); | |
1782 | ||
1783 | if (hfi1_check_ah(ibah->device, ah_attr)) | |
1784 | return -EINVAL; | |
1785 | ||
1786 | ah->attr = *ah_attr; | |
1787 | ||
1788 | return 0; | |
1789 | } | |
1790 | ||
1791 | static int query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) | |
1792 | { | |
1793 | struct hfi1_ah *ah = to_iah(ibah); | |
1794 | ||
1795 | *ah_attr = ah->attr; | |
1796 | ||
1797 | return 0; | |
1798 | } | |
1799 | ||
1800 | /** | |
1801 | * hfi1_get_npkeys - return the size of the PKEY table for context 0 | |
1802 | * @dd: the hfi1_ib device | |
1803 | */ | |
1804 | unsigned hfi1_get_npkeys(struct hfi1_devdata *dd) | |
1805 | { | |
1806 | return ARRAY_SIZE(dd->pport[0].pkeys); | |
1807 | } | |
1808 | ||
1809 | static int query_pkey(struct ib_device *ibdev, u8 port, u16 index, | |
1810 | u16 *pkey) | |
1811 | { | |
1812 | struct hfi1_devdata *dd = dd_from_ibdev(ibdev); | |
1813 | int ret; | |
1814 | ||
1815 | if (index >= hfi1_get_npkeys(dd)) { | |
1816 | ret = -EINVAL; | |
1817 | goto bail; | |
1818 | } | |
1819 | ||
1820 | *pkey = hfi1_get_pkey(to_iport(ibdev, port), index); | |
1821 | ret = 0; | |
1822 | ||
1823 | bail: | |
1824 | return ret; | |
1825 | } | |
1826 | ||
1827 | /** | |
1828 | * alloc_ucontext - allocate a ucontest | |
1829 | * @ibdev: the infiniband device | |
1830 | * @udata: not used by the driver | |
1831 | */ | |
1832 | ||
1833 | static struct ib_ucontext *alloc_ucontext(struct ib_device *ibdev, | |
1834 | struct ib_udata *udata) | |
1835 | { | |
1836 | struct hfi1_ucontext *context; | |
1837 | struct ib_ucontext *ret; | |
1838 | ||
1839 | context = kmalloc(sizeof(*context), GFP_KERNEL); | |
1840 | if (!context) { | |
1841 | ret = ERR_PTR(-ENOMEM); | |
1842 | goto bail; | |
1843 | } | |
1844 | ||
1845 | ret = &context->ibucontext; | |
1846 | ||
1847 | bail: | |
1848 | return ret; | |
1849 | } | |
1850 | ||
1851 | static int dealloc_ucontext(struct ib_ucontext *context) | |
1852 | { | |
1853 | kfree(to_iucontext(context)); | |
1854 | return 0; | |
1855 | } | |
1856 | ||
1857 | static void init_ibport(struct hfi1_pportdata *ppd) | |
1858 | { | |
1859 | struct hfi1_ibport *ibp = &ppd->ibport_data; | |
1860 | size_t sz = ARRAY_SIZE(ibp->sl_to_sc); | |
1861 | int i; | |
1862 | ||
1863 | for (i = 0; i < sz; i++) { | |
1864 | ibp->sl_to_sc[i] = i; | |
1865 | ibp->sc_to_sl[i] = i; | |
1866 | } | |
1867 | ||
1868 | spin_lock_init(&ibp->lock); | |
1869 | /* Set the prefix to the default value (see ch. 4.1.1) */ | |
1870 | ibp->gid_prefix = IB_DEFAULT_GID_PREFIX; | |
1871 | ibp->sm_lid = 0; | |
1872 | /* Below should only set bits defined in OPA PortInfo.CapabilityMask */ | |
1873 | ibp->port_cap_flags = IB_PORT_AUTO_MIGR_SUP | | |
1874 | IB_PORT_CAP_MASK_NOTICE_SUP; | |
1875 | ibp->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; | |
1876 | ibp->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; | |
1877 | ibp->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; | |
1878 | ibp->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; | |
1879 | ibp->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; | |
1880 | ||
1881 | RCU_INIT_POINTER(ibp->qp[0], NULL); | |
1882 | RCU_INIT_POINTER(ibp->qp[1], NULL); | |
1883 | } | |
1884 | ||
1885 | static void verbs_txreq_kmem_cache_ctor(void *obj) | |
1886 | { | |
1887 | struct verbs_txreq *tx = (struct verbs_txreq *)obj; | |
1888 | ||
1889 | memset(tx, 0, sizeof(*tx)); | |
1890 | } | |
1891 | ||
1892 | /** | |
1893 | * hfi1_register_ib_device - register our device with the infiniband core | |
1894 | * @dd: the device data structure | |
1895 | * Return 0 if successful, errno if unsuccessful. | |
1896 | */ | |
1897 | int hfi1_register_ib_device(struct hfi1_devdata *dd) | |
1898 | { | |
1899 | struct hfi1_ibdev *dev = &dd->verbs_dev; | |
1900 | struct ib_device *ibdev = &dev->ibdev; | |
1901 | struct hfi1_pportdata *ppd = dd->pport; | |
1902 | unsigned i, lk_tab_size; | |
1903 | int ret; | |
1904 | size_t lcpysz = IB_DEVICE_NAME_MAX; | |
1905 | u16 descq_cnt; | |
1906 | ||
1907 | ret = hfi1_qp_init(dev); | |
1908 | if (ret) | |
1909 | goto err_qp_init; | |
1910 | ||
1911 | ||
1912 | for (i = 0; i < dd->num_pports; i++) | |
1913 | init_ibport(ppd + i); | |
1914 | ||
1915 | /* Only need to initialize non-zero fields. */ | |
1916 | spin_lock_init(&dev->n_pds_lock); | |
1917 | spin_lock_init(&dev->n_ahs_lock); | |
1918 | spin_lock_init(&dev->n_cqs_lock); | |
1919 | spin_lock_init(&dev->n_qps_lock); | |
1920 | spin_lock_init(&dev->n_srqs_lock); | |
1921 | spin_lock_init(&dev->n_mcast_grps_lock); | |
1922 | init_timer(&dev->mem_timer); | |
1923 | dev->mem_timer.function = mem_timer; | |
1924 | dev->mem_timer.data = (unsigned long) dev; | |
1925 | ||
1926 | /* | |
1927 | * The top hfi1_lkey_table_size bits are used to index the | |
1928 | * table. The lower 8 bits can be owned by the user (copied from | |
1929 | * the LKEY). The remaining bits act as a generation number or tag. | |
1930 | */ | |
1931 | spin_lock_init(&dev->lk_table.lock); | |
1932 | dev->lk_table.max = 1 << hfi1_lkey_table_size; | |
1933 | /* ensure generation is at least 4 bits (keys.c) */ | |
1934 | if (hfi1_lkey_table_size > MAX_LKEY_TABLE_BITS) { | |
1935 | dd_dev_warn(dd, "lkey bits %u too large, reduced to %u\n", | |
1936 | hfi1_lkey_table_size, MAX_LKEY_TABLE_BITS); | |
1937 | hfi1_lkey_table_size = MAX_LKEY_TABLE_BITS; | |
1938 | } | |
1939 | lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); | |
1940 | dev->lk_table.table = (struct hfi1_mregion __rcu **) | |
1941 | vmalloc(lk_tab_size); | |
1942 | if (dev->lk_table.table == NULL) { | |
1943 | ret = -ENOMEM; | |
1944 | goto err_lk; | |
1945 | } | |
1946 | RCU_INIT_POINTER(dev->dma_mr, NULL); | |
1947 | for (i = 0; i < dev->lk_table.max; i++) | |
1948 | RCU_INIT_POINTER(dev->lk_table.table[i], NULL); | |
1949 | INIT_LIST_HEAD(&dev->pending_mmaps); | |
1950 | spin_lock_init(&dev->pending_lock); | |
1951 | seqlock_init(&dev->iowait_lock); | |
1952 | dev->mmap_offset = PAGE_SIZE; | |
1953 | spin_lock_init(&dev->mmap_offset_lock); | |
1954 | INIT_LIST_HEAD(&dev->txwait); | |
1955 | INIT_LIST_HEAD(&dev->memwait); | |
1956 | ||
1957 | descq_cnt = sdma_get_descq_cnt(); | |
1958 | ||
1959 | /* SLAB_HWCACHE_ALIGN for AHG */ | |
1960 | dev->verbs_txreq_cache = kmem_cache_create("hfi1_vtxreq_cache", | |
1961 | sizeof(struct verbs_txreq), | |
1962 | 0, SLAB_HWCACHE_ALIGN, | |
1963 | verbs_txreq_kmem_cache_ctor); | |
1964 | if (!dev->verbs_txreq_cache) { | |
1965 | ret = -ENOMEM; | |
1966 | goto err_verbs_txreq; | |
1967 | } | |
1968 | ||
1969 | /* | |
1970 | * The system image GUID is supposed to be the same for all | |
1971 | * HFIs in a single system but since there can be other | |
1972 | * device types in the system, we can't be sure this is unique. | |
1973 | */ | |
1974 | if (!ib_hfi1_sys_image_guid) | |
1975 | ib_hfi1_sys_image_guid = cpu_to_be64(ppd->guid); | |
1976 | lcpysz = strlcpy(ibdev->name, class_name(), lcpysz); | |
1977 | strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz); | |
1978 | ibdev->owner = THIS_MODULE; | |
1979 | ibdev->node_guid = cpu_to_be64(ppd->guid); | |
1980 | ibdev->uverbs_abi_ver = HFI1_UVERBS_ABI_VERSION; | |
1981 | ibdev->uverbs_cmd_mask = | |
1982 | (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | | |
1983 | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | | |
1984 | (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | | |
1985 | (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | | |
1986 | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | | |
1987 | (1ull << IB_USER_VERBS_CMD_CREATE_AH) | | |
1988 | (1ull << IB_USER_VERBS_CMD_MODIFY_AH) | | |
1989 | (1ull << IB_USER_VERBS_CMD_QUERY_AH) | | |
1990 | (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | | |
1991 | (1ull << IB_USER_VERBS_CMD_REG_MR) | | |
1992 | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | | |
1993 | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | | |
1994 | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | | |
1995 | (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | | |
1996 | (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | | |
1997 | (1ull << IB_USER_VERBS_CMD_POLL_CQ) | | |
1998 | (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | | |
1999 | (1ull << IB_USER_VERBS_CMD_CREATE_QP) | | |
2000 | (1ull << IB_USER_VERBS_CMD_QUERY_QP) | | |
2001 | (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | | |
2002 | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | | |
2003 | (1ull << IB_USER_VERBS_CMD_POST_SEND) | | |
2004 | (1ull << IB_USER_VERBS_CMD_POST_RECV) | | |
2005 | (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | | |
2006 | (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | | |
2007 | (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | | |
2008 | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | | |
2009 | (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | | |
2010 | (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | | |
2011 | (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); | |
2012 | ibdev->node_type = RDMA_NODE_IB_CA; | |
2013 | ibdev->phys_port_cnt = dd->num_pports; | |
2014 | ibdev->num_comp_vectors = 1; | |
2015 | ibdev->dma_device = &dd->pcidev->dev; | |
2016 | ibdev->query_device = query_device; | |
2017 | ibdev->modify_device = modify_device; | |
2018 | ibdev->query_port = query_port; | |
2019 | ibdev->modify_port = modify_port; | |
2020 | ibdev->query_pkey = query_pkey; | |
2021 | ibdev->query_gid = query_gid; | |
2022 | ibdev->alloc_ucontext = alloc_ucontext; | |
2023 | ibdev->dealloc_ucontext = dealloc_ucontext; | |
2024 | ibdev->alloc_pd = alloc_pd; | |
2025 | ibdev->dealloc_pd = dealloc_pd; | |
2026 | ibdev->create_ah = create_ah; | |
2027 | ibdev->destroy_ah = destroy_ah; | |
2028 | ibdev->modify_ah = modify_ah; | |
2029 | ibdev->query_ah = query_ah; | |
2030 | ibdev->create_srq = hfi1_create_srq; | |
2031 | ibdev->modify_srq = hfi1_modify_srq; | |
2032 | ibdev->query_srq = hfi1_query_srq; | |
2033 | ibdev->destroy_srq = hfi1_destroy_srq; | |
2034 | ibdev->create_qp = hfi1_create_qp; | |
2035 | ibdev->modify_qp = hfi1_modify_qp; | |
2036 | ibdev->query_qp = hfi1_query_qp; | |
2037 | ibdev->destroy_qp = hfi1_destroy_qp; | |
2038 | ibdev->post_send = post_send; | |
2039 | ibdev->post_recv = post_receive; | |
2040 | ibdev->post_srq_recv = hfi1_post_srq_receive; | |
2041 | ibdev->create_cq = hfi1_create_cq; | |
2042 | ibdev->destroy_cq = hfi1_destroy_cq; | |
2043 | ibdev->resize_cq = hfi1_resize_cq; | |
2044 | ibdev->poll_cq = hfi1_poll_cq; | |
2045 | ibdev->req_notify_cq = hfi1_req_notify_cq; | |
2046 | ibdev->get_dma_mr = hfi1_get_dma_mr; | |
2047 | ibdev->reg_phys_mr = hfi1_reg_phys_mr; | |
2048 | ibdev->reg_user_mr = hfi1_reg_user_mr; | |
2049 | ibdev->dereg_mr = hfi1_dereg_mr; | |
d9fe6dd7 | 2050 | ibdev->alloc_mr = hfi1_alloc_mr; |
77241056 MM |
2051 | ibdev->alloc_fast_reg_page_list = hfi1_alloc_fast_reg_page_list; |
2052 | ibdev->free_fast_reg_page_list = hfi1_free_fast_reg_page_list; | |
2053 | ibdev->alloc_fmr = hfi1_alloc_fmr; | |
2054 | ibdev->map_phys_fmr = hfi1_map_phys_fmr; | |
2055 | ibdev->unmap_fmr = hfi1_unmap_fmr; | |
2056 | ibdev->dealloc_fmr = hfi1_dealloc_fmr; | |
2057 | ibdev->attach_mcast = hfi1_multicast_attach; | |
2058 | ibdev->detach_mcast = hfi1_multicast_detach; | |
2059 | ibdev->process_mad = hfi1_process_mad; | |
2060 | ibdev->mmap = hfi1_mmap; | |
2061 | ibdev->dma_ops = &hfi1_dma_mapping_ops; | |
2062 | ibdev->get_port_immutable = port_immutable; | |
2063 | ||
2064 | strncpy(ibdev->node_desc, init_utsname()->nodename, | |
2065 | sizeof(ibdev->node_desc)); | |
2066 | ||
2067 | ret = ib_register_device(ibdev, hfi1_create_port_files); | |
2068 | if (ret) | |
2069 | goto err_reg; | |
2070 | ||
2071 | ret = hfi1_create_agents(dev); | |
2072 | if (ret) | |
2073 | goto err_agents; | |
2074 | ||
2075 | ret = hfi1_verbs_register_sysfs(dd); | |
2076 | if (ret) | |
2077 | goto err_class; | |
2078 | ||
2079 | goto bail; | |
2080 | ||
2081 | err_class: | |
2082 | hfi1_free_agents(dev); | |
2083 | err_agents: | |
2084 | ib_unregister_device(ibdev); | |
2085 | err_reg: | |
2086 | err_verbs_txreq: | |
2087 | kmem_cache_destroy(dev->verbs_txreq_cache); | |
2088 | vfree(dev->lk_table.table); | |
2089 | err_lk: | |
2090 | hfi1_qp_exit(dev); | |
2091 | err_qp_init: | |
2092 | dd_dev_err(dd, "cannot register verbs: %d!\n", -ret); | |
2093 | bail: | |
2094 | return ret; | |
2095 | } | |
2096 | ||
2097 | void hfi1_unregister_ib_device(struct hfi1_devdata *dd) | |
2098 | { | |
2099 | struct hfi1_ibdev *dev = &dd->verbs_dev; | |
2100 | struct ib_device *ibdev = &dev->ibdev; | |
2101 | ||
2102 | hfi1_verbs_unregister_sysfs(dd); | |
2103 | ||
2104 | hfi1_free_agents(dev); | |
2105 | ||
2106 | ib_unregister_device(ibdev); | |
2107 | ||
2108 | if (!list_empty(&dev->txwait)) | |
2109 | dd_dev_err(dd, "txwait list not empty!\n"); | |
2110 | if (!list_empty(&dev->memwait)) | |
2111 | dd_dev_err(dd, "memwait list not empty!\n"); | |
2112 | if (dev->dma_mr) | |
2113 | dd_dev_err(dd, "DMA MR not NULL!\n"); | |
2114 | ||
2115 | hfi1_qp_exit(dev); | |
2116 | del_timer_sync(&dev->mem_timer); | |
2117 | kmem_cache_destroy(dev->verbs_txreq_cache); | |
2118 | vfree(dev->lk_table.table); | |
2119 | } | |
2120 | ||
2121 | /* | |
2122 | * This must be called with s_lock held. | |
2123 | */ | |
2124 | void hfi1_schedule_send(struct hfi1_qp *qp) | |
2125 | { | |
2126 | if (hfi1_send_ok(qp)) { | |
2127 | struct hfi1_ibport *ibp = | |
2128 | to_iport(qp->ibqp.device, qp->port_num); | |
2129 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); | |
2130 | ||
2131 | iowait_schedule(&qp->s_iowait, ppd->hfi1_wq); | |
2132 | } | |
2133 | } | |
2134 | ||
2135 | void hfi1_cnp_rcv(struct hfi1_packet *packet) | |
2136 | { | |
2137 | struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; | |
2138 | ||
2139 | if (packet->qp->ibqp.qp_type == IB_QPT_UC) | |
2140 | hfi1_uc_rcv(packet); | |
2141 | else if (packet->qp->ibqp.qp_type == IB_QPT_UD) | |
2142 | hfi1_ud_rcv(packet); | |
2143 | else | |
2144 | ibp->n_pkt_drops++; | |
2145 | } |