Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
[deliverable/linux.git] / drivers / staging / rdma / hfi1 / chip.c
CommitLineData
77241056
MM
1/*
2 *
3 * This file is provided under a dual BSD/GPLv2 license. When using or
4 * redistributing this file, you may do so under either license.
5 *
6 * GPL LICENSE SUMMARY
7 *
8 * Copyright(c) 2015 Intel Corporation.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of version 2 of the GNU General Public License as
12 * published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 *
19 * BSD LICENSE
20 *
21 * Copyright(c) 2015 Intel Corporation.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 *
27 * - Redistributions of source code must retain the above copyright
28 * notice, this list of conditions and the following disclaimer.
29 * - Redistributions in binary form must reproduce the above copyright
30 * notice, this list of conditions and the following disclaimer in
31 * the documentation and/or other materials provided with the
32 * distribution.
33 * - Neither the name of Intel Corporation nor the names of its
34 * contributors may be used to endorse or promote products derived
35 * from this software without specific prior written permission.
36 *
37 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
38 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 *
49 */
50
51/*
52 * This file contains all of the code that is specific to the HFI chip
53 */
54
55#include <linux/pci.h>
56#include <linux/delay.h>
57#include <linux/interrupt.h>
58#include <linux/module.h>
59
60#include "hfi.h"
61#include "trace.h"
62#include "mad.h"
63#include "pio.h"
64#include "sdma.h"
65#include "eprom.h"
66
67#define NUM_IB_PORTS 1
68
69uint kdeth_qp;
70module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
71MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
72
73uint num_vls = HFI1_MAX_VLS_SUPPORTED;
74module_param(num_vls, uint, S_IRUGO);
75MODULE_PARM_DESC(num_vls, "Set number of Virtual Lanes to use (1-8)");
76
77/*
78 * Default time to aggregate two 10K packets from the idle state
79 * (timer not running). The timer starts at the end of the first packet,
80 * so only the time for one 10K packet and header plus a bit extra is needed.
81 * 10 * 1024 + 64 header byte = 10304 byte
82 * 10304 byte / 12.5 GB/s = 824.32ns
83 */
84uint rcv_intr_timeout = (824 + 16); /* 16 is for coalescing interrupt */
85module_param(rcv_intr_timeout, uint, S_IRUGO);
86MODULE_PARM_DESC(rcv_intr_timeout, "Receive interrupt mitigation timeout in ns");
87
88uint rcv_intr_count = 16; /* same as qib */
89module_param(rcv_intr_count, uint, S_IRUGO);
90MODULE_PARM_DESC(rcv_intr_count, "Receive interrupt mitigation count");
91
92ushort link_crc_mask = SUPPORTED_CRCS;
93module_param(link_crc_mask, ushort, S_IRUGO);
94MODULE_PARM_DESC(link_crc_mask, "CRCs to use on the link");
95
96uint loopback;
97module_param_named(loopback, loopback, uint, S_IRUGO);
98MODULE_PARM_DESC(loopback, "Put into loopback mode (1 = serdes, 3 = external cable");
99
100/* Other driver tunables */
101uint rcv_intr_dynamic = 1; /* enable dynamic mode for rcv int mitigation*/
102static ushort crc_14b_sideband = 1;
103static uint use_flr = 1;
104uint quick_linkup; /* skip LNI */
105
106struct flag_table {
107 u64 flag; /* the flag */
108 char *str; /* description string */
109 u16 extra; /* extra information */
110 u16 unused0;
111 u32 unused1;
112};
113
114/* str must be a string constant */
115#define FLAG_ENTRY(str, extra, flag) {flag, str, extra}
116#define FLAG_ENTRY0(str, flag) {flag, str, 0}
117
118/* Send Error Consequences */
119#define SEC_WRITE_DROPPED 0x1
120#define SEC_PACKET_DROPPED 0x2
121#define SEC_SC_HALTED 0x4 /* per-context only */
122#define SEC_SPC_FREEZE 0x8 /* per-HFI only */
123
124#define VL15CTXT 1
125#define MIN_KERNEL_KCTXTS 2
126#define NUM_MAP_REGS 32
127
128/* Bit offset into the GUID which carries HFI id information */
129#define GUID_HFI_INDEX_SHIFT 39
130
131/* extract the emulation revision */
132#define emulator_rev(dd) ((dd)->irev >> 8)
133/* parallel and serial emulation versions are 3 and 4 respectively */
134#define is_emulator_p(dd) ((((dd)->irev) & 0xf) == 3)
135#define is_emulator_s(dd) ((((dd)->irev) & 0xf) == 4)
136
137/* RSM fields */
138
139/* packet type */
140#define IB_PACKET_TYPE 2ull
141#define QW_SHIFT 6ull
142/* QPN[7..1] */
143#define QPN_WIDTH 7ull
144
145/* LRH.BTH: QW 0, OFFSET 48 - for match */
146#define LRH_BTH_QW 0ull
147#define LRH_BTH_BIT_OFFSET 48ull
148#define LRH_BTH_OFFSET(off) ((LRH_BTH_QW << QW_SHIFT) | (off))
149#define LRH_BTH_MATCH_OFFSET LRH_BTH_OFFSET(LRH_BTH_BIT_OFFSET)
150#define LRH_BTH_SELECT
151#define LRH_BTH_MASK 3ull
152#define LRH_BTH_VALUE 2ull
153
154/* LRH.SC[3..0] QW 0, OFFSET 56 - for match */
155#define LRH_SC_QW 0ull
156#define LRH_SC_BIT_OFFSET 56ull
157#define LRH_SC_OFFSET(off) ((LRH_SC_QW << QW_SHIFT) | (off))
158#define LRH_SC_MATCH_OFFSET LRH_SC_OFFSET(LRH_SC_BIT_OFFSET)
159#define LRH_SC_MASK 128ull
160#define LRH_SC_VALUE 0ull
161
162/* SC[n..0] QW 0, OFFSET 60 - for select */
163#define LRH_SC_SELECT_OFFSET ((LRH_SC_QW << QW_SHIFT) | (60ull))
164
165/* QPN[m+n:1] QW 1, OFFSET 1 */
166#define QPN_SELECT_OFFSET ((1ull << QW_SHIFT) | (1ull))
167
168/* defines to build power on SC2VL table */
169#define SC2VL_VAL( \
170 num, \
171 sc0, sc0val, \
172 sc1, sc1val, \
173 sc2, sc2val, \
174 sc3, sc3val, \
175 sc4, sc4val, \
176 sc5, sc5val, \
177 sc6, sc6val, \
178 sc7, sc7val) \
179( \
180 ((u64)(sc0val) << SEND_SC2VLT##num##_SC##sc0##_SHIFT) | \
181 ((u64)(sc1val) << SEND_SC2VLT##num##_SC##sc1##_SHIFT) | \
182 ((u64)(sc2val) << SEND_SC2VLT##num##_SC##sc2##_SHIFT) | \
183 ((u64)(sc3val) << SEND_SC2VLT##num##_SC##sc3##_SHIFT) | \
184 ((u64)(sc4val) << SEND_SC2VLT##num##_SC##sc4##_SHIFT) | \
185 ((u64)(sc5val) << SEND_SC2VLT##num##_SC##sc5##_SHIFT) | \
186 ((u64)(sc6val) << SEND_SC2VLT##num##_SC##sc6##_SHIFT) | \
187 ((u64)(sc7val) << SEND_SC2VLT##num##_SC##sc7##_SHIFT) \
188)
189
190#define DC_SC_VL_VAL( \
191 range, \
192 e0, e0val, \
193 e1, e1val, \
194 e2, e2val, \
195 e3, e3val, \
196 e4, e4val, \
197 e5, e5val, \
198 e6, e6val, \
199 e7, e7val, \
200 e8, e8val, \
201 e9, e9val, \
202 e10, e10val, \
203 e11, e11val, \
204 e12, e12val, \
205 e13, e13val, \
206 e14, e14val, \
207 e15, e15val) \
208( \
209 ((u64)(e0val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e0##_SHIFT) | \
210 ((u64)(e1val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e1##_SHIFT) | \
211 ((u64)(e2val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e2##_SHIFT) | \
212 ((u64)(e3val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e3##_SHIFT) | \
213 ((u64)(e4val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e4##_SHIFT) | \
214 ((u64)(e5val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e5##_SHIFT) | \
215 ((u64)(e6val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e6##_SHIFT) | \
216 ((u64)(e7val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e7##_SHIFT) | \
217 ((u64)(e8val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e8##_SHIFT) | \
218 ((u64)(e9val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e9##_SHIFT) | \
219 ((u64)(e10val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e10##_SHIFT) | \
220 ((u64)(e11val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e11##_SHIFT) | \
221 ((u64)(e12val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e12##_SHIFT) | \
222 ((u64)(e13val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e13##_SHIFT) | \
223 ((u64)(e14val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e14##_SHIFT) | \
224 ((u64)(e15val) << DCC_CFG_SC_VL_TABLE_##range##_ENTRY##e15##_SHIFT) \
225)
226
227/* all CceStatus sub-block freeze bits */
228#define ALL_FROZE (CCE_STATUS_SDMA_FROZE_SMASK \
229 | CCE_STATUS_RXE_FROZE_SMASK \
230 | CCE_STATUS_TXE_FROZE_SMASK \
231 | CCE_STATUS_TXE_PIO_FROZE_SMASK)
232/* all CceStatus sub-block TXE pause bits */
233#define ALL_TXE_PAUSE (CCE_STATUS_TXE_PIO_PAUSED_SMASK \
234 | CCE_STATUS_TXE_PAUSED_SMASK \
235 | CCE_STATUS_SDMA_PAUSED_SMASK)
236/* all CceStatus sub-block RXE pause bits */
237#define ALL_RXE_PAUSE CCE_STATUS_RXE_PAUSED_SMASK
238
239/*
240 * CCE Error flags.
241 */
242static struct flag_table cce_err_status_flags[] = {
243/* 0*/ FLAG_ENTRY0("CceCsrParityErr",
244 CCE_ERR_STATUS_CCE_CSR_PARITY_ERR_SMASK),
245/* 1*/ FLAG_ENTRY0("CceCsrReadBadAddrErr",
246 CCE_ERR_STATUS_CCE_CSR_READ_BAD_ADDR_ERR_SMASK),
247/* 2*/ FLAG_ENTRY0("CceCsrWriteBadAddrErr",
248 CCE_ERR_STATUS_CCE_CSR_WRITE_BAD_ADDR_ERR_SMASK),
249/* 3*/ FLAG_ENTRY0("CceTrgtAsyncFifoParityErr",
250 CCE_ERR_STATUS_CCE_TRGT_ASYNC_FIFO_PARITY_ERR_SMASK),
251/* 4*/ FLAG_ENTRY0("CceTrgtAccessErr",
252 CCE_ERR_STATUS_CCE_TRGT_ACCESS_ERR_SMASK),
253/* 5*/ FLAG_ENTRY0("CceRspdDataParityErr",
254 CCE_ERR_STATUS_CCE_RSPD_DATA_PARITY_ERR_SMASK),
255/* 6*/ FLAG_ENTRY0("CceCli0AsyncFifoParityErr",
256 CCE_ERR_STATUS_CCE_CLI0_ASYNC_FIFO_PARITY_ERR_SMASK),
257/* 7*/ FLAG_ENTRY0("CceCsrCfgBusParityErr",
258 CCE_ERR_STATUS_CCE_CSR_CFG_BUS_PARITY_ERR_SMASK),
259/* 8*/ FLAG_ENTRY0("CceCli2AsyncFifoParityErr",
260 CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK),
261/* 9*/ FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr",
262 CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_PIO_CRDT_PARITY_ERR_SMASK),
263/*10*/ FLAG_ENTRY0("CceCli1AsyncFifoPioCrdtParityErr",
264 CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_SDMA_HD_PARITY_ERR_SMASK),
265/*11*/ FLAG_ENTRY0("CceCli1AsyncFifoRxdmaParityError",
266 CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_RXDMA_PARITY_ERROR_SMASK),
267/*12*/ FLAG_ENTRY0("CceCli1AsyncFifoDbgParityError",
268 CCE_ERR_STATUS_CCE_CLI1_ASYNC_FIFO_DBG_PARITY_ERROR_SMASK),
269/*13*/ FLAG_ENTRY0("PcicRetryMemCorErr",
270 CCE_ERR_STATUS_PCIC_RETRY_MEM_COR_ERR_SMASK),
271/*14*/ FLAG_ENTRY0("PcicRetryMemCorErr",
272 CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_COR_ERR_SMASK),
273/*15*/ FLAG_ENTRY0("PcicPostHdQCorErr",
274 CCE_ERR_STATUS_PCIC_POST_HD_QCOR_ERR_SMASK),
275/*16*/ FLAG_ENTRY0("PcicPostHdQCorErr",
276 CCE_ERR_STATUS_PCIC_POST_DAT_QCOR_ERR_SMASK),
277/*17*/ FLAG_ENTRY0("PcicPostHdQCorErr",
278 CCE_ERR_STATUS_PCIC_CPL_HD_QCOR_ERR_SMASK),
279/*18*/ FLAG_ENTRY0("PcicCplDatQCorErr",
280 CCE_ERR_STATUS_PCIC_CPL_DAT_QCOR_ERR_SMASK),
281/*19*/ FLAG_ENTRY0("PcicNPostHQParityErr",
282 CCE_ERR_STATUS_PCIC_NPOST_HQ_PARITY_ERR_SMASK),
283/*20*/ FLAG_ENTRY0("PcicNPostDatQParityErr",
284 CCE_ERR_STATUS_PCIC_NPOST_DAT_QPARITY_ERR_SMASK),
285/*21*/ FLAG_ENTRY0("PcicRetryMemUncErr",
286 CCE_ERR_STATUS_PCIC_RETRY_MEM_UNC_ERR_SMASK),
287/*22*/ FLAG_ENTRY0("PcicRetrySotMemUncErr",
288 CCE_ERR_STATUS_PCIC_RETRY_SOT_MEM_UNC_ERR_SMASK),
289/*23*/ FLAG_ENTRY0("PcicPostHdQUncErr",
290 CCE_ERR_STATUS_PCIC_POST_HD_QUNC_ERR_SMASK),
291/*24*/ FLAG_ENTRY0("PcicPostDatQUncErr",
292 CCE_ERR_STATUS_PCIC_POST_DAT_QUNC_ERR_SMASK),
293/*25*/ FLAG_ENTRY0("PcicCplHdQUncErr",
294 CCE_ERR_STATUS_PCIC_CPL_HD_QUNC_ERR_SMASK),
295/*26*/ FLAG_ENTRY0("PcicCplDatQUncErr",
296 CCE_ERR_STATUS_PCIC_CPL_DAT_QUNC_ERR_SMASK),
297/*27*/ FLAG_ENTRY0("PcicTransmitFrontParityErr",
298 CCE_ERR_STATUS_PCIC_TRANSMIT_FRONT_PARITY_ERR_SMASK),
299/*28*/ FLAG_ENTRY0("PcicTransmitBackParityErr",
300 CCE_ERR_STATUS_PCIC_TRANSMIT_BACK_PARITY_ERR_SMASK),
301/*29*/ FLAG_ENTRY0("PcicReceiveParityErr",
302 CCE_ERR_STATUS_PCIC_RECEIVE_PARITY_ERR_SMASK),
303/*30*/ FLAG_ENTRY0("CceTrgtCplTimeoutErr",
304 CCE_ERR_STATUS_CCE_TRGT_CPL_TIMEOUT_ERR_SMASK),
305/*31*/ FLAG_ENTRY0("LATriggered",
306 CCE_ERR_STATUS_LA_TRIGGERED_SMASK),
307/*32*/ FLAG_ENTRY0("CceSegReadBadAddrErr",
308 CCE_ERR_STATUS_CCE_SEG_READ_BAD_ADDR_ERR_SMASK),
309/*33*/ FLAG_ENTRY0("CceSegWriteBadAddrErr",
310 CCE_ERR_STATUS_CCE_SEG_WRITE_BAD_ADDR_ERR_SMASK),
311/*34*/ FLAG_ENTRY0("CceRcplAsyncFifoParityErr",
312 CCE_ERR_STATUS_CCE_RCPL_ASYNC_FIFO_PARITY_ERR_SMASK),
313/*35*/ FLAG_ENTRY0("CceRxdmaConvFifoParityErr",
314 CCE_ERR_STATUS_CCE_RXDMA_CONV_FIFO_PARITY_ERR_SMASK),
315/*36*/ FLAG_ENTRY0("CceMsixTableCorErr",
316 CCE_ERR_STATUS_CCE_MSIX_TABLE_COR_ERR_SMASK),
317/*37*/ FLAG_ENTRY0("CceMsixTableUncErr",
318 CCE_ERR_STATUS_CCE_MSIX_TABLE_UNC_ERR_SMASK),
319/*38*/ FLAG_ENTRY0("CceIntMapCorErr",
320 CCE_ERR_STATUS_CCE_INT_MAP_COR_ERR_SMASK),
321/*39*/ FLAG_ENTRY0("CceIntMapUncErr",
322 CCE_ERR_STATUS_CCE_INT_MAP_UNC_ERR_SMASK),
323/*40*/ FLAG_ENTRY0("CceMsixCsrParityErr",
324 CCE_ERR_STATUS_CCE_MSIX_CSR_PARITY_ERR_SMASK),
325/*41-63 reserved*/
326};
327
328/*
329 * Misc Error flags
330 */
331#define MES(text) MISC_ERR_STATUS_MISC_##text##_ERR_SMASK
332static struct flag_table misc_err_status_flags[] = {
333/* 0*/ FLAG_ENTRY0("CSR_PARITY", MES(CSR_PARITY)),
334/* 1*/ FLAG_ENTRY0("CSR_READ_BAD_ADDR", MES(CSR_READ_BAD_ADDR)),
335/* 2*/ FLAG_ENTRY0("CSR_WRITE_BAD_ADDR", MES(CSR_WRITE_BAD_ADDR)),
336/* 3*/ FLAG_ENTRY0("SBUS_WRITE_FAILED", MES(SBUS_WRITE_FAILED)),
337/* 4*/ FLAG_ENTRY0("KEY_MISMATCH", MES(KEY_MISMATCH)),
338/* 5*/ FLAG_ENTRY0("FW_AUTH_FAILED", MES(FW_AUTH_FAILED)),
339/* 6*/ FLAG_ENTRY0("EFUSE_CSR_PARITY", MES(EFUSE_CSR_PARITY)),
340/* 7*/ FLAG_ENTRY0("EFUSE_READ_BAD_ADDR", MES(EFUSE_READ_BAD_ADDR)),
341/* 8*/ FLAG_ENTRY0("EFUSE_WRITE", MES(EFUSE_WRITE)),
342/* 9*/ FLAG_ENTRY0("EFUSE_DONE_PARITY", MES(EFUSE_DONE_PARITY)),
343/*10*/ FLAG_ENTRY0("INVALID_EEP_CMD", MES(INVALID_EEP_CMD)),
344/*11*/ FLAG_ENTRY0("MBIST_FAIL", MES(MBIST_FAIL)),
345/*12*/ FLAG_ENTRY0("PLL_LOCK_FAIL", MES(PLL_LOCK_FAIL))
346};
347
348/*
349 * TXE PIO Error flags and consequences
350 */
351static struct flag_table pio_err_status_flags[] = {
352/* 0*/ FLAG_ENTRY("PioWriteBadCtxt",
353 SEC_WRITE_DROPPED,
354 SEND_PIO_ERR_STATUS_PIO_WRITE_BAD_CTXT_ERR_SMASK),
355/* 1*/ FLAG_ENTRY("PioWriteAddrParity",
356 SEC_SPC_FREEZE,
357 SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK),
358/* 2*/ FLAG_ENTRY("PioCsrParity",
359 SEC_SPC_FREEZE,
360 SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK),
361/* 3*/ FLAG_ENTRY("PioSbMemFifo0",
362 SEC_SPC_FREEZE,
363 SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK),
364/* 4*/ FLAG_ENTRY("PioSbMemFifo1",
365 SEC_SPC_FREEZE,
366 SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK),
367/* 5*/ FLAG_ENTRY("PioPccFifoParity",
368 SEC_SPC_FREEZE,
369 SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK),
370/* 6*/ FLAG_ENTRY("PioPecFifoParity",
371 SEC_SPC_FREEZE,
372 SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK),
373/* 7*/ FLAG_ENTRY("PioSbrdctlCrrelParity",
374 SEC_SPC_FREEZE,
375 SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK),
376/* 8*/ FLAG_ENTRY("PioSbrdctrlCrrelFifoParity",
377 SEC_SPC_FREEZE,
378 SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK),
379/* 9*/ FLAG_ENTRY("PioPktEvictFifoParityErr",
380 SEC_SPC_FREEZE,
381 SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK),
382/*10*/ FLAG_ENTRY("PioSmPktResetParity",
383 SEC_SPC_FREEZE,
384 SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK),
385/*11*/ FLAG_ENTRY("PioVlLenMemBank0Unc",
386 SEC_SPC_FREEZE,
387 SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK),
388/*12*/ FLAG_ENTRY("PioVlLenMemBank1Unc",
389 SEC_SPC_FREEZE,
390 SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK),
391/*13*/ FLAG_ENTRY("PioVlLenMemBank0Cor",
392 0,
393 SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_COR_ERR_SMASK),
394/*14*/ FLAG_ENTRY("PioVlLenMemBank1Cor",
395 0,
396 SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_COR_ERR_SMASK),
397/*15*/ FLAG_ENTRY("PioCreditRetFifoParity",
398 SEC_SPC_FREEZE,
399 SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK),
400/*16*/ FLAG_ENTRY("PioPpmcPblFifo",
401 SEC_SPC_FREEZE,
402 SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK),
403/*17*/ FLAG_ENTRY("PioInitSmIn",
404 0,
405 SEND_PIO_ERR_STATUS_PIO_INIT_SM_IN_ERR_SMASK),
406/*18*/ FLAG_ENTRY("PioPktEvictSmOrArbSm",
407 SEC_SPC_FREEZE,
408 SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK),
409/*19*/ FLAG_ENTRY("PioHostAddrMemUnc",
410 SEC_SPC_FREEZE,
411 SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK),
412/*20*/ FLAG_ENTRY("PioHostAddrMemCor",
413 0,
414 SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_COR_ERR_SMASK),
415/*21*/ FLAG_ENTRY("PioWriteDataParity",
416 SEC_SPC_FREEZE,
417 SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK),
418/*22*/ FLAG_ENTRY("PioStateMachine",
419 SEC_SPC_FREEZE,
420 SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK),
421/*23*/ FLAG_ENTRY("PioWriteQwValidParity",
422 SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
423 SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK),
424/*24*/ FLAG_ENTRY("PioBlockQwCountParity",
425 SEC_WRITE_DROPPED|SEC_SPC_FREEZE,
426 SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK),
427/*25*/ FLAG_ENTRY("PioVlfVlLenParity",
428 SEC_SPC_FREEZE,
429 SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK),
430/*26*/ FLAG_ENTRY("PioVlfSopParity",
431 SEC_SPC_FREEZE,
432 SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK),
433/*27*/ FLAG_ENTRY("PioVlFifoParity",
434 SEC_SPC_FREEZE,
435 SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK),
436/*28*/ FLAG_ENTRY("PioPpmcBqcMemParity",
437 SEC_SPC_FREEZE,
438 SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK),
439/*29*/ FLAG_ENTRY("PioPpmcSopLen",
440 SEC_SPC_FREEZE,
441 SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK),
442/*30-31 reserved*/
443/*32*/ FLAG_ENTRY("PioCurrentFreeCntParity",
444 SEC_SPC_FREEZE,
445 SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK),
446/*33*/ FLAG_ENTRY("PioLastReturnedCntParity",
447 SEC_SPC_FREEZE,
448 SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK),
449/*34*/ FLAG_ENTRY("PioPccSopHeadParity",
450 SEC_SPC_FREEZE,
451 SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK),
452/*35*/ FLAG_ENTRY("PioPecSopHeadParityErr",
453 SEC_SPC_FREEZE,
454 SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK),
455/*36-63 reserved*/
456};
457
458/* TXE PIO errors that cause an SPC freeze */
459#define ALL_PIO_FREEZE_ERR \
460 (SEND_PIO_ERR_STATUS_PIO_WRITE_ADDR_PARITY_ERR_SMASK \
461 | SEND_PIO_ERR_STATUS_PIO_CSR_PARITY_ERR_SMASK \
462 | SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO0_ERR_SMASK \
463 | SEND_PIO_ERR_STATUS_PIO_SB_MEM_FIFO1_ERR_SMASK \
464 | SEND_PIO_ERR_STATUS_PIO_PCC_FIFO_PARITY_ERR_SMASK \
465 | SEND_PIO_ERR_STATUS_PIO_PEC_FIFO_PARITY_ERR_SMASK \
466 | SEND_PIO_ERR_STATUS_PIO_SBRDCTL_CRREL_PARITY_ERR_SMASK \
467 | SEND_PIO_ERR_STATUS_PIO_SBRDCTRL_CRREL_FIFO_PARITY_ERR_SMASK \
468 | SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_FIFO_PARITY_ERR_SMASK \
469 | SEND_PIO_ERR_STATUS_PIO_SM_PKT_RESET_PARITY_ERR_SMASK \
470 | SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK0_UNC_ERR_SMASK \
471 | SEND_PIO_ERR_STATUS_PIO_VL_LEN_MEM_BANK1_UNC_ERR_SMASK \
472 | SEND_PIO_ERR_STATUS_PIO_CREDIT_RET_FIFO_PARITY_ERR_SMASK \
473 | SEND_PIO_ERR_STATUS_PIO_PPMC_PBL_FIFO_ERR_SMASK \
474 | SEND_PIO_ERR_STATUS_PIO_PKT_EVICT_SM_OR_ARB_SM_ERR_SMASK \
475 | SEND_PIO_ERR_STATUS_PIO_HOST_ADDR_MEM_UNC_ERR_SMASK \
476 | SEND_PIO_ERR_STATUS_PIO_WRITE_DATA_PARITY_ERR_SMASK \
477 | SEND_PIO_ERR_STATUS_PIO_STATE_MACHINE_ERR_SMASK \
478 | SEND_PIO_ERR_STATUS_PIO_WRITE_QW_VALID_PARITY_ERR_SMASK \
479 | SEND_PIO_ERR_STATUS_PIO_BLOCK_QW_COUNT_PARITY_ERR_SMASK \
480 | SEND_PIO_ERR_STATUS_PIO_VLF_VL_LEN_PARITY_ERR_SMASK \
481 | SEND_PIO_ERR_STATUS_PIO_VLF_SOP_PARITY_ERR_SMASK \
482 | SEND_PIO_ERR_STATUS_PIO_VL_FIFO_PARITY_ERR_SMASK \
483 | SEND_PIO_ERR_STATUS_PIO_PPMC_BQC_MEM_PARITY_ERR_SMASK \
484 | SEND_PIO_ERR_STATUS_PIO_PPMC_SOP_LEN_ERR_SMASK \
485 | SEND_PIO_ERR_STATUS_PIO_CURRENT_FREE_CNT_PARITY_ERR_SMASK \
486 | SEND_PIO_ERR_STATUS_PIO_LAST_RETURNED_CNT_PARITY_ERR_SMASK \
487 | SEND_PIO_ERR_STATUS_PIO_PCC_SOP_HEAD_PARITY_ERR_SMASK \
488 | SEND_PIO_ERR_STATUS_PIO_PEC_SOP_HEAD_PARITY_ERR_SMASK)
489
490/*
491 * TXE SDMA Error flags
492 */
493static struct flag_table sdma_err_status_flags[] = {
494/* 0*/ FLAG_ENTRY0("SDmaRpyTagErr",
495 SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK),
496/* 1*/ FLAG_ENTRY0("SDmaCsrParityErr",
497 SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK),
498/* 2*/ FLAG_ENTRY0("SDmaPcieReqTrackingUncErr",
499 SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK),
500/* 3*/ FLAG_ENTRY0("SDmaPcieReqTrackingCorErr",
501 SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_COR_ERR_SMASK),
502/*04-63 reserved*/
503};
504
505/* TXE SDMA errors that cause an SPC freeze */
506#define ALL_SDMA_FREEZE_ERR \
507 (SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK \
508 | SEND_DMA_ERR_STATUS_SDMA_CSR_PARITY_ERR_SMASK \
509 | SEND_DMA_ERR_STATUS_SDMA_PCIE_REQ_TRACKING_UNC_ERR_SMASK)
510
511/*
512 * TXE Egress Error flags
513 */
514#define SEES(text) SEND_EGRESS_ERR_STATUS_##text##_ERR_SMASK
515static struct flag_table egress_err_status_flags[] = {
516/* 0*/ FLAG_ENTRY0("TxPktIntegrityMemCorErr", SEES(TX_PKT_INTEGRITY_MEM_COR)),
517/* 1*/ FLAG_ENTRY0("TxPktIntegrityMemUncErr", SEES(TX_PKT_INTEGRITY_MEM_UNC)),
518/* 2 reserved */
519/* 3*/ FLAG_ENTRY0("TxEgressFifoUnderrunOrParityErr",
520 SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY)),
521/* 4*/ FLAG_ENTRY0("TxLinkdownErr", SEES(TX_LINKDOWN)),
522/* 5*/ FLAG_ENTRY0("TxIncorrectLinkStateErr", SEES(TX_INCORRECT_LINK_STATE)),
523/* 6 reserved */
524/* 7*/ FLAG_ENTRY0("TxPioLaunchIntfParityErr",
525 SEES(TX_PIO_LAUNCH_INTF_PARITY)),
526/* 8*/ FLAG_ENTRY0("TxSdmaLaunchIntfParityErr",
527 SEES(TX_SDMA_LAUNCH_INTF_PARITY)),
528/* 9-10 reserved */
529/*11*/ FLAG_ENTRY0("TxSbrdCtlStateMachineParityErr",
530 SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY)),
531/*12*/ FLAG_ENTRY0("TxIllegalVLErr", SEES(TX_ILLEGAL_VL)),
532/*13*/ FLAG_ENTRY0("TxLaunchCsrParityErr", SEES(TX_LAUNCH_CSR_PARITY)),
533/*14*/ FLAG_ENTRY0("TxSbrdCtlCsrParityErr", SEES(TX_SBRD_CTL_CSR_PARITY)),
534/*15*/ FLAG_ENTRY0("TxConfigParityErr", SEES(TX_CONFIG_PARITY)),
535/*16*/ FLAG_ENTRY0("TxSdma0DisallowedPacketErr",
536 SEES(TX_SDMA0_DISALLOWED_PACKET)),
537/*17*/ FLAG_ENTRY0("TxSdma1DisallowedPacketErr",
538 SEES(TX_SDMA1_DISALLOWED_PACKET)),
539/*18*/ FLAG_ENTRY0("TxSdma2DisallowedPacketErr",
540 SEES(TX_SDMA2_DISALLOWED_PACKET)),
541/*19*/ FLAG_ENTRY0("TxSdma3DisallowedPacketErr",
542 SEES(TX_SDMA3_DISALLOWED_PACKET)),
543/*20*/ FLAG_ENTRY0("TxSdma4DisallowedPacketErr",
544 SEES(TX_SDMA4_DISALLOWED_PACKET)),
545/*21*/ FLAG_ENTRY0("TxSdma5DisallowedPacketErr",
546 SEES(TX_SDMA5_DISALLOWED_PACKET)),
547/*22*/ FLAG_ENTRY0("TxSdma6DisallowedPacketErr",
548 SEES(TX_SDMA6_DISALLOWED_PACKET)),
549/*23*/ FLAG_ENTRY0("TxSdma7DisallowedPacketErr",
550 SEES(TX_SDMA7_DISALLOWED_PACKET)),
551/*24*/ FLAG_ENTRY0("TxSdma8DisallowedPacketErr",
552 SEES(TX_SDMA8_DISALLOWED_PACKET)),
553/*25*/ FLAG_ENTRY0("TxSdma9DisallowedPacketErr",
554 SEES(TX_SDMA9_DISALLOWED_PACKET)),
555/*26*/ FLAG_ENTRY0("TxSdma10DisallowedPacketErr",
556 SEES(TX_SDMA10_DISALLOWED_PACKET)),
557/*27*/ FLAG_ENTRY0("TxSdma11DisallowedPacketErr",
558 SEES(TX_SDMA11_DISALLOWED_PACKET)),
559/*28*/ FLAG_ENTRY0("TxSdma12DisallowedPacketErr",
560 SEES(TX_SDMA12_DISALLOWED_PACKET)),
561/*29*/ FLAG_ENTRY0("TxSdma13DisallowedPacketErr",
562 SEES(TX_SDMA13_DISALLOWED_PACKET)),
563/*30*/ FLAG_ENTRY0("TxSdma14DisallowedPacketErr",
564 SEES(TX_SDMA14_DISALLOWED_PACKET)),
565/*31*/ FLAG_ENTRY0("TxSdma15DisallowedPacketErr",
566 SEES(TX_SDMA15_DISALLOWED_PACKET)),
567/*32*/ FLAG_ENTRY0("TxLaunchFifo0UncOrParityErr",
568 SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY)),
569/*33*/ FLAG_ENTRY0("TxLaunchFifo1UncOrParityErr",
570 SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY)),
571/*34*/ FLAG_ENTRY0("TxLaunchFifo2UncOrParityErr",
572 SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY)),
573/*35*/ FLAG_ENTRY0("TxLaunchFifo3UncOrParityErr",
574 SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY)),
575/*36*/ FLAG_ENTRY0("TxLaunchFifo4UncOrParityErr",
576 SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY)),
577/*37*/ FLAG_ENTRY0("TxLaunchFifo5UncOrParityErr",
578 SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY)),
579/*38*/ FLAG_ENTRY0("TxLaunchFifo6UncOrParityErr",
580 SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY)),
581/*39*/ FLAG_ENTRY0("TxLaunchFifo7UncOrParityErr",
582 SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY)),
583/*40*/ FLAG_ENTRY0("TxLaunchFifo8UncOrParityErr",
584 SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY)),
585/*41*/ FLAG_ENTRY0("TxCreditReturnParityErr", SEES(TX_CREDIT_RETURN_PARITY)),
586/*42*/ FLAG_ENTRY0("TxSbHdrUncErr", SEES(TX_SB_HDR_UNC)),
587/*43*/ FLAG_ENTRY0("TxReadSdmaMemoryUncErr", SEES(TX_READ_SDMA_MEMORY_UNC)),
588/*44*/ FLAG_ENTRY0("TxReadPioMemoryUncErr", SEES(TX_READ_PIO_MEMORY_UNC)),
589/*45*/ FLAG_ENTRY0("TxEgressFifoUncErr", SEES(TX_EGRESS_FIFO_UNC)),
590/*46*/ FLAG_ENTRY0("TxHcrcInsertionErr", SEES(TX_HCRC_INSERTION)),
591/*47*/ FLAG_ENTRY0("TxCreditReturnVLErr", SEES(TX_CREDIT_RETURN_VL)),
592/*48*/ FLAG_ENTRY0("TxLaunchFifo0CorErr", SEES(TX_LAUNCH_FIFO0_COR)),
593/*49*/ FLAG_ENTRY0("TxLaunchFifo1CorErr", SEES(TX_LAUNCH_FIFO1_COR)),
594/*50*/ FLAG_ENTRY0("TxLaunchFifo2CorErr", SEES(TX_LAUNCH_FIFO2_COR)),
595/*51*/ FLAG_ENTRY0("TxLaunchFifo3CorErr", SEES(TX_LAUNCH_FIFO3_COR)),
596/*52*/ FLAG_ENTRY0("TxLaunchFifo4CorErr", SEES(TX_LAUNCH_FIFO4_COR)),
597/*53*/ FLAG_ENTRY0("TxLaunchFifo5CorErr", SEES(TX_LAUNCH_FIFO5_COR)),
598/*54*/ FLAG_ENTRY0("TxLaunchFifo6CorErr", SEES(TX_LAUNCH_FIFO6_COR)),
599/*55*/ FLAG_ENTRY0("TxLaunchFifo7CorErr", SEES(TX_LAUNCH_FIFO7_COR)),
600/*56*/ FLAG_ENTRY0("TxLaunchFifo8CorErr", SEES(TX_LAUNCH_FIFO8_COR)),
601/*57*/ FLAG_ENTRY0("TxCreditOverrunErr", SEES(TX_CREDIT_OVERRUN)),
602/*58*/ FLAG_ENTRY0("TxSbHdrCorErr", SEES(TX_SB_HDR_COR)),
603/*59*/ FLAG_ENTRY0("TxReadSdmaMemoryCorErr", SEES(TX_READ_SDMA_MEMORY_COR)),
604/*60*/ FLAG_ENTRY0("TxReadPioMemoryCorErr", SEES(TX_READ_PIO_MEMORY_COR)),
605/*61*/ FLAG_ENTRY0("TxEgressFifoCorErr", SEES(TX_EGRESS_FIFO_COR)),
606/*62*/ FLAG_ENTRY0("TxReadSdmaMemoryCsrUncErr",
607 SEES(TX_READ_SDMA_MEMORY_CSR_UNC)),
608/*63*/ FLAG_ENTRY0("TxReadPioMemoryCsrUncErr",
609 SEES(TX_READ_PIO_MEMORY_CSR_UNC)),
610};
611
612/*
613 * TXE Egress Error Info flags
614 */
615#define SEEI(text) SEND_EGRESS_ERR_INFO_##text##_ERR_SMASK
616static struct flag_table egress_err_info_flags[] = {
617/* 0*/ FLAG_ENTRY0("Reserved", 0ull),
618/* 1*/ FLAG_ENTRY0("VLErr", SEEI(VL)),
619/* 2*/ FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
620/* 3*/ FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
621/* 4*/ FLAG_ENTRY0("PartitionKeyErr", SEEI(PARTITION_KEY)),
622/* 5*/ FLAG_ENTRY0("SLIDErr", SEEI(SLID)),
623/* 6*/ FLAG_ENTRY0("OpcodeErr", SEEI(OPCODE)),
624/* 7*/ FLAG_ENTRY0("VLMappingErr", SEEI(VL_MAPPING)),
625/* 8*/ FLAG_ENTRY0("RawErr", SEEI(RAW)),
626/* 9*/ FLAG_ENTRY0("RawIPv6Err", SEEI(RAW_IPV6)),
627/*10*/ FLAG_ENTRY0("GRHErr", SEEI(GRH)),
628/*11*/ FLAG_ENTRY0("BypassErr", SEEI(BYPASS)),
629/*12*/ FLAG_ENTRY0("KDETHPacketsErr", SEEI(KDETH_PACKETS)),
630/*13*/ FLAG_ENTRY0("NonKDETHPacketsErr", SEEI(NON_KDETH_PACKETS)),
631/*14*/ FLAG_ENTRY0("TooSmallIBPacketsErr", SEEI(TOO_SMALL_IB_PACKETS)),
632/*15*/ FLAG_ENTRY0("TooSmallBypassPacketsErr", SEEI(TOO_SMALL_BYPASS_PACKETS)),
633/*16*/ FLAG_ENTRY0("PbcTestErr", SEEI(PBC_TEST)),
634/*17*/ FLAG_ENTRY0("BadPktLenErr", SEEI(BAD_PKT_LEN)),
635/*18*/ FLAG_ENTRY0("TooLongIBPacketErr", SEEI(TOO_LONG_IB_PACKET)),
636/*19*/ FLAG_ENTRY0("TooLongBypassPacketsErr", SEEI(TOO_LONG_BYPASS_PACKETS)),
637/*20*/ FLAG_ENTRY0("PbcStaticRateControlErr", SEEI(PBC_STATIC_RATE_CONTROL)),
638/*21*/ FLAG_ENTRY0("BypassBadPktLenErr", SEEI(BAD_PKT_LEN)),
639};
640
641/* TXE Egress errors that cause an SPC freeze */
642#define ALL_TXE_EGRESS_FREEZE_ERR \
643 (SEES(TX_EGRESS_FIFO_UNDERRUN_OR_PARITY) \
644 | SEES(TX_PIO_LAUNCH_INTF_PARITY) \
645 | SEES(TX_SDMA_LAUNCH_INTF_PARITY) \
646 | SEES(TX_SBRD_CTL_STATE_MACHINE_PARITY) \
647 | SEES(TX_LAUNCH_CSR_PARITY) \
648 | SEES(TX_SBRD_CTL_CSR_PARITY) \
649 | SEES(TX_CONFIG_PARITY) \
650 | SEES(TX_LAUNCH_FIFO0_UNC_OR_PARITY) \
651 | SEES(TX_LAUNCH_FIFO1_UNC_OR_PARITY) \
652 | SEES(TX_LAUNCH_FIFO2_UNC_OR_PARITY) \
653 | SEES(TX_LAUNCH_FIFO3_UNC_OR_PARITY) \
654 | SEES(TX_LAUNCH_FIFO4_UNC_OR_PARITY) \
655 | SEES(TX_LAUNCH_FIFO5_UNC_OR_PARITY) \
656 | SEES(TX_LAUNCH_FIFO6_UNC_OR_PARITY) \
657 | SEES(TX_LAUNCH_FIFO7_UNC_OR_PARITY) \
658 | SEES(TX_LAUNCH_FIFO8_UNC_OR_PARITY) \
659 | SEES(TX_CREDIT_RETURN_PARITY))
660
661/*
662 * TXE Send error flags
663 */
664#define SES(name) SEND_ERR_STATUS_SEND_##name##_ERR_SMASK
665static struct flag_table send_err_status_flags[] = {
666/* 0*/ FLAG_ENTRY0("SDmaRpyTagErr", SES(CSR_PARITY)),
667/* 1*/ FLAG_ENTRY0("SendCsrReadBadAddrErr", SES(CSR_READ_BAD_ADDR)),
668/* 2*/ FLAG_ENTRY0("SendCsrWriteBadAddrErr", SES(CSR_WRITE_BAD_ADDR))
669};
670
671/*
672 * TXE Send Context Error flags and consequences
673 */
674static struct flag_table sc_err_status_flags[] = {
675/* 0*/ FLAG_ENTRY("InconsistentSop",
676 SEC_PACKET_DROPPED | SEC_SC_HALTED,
677 SEND_CTXT_ERR_STATUS_PIO_INCONSISTENT_SOP_ERR_SMASK),
678/* 1*/ FLAG_ENTRY("DisallowedPacket",
679 SEC_PACKET_DROPPED | SEC_SC_HALTED,
680 SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK),
681/* 2*/ FLAG_ENTRY("WriteCrossesBoundary",
682 SEC_WRITE_DROPPED | SEC_SC_HALTED,
683 SEND_CTXT_ERR_STATUS_PIO_WRITE_CROSSES_BOUNDARY_ERR_SMASK),
684/* 3*/ FLAG_ENTRY("WriteOverflow",
685 SEC_WRITE_DROPPED | SEC_SC_HALTED,
686 SEND_CTXT_ERR_STATUS_PIO_WRITE_OVERFLOW_ERR_SMASK),
687/* 4*/ FLAG_ENTRY("WriteOutOfBounds",
688 SEC_WRITE_DROPPED | SEC_SC_HALTED,
689 SEND_CTXT_ERR_STATUS_PIO_WRITE_OUT_OF_BOUNDS_ERR_SMASK),
690/* 5-63 reserved*/
691};
692
693/*
694 * RXE Receive Error flags
695 */
696#define RXES(name) RCV_ERR_STATUS_RX_##name##_ERR_SMASK
697static struct flag_table rxe_err_status_flags[] = {
698/* 0*/ FLAG_ENTRY0("RxDmaCsrCorErr", RXES(DMA_CSR_COR)),
699/* 1*/ FLAG_ENTRY0("RxDcIntfParityErr", RXES(DC_INTF_PARITY)),
700/* 2*/ FLAG_ENTRY0("RxRcvHdrUncErr", RXES(RCV_HDR_UNC)),
701/* 3*/ FLAG_ENTRY0("RxRcvHdrCorErr", RXES(RCV_HDR_COR)),
702/* 4*/ FLAG_ENTRY0("RxRcvDataUncErr", RXES(RCV_DATA_UNC)),
703/* 5*/ FLAG_ENTRY0("RxRcvDataCorErr", RXES(RCV_DATA_COR)),
704/* 6*/ FLAG_ENTRY0("RxRcvQpMapTableUncErr", RXES(RCV_QP_MAP_TABLE_UNC)),
705/* 7*/ FLAG_ENTRY0("RxRcvQpMapTableCorErr", RXES(RCV_QP_MAP_TABLE_COR)),
706/* 8*/ FLAG_ENTRY0("RxRcvCsrParityErr", RXES(RCV_CSR_PARITY)),
707/* 9*/ FLAG_ENTRY0("RxDcSopEopParityErr", RXES(DC_SOP_EOP_PARITY)),
708/*10*/ FLAG_ENTRY0("RxDmaFlagUncErr", RXES(DMA_FLAG_UNC)),
709/*11*/ FLAG_ENTRY0("RxDmaFlagCorErr", RXES(DMA_FLAG_COR)),
710/*12*/ FLAG_ENTRY0("RxRcvFsmEncodingErr", RXES(RCV_FSM_ENCODING)),
711/*13*/ FLAG_ENTRY0("RxRbufFreeListUncErr", RXES(RBUF_FREE_LIST_UNC)),
712/*14*/ FLAG_ENTRY0("RxRbufFreeListCorErr", RXES(RBUF_FREE_LIST_COR)),
713/*15*/ FLAG_ENTRY0("RxRbufLookupDesRegUncErr", RXES(RBUF_LOOKUP_DES_REG_UNC)),
714/*16*/ FLAG_ENTRY0("RxRbufLookupDesRegUncCorErr",
715 RXES(RBUF_LOOKUP_DES_REG_UNC_COR)),
716/*17*/ FLAG_ENTRY0("RxRbufLookupDesUncErr", RXES(RBUF_LOOKUP_DES_UNC)),
717/*18*/ FLAG_ENTRY0("RxRbufLookupDesCorErr", RXES(RBUF_LOOKUP_DES_COR)),
718/*19*/ FLAG_ENTRY0("RxRbufBlockListReadUncErr",
719 RXES(RBUF_BLOCK_LIST_READ_UNC)),
720/*20*/ FLAG_ENTRY0("RxRbufBlockListReadCorErr",
721 RXES(RBUF_BLOCK_LIST_READ_COR)),
722/*21*/ FLAG_ENTRY0("RxRbufCsrQHeadBufNumParityErr",
723 RXES(RBUF_CSR_QHEAD_BUF_NUM_PARITY)),
724/*22*/ FLAG_ENTRY0("RxRbufCsrQEntCntParityErr",
725 RXES(RBUF_CSR_QENT_CNT_PARITY)),
726/*23*/ FLAG_ENTRY0("RxRbufCsrQNextBufParityErr",
727 RXES(RBUF_CSR_QNEXT_BUF_PARITY)),
728/*24*/ FLAG_ENTRY0("RxRbufCsrQVldBitParityErr",
729 RXES(RBUF_CSR_QVLD_BIT_PARITY)),
730/*25*/ FLAG_ENTRY0("RxRbufCsrQHdPtrParityErr", RXES(RBUF_CSR_QHD_PTR_PARITY)),
731/*26*/ FLAG_ENTRY0("RxRbufCsrQTlPtrParityErr", RXES(RBUF_CSR_QTL_PTR_PARITY)),
732/*27*/ FLAG_ENTRY0("RxRbufCsrQNumOfPktParityErr",
733 RXES(RBUF_CSR_QNUM_OF_PKT_PARITY)),
734/*28*/ FLAG_ENTRY0("RxRbufCsrQEOPDWParityErr", RXES(RBUF_CSR_QEOPDW_PARITY)),
735/*29*/ FLAG_ENTRY0("RxRbufCtxIdParityErr", RXES(RBUF_CTX_ID_PARITY)),
736/*30*/ FLAG_ENTRY0("RxRBufBadLookupErr", RXES(RBUF_BAD_LOOKUP)),
737/*31*/ FLAG_ENTRY0("RxRbufFullErr", RXES(RBUF_FULL)),
738/*32*/ FLAG_ENTRY0("RxRbufEmptyErr", RXES(RBUF_EMPTY)),
739/*33*/ FLAG_ENTRY0("RxRbufFlRdAddrParityErr", RXES(RBUF_FL_RD_ADDR_PARITY)),
740/*34*/ FLAG_ENTRY0("RxRbufFlWrAddrParityErr", RXES(RBUF_FL_WR_ADDR_PARITY)),
741/*35*/ FLAG_ENTRY0("RxRbufFlInitdoneParityErr",
742 RXES(RBUF_FL_INITDONE_PARITY)),
743/*36*/ FLAG_ENTRY0("RxRbufFlInitWrAddrParityErr",
744 RXES(RBUF_FL_INIT_WR_ADDR_PARITY)),
745/*37*/ FLAG_ENTRY0("RxRbufNextFreeBufUncErr", RXES(RBUF_NEXT_FREE_BUF_UNC)),
746/*38*/ FLAG_ENTRY0("RxRbufNextFreeBufCorErr", RXES(RBUF_NEXT_FREE_BUF_COR)),
747/*39*/ FLAG_ENTRY0("RxLookupDesPart1UncErr", RXES(LOOKUP_DES_PART1_UNC)),
748/*40*/ FLAG_ENTRY0("RxLookupDesPart1UncCorErr",
749 RXES(LOOKUP_DES_PART1_UNC_COR)),
750/*41*/ FLAG_ENTRY0("RxLookupDesPart2ParityErr",
751 RXES(LOOKUP_DES_PART2_PARITY)),
752/*42*/ FLAG_ENTRY0("RxLookupRcvArrayUncErr", RXES(LOOKUP_RCV_ARRAY_UNC)),
753/*43*/ FLAG_ENTRY0("RxLookupRcvArrayCorErr", RXES(LOOKUP_RCV_ARRAY_COR)),
754/*44*/ FLAG_ENTRY0("RxLookupCsrParityErr", RXES(LOOKUP_CSR_PARITY)),
755/*45*/ FLAG_ENTRY0("RxHqIntrCsrParityErr", RXES(HQ_INTR_CSR_PARITY)),
756/*46*/ FLAG_ENTRY0("RxHqIntrFsmErr", RXES(HQ_INTR_FSM)),
757/*47*/ FLAG_ENTRY0("RxRbufDescPart1UncErr", RXES(RBUF_DESC_PART1_UNC)),
758/*48*/ FLAG_ENTRY0("RxRbufDescPart1CorErr", RXES(RBUF_DESC_PART1_COR)),
759/*49*/ FLAG_ENTRY0("RxRbufDescPart2UncErr", RXES(RBUF_DESC_PART2_UNC)),
760/*50*/ FLAG_ENTRY0("RxRbufDescPart2CorErr", RXES(RBUF_DESC_PART2_COR)),
761/*51*/ FLAG_ENTRY0("RxDmaHdrFifoRdUncErr", RXES(DMA_HDR_FIFO_RD_UNC)),
762/*52*/ FLAG_ENTRY0("RxDmaHdrFifoRdCorErr", RXES(DMA_HDR_FIFO_RD_COR)),
763/*53*/ FLAG_ENTRY0("RxDmaDataFifoRdUncErr", RXES(DMA_DATA_FIFO_RD_UNC)),
764/*54*/ FLAG_ENTRY0("RxDmaDataFifoRdCorErr", RXES(DMA_DATA_FIFO_RD_COR)),
765/*55*/ FLAG_ENTRY0("RxRbufDataUncErr", RXES(RBUF_DATA_UNC)),
766/*56*/ FLAG_ENTRY0("RxRbufDataCorErr", RXES(RBUF_DATA_COR)),
767/*57*/ FLAG_ENTRY0("RxDmaCsrParityErr", RXES(DMA_CSR_PARITY)),
768/*58*/ FLAG_ENTRY0("RxDmaEqFsmEncodingErr", RXES(DMA_EQ_FSM_ENCODING)),
769/*59*/ FLAG_ENTRY0("RxDmaDqFsmEncodingErr", RXES(DMA_DQ_FSM_ENCODING)),
770/*60*/ FLAG_ENTRY0("RxDmaCsrUncErr", RXES(DMA_CSR_UNC)),
771/*61*/ FLAG_ENTRY0("RxCsrReadBadAddrErr", RXES(CSR_READ_BAD_ADDR)),
772/*62*/ FLAG_ENTRY0("RxCsrWriteBadAddrErr", RXES(CSR_WRITE_BAD_ADDR)),
773/*63*/ FLAG_ENTRY0("RxCsrParityErr", RXES(CSR_PARITY))
774};
775
776/* RXE errors that will trigger an SPC freeze */
777#define ALL_RXE_FREEZE_ERR \
778 (RCV_ERR_STATUS_RX_RCV_QP_MAP_TABLE_UNC_ERR_SMASK \
779 | RCV_ERR_STATUS_RX_RCV_CSR_PARITY_ERR_SMASK \
780 | RCV_ERR_STATUS_RX_DMA_FLAG_UNC_ERR_SMASK \
781 | RCV_ERR_STATUS_RX_RCV_FSM_ENCODING_ERR_SMASK \
782 | RCV_ERR_STATUS_RX_RBUF_FREE_LIST_UNC_ERR_SMASK \
783 | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_ERR_SMASK \
784 | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_REG_UNC_COR_ERR_SMASK \
785 | RCV_ERR_STATUS_RX_RBUF_LOOKUP_DES_UNC_ERR_SMASK \
786 | RCV_ERR_STATUS_RX_RBUF_BLOCK_LIST_READ_UNC_ERR_SMASK \
787 | RCV_ERR_STATUS_RX_RBUF_CSR_QHEAD_BUF_NUM_PARITY_ERR_SMASK \
788 | RCV_ERR_STATUS_RX_RBUF_CSR_QENT_CNT_PARITY_ERR_SMASK \
789 | RCV_ERR_STATUS_RX_RBUF_CSR_QNEXT_BUF_PARITY_ERR_SMASK \
790 | RCV_ERR_STATUS_RX_RBUF_CSR_QVLD_BIT_PARITY_ERR_SMASK \
791 | RCV_ERR_STATUS_RX_RBUF_CSR_QHD_PTR_PARITY_ERR_SMASK \
792 | RCV_ERR_STATUS_RX_RBUF_CSR_QTL_PTR_PARITY_ERR_SMASK \
793 | RCV_ERR_STATUS_RX_RBUF_CSR_QNUM_OF_PKT_PARITY_ERR_SMASK \
794 | RCV_ERR_STATUS_RX_RBUF_CSR_QEOPDW_PARITY_ERR_SMASK \
795 | RCV_ERR_STATUS_RX_RBUF_CTX_ID_PARITY_ERR_SMASK \
796 | RCV_ERR_STATUS_RX_RBUF_BAD_LOOKUP_ERR_SMASK \
797 | RCV_ERR_STATUS_RX_RBUF_FULL_ERR_SMASK \
798 | RCV_ERR_STATUS_RX_RBUF_EMPTY_ERR_SMASK \
799 | RCV_ERR_STATUS_RX_RBUF_FL_RD_ADDR_PARITY_ERR_SMASK \
800 | RCV_ERR_STATUS_RX_RBUF_FL_WR_ADDR_PARITY_ERR_SMASK \
801 | RCV_ERR_STATUS_RX_RBUF_FL_INITDONE_PARITY_ERR_SMASK \
802 | RCV_ERR_STATUS_RX_RBUF_FL_INIT_WR_ADDR_PARITY_ERR_SMASK \
803 | RCV_ERR_STATUS_RX_RBUF_NEXT_FREE_BUF_UNC_ERR_SMASK \
804 | RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_ERR_SMASK \
805 | RCV_ERR_STATUS_RX_LOOKUP_DES_PART1_UNC_COR_ERR_SMASK \
806 | RCV_ERR_STATUS_RX_LOOKUP_DES_PART2_PARITY_ERR_SMASK \
807 | RCV_ERR_STATUS_RX_LOOKUP_RCV_ARRAY_UNC_ERR_SMASK \
808 | RCV_ERR_STATUS_RX_LOOKUP_CSR_PARITY_ERR_SMASK \
809 | RCV_ERR_STATUS_RX_HQ_INTR_CSR_PARITY_ERR_SMASK \
810 | RCV_ERR_STATUS_RX_HQ_INTR_FSM_ERR_SMASK \
811 | RCV_ERR_STATUS_RX_RBUF_DESC_PART1_UNC_ERR_SMASK \
812 | RCV_ERR_STATUS_RX_RBUF_DESC_PART1_COR_ERR_SMASK \
813 | RCV_ERR_STATUS_RX_RBUF_DESC_PART2_UNC_ERR_SMASK \
814 | RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK \
815 | RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK \
816 | RCV_ERR_STATUS_RX_RBUF_DATA_UNC_ERR_SMASK \
817 | RCV_ERR_STATUS_RX_DMA_CSR_PARITY_ERR_SMASK \
818 | RCV_ERR_STATUS_RX_DMA_EQ_FSM_ENCODING_ERR_SMASK \
819 | RCV_ERR_STATUS_RX_DMA_DQ_FSM_ENCODING_ERR_SMASK \
820 | RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK \
821 | RCV_ERR_STATUS_RX_CSR_PARITY_ERR_SMASK)
822
823#define RXE_FREEZE_ABORT_MASK \
824 (RCV_ERR_STATUS_RX_DMA_CSR_UNC_ERR_SMASK | \
825 RCV_ERR_STATUS_RX_DMA_HDR_FIFO_RD_UNC_ERR_SMASK | \
826 RCV_ERR_STATUS_RX_DMA_DATA_FIFO_RD_UNC_ERR_SMASK)
827
828/*
829 * DCC Error Flags
830 */
831#define DCCE(name) DCC_ERR_FLG_##name##_SMASK
832static struct flag_table dcc_err_flags[] = {
833 FLAG_ENTRY0("bad_l2_err", DCCE(BAD_L2_ERR)),
834 FLAG_ENTRY0("bad_sc_err", DCCE(BAD_SC_ERR)),
835 FLAG_ENTRY0("bad_mid_tail_err", DCCE(BAD_MID_TAIL_ERR)),
836 FLAG_ENTRY0("bad_preemption_err", DCCE(BAD_PREEMPTION_ERR)),
837 FLAG_ENTRY0("preemption_err", DCCE(PREEMPTION_ERR)),
838 FLAG_ENTRY0("preemptionvl15_err", DCCE(PREEMPTIONVL15_ERR)),
839 FLAG_ENTRY0("bad_vl_marker_err", DCCE(BAD_VL_MARKER_ERR)),
840 FLAG_ENTRY0("bad_dlid_target_err", DCCE(BAD_DLID_TARGET_ERR)),
841 FLAG_ENTRY0("bad_lver_err", DCCE(BAD_LVER_ERR)),
842 FLAG_ENTRY0("uncorrectable_err", DCCE(UNCORRECTABLE_ERR)),
843 FLAG_ENTRY0("bad_crdt_ack_err", DCCE(BAD_CRDT_ACK_ERR)),
844 FLAG_ENTRY0("unsup_pkt_type", DCCE(UNSUP_PKT_TYPE)),
845 FLAG_ENTRY0("bad_ctrl_flit_err", DCCE(BAD_CTRL_FLIT_ERR)),
846 FLAG_ENTRY0("event_cntr_parity_err", DCCE(EVENT_CNTR_PARITY_ERR)),
847 FLAG_ENTRY0("event_cntr_rollover_err", DCCE(EVENT_CNTR_ROLLOVER_ERR)),
848 FLAG_ENTRY0("link_err", DCCE(LINK_ERR)),
849 FLAG_ENTRY0("misc_cntr_rollover_err", DCCE(MISC_CNTR_ROLLOVER_ERR)),
850 FLAG_ENTRY0("bad_ctrl_dist_err", DCCE(BAD_CTRL_DIST_ERR)),
851 FLAG_ENTRY0("bad_tail_dist_err", DCCE(BAD_TAIL_DIST_ERR)),
852 FLAG_ENTRY0("bad_head_dist_err", DCCE(BAD_HEAD_DIST_ERR)),
853 FLAG_ENTRY0("nonvl15_state_err", DCCE(NONVL15_STATE_ERR)),
854 FLAG_ENTRY0("vl15_multi_err", DCCE(VL15_MULTI_ERR)),
855 FLAG_ENTRY0("bad_pkt_length_err", DCCE(BAD_PKT_LENGTH_ERR)),
856 FLAG_ENTRY0("unsup_vl_err", DCCE(UNSUP_VL_ERR)),
857 FLAG_ENTRY0("perm_nvl15_err", DCCE(PERM_NVL15_ERR)),
858 FLAG_ENTRY0("slid_zero_err", DCCE(SLID_ZERO_ERR)),
859 FLAG_ENTRY0("dlid_zero_err", DCCE(DLID_ZERO_ERR)),
860 FLAG_ENTRY0("length_mtu_err", DCCE(LENGTH_MTU_ERR)),
861 FLAG_ENTRY0("rx_early_drop_err", DCCE(RX_EARLY_DROP_ERR)),
862 FLAG_ENTRY0("late_short_err", DCCE(LATE_SHORT_ERR)),
863 FLAG_ENTRY0("late_long_err", DCCE(LATE_LONG_ERR)),
864 FLAG_ENTRY0("late_ebp_err", DCCE(LATE_EBP_ERR)),
865 FLAG_ENTRY0("fpe_tx_fifo_ovflw_err", DCCE(FPE_TX_FIFO_OVFLW_ERR)),
866 FLAG_ENTRY0("fpe_tx_fifo_unflw_err", DCCE(FPE_TX_FIFO_UNFLW_ERR)),
867 FLAG_ENTRY0("csr_access_blocked_host", DCCE(CSR_ACCESS_BLOCKED_HOST)),
868 FLAG_ENTRY0("csr_access_blocked_uc", DCCE(CSR_ACCESS_BLOCKED_UC)),
869 FLAG_ENTRY0("tx_ctrl_parity_err", DCCE(TX_CTRL_PARITY_ERR)),
870 FLAG_ENTRY0("tx_ctrl_parity_mbe_err", DCCE(TX_CTRL_PARITY_MBE_ERR)),
871 FLAG_ENTRY0("tx_sc_parity_err", DCCE(TX_SC_PARITY_ERR)),
872 FLAG_ENTRY0("rx_ctrl_parity_mbe_err", DCCE(RX_CTRL_PARITY_MBE_ERR)),
873 FLAG_ENTRY0("csr_parity_err", DCCE(CSR_PARITY_ERR)),
874 FLAG_ENTRY0("csr_inval_addr", DCCE(CSR_INVAL_ADDR)),
875 FLAG_ENTRY0("tx_byte_shft_parity_err", DCCE(TX_BYTE_SHFT_PARITY_ERR)),
876 FLAG_ENTRY0("rx_byte_shft_parity_err", DCCE(RX_BYTE_SHFT_PARITY_ERR)),
877 FLAG_ENTRY0("fmconfig_err", DCCE(FMCONFIG_ERR)),
878 FLAG_ENTRY0("rcvport_err", DCCE(RCVPORT_ERR)),
879};
880
881/*
882 * LCB error flags
883 */
884#define LCBE(name) DC_LCB_ERR_FLG_##name##_SMASK
885static struct flag_table lcb_err_flags[] = {
886/* 0*/ FLAG_ENTRY0("CSR_PARITY_ERR", LCBE(CSR_PARITY_ERR)),
887/* 1*/ FLAG_ENTRY0("INVALID_CSR_ADDR", LCBE(INVALID_CSR_ADDR)),
888/* 2*/ FLAG_ENTRY0("RST_FOR_FAILED_DESKEW", LCBE(RST_FOR_FAILED_DESKEW)),
889/* 3*/ FLAG_ENTRY0("ALL_LNS_FAILED_REINIT_TEST",
890 LCBE(ALL_LNS_FAILED_REINIT_TEST)),
891/* 4*/ FLAG_ENTRY0("LOST_REINIT_STALL_OR_TOS", LCBE(LOST_REINIT_STALL_OR_TOS)),
892/* 5*/ FLAG_ENTRY0("TX_LESS_THAN_FOUR_LNS", LCBE(TX_LESS_THAN_FOUR_LNS)),
893/* 6*/ FLAG_ENTRY0("RX_LESS_THAN_FOUR_LNS", LCBE(RX_LESS_THAN_FOUR_LNS)),
894/* 7*/ FLAG_ENTRY0("SEQ_CRC_ERR", LCBE(SEQ_CRC_ERR)),
895/* 8*/ FLAG_ENTRY0("REINIT_FROM_PEER", LCBE(REINIT_FROM_PEER)),
896/* 9*/ FLAG_ENTRY0("REINIT_FOR_LN_DEGRADE", LCBE(REINIT_FOR_LN_DEGRADE)),
897/*10*/ FLAG_ENTRY0("CRC_ERR_CNT_HIT_LIMIT", LCBE(CRC_ERR_CNT_HIT_LIMIT)),
898/*11*/ FLAG_ENTRY0("RCLK_STOPPED", LCBE(RCLK_STOPPED)),
899/*12*/ FLAG_ENTRY0("UNEXPECTED_REPLAY_MARKER", LCBE(UNEXPECTED_REPLAY_MARKER)),
900/*13*/ FLAG_ENTRY0("UNEXPECTED_ROUND_TRIP_MARKER",
901 LCBE(UNEXPECTED_ROUND_TRIP_MARKER)),
902/*14*/ FLAG_ENTRY0("ILLEGAL_NULL_LTP", LCBE(ILLEGAL_NULL_LTP)),
903/*15*/ FLAG_ENTRY0("ILLEGAL_FLIT_ENCODING", LCBE(ILLEGAL_FLIT_ENCODING)),
904/*16*/ FLAG_ENTRY0("FLIT_INPUT_BUF_OFLW", LCBE(FLIT_INPUT_BUF_OFLW)),
905/*17*/ FLAG_ENTRY0("VL_ACK_INPUT_BUF_OFLW", LCBE(VL_ACK_INPUT_BUF_OFLW)),
906/*18*/ FLAG_ENTRY0("VL_ACK_INPUT_PARITY_ERR", LCBE(VL_ACK_INPUT_PARITY_ERR)),
907/*19*/ FLAG_ENTRY0("VL_ACK_INPUT_WRONG_CRC_MODE",
908 LCBE(VL_ACK_INPUT_WRONG_CRC_MODE)),
909/*20*/ FLAG_ENTRY0("FLIT_INPUT_BUF_MBE", LCBE(FLIT_INPUT_BUF_MBE)),
910/*21*/ FLAG_ENTRY0("FLIT_INPUT_BUF_SBE", LCBE(FLIT_INPUT_BUF_SBE)),
911/*22*/ FLAG_ENTRY0("REPLAY_BUF_MBE", LCBE(REPLAY_BUF_MBE)),
912/*23*/ FLAG_ENTRY0("REPLAY_BUF_SBE", LCBE(REPLAY_BUF_SBE)),
913/*24*/ FLAG_ENTRY0("CREDIT_RETURN_FLIT_MBE", LCBE(CREDIT_RETURN_FLIT_MBE)),
914/*25*/ FLAG_ENTRY0("RST_FOR_LINK_TIMEOUT", LCBE(RST_FOR_LINK_TIMEOUT)),
915/*26*/ FLAG_ENTRY0("RST_FOR_INCOMPLT_RND_TRIP",
916 LCBE(RST_FOR_INCOMPLT_RND_TRIP)),
917/*27*/ FLAG_ENTRY0("HOLD_REINIT", LCBE(HOLD_REINIT)),
918/*28*/ FLAG_ENTRY0("NEG_EDGE_LINK_TRANSFER_ACTIVE",
919 LCBE(NEG_EDGE_LINK_TRANSFER_ACTIVE)),
920/*29*/ FLAG_ENTRY0("REDUNDANT_FLIT_PARITY_ERR",
921 LCBE(REDUNDANT_FLIT_PARITY_ERR))
922};
923
924/*
925 * DC8051 Error Flags
926 */
927#define D8E(name) DC_DC8051_ERR_FLG_##name##_SMASK
928static struct flag_table dc8051_err_flags[] = {
929 FLAG_ENTRY0("SET_BY_8051", D8E(SET_BY_8051)),
930 FLAG_ENTRY0("LOST_8051_HEART_BEAT", D8E(LOST_8051_HEART_BEAT)),
931 FLAG_ENTRY0("CRAM_MBE", D8E(CRAM_MBE)),
932 FLAG_ENTRY0("CRAM_SBE", D8E(CRAM_SBE)),
933 FLAG_ENTRY0("DRAM_MBE", D8E(DRAM_MBE)),
934 FLAG_ENTRY0("DRAM_SBE", D8E(DRAM_SBE)),
935 FLAG_ENTRY0("IRAM_MBE", D8E(IRAM_MBE)),
936 FLAG_ENTRY0("IRAM_SBE", D8E(IRAM_SBE)),
937 FLAG_ENTRY0("UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES",
938 D8E(UNMATCHED_SECURE_MSG_ACROSS_BCC_LANES)),
939 FLAG_ENTRY0("INVALID_CSR_ADDR", D8E(INVALID_CSR_ADDR)),
940};
941
942/*
943 * DC8051 Information Error flags
944 *
945 * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR field.
946 */
947static struct flag_table dc8051_info_err_flags[] = {
948 FLAG_ENTRY0("Spico ROM check failed", SPICO_ROM_FAILED),
949 FLAG_ENTRY0("Unknown frame received", UNKNOWN_FRAME),
950 FLAG_ENTRY0("Target BER not met", TARGET_BER_NOT_MET),
951 FLAG_ENTRY0("Serdes internal loopback failure",
952 FAILED_SERDES_INTERNAL_LOOPBACK),
953 FLAG_ENTRY0("Failed SerDes init", FAILED_SERDES_INIT),
954 FLAG_ENTRY0("Failed LNI(Polling)", FAILED_LNI_POLLING),
955 FLAG_ENTRY0("Failed LNI(Debounce)", FAILED_LNI_DEBOUNCE),
956 FLAG_ENTRY0("Failed LNI(EstbComm)", FAILED_LNI_ESTBCOMM),
957 FLAG_ENTRY0("Failed LNI(OptEq)", FAILED_LNI_OPTEQ),
958 FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
959 FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
960 FLAG_ENTRY0("Failed LNI(ConfigLT)", FAILED_LNI_CONFIGLT)
961};
962
963/*
964 * DC8051 Information Host Information flags
965 *
966 * Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG field.
967 */
968static struct flag_table dc8051_info_host_msg_flags[] = {
969 FLAG_ENTRY0("Host request done", 0x0001),
970 FLAG_ENTRY0("BC SMA message", 0x0002),
971 FLAG_ENTRY0("BC PWR_MGM message", 0x0004),
972 FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008),
973 FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010),
974 FLAG_ENTRY0("External device config request", 0x0020),
975 FLAG_ENTRY0("VerifyCap all frames received", 0x0040),
976 FLAG_ENTRY0("LinkUp achieved", 0x0080),
977 FLAG_ENTRY0("Link going down", 0x0100),
978};
979
980
981static u32 encoded_size(u32 size);
982static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate);
983static int set_physical_link_state(struct hfi1_devdata *dd, u64 state);
984static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
985 u8 *continuous);
986static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
987 u8 *vcu, u16 *vl15buf, u8 *crc_sizes);
988static void read_vc_remote_link_width(struct hfi1_devdata *dd,
989 u8 *remote_tx_rate, u16 *link_widths);
990static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
991 u8 *flag_bits, u16 *link_widths);
992static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
993 u8 *device_rev);
994static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed);
995static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx);
996static int read_tx_settings(struct hfi1_devdata *dd, u8 *enable_lane_tx,
997 u8 *tx_polarity_inversion,
998 u8 *rx_polarity_inversion, u8 *max_rate);
999static void handle_sdma_eng_err(struct hfi1_devdata *dd,
1000 unsigned int context, u64 err_status);
1001static void handle_qsfp_int(struct hfi1_devdata *dd, u32 source, u64 reg);
1002static void handle_dcc_err(struct hfi1_devdata *dd,
1003 unsigned int context, u64 err_status);
1004static void handle_lcb_err(struct hfi1_devdata *dd,
1005 unsigned int context, u64 err_status);
1006static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg);
1007static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1008static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1009static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1010static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1011static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1012static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1013static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg);
1014static void set_partition_keys(struct hfi1_pportdata *);
1015static const char *link_state_name(u32 state);
1016static const char *link_state_reason_name(struct hfi1_pportdata *ppd,
1017 u32 state);
1018static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data,
1019 u64 *out_data);
1020static int read_idle_sma(struct hfi1_devdata *dd, u64 *data);
1021static int thermal_init(struct hfi1_devdata *dd);
1022
1023static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
1024 int msecs);
1025static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
1026static void handle_temp_err(struct hfi1_devdata *);
1027static void dc_shutdown(struct hfi1_devdata *);
1028static void dc_start(struct hfi1_devdata *);
1029
1030/*
1031 * Error interrupt table entry. This is used as input to the interrupt
1032 * "clear down" routine used for all second tier error interrupt register.
1033 * Second tier interrupt registers have a single bit representing them
1034 * in the top-level CceIntStatus.
1035 */
1036struct err_reg_info {
1037 u32 status; /* status CSR offset */
1038 u32 clear; /* clear CSR offset */
1039 u32 mask; /* mask CSR offset */
1040 void (*handler)(struct hfi1_devdata *dd, u32 source, u64 reg);
1041 const char *desc;
1042};
1043
1044#define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START)
1045#define NUM_DC_ERRS (IS_DC_END - IS_DC_START)
1046#define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START)
1047
1048/*
1049 * Helpers for building HFI and DC error interrupt table entries. Different
1050 * helpers are needed because of inconsistent register names.
1051 */
1052#define EE(reg, handler, desc) \
1053 { reg##_STATUS, reg##_CLEAR, reg##_MASK, \
1054 handler, desc }
1055#define DC_EE1(reg, handler, desc) \
1056 { reg##_FLG, reg##_FLG_CLR, reg##_FLG_EN, handler, desc }
1057#define DC_EE2(reg, handler, desc) \
1058 { reg##_FLG, reg##_CLR, reg##_EN, handler, desc }
1059
1060/*
1061 * Table of the "misc" grouping of error interrupts. Each entry refers to
1062 * another register containing more information.
1063 */
1064static const struct err_reg_info misc_errs[NUM_MISC_ERRS] = {
1065/* 0*/ EE(CCE_ERR, handle_cce_err, "CceErr"),
1066/* 1*/ EE(RCV_ERR, handle_rxe_err, "RxeErr"),
1067/* 2*/ EE(MISC_ERR, handle_misc_err, "MiscErr"),
1068/* 3*/ { 0, 0, 0, NULL }, /* reserved */
1069/* 4*/ EE(SEND_PIO_ERR, handle_pio_err, "PioErr"),
1070/* 5*/ EE(SEND_DMA_ERR, handle_sdma_err, "SDmaErr"),
1071/* 6*/ EE(SEND_EGRESS_ERR, handle_egress_err, "EgressErr"),
1072/* 7*/ EE(SEND_ERR, handle_txe_err, "TxeErr")
1073 /* the rest are reserved */
1074};
1075
1076/*
1077 * Index into the Various section of the interrupt sources
1078 * corresponding to the Critical Temperature interrupt.
1079 */
1080#define TCRIT_INT_SOURCE 4
1081
1082/*
1083 * SDMA error interrupt entry - refers to another register containing more
1084 * information.
1085 */
1086static const struct err_reg_info sdma_eng_err =
1087 EE(SEND_DMA_ENG_ERR, handle_sdma_eng_err, "SDmaEngErr");
1088
1089static const struct err_reg_info various_err[NUM_VARIOUS] = {
1090/* 0*/ { 0, 0, 0, NULL }, /* PbcInt */
1091/* 1*/ { 0, 0, 0, NULL }, /* GpioAssertInt */
1092/* 2*/ EE(ASIC_QSFP1, handle_qsfp_int, "QSFP1"),
1093/* 3*/ EE(ASIC_QSFP2, handle_qsfp_int, "QSFP2"),
1094/* 4*/ { 0, 0, 0, NULL }, /* TCritInt */
1095 /* rest are reserved */
1096};
1097
1098/*
1099 * The DC encoding of mtu_cap for 10K MTU in the DCC_CFG_PORT_CONFIG
1100 * register can not be derived from the MTU value because 10K is not
1101 * a power of 2. Therefore, we need a constant. Everything else can
1102 * be calculated.
1103 */
1104#define DCC_CFG_PORT_MTU_CAP_10240 7
1105
1106/*
1107 * Table of the DC grouping of error interrupts. Each entry refers to
1108 * another register containing more information.
1109 */
1110static const struct err_reg_info dc_errs[NUM_DC_ERRS] = {
1111/* 0*/ DC_EE1(DCC_ERR, handle_dcc_err, "DCC Err"),
1112/* 1*/ DC_EE2(DC_LCB_ERR, handle_lcb_err, "LCB Err"),
1113/* 2*/ DC_EE2(DC_DC8051_ERR, handle_8051_interrupt, "DC8051 Interrupt"),
1114/* 3*/ /* dc_lbm_int - special, see is_dc_int() */
1115 /* the rest are reserved */
1116};
1117
1118struct cntr_entry {
1119 /*
1120 * counter name
1121 */
1122 char *name;
1123
1124 /*
1125 * csr to read for name (if applicable)
1126 */
1127 u64 csr;
1128
1129 /*
1130 * offset into dd or ppd to store the counter's value
1131 */
1132 int offset;
1133
1134 /*
1135 * flags
1136 */
1137 u8 flags;
1138
1139 /*
1140 * accessor for stat element, context either dd or ppd
1141 */
1142 u64 (*rw_cntr)(const struct cntr_entry *,
1143 void *context,
1144 int vl,
1145 int mode,
1146 u64 data);
1147};
1148
1149#define C_RCV_HDR_OVF_FIRST C_RCV_HDR_OVF_0
1150#define C_RCV_HDR_OVF_LAST C_RCV_HDR_OVF_159
1151
1152#define CNTR_ELEM(name, csr, offset, flags, accessor) \
1153{ \
1154 name, \
1155 csr, \
1156 offset, \
1157 flags, \
1158 accessor \
1159}
1160
1161/* 32bit RXE */
1162#define RXE32_PORT_CNTR_ELEM(name, counter, flags) \
1163CNTR_ELEM(#name, \
1164 (counter * 8 + RCV_COUNTER_ARRAY32), \
1165 0, flags | CNTR_32BIT, \
1166 port_access_u32_csr)
1167
1168#define RXE32_DEV_CNTR_ELEM(name, counter, flags) \
1169CNTR_ELEM(#name, \
1170 (counter * 8 + RCV_COUNTER_ARRAY32), \
1171 0, flags | CNTR_32BIT, \
1172 dev_access_u32_csr)
1173
1174/* 64bit RXE */
1175#define RXE64_PORT_CNTR_ELEM(name, counter, flags) \
1176CNTR_ELEM(#name, \
1177 (counter * 8 + RCV_COUNTER_ARRAY64), \
1178 0, flags, \
1179 port_access_u64_csr)
1180
1181#define RXE64_DEV_CNTR_ELEM(name, counter, flags) \
1182CNTR_ELEM(#name, \
1183 (counter * 8 + RCV_COUNTER_ARRAY64), \
1184 0, flags, \
1185 dev_access_u64_csr)
1186
1187#define OVR_LBL(ctx) C_RCV_HDR_OVF_ ## ctx
1188#define OVR_ELM(ctx) \
1189CNTR_ELEM("RcvHdrOvr" #ctx, \
1190 (RCV_HDR_OVFL_CNT + ctx*0x100), \
1191 0, CNTR_NORMAL, port_access_u64_csr)
1192
1193/* 32bit TXE */
1194#define TXE32_PORT_CNTR_ELEM(name, counter, flags) \
1195CNTR_ELEM(#name, \
1196 (counter * 8 + SEND_COUNTER_ARRAY32), \
1197 0, flags | CNTR_32BIT, \
1198 port_access_u32_csr)
1199
1200/* 64bit TXE */
1201#define TXE64_PORT_CNTR_ELEM(name, counter, flags) \
1202CNTR_ELEM(#name, \
1203 (counter * 8 + SEND_COUNTER_ARRAY64), \
1204 0, flags, \
1205 port_access_u64_csr)
1206
1207# define TX64_DEV_CNTR_ELEM(name, counter, flags) \
1208CNTR_ELEM(#name,\
1209 counter * 8 + SEND_COUNTER_ARRAY64, \
1210 0, \
1211 flags, \
1212 dev_access_u64_csr)
1213
1214/* CCE */
1215#define CCE_PERF_DEV_CNTR_ELEM(name, counter, flags) \
1216CNTR_ELEM(#name, \
1217 (counter * 8 + CCE_COUNTER_ARRAY32), \
1218 0, flags | CNTR_32BIT, \
1219 dev_access_u32_csr)
1220
1221#define CCE_INT_DEV_CNTR_ELEM(name, counter, flags) \
1222CNTR_ELEM(#name, \
1223 (counter * 8 + CCE_INT_COUNTER_ARRAY32), \
1224 0, flags | CNTR_32BIT, \
1225 dev_access_u32_csr)
1226
1227/* DC */
1228#define DC_PERF_CNTR(name, counter, flags) \
1229CNTR_ELEM(#name, \
1230 counter, \
1231 0, \
1232 flags, \
1233 dev_access_u64_csr)
1234
1235#define DC_PERF_CNTR_LCB(name, counter, flags) \
1236CNTR_ELEM(#name, \
1237 counter, \
1238 0, \
1239 flags, \
1240 dc_access_lcb_cntr)
1241
1242/* ibp counters */
1243#define SW_IBP_CNTR(name, cntr) \
1244CNTR_ELEM(#name, \
1245 0, \
1246 0, \
1247 CNTR_SYNTH, \
1248 access_ibp_##cntr)
1249
1250u64 read_csr(const struct hfi1_devdata *dd, u32 offset)
1251{
1252 u64 val;
1253
1254 if (dd->flags & HFI1_PRESENT) {
1255 val = readq((void __iomem *)dd->kregbase + offset);
1256 return val;
1257 }
1258 return -1;
1259}
1260
1261void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value)
1262{
1263 if (dd->flags & HFI1_PRESENT)
1264 writeq(value, (void __iomem *)dd->kregbase + offset);
1265}
1266
1267void __iomem *get_csr_addr(
1268 struct hfi1_devdata *dd,
1269 u32 offset)
1270{
1271 return (void __iomem *)dd->kregbase + offset;
1272}
1273
1274static inline u64 read_write_csr(const struct hfi1_devdata *dd, u32 csr,
1275 int mode, u64 value)
1276{
1277 u64 ret;
1278
1279
1280 if (mode == CNTR_MODE_R) {
1281 ret = read_csr(dd, csr);
1282 } else if (mode == CNTR_MODE_W) {
1283 write_csr(dd, csr, value);
1284 ret = value;
1285 } else {
1286 dd_dev_err(dd, "Invalid cntr register access mode");
1287 return 0;
1288 }
1289
1290 hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, ret, mode);
1291 return ret;
1292}
1293
1294/* Dev Access */
1295static u64 dev_access_u32_csr(const struct cntr_entry *entry,
1296 void *context, int vl, int mode, u64 data)
1297{
1298 struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1299
1300 if (vl != CNTR_INVALID_VL)
1301 return 0;
1302 return read_write_csr(dd, entry->csr, mode, data);
1303}
1304
1305static u64 dev_access_u64_csr(const struct cntr_entry *entry, void *context,
1306 int vl, int mode, u64 data)
1307{
1308 struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1309
1310 u64 val = 0;
1311 u64 csr = entry->csr;
1312
1313 if (entry->flags & CNTR_VL) {
1314 if (vl == CNTR_INVALID_VL)
1315 return 0;
1316 csr += 8 * vl;
1317 } else {
1318 if (vl != CNTR_INVALID_VL)
1319 return 0;
1320 }
1321
1322 val = read_write_csr(dd, csr, mode, data);
1323 return val;
1324}
1325
1326static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
1327 int vl, int mode, u64 data)
1328{
1329 struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1330 u32 csr = entry->csr;
1331 int ret = 0;
1332
1333 if (vl != CNTR_INVALID_VL)
1334 return 0;
1335 if (mode == CNTR_MODE_R)
1336 ret = read_lcb_csr(dd, csr, &data);
1337 else if (mode == CNTR_MODE_W)
1338 ret = write_lcb_csr(dd, csr, data);
1339
1340 if (ret) {
1341 dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr);
1342 return 0;
1343 }
1344
1345 hfi1_cdbg(CNTR, "csr 0x%x val 0x%llx mode %d", csr, data, mode);
1346 return data;
1347}
1348
1349/* Port Access */
1350static u64 port_access_u32_csr(const struct cntr_entry *entry, void *context,
1351 int vl, int mode, u64 data)
1352{
1353 struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
1354
1355 if (vl != CNTR_INVALID_VL)
1356 return 0;
1357 return read_write_csr(ppd->dd, entry->csr, mode, data);
1358}
1359
1360static u64 port_access_u64_csr(const struct cntr_entry *entry,
1361 void *context, int vl, int mode, u64 data)
1362{
1363 struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
1364 u64 val;
1365 u64 csr = entry->csr;
1366
1367 if (entry->flags & CNTR_VL) {
1368 if (vl == CNTR_INVALID_VL)
1369 return 0;
1370 csr += 8 * vl;
1371 } else {
1372 if (vl != CNTR_INVALID_VL)
1373 return 0;
1374 }
1375 val = read_write_csr(ppd->dd, csr, mode, data);
1376 return val;
1377}
1378
1379/* Software defined */
1380static inline u64 read_write_sw(struct hfi1_devdata *dd, u64 *cntr, int mode,
1381 u64 data)
1382{
1383 u64 ret;
1384
1385 if (mode == CNTR_MODE_R) {
1386 ret = *cntr;
1387 } else if (mode == CNTR_MODE_W) {
1388 *cntr = data;
1389 ret = data;
1390 } else {
1391 dd_dev_err(dd, "Invalid cntr sw access mode");
1392 return 0;
1393 }
1394
1395 hfi1_cdbg(CNTR, "val 0x%llx mode %d", ret, mode);
1396
1397 return ret;
1398}
1399
1400static u64 access_sw_link_dn_cnt(const struct cntr_entry *entry, void *context,
1401 int vl, int mode, u64 data)
1402{
1403 struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
1404
1405 if (vl != CNTR_INVALID_VL)
1406 return 0;
1407 return read_write_sw(ppd->dd, &ppd->link_downed, mode, data);
1408}
1409
1410static u64 access_sw_link_up_cnt(const struct cntr_entry *entry, void *context,
1411 int vl, int mode, u64 data)
1412{
1413 struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
1414
1415 if (vl != CNTR_INVALID_VL)
1416 return 0;
1417 return read_write_sw(ppd->dd, &ppd->link_up, mode, data);
1418}
1419
1420static u64 access_sw_xmit_discards(const struct cntr_entry *entry,
1421 void *context, int vl, int mode, u64 data)
1422{
1423 struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
1424
1425 if (vl != CNTR_INVALID_VL)
1426 return 0;
1427
1428 return read_write_sw(ppd->dd, &ppd->port_xmit_discards, mode, data);
1429}
1430
1431static u64 access_xmit_constraint_errs(const struct cntr_entry *entry,
1432 void *context, int vl, int mode, u64 data)
1433{
1434 struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
1435
1436 if (vl != CNTR_INVALID_VL)
1437 return 0;
1438
1439 return read_write_sw(ppd->dd, &ppd->port_xmit_constraint_errors,
1440 mode, data);
1441}
1442
1443static u64 access_rcv_constraint_errs(const struct cntr_entry *entry,
1444 void *context, int vl, int mode, u64 data)
1445{
1446 struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context;
1447
1448 if (vl != CNTR_INVALID_VL)
1449 return 0;
1450
1451 return read_write_sw(ppd->dd, &ppd->port_rcv_constraint_errors,
1452 mode, data);
1453}
1454
1455u64 get_all_cpu_total(u64 __percpu *cntr)
1456{
1457 int cpu;
1458 u64 counter = 0;
1459
1460 for_each_possible_cpu(cpu)
1461 counter += *per_cpu_ptr(cntr, cpu);
1462 return counter;
1463}
1464
1465static u64 read_write_cpu(struct hfi1_devdata *dd, u64 *z_val,
1466 u64 __percpu *cntr,
1467 int vl, int mode, u64 data)
1468{
1469
1470 u64 ret = 0;
1471
1472 if (vl != CNTR_INVALID_VL)
1473 return 0;
1474
1475 if (mode == CNTR_MODE_R) {
1476 ret = get_all_cpu_total(cntr) - *z_val;
1477 } else if (mode == CNTR_MODE_W) {
1478 /* A write can only zero the counter */
1479 if (data == 0)
1480 *z_val = get_all_cpu_total(cntr);
1481 else
1482 dd_dev_err(dd, "Per CPU cntrs can only be zeroed");
1483 } else {
1484 dd_dev_err(dd, "Invalid cntr sw cpu access mode");
1485 return 0;
1486 }
1487
1488 return ret;
1489}
1490
1491static u64 access_sw_cpu_intr(const struct cntr_entry *entry,
1492 void *context, int vl, int mode, u64 data)
1493{
1494 struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1495
1496 return read_write_cpu(dd, &dd->z_int_counter, dd->int_counter, vl,
1497 mode, data);
1498}
1499
1500static u64 access_sw_cpu_rcv_limit(const struct cntr_entry *entry,
1501 void *context, int vl, int mode, u64 data)
1502{
1503 struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1504
1505 return read_write_cpu(dd, &dd->z_rcv_limit, dd->rcv_limit, vl,
1506 mode, data);
1507}
1508
1509static u64 access_sw_pio_wait(const struct cntr_entry *entry,
1510 void *context, int vl, int mode, u64 data)
1511{
1512 struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1513
1514 return dd->verbs_dev.n_piowait;
1515}
1516
1517static u64 access_sw_vtx_wait(const struct cntr_entry *entry,
1518 void *context, int vl, int mode, u64 data)
1519{
1520 struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1521
1522 return dd->verbs_dev.n_txwait;
1523}
1524
1525static u64 access_sw_kmem_wait(const struct cntr_entry *entry,
1526 void *context, int vl, int mode, u64 data)
1527{
1528 struct hfi1_devdata *dd = (struct hfi1_devdata *)context;
1529
1530 return dd->verbs_dev.n_kmem_wait;
1531}
1532
1533#define def_access_sw_cpu(cntr) \
1534static u64 access_sw_cpu_##cntr(const struct cntr_entry *entry, \
1535 void *context, int vl, int mode, u64 data) \
1536{ \
1537 struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; \
1538 return read_write_cpu(ppd->dd, &ppd->ibport_data.z_ ##cntr, \
1539 ppd->ibport_data.cntr, vl, \
1540 mode, data); \
1541}
1542
1543def_access_sw_cpu(rc_acks);
1544def_access_sw_cpu(rc_qacks);
1545def_access_sw_cpu(rc_delayed_comp);
1546
1547#define def_access_ibp_counter(cntr) \
1548static u64 access_ibp_##cntr(const struct cntr_entry *entry, \
1549 void *context, int vl, int mode, u64 data) \
1550{ \
1551 struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)context; \
1552 \
1553 if (vl != CNTR_INVALID_VL) \
1554 return 0; \
1555 \
1556 return read_write_sw(ppd->dd, &ppd->ibport_data.n_ ##cntr, \
1557 mode, data); \
1558}
1559
1560def_access_ibp_counter(loop_pkts);
1561def_access_ibp_counter(rc_resends);
1562def_access_ibp_counter(rnr_naks);
1563def_access_ibp_counter(other_naks);
1564def_access_ibp_counter(rc_timeouts);
1565def_access_ibp_counter(pkt_drops);
1566def_access_ibp_counter(dmawait);
1567def_access_ibp_counter(rc_seqnak);
1568def_access_ibp_counter(rc_dupreq);
1569def_access_ibp_counter(rdma_seq);
1570def_access_ibp_counter(unaligned);
1571def_access_ibp_counter(seq_naks);
1572
1573static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
1574[C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH),
1575[C_RX_TID_FULL] = RXE32_DEV_CNTR_ELEM(RxTIDFullEr, RCV_TID_FULL_ERR_CNT,
1576 CNTR_NORMAL),
1577[C_RX_TID_INVALID] = RXE32_DEV_CNTR_ELEM(RxTIDInvalid, RCV_TID_VALID_ERR_CNT,
1578 CNTR_NORMAL),
1579[C_RX_TID_FLGMS] = RXE32_DEV_CNTR_ELEM(RxTidFLGMs,
1580 RCV_TID_FLOW_GEN_MISMATCH_CNT,
1581 CNTR_NORMAL),
1582[C_RX_CTX_RHQS] = RXE32_DEV_CNTR_ELEM(RxCtxRHQS, RCV_CONTEXT_RHQ_STALL,
1583 CNTR_NORMAL),
1584[C_RX_CTX_EGRS] = RXE32_DEV_CNTR_ELEM(RxCtxEgrS, RCV_CONTEXT_EGR_STALL,
1585 CNTR_NORMAL),
1586[C_RCV_TID_FLSMS] = RXE32_DEV_CNTR_ELEM(RxTidFLSMs,
1587 RCV_TID_FLOW_SEQ_MISMATCH_CNT, CNTR_NORMAL),
1588[C_CCE_PCI_CR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciCrSt,
1589 CCE_PCIE_POSTED_CRDT_STALL_CNT, CNTR_NORMAL),
1590[C_CCE_PCI_TR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePciTrSt, CCE_PCIE_TRGT_STALL_CNT,
1591 CNTR_NORMAL),
1592[C_CCE_PIO_WR_ST] = CCE_PERF_DEV_CNTR_ELEM(CcePioWrSt, CCE_PIO_WR_STALL_CNT,
1593 CNTR_NORMAL),
1594[C_CCE_ERR_INT] = CCE_INT_DEV_CNTR_ELEM(CceErrInt, CCE_ERR_INT_CNT,
1595 CNTR_NORMAL),
1596[C_CCE_SDMA_INT] = CCE_INT_DEV_CNTR_ELEM(CceSdmaInt, CCE_SDMA_INT_CNT,
1597 CNTR_NORMAL),
1598[C_CCE_MISC_INT] = CCE_INT_DEV_CNTR_ELEM(CceMiscInt, CCE_MISC_INT_CNT,
1599 CNTR_NORMAL),
1600[C_CCE_RCV_AV_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvAvInt, CCE_RCV_AVAIL_INT_CNT,
1601 CNTR_NORMAL),
1602[C_CCE_RCV_URG_INT] = CCE_INT_DEV_CNTR_ELEM(CceRcvUrgInt,
1603 CCE_RCV_URGENT_INT_CNT, CNTR_NORMAL),
1604[C_CCE_SEND_CR_INT] = CCE_INT_DEV_CNTR_ELEM(CceSndCrInt,
1605 CCE_SEND_CREDIT_INT_CNT, CNTR_NORMAL),
1606[C_DC_UNC_ERR] = DC_PERF_CNTR(DcUnctblErr, DCC_ERR_UNCORRECTABLE_CNT,
1607 CNTR_SYNTH),
1608[C_DC_RCV_ERR] = DC_PERF_CNTR(DcRecvErr, DCC_ERR_PORTRCV_ERR_CNT, CNTR_SYNTH),
1609[C_DC_FM_CFG_ERR] = DC_PERF_CNTR(DcFmCfgErr, DCC_ERR_FMCONFIG_ERR_CNT,
1610 CNTR_SYNTH),
1611[C_DC_RMT_PHY_ERR] = DC_PERF_CNTR(DcRmtPhyErr, DCC_ERR_RCVREMOTE_PHY_ERR_CNT,
1612 CNTR_SYNTH),
1613[C_DC_DROPPED_PKT] = DC_PERF_CNTR(DcDroppedPkt, DCC_ERR_DROPPED_PKT_CNT,
1614 CNTR_SYNTH),
1615[C_DC_MC_XMIT_PKTS] = DC_PERF_CNTR(DcMcXmitPkts,
1616 DCC_PRF_PORT_XMIT_MULTICAST_CNT, CNTR_SYNTH),
1617[C_DC_MC_RCV_PKTS] = DC_PERF_CNTR(DcMcRcvPkts,
1618 DCC_PRF_PORT_RCV_MULTICAST_PKT_CNT,
1619 CNTR_SYNTH),
1620[C_DC_XMIT_CERR] = DC_PERF_CNTR(DcXmitCorr,
1621 DCC_PRF_PORT_XMIT_CORRECTABLE_CNT, CNTR_SYNTH),
1622[C_DC_RCV_CERR] = DC_PERF_CNTR(DcRcvCorrCnt, DCC_PRF_PORT_RCV_CORRECTABLE_CNT,
1623 CNTR_SYNTH),
1624[C_DC_RCV_FCC] = DC_PERF_CNTR(DcRxFCntl, DCC_PRF_RX_FLOW_CRTL_CNT,
1625 CNTR_SYNTH),
1626[C_DC_XMIT_FCC] = DC_PERF_CNTR(DcXmitFCntl, DCC_PRF_TX_FLOW_CRTL_CNT,
1627 CNTR_SYNTH),
1628[C_DC_XMIT_FLITS] = DC_PERF_CNTR(DcXmitFlits, DCC_PRF_PORT_XMIT_DATA_CNT,
1629 CNTR_SYNTH),
1630[C_DC_RCV_FLITS] = DC_PERF_CNTR(DcRcvFlits, DCC_PRF_PORT_RCV_DATA_CNT,
1631 CNTR_SYNTH),
1632[C_DC_XMIT_PKTS] = DC_PERF_CNTR(DcXmitPkts, DCC_PRF_PORT_XMIT_PKTS_CNT,
1633 CNTR_SYNTH),
1634[C_DC_RCV_PKTS] = DC_PERF_CNTR(DcRcvPkts, DCC_PRF_PORT_RCV_PKTS_CNT,
1635 CNTR_SYNTH),
1636[C_DC_RX_FLIT_VL] = DC_PERF_CNTR(DcRxFlitVl, DCC_PRF_PORT_VL_RCV_DATA_CNT,
1637 CNTR_SYNTH | CNTR_VL),
1638[C_DC_RX_PKT_VL] = DC_PERF_CNTR(DcRxPktVl, DCC_PRF_PORT_VL_RCV_PKTS_CNT,
1639 CNTR_SYNTH | CNTR_VL),
1640[C_DC_RCV_FCN] = DC_PERF_CNTR(DcRcvFcn, DCC_PRF_PORT_RCV_FECN_CNT, CNTR_SYNTH),
1641[C_DC_RCV_FCN_VL] = DC_PERF_CNTR(DcRcvFcnVl, DCC_PRF_PORT_VL_RCV_FECN_CNT,
1642 CNTR_SYNTH | CNTR_VL),
1643[C_DC_RCV_BCN] = DC_PERF_CNTR(DcRcvBcn, DCC_PRF_PORT_RCV_BECN_CNT, CNTR_SYNTH),
1644[C_DC_RCV_BCN_VL] = DC_PERF_CNTR(DcRcvBcnVl, DCC_PRF_PORT_VL_RCV_BECN_CNT,
1645 CNTR_SYNTH | CNTR_VL),
1646[C_DC_RCV_BBL] = DC_PERF_CNTR(DcRcvBbl, DCC_PRF_PORT_RCV_BUBBLE_CNT,
1647 CNTR_SYNTH),
1648[C_DC_RCV_BBL_VL] = DC_PERF_CNTR(DcRcvBblVl, DCC_PRF_PORT_VL_RCV_BUBBLE_CNT,
1649 CNTR_SYNTH | CNTR_VL),
1650[C_DC_MARK_FECN] = DC_PERF_CNTR(DcMarkFcn, DCC_PRF_PORT_MARK_FECN_CNT,
1651 CNTR_SYNTH),
1652[C_DC_MARK_FECN_VL] = DC_PERF_CNTR(DcMarkFcnVl, DCC_PRF_PORT_VL_MARK_FECN_CNT,
1653 CNTR_SYNTH | CNTR_VL),
1654[C_DC_TOTAL_CRC] =
1655 DC_PERF_CNTR_LCB(DcTotCrc, DC_LCB_ERR_INFO_TOTAL_CRC_ERR,
1656 CNTR_SYNTH),
1657[C_DC_CRC_LN0] = DC_PERF_CNTR_LCB(DcCrcLn0, DC_LCB_ERR_INFO_CRC_ERR_LN0,
1658 CNTR_SYNTH),
1659[C_DC_CRC_LN1] = DC_PERF_CNTR_LCB(DcCrcLn1, DC_LCB_ERR_INFO_CRC_ERR_LN1,
1660 CNTR_SYNTH),
1661[C_DC_CRC_LN2] = DC_PERF_CNTR_LCB(DcCrcLn2, DC_LCB_ERR_INFO_CRC_ERR_LN2,
1662 CNTR_SYNTH),
1663[C_DC_CRC_LN3] = DC_PERF_CNTR_LCB(DcCrcLn3, DC_LCB_ERR_INFO_CRC_ERR_LN3,
1664 CNTR_SYNTH),
1665[C_DC_CRC_MULT_LN] =
1666 DC_PERF_CNTR_LCB(DcMultLn, DC_LCB_ERR_INFO_CRC_ERR_MULTI_LN,
1667 CNTR_SYNTH),
1668[C_DC_TX_REPLAY] = DC_PERF_CNTR_LCB(DcTxReplay, DC_LCB_ERR_INFO_TX_REPLAY_CNT,
1669 CNTR_SYNTH),
1670[C_DC_RX_REPLAY] = DC_PERF_CNTR_LCB(DcRxReplay, DC_LCB_ERR_INFO_RX_REPLAY_CNT,
1671 CNTR_SYNTH),
1672[C_DC_SEQ_CRC_CNT] =
1673 DC_PERF_CNTR_LCB(DcLinkSeqCrc, DC_LCB_ERR_INFO_SEQ_CRC_CNT,
1674 CNTR_SYNTH),
1675[C_DC_ESC0_ONLY_CNT] =
1676 DC_PERF_CNTR_LCB(DcEsc0, DC_LCB_ERR_INFO_ESCAPE_0_ONLY_CNT,
1677 CNTR_SYNTH),
1678[C_DC_ESC0_PLUS1_CNT] =
1679 DC_PERF_CNTR_LCB(DcEsc1, DC_LCB_ERR_INFO_ESCAPE_0_PLUS1_CNT,
1680 CNTR_SYNTH),
1681[C_DC_ESC0_PLUS2_CNT] =
1682 DC_PERF_CNTR_LCB(DcEsc0Plus2, DC_LCB_ERR_INFO_ESCAPE_0_PLUS2_CNT,
1683 CNTR_SYNTH),
1684[C_DC_REINIT_FROM_PEER_CNT] =
1685 DC_PERF_CNTR_LCB(DcReinitPeer, DC_LCB_ERR_INFO_REINIT_FROM_PEER_CNT,
1686 CNTR_SYNTH),
1687[C_DC_SBE_CNT] = DC_PERF_CNTR_LCB(DcSbe, DC_LCB_ERR_INFO_SBE_CNT,
1688 CNTR_SYNTH),
1689[C_DC_MISC_FLG_CNT] =
1690 DC_PERF_CNTR_LCB(DcMiscFlg, DC_LCB_ERR_INFO_MISC_FLG_CNT,
1691 CNTR_SYNTH),
1692[C_DC_PRF_GOOD_LTP_CNT] =
1693 DC_PERF_CNTR_LCB(DcGoodLTP, DC_LCB_PRF_GOOD_LTP_CNT, CNTR_SYNTH),
1694[C_DC_PRF_ACCEPTED_LTP_CNT] =
1695 DC_PERF_CNTR_LCB(DcAccLTP, DC_LCB_PRF_ACCEPTED_LTP_CNT,
1696 CNTR_SYNTH),
1697[C_DC_PRF_RX_FLIT_CNT] =
1698 DC_PERF_CNTR_LCB(DcPrfRxFlit, DC_LCB_PRF_RX_FLIT_CNT, CNTR_SYNTH),
1699[C_DC_PRF_TX_FLIT_CNT] =
1700 DC_PERF_CNTR_LCB(DcPrfTxFlit, DC_LCB_PRF_TX_FLIT_CNT, CNTR_SYNTH),
1701[C_DC_PRF_CLK_CNTR] =
1702 DC_PERF_CNTR_LCB(DcPrfClk, DC_LCB_PRF_CLK_CNTR, CNTR_SYNTH),
1703[C_DC_PG_DBG_FLIT_CRDTS_CNT] =
1704 DC_PERF_CNTR_LCB(DcFltCrdts, DC_LCB_PG_DBG_FLIT_CRDTS_CNT, CNTR_SYNTH),
1705[C_DC_PG_STS_PAUSE_COMPLETE_CNT] =
1706 DC_PERF_CNTR_LCB(DcPauseComp, DC_LCB_PG_STS_PAUSE_COMPLETE_CNT,
1707 CNTR_SYNTH),
1708[C_DC_PG_STS_TX_SBE_CNT] =
1709 DC_PERF_CNTR_LCB(DcStsTxSbe, DC_LCB_PG_STS_TX_SBE_CNT, CNTR_SYNTH),
1710[C_DC_PG_STS_TX_MBE_CNT] =
1711 DC_PERF_CNTR_LCB(DcStsTxMbe, DC_LCB_PG_STS_TX_MBE_CNT,
1712 CNTR_SYNTH),
1713[C_SW_CPU_INTR] = CNTR_ELEM("Intr", 0, 0, CNTR_NORMAL,
1714 access_sw_cpu_intr),
1715[C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL,
1716 access_sw_cpu_rcv_limit),
1717[C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL,
1718 access_sw_vtx_wait),
1719[C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL,
1720 access_sw_pio_wait),
1721[C_SW_KMEM_WAIT] = CNTR_ELEM("KmemWait", 0, 0, CNTR_NORMAL,
1722 access_sw_kmem_wait),
1723};
1724
1725static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
1726[C_TX_UNSUP_VL] = TXE32_PORT_CNTR_ELEM(TxUnVLErr, SEND_UNSUP_VL_ERR_CNT,
1727 CNTR_NORMAL),
1728[C_TX_INVAL_LEN] = TXE32_PORT_CNTR_ELEM(TxInvalLen, SEND_LEN_ERR_CNT,
1729 CNTR_NORMAL),
1730[C_TX_MM_LEN_ERR] = TXE32_PORT_CNTR_ELEM(TxMMLenErr, SEND_MAX_MIN_LEN_ERR_CNT,
1731 CNTR_NORMAL),
1732[C_TX_UNDERRUN] = TXE32_PORT_CNTR_ELEM(TxUnderrun, SEND_UNDERRUN_CNT,
1733 CNTR_NORMAL),
1734[C_TX_FLOW_STALL] = TXE32_PORT_CNTR_ELEM(TxFlowStall, SEND_FLOW_STALL_CNT,
1735 CNTR_NORMAL),
1736[C_TX_DROPPED] = TXE32_PORT_CNTR_ELEM(TxDropped, SEND_DROPPED_PKT_CNT,
1737 CNTR_NORMAL),
1738[C_TX_HDR_ERR] = TXE32_PORT_CNTR_ELEM(TxHdrErr, SEND_HEADERS_ERR_CNT,
1739 CNTR_NORMAL),
1740[C_TX_PKT] = TXE64_PORT_CNTR_ELEM(TxPkt, SEND_DATA_PKT_CNT, CNTR_NORMAL),
1741[C_TX_WORDS] = TXE64_PORT_CNTR_ELEM(TxWords, SEND_DWORD_CNT, CNTR_NORMAL),
1742[C_TX_WAIT] = TXE64_PORT_CNTR_ELEM(TxWait, SEND_WAIT_CNT, CNTR_SYNTH),
1743[C_TX_FLIT_VL] = TXE64_PORT_CNTR_ELEM(TxFlitVL, SEND_DATA_VL0_CNT,
1744 CNTR_SYNTH | CNTR_VL),
1745[C_TX_PKT_VL] = TXE64_PORT_CNTR_ELEM(TxPktVL, SEND_DATA_PKT_VL0_CNT,
1746 CNTR_SYNTH | CNTR_VL),
1747[C_TX_WAIT_VL] = TXE64_PORT_CNTR_ELEM(TxWaitVL, SEND_WAIT_VL0_CNT,
1748 CNTR_SYNTH | CNTR_VL),
1749[C_RX_PKT] = RXE64_PORT_CNTR_ELEM(RxPkt, RCV_DATA_PKT_CNT, CNTR_NORMAL),
1750[C_RX_WORDS] = RXE64_PORT_CNTR_ELEM(RxWords, RCV_DWORD_CNT, CNTR_NORMAL),
1751[C_SW_LINK_DOWN] = CNTR_ELEM("SwLinkDown", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1752 access_sw_link_dn_cnt),
1753[C_SW_LINK_UP] = CNTR_ELEM("SwLinkUp", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1754 access_sw_link_up_cnt),
1755[C_SW_XMIT_DSCD] = CNTR_ELEM("XmitDscd", 0, 0, CNTR_SYNTH | CNTR_32BIT,
1756 access_sw_xmit_discards),
1757[C_SW_XMIT_DSCD_VL] = CNTR_ELEM("XmitDscdVl", 0, 0,
1758 CNTR_SYNTH | CNTR_32BIT | CNTR_VL,
1759 access_sw_xmit_discards),
1760[C_SW_XMIT_CSTR_ERR] = CNTR_ELEM("XmitCstrErr", 0, 0, CNTR_SYNTH,
1761 access_xmit_constraint_errs),
1762[C_SW_RCV_CSTR_ERR] = CNTR_ELEM("RcvCstrErr", 0, 0, CNTR_SYNTH,
1763 access_rcv_constraint_errs),
1764[C_SW_IBP_LOOP_PKTS] = SW_IBP_CNTR(LoopPkts, loop_pkts),
1765[C_SW_IBP_RC_RESENDS] = SW_IBP_CNTR(RcResend, rc_resends),
1766[C_SW_IBP_RNR_NAKS] = SW_IBP_CNTR(RnrNak, rnr_naks),
1767[C_SW_IBP_OTHER_NAKS] = SW_IBP_CNTR(OtherNak, other_naks),
1768[C_SW_IBP_RC_TIMEOUTS] = SW_IBP_CNTR(RcTimeOut, rc_timeouts),
1769[C_SW_IBP_PKT_DROPS] = SW_IBP_CNTR(PktDrop, pkt_drops),
1770[C_SW_IBP_DMA_WAIT] = SW_IBP_CNTR(DmaWait, dmawait),
1771[C_SW_IBP_RC_SEQNAK] = SW_IBP_CNTR(RcSeqNak, rc_seqnak),
1772[C_SW_IBP_RC_DUPREQ] = SW_IBP_CNTR(RcDupRew, rc_dupreq),
1773[C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq),
1774[C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned),
1775[C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks),
1776[C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
1777 access_sw_cpu_rc_acks),
1778[C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
1779 access_sw_cpu_rc_qacks),
1780[C_SW_CPU_RC_DELAYED_COMP] = CNTR_ELEM("RcDelayComp", 0, 0, CNTR_NORMAL,
1781 access_sw_cpu_rc_delayed_comp),
1782[OVR_LBL(0)] = OVR_ELM(0), [OVR_LBL(1)] = OVR_ELM(1),
1783[OVR_LBL(2)] = OVR_ELM(2), [OVR_LBL(3)] = OVR_ELM(3),
1784[OVR_LBL(4)] = OVR_ELM(4), [OVR_LBL(5)] = OVR_ELM(5),
1785[OVR_LBL(6)] = OVR_ELM(6), [OVR_LBL(7)] = OVR_ELM(7),
1786[OVR_LBL(8)] = OVR_ELM(8), [OVR_LBL(9)] = OVR_ELM(9),
1787[OVR_LBL(10)] = OVR_ELM(10), [OVR_LBL(11)] = OVR_ELM(11),
1788[OVR_LBL(12)] = OVR_ELM(12), [OVR_LBL(13)] = OVR_ELM(13),
1789[OVR_LBL(14)] = OVR_ELM(14), [OVR_LBL(15)] = OVR_ELM(15),
1790[OVR_LBL(16)] = OVR_ELM(16), [OVR_LBL(17)] = OVR_ELM(17),
1791[OVR_LBL(18)] = OVR_ELM(18), [OVR_LBL(19)] = OVR_ELM(19),
1792[OVR_LBL(20)] = OVR_ELM(20), [OVR_LBL(21)] = OVR_ELM(21),
1793[OVR_LBL(22)] = OVR_ELM(22), [OVR_LBL(23)] = OVR_ELM(23),
1794[OVR_LBL(24)] = OVR_ELM(24), [OVR_LBL(25)] = OVR_ELM(25),
1795[OVR_LBL(26)] = OVR_ELM(26), [OVR_LBL(27)] = OVR_ELM(27),
1796[OVR_LBL(28)] = OVR_ELM(28), [OVR_LBL(29)] = OVR_ELM(29),
1797[OVR_LBL(30)] = OVR_ELM(30), [OVR_LBL(31)] = OVR_ELM(31),
1798[OVR_LBL(32)] = OVR_ELM(32), [OVR_LBL(33)] = OVR_ELM(33),
1799[OVR_LBL(34)] = OVR_ELM(34), [OVR_LBL(35)] = OVR_ELM(35),
1800[OVR_LBL(36)] = OVR_ELM(36), [OVR_LBL(37)] = OVR_ELM(37),
1801[OVR_LBL(38)] = OVR_ELM(38), [OVR_LBL(39)] = OVR_ELM(39),
1802[OVR_LBL(40)] = OVR_ELM(40), [OVR_LBL(41)] = OVR_ELM(41),
1803[OVR_LBL(42)] = OVR_ELM(42), [OVR_LBL(43)] = OVR_ELM(43),
1804[OVR_LBL(44)] = OVR_ELM(44), [OVR_LBL(45)] = OVR_ELM(45),
1805[OVR_LBL(46)] = OVR_ELM(46), [OVR_LBL(47)] = OVR_ELM(47),
1806[OVR_LBL(48)] = OVR_ELM(48), [OVR_LBL(49)] = OVR_ELM(49),
1807[OVR_LBL(50)] = OVR_ELM(50), [OVR_LBL(51)] = OVR_ELM(51),
1808[OVR_LBL(52)] = OVR_ELM(52), [OVR_LBL(53)] = OVR_ELM(53),
1809[OVR_LBL(54)] = OVR_ELM(54), [OVR_LBL(55)] = OVR_ELM(55),
1810[OVR_LBL(56)] = OVR_ELM(56), [OVR_LBL(57)] = OVR_ELM(57),
1811[OVR_LBL(58)] = OVR_ELM(58), [OVR_LBL(59)] = OVR_ELM(59),
1812[OVR_LBL(60)] = OVR_ELM(60), [OVR_LBL(61)] = OVR_ELM(61),
1813[OVR_LBL(62)] = OVR_ELM(62), [OVR_LBL(63)] = OVR_ELM(63),
1814[OVR_LBL(64)] = OVR_ELM(64), [OVR_LBL(65)] = OVR_ELM(65),
1815[OVR_LBL(66)] = OVR_ELM(66), [OVR_LBL(67)] = OVR_ELM(67),
1816[OVR_LBL(68)] = OVR_ELM(68), [OVR_LBL(69)] = OVR_ELM(69),
1817[OVR_LBL(70)] = OVR_ELM(70), [OVR_LBL(71)] = OVR_ELM(71),
1818[OVR_LBL(72)] = OVR_ELM(72), [OVR_LBL(73)] = OVR_ELM(73),
1819[OVR_LBL(74)] = OVR_ELM(74), [OVR_LBL(75)] = OVR_ELM(75),
1820[OVR_LBL(76)] = OVR_ELM(76), [OVR_LBL(77)] = OVR_ELM(77),
1821[OVR_LBL(78)] = OVR_ELM(78), [OVR_LBL(79)] = OVR_ELM(79),
1822[OVR_LBL(80)] = OVR_ELM(80), [OVR_LBL(81)] = OVR_ELM(81),
1823[OVR_LBL(82)] = OVR_ELM(82), [OVR_LBL(83)] = OVR_ELM(83),
1824[OVR_LBL(84)] = OVR_ELM(84), [OVR_LBL(85)] = OVR_ELM(85),
1825[OVR_LBL(86)] = OVR_ELM(86), [OVR_LBL(87)] = OVR_ELM(87),
1826[OVR_LBL(88)] = OVR_ELM(88), [OVR_LBL(89)] = OVR_ELM(89),
1827[OVR_LBL(90)] = OVR_ELM(90), [OVR_LBL(91)] = OVR_ELM(91),
1828[OVR_LBL(92)] = OVR_ELM(92), [OVR_LBL(93)] = OVR_ELM(93),
1829[OVR_LBL(94)] = OVR_ELM(94), [OVR_LBL(95)] = OVR_ELM(95),
1830[OVR_LBL(96)] = OVR_ELM(96), [OVR_LBL(97)] = OVR_ELM(97),
1831[OVR_LBL(98)] = OVR_ELM(98), [OVR_LBL(99)] = OVR_ELM(99),
1832[OVR_LBL(100)] = OVR_ELM(100), [OVR_LBL(101)] = OVR_ELM(101),
1833[OVR_LBL(102)] = OVR_ELM(102), [OVR_LBL(103)] = OVR_ELM(103),
1834[OVR_LBL(104)] = OVR_ELM(104), [OVR_LBL(105)] = OVR_ELM(105),
1835[OVR_LBL(106)] = OVR_ELM(106), [OVR_LBL(107)] = OVR_ELM(107),
1836[OVR_LBL(108)] = OVR_ELM(108), [OVR_LBL(109)] = OVR_ELM(109),
1837[OVR_LBL(110)] = OVR_ELM(110), [OVR_LBL(111)] = OVR_ELM(111),
1838[OVR_LBL(112)] = OVR_ELM(112), [OVR_LBL(113)] = OVR_ELM(113),
1839[OVR_LBL(114)] = OVR_ELM(114), [OVR_LBL(115)] = OVR_ELM(115),
1840[OVR_LBL(116)] = OVR_ELM(116), [OVR_LBL(117)] = OVR_ELM(117),
1841[OVR_LBL(118)] = OVR_ELM(118), [OVR_LBL(119)] = OVR_ELM(119),
1842[OVR_LBL(120)] = OVR_ELM(120), [OVR_LBL(121)] = OVR_ELM(121),
1843[OVR_LBL(122)] = OVR_ELM(122), [OVR_LBL(123)] = OVR_ELM(123),
1844[OVR_LBL(124)] = OVR_ELM(124), [OVR_LBL(125)] = OVR_ELM(125),
1845[OVR_LBL(126)] = OVR_ELM(126), [OVR_LBL(127)] = OVR_ELM(127),
1846[OVR_LBL(128)] = OVR_ELM(128), [OVR_LBL(129)] = OVR_ELM(129),
1847[OVR_LBL(130)] = OVR_ELM(130), [OVR_LBL(131)] = OVR_ELM(131),
1848[OVR_LBL(132)] = OVR_ELM(132), [OVR_LBL(133)] = OVR_ELM(133),
1849[OVR_LBL(134)] = OVR_ELM(134), [OVR_LBL(135)] = OVR_ELM(135),
1850[OVR_LBL(136)] = OVR_ELM(136), [OVR_LBL(137)] = OVR_ELM(137),
1851[OVR_LBL(138)] = OVR_ELM(138), [OVR_LBL(139)] = OVR_ELM(139),
1852[OVR_LBL(140)] = OVR_ELM(140), [OVR_LBL(141)] = OVR_ELM(141),
1853[OVR_LBL(142)] = OVR_ELM(142), [OVR_LBL(143)] = OVR_ELM(143),
1854[OVR_LBL(144)] = OVR_ELM(144), [OVR_LBL(145)] = OVR_ELM(145),
1855[OVR_LBL(146)] = OVR_ELM(146), [OVR_LBL(147)] = OVR_ELM(147),
1856[OVR_LBL(148)] = OVR_ELM(148), [OVR_LBL(149)] = OVR_ELM(149),
1857[OVR_LBL(150)] = OVR_ELM(150), [OVR_LBL(151)] = OVR_ELM(151),
1858[OVR_LBL(152)] = OVR_ELM(152), [OVR_LBL(153)] = OVR_ELM(153),
1859[OVR_LBL(154)] = OVR_ELM(154), [OVR_LBL(155)] = OVR_ELM(155),
1860[OVR_LBL(156)] = OVR_ELM(156), [OVR_LBL(157)] = OVR_ELM(157),
1861[OVR_LBL(158)] = OVR_ELM(158), [OVR_LBL(159)] = OVR_ELM(159),
1862};
1863
1864/* ======================================================================== */
1865
1866/* return true if this is chip revision revision a0 */
1867int is_a0(struct hfi1_devdata *dd)
1868{
1869 return ((dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
1870 & CCE_REVISION_CHIP_REV_MINOR_MASK) == 0;
1871}
1872
1873/* return true if this is chip revision revision a */
1874int is_ax(struct hfi1_devdata *dd)
1875{
1876 u8 chip_rev_minor =
1877 dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
1878 & CCE_REVISION_CHIP_REV_MINOR_MASK;
1879 return (chip_rev_minor & 0xf0) == 0;
1880}
1881
1882/* return true if this is chip revision revision b */
1883int is_bx(struct hfi1_devdata *dd)
1884{
1885 u8 chip_rev_minor =
1886 dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT
1887 & CCE_REVISION_CHIP_REV_MINOR_MASK;
1888 return !!(chip_rev_minor & 0x10);
1889}
1890
1891/*
1892 * Append string s to buffer buf. Arguments curp and len are the current
1893 * position and remaining length, respectively.
1894 *
1895 * return 0 on success, 1 on out of room
1896 */
1897static int append_str(char *buf, char **curp, int *lenp, const char *s)
1898{
1899 char *p = *curp;
1900 int len = *lenp;
1901 int result = 0; /* success */
1902 char c;
1903
1904 /* add a comma, if first in the buffer */
1905 if (p != buf) {
1906 if (len == 0) {
1907 result = 1; /* out of room */
1908 goto done;
1909 }
1910 *p++ = ',';
1911 len--;
1912 }
1913
1914 /* copy the string */
1915 while ((c = *s++) != 0) {
1916 if (len == 0) {
1917 result = 1; /* out of room */
1918 goto done;
1919 }
1920 *p++ = c;
1921 len--;
1922 }
1923
1924done:
1925 /* write return values */
1926 *curp = p;
1927 *lenp = len;
1928
1929 return result;
1930}
1931
1932/*
1933 * Using the given flag table, print a comma separated string into
1934 * the buffer. End in '*' if the buffer is too short.
1935 */
1936static char *flag_string(char *buf, int buf_len, u64 flags,
1937 struct flag_table *table, int table_size)
1938{
1939 char extra[32];
1940 char *p = buf;
1941 int len = buf_len;
1942 int no_room = 0;
1943 int i;
1944
1945 /* make sure there is at least 2 so we can form "*" */
1946 if (len < 2)
1947 return "";
1948
1949 len--; /* leave room for a nul */
1950 for (i = 0; i < table_size; i++) {
1951 if (flags & table[i].flag) {
1952 no_room = append_str(buf, &p, &len, table[i].str);
1953 if (no_room)
1954 break;
1955 flags &= ~table[i].flag;
1956 }
1957 }
1958
1959 /* any undocumented bits left? */
1960 if (!no_room && flags) {
1961 snprintf(extra, sizeof(extra), "bits 0x%llx", flags);
1962 no_room = append_str(buf, &p, &len, extra);
1963 }
1964
1965 /* add * if ran out of room */
1966 if (no_room) {
1967 /* may need to back up to add space for a '*' */
1968 if (len == 0)
1969 --p;
1970 *p++ = '*';
1971 }
1972
1973 /* add final nul - space already allocated above */
1974 *p = 0;
1975 return buf;
1976}
1977
1978/* first 8 CCE error interrupt source names */
1979static const char * const cce_misc_names[] = {
1980 "CceErrInt", /* 0 */
1981 "RxeErrInt", /* 1 */
1982 "MiscErrInt", /* 2 */
1983 "Reserved3", /* 3 */
1984 "PioErrInt", /* 4 */
1985 "SDmaErrInt", /* 5 */
1986 "EgressErrInt", /* 6 */
1987 "TxeErrInt" /* 7 */
1988};
1989
1990/*
1991 * Return the miscellaneous error interrupt name.
1992 */
1993static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source)
1994{
1995 if (source < ARRAY_SIZE(cce_misc_names))
1996 strncpy(buf, cce_misc_names[source], bsize);
1997 else
1998 snprintf(buf,
1999 bsize,
2000 "Reserved%u",
2001 source + IS_GENERAL_ERR_START);
2002
2003 return buf;
2004}
2005
2006/*
2007 * Return the SDMA engine error interrupt name.
2008 */
2009static char *is_sdma_eng_err_name(char *buf, size_t bsize, unsigned int source)
2010{
2011 snprintf(buf, bsize, "SDmaEngErrInt%u", source);
2012 return buf;
2013}
2014
2015/*
2016 * Return the send context error interrupt name.
2017 */
2018static char *is_sendctxt_err_name(char *buf, size_t bsize, unsigned int source)
2019{
2020 snprintf(buf, bsize, "SendCtxtErrInt%u", source);
2021 return buf;
2022}
2023
2024static const char * const various_names[] = {
2025 "PbcInt",
2026 "GpioAssertInt",
2027 "Qsfp1Int",
2028 "Qsfp2Int",
2029 "TCritInt"
2030};
2031
2032/*
2033 * Return the various interrupt name.
2034 */
2035static char *is_various_name(char *buf, size_t bsize, unsigned int source)
2036{
2037 if (source < ARRAY_SIZE(various_names))
2038 strncpy(buf, various_names[source], bsize);
2039 else
2040 snprintf(buf, bsize, "Reserved%u", source+IS_VARIOUS_START);
2041 return buf;
2042}
2043
2044/*
2045 * Return the DC interrupt name.
2046 */
2047static char *is_dc_name(char *buf, size_t bsize, unsigned int source)
2048{
2049 static const char * const dc_int_names[] = {
2050 "common",
2051 "lcb",
2052 "8051",
2053 "lbm" /* local block merge */
2054 };
2055
2056 if (source < ARRAY_SIZE(dc_int_names))
2057 snprintf(buf, bsize, "dc_%s_int", dc_int_names[source]);
2058 else
2059 snprintf(buf, bsize, "DCInt%u", source);
2060 return buf;
2061}
2062
2063static const char * const sdma_int_names[] = {
2064 "SDmaInt",
2065 "SdmaIdleInt",
2066 "SdmaProgressInt",
2067};
2068
2069/*
2070 * Return the SDMA engine interrupt name.
2071 */
2072static char *is_sdma_eng_name(char *buf, size_t bsize, unsigned int source)
2073{
2074 /* what interrupt */
2075 unsigned int what = source / TXE_NUM_SDMA_ENGINES;
2076 /* which engine */
2077 unsigned int which = source % TXE_NUM_SDMA_ENGINES;
2078
2079 if (likely(what < 3))
2080 snprintf(buf, bsize, "%s%u", sdma_int_names[what], which);
2081 else
2082 snprintf(buf, bsize, "Invalid SDMA interrupt %u", source);
2083 return buf;
2084}
2085
2086/*
2087 * Return the receive available interrupt name.
2088 */
2089static char *is_rcv_avail_name(char *buf, size_t bsize, unsigned int source)
2090{
2091 snprintf(buf, bsize, "RcvAvailInt%u", source);
2092 return buf;
2093}
2094
2095/*
2096 * Return the receive urgent interrupt name.
2097 */
2098static char *is_rcv_urgent_name(char *buf, size_t bsize, unsigned int source)
2099{
2100 snprintf(buf, bsize, "RcvUrgentInt%u", source);
2101 return buf;
2102}
2103
2104/*
2105 * Return the send credit interrupt name.
2106 */
2107static char *is_send_credit_name(char *buf, size_t bsize, unsigned int source)
2108{
2109 snprintf(buf, bsize, "SendCreditInt%u", source);
2110 return buf;
2111}
2112
2113/*
2114 * Return the reserved interrupt name.
2115 */
2116static char *is_reserved_name(char *buf, size_t bsize, unsigned int source)
2117{
2118 snprintf(buf, bsize, "Reserved%u", source + IS_RESERVED_START);
2119 return buf;
2120}
2121
2122static char *cce_err_status_string(char *buf, int buf_len, u64 flags)
2123{
2124 return flag_string(buf, buf_len, flags,
2125 cce_err_status_flags, ARRAY_SIZE(cce_err_status_flags));
2126}
2127
2128static char *rxe_err_status_string(char *buf, int buf_len, u64 flags)
2129{
2130 return flag_string(buf, buf_len, flags,
2131 rxe_err_status_flags, ARRAY_SIZE(rxe_err_status_flags));
2132}
2133
2134static char *misc_err_status_string(char *buf, int buf_len, u64 flags)
2135{
2136 return flag_string(buf, buf_len, flags, misc_err_status_flags,
2137 ARRAY_SIZE(misc_err_status_flags));
2138}
2139
2140static char *pio_err_status_string(char *buf, int buf_len, u64 flags)
2141{
2142 return flag_string(buf, buf_len, flags,
2143 pio_err_status_flags, ARRAY_SIZE(pio_err_status_flags));
2144}
2145
2146static char *sdma_err_status_string(char *buf, int buf_len, u64 flags)
2147{
2148 return flag_string(buf, buf_len, flags,
2149 sdma_err_status_flags,
2150 ARRAY_SIZE(sdma_err_status_flags));
2151}
2152
2153static char *egress_err_status_string(char *buf, int buf_len, u64 flags)
2154{
2155 return flag_string(buf, buf_len, flags,
2156 egress_err_status_flags, ARRAY_SIZE(egress_err_status_flags));
2157}
2158
2159static char *egress_err_info_string(char *buf, int buf_len, u64 flags)
2160{
2161 return flag_string(buf, buf_len, flags,
2162 egress_err_info_flags, ARRAY_SIZE(egress_err_info_flags));
2163}
2164
2165static char *send_err_status_string(char *buf, int buf_len, u64 flags)
2166{
2167 return flag_string(buf, buf_len, flags,
2168 send_err_status_flags,
2169 ARRAY_SIZE(send_err_status_flags));
2170}
2171
2172static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2173{
2174 char buf[96];
2175
2176 /*
2177 * For most these errors, there is nothing that can be done except
2178 * report or record it.
2179 */
2180 dd_dev_info(dd, "CCE Error: %s\n",
2181 cce_err_status_string(buf, sizeof(buf), reg));
2182
2183 if ((reg & CCE_ERR_STATUS_CCE_CLI2_ASYNC_FIFO_PARITY_ERR_SMASK)
2184 && is_a0(dd)
2185 && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)) {
2186 /* this error requires a manual drop into SPC freeze mode */
2187 /* then a fix up */
2188 start_freeze_handling(dd->pport, FREEZE_SELF);
2189 }
2190}
2191
2192/*
2193 * Check counters for receive errors that do not have an interrupt
2194 * associated with them.
2195 */
2196#define RCVERR_CHECK_TIME 10
2197static void update_rcverr_timer(unsigned long opaque)
2198{
2199 struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
2200 struct hfi1_pportdata *ppd = dd->pport;
2201 u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
2202
2203 if (dd->rcv_ovfl_cnt < cur_ovfl_cnt &&
2204 ppd->port_error_action & OPA_PI_MASK_EX_BUFFER_OVERRUN) {
2205 dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
2206 set_link_down_reason(ppd,
2207 OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN, 0,
2208 OPA_LINKDOWN_REASON_EXCESSIVE_BUFFER_OVERRUN);
2209 queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
2210 }
2211 dd->rcv_ovfl_cnt = (u32) cur_ovfl_cnt;
2212
2213 mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
2214}
2215
2216static int init_rcverr(struct hfi1_devdata *dd)
2217{
2218 init_timer(&dd->rcverr_timer);
2219 dd->rcverr_timer.function = update_rcverr_timer;
2220 dd->rcverr_timer.data = (unsigned long) dd;
2221 /* Assume the hardware counter has been reset */
2222 dd->rcv_ovfl_cnt = 0;
2223 return mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME);
2224}
2225
2226static void free_rcverr(struct hfi1_devdata *dd)
2227{
2228 if (dd->rcverr_timer.data)
2229 del_timer_sync(&dd->rcverr_timer);
2230 dd->rcverr_timer.data = 0;
2231}
2232
2233static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2234{
2235 char buf[96];
2236
2237 dd_dev_info(dd, "Receive Error: %s\n",
2238 rxe_err_status_string(buf, sizeof(buf), reg));
2239
2240 if (reg & ALL_RXE_FREEZE_ERR) {
2241 int flags = 0;
2242
2243 /*
2244 * Freeze mode recovery is disabled for the errors
2245 * in RXE_FREEZE_ABORT_MASK
2246 */
2247 if (is_a0(dd) && (reg & RXE_FREEZE_ABORT_MASK))
2248 flags = FREEZE_ABORT;
2249
2250 start_freeze_handling(dd->pport, flags);
2251 }
2252}
2253
2254static void handle_misc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2255{
2256 char buf[96];
2257
2258 dd_dev_info(dd, "Misc Error: %s",
2259 misc_err_status_string(buf, sizeof(buf), reg));
2260}
2261
2262static void handle_pio_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2263{
2264 char buf[96];
2265
2266 dd_dev_info(dd, "PIO Error: %s\n",
2267 pio_err_status_string(buf, sizeof(buf), reg));
2268
2269 if (reg & ALL_PIO_FREEZE_ERR)
2270 start_freeze_handling(dd->pport, 0);
2271}
2272
2273static void handle_sdma_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2274{
2275 char buf[96];
2276
2277 dd_dev_info(dd, "SDMA Error: %s\n",
2278 sdma_err_status_string(buf, sizeof(buf), reg));
2279
2280 if (reg & ALL_SDMA_FREEZE_ERR)
2281 start_freeze_handling(dd->pport, 0);
2282}
2283
2284static void count_port_inactive(struct hfi1_devdata *dd)
2285{
2286 struct hfi1_pportdata *ppd = dd->pport;
2287
2288 if (ppd->port_xmit_discards < ~(u64)0)
2289 ppd->port_xmit_discards++;
2290}
2291
2292/*
2293 * We have had a "disallowed packet" error during egress. Determine the
2294 * integrity check which failed, and update relevant error counter, etc.
2295 *
2296 * Note that the SEND_EGRESS_ERR_INFO register has only a single
2297 * bit of state per integrity check, and so we can miss the reason for an
2298 * egress error if more than one packet fails the same integrity check
2299 * since we cleared the corresponding bit in SEND_EGRESS_ERR_INFO.
2300 */
2301static void handle_send_egress_err_info(struct hfi1_devdata *dd)
2302{
2303 struct hfi1_pportdata *ppd = dd->pport;
2304 u64 src = read_csr(dd, SEND_EGRESS_ERR_SOURCE); /* read first */
2305 u64 info = read_csr(dd, SEND_EGRESS_ERR_INFO);
2306 char buf[96];
2307
2308 /* clear down all observed info as quickly as possible after read */
2309 write_csr(dd, SEND_EGRESS_ERR_INFO, info);
2310
2311 dd_dev_info(dd,
2312 "Egress Error Info: 0x%llx, %s Egress Error Src 0x%llx\n",
2313 info, egress_err_info_string(buf, sizeof(buf), info), src);
2314
2315 /* Eventually add other counters for each bit */
2316
2317 if (info & SEND_EGRESS_ERR_INFO_TOO_LONG_IB_PACKET_ERR_SMASK) {
2318 if (ppd->port_xmit_discards < ~(u64)0)
2319 ppd->port_xmit_discards++;
2320 }
2321}
2322
2323/*
2324 * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
2325 * register. Does it represent a 'port inactive' error?
2326 */
2327static inline int port_inactive_err(u64 posn)
2328{
2329 return (posn >= SEES(TX_LINKDOWN) &&
2330 posn <= SEES(TX_INCORRECT_LINK_STATE));
2331}
2332
2333/*
2334 * Input value is a bit position within the SEND_EGRESS_ERR_STATUS
2335 * register. Does it represent a 'disallowed packet' error?
2336 */
2337static inline int disallowed_pkt_err(u64 posn)
2338{
2339 return (posn >= SEES(TX_SDMA0_DISALLOWED_PACKET) &&
2340 posn <= SEES(TX_SDMA15_DISALLOWED_PACKET));
2341}
2342
2343static void handle_egress_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2344{
2345 u64 reg_copy = reg, handled = 0;
2346 char buf[96];
2347
2348 if (reg & ALL_TXE_EGRESS_FREEZE_ERR)
2349 start_freeze_handling(dd->pport, 0);
2350 if (is_a0(dd) && (reg &
2351 SEND_EGRESS_ERR_STATUS_TX_CREDIT_RETURN_VL_ERR_SMASK)
2352 && (dd->icode != ICODE_FUNCTIONAL_SIMULATOR))
2353 start_freeze_handling(dd->pport, 0);
2354
2355 while (reg_copy) {
2356 int posn = fls64(reg_copy);
2357 /*
2358 * fls64() returns a 1-based offset, but we generally
2359 * want 0-based offsets.
2360 */
2361 int shift = posn - 1;
2362
2363 if (port_inactive_err(shift)) {
2364 count_port_inactive(dd);
2365 handled |= (1ULL << shift);
2366 } else if (disallowed_pkt_err(shift)) {
2367 handle_send_egress_err_info(dd);
2368 handled |= (1ULL << shift);
2369 }
2370 clear_bit(shift, (unsigned long *)&reg_copy);
2371 }
2372
2373 reg &= ~handled;
2374
2375 if (reg)
2376 dd_dev_info(dd, "Egress Error: %s\n",
2377 egress_err_status_string(buf, sizeof(buf), reg));
2378}
2379
2380static void handle_txe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
2381{
2382 char buf[96];
2383
2384 dd_dev_info(dd, "Send Error: %s\n",
2385 send_err_status_string(buf, sizeof(buf), reg));
2386
2387}
2388
2389/*
2390 * The maximum number of times the error clear down will loop before
2391 * blocking a repeating error. This value is arbitrary.
2392 */
2393#define MAX_CLEAR_COUNT 20
2394
2395/*
2396 * Clear and handle an error register. All error interrupts are funneled
2397 * through here to have a central location to correctly handle single-
2398 * or multi-shot errors.
2399 *
2400 * For non per-context registers, call this routine with a context value
2401 * of 0 so the per-context offset is zero.
2402 *
2403 * If the handler loops too many times, assume that something is wrong
2404 * and can't be fixed, so mask the error bits.
2405 */
2406static void interrupt_clear_down(struct hfi1_devdata *dd,
2407 u32 context,
2408 const struct err_reg_info *eri)
2409{
2410 u64 reg;
2411 u32 count;
2412
2413 /* read in a loop until no more errors are seen */
2414 count = 0;
2415 while (1) {
2416 reg = read_kctxt_csr(dd, context, eri->status);
2417 if (reg == 0)
2418 break;
2419 write_kctxt_csr(dd, context, eri->clear, reg);
2420 if (likely(eri->handler))
2421 eri->handler(dd, context, reg);
2422 count++;
2423 if (count > MAX_CLEAR_COUNT) {
2424 u64 mask;
2425
2426 dd_dev_err(dd, "Repeating %s bits 0x%llx - masking\n",
2427 eri->desc, reg);
2428 /*
2429 * Read-modify-write so any other masked bits
2430 * remain masked.
2431 */
2432 mask = read_kctxt_csr(dd, context, eri->mask);
2433 mask &= ~reg;
2434 write_kctxt_csr(dd, context, eri->mask, mask);
2435 break;
2436 }
2437 }
2438}
2439
2440/*
2441 * CCE block "misc" interrupt. Source is < 16.
2442 */
2443static void is_misc_err_int(struct hfi1_devdata *dd, unsigned int source)
2444{
2445 const struct err_reg_info *eri = &misc_errs[source];
2446
2447 if (eri->handler) {
2448 interrupt_clear_down(dd, 0, eri);
2449 } else {
2450 dd_dev_err(dd, "Unexpected misc interrupt (%u) - reserved\n",
2451 source);
2452 }
2453}
2454
2455static char *send_context_err_status_string(char *buf, int buf_len, u64 flags)
2456{
2457 return flag_string(buf, buf_len, flags,
2458 sc_err_status_flags, ARRAY_SIZE(sc_err_status_flags));
2459}
2460
2461/*
2462 * Send context error interrupt. Source (hw_context) is < 160.
2463 *
2464 * All send context errors cause the send context to halt. The normal
2465 * clear-down mechanism cannot be used because we cannot clear the
2466 * error bits until several other long-running items are done first.
2467 * This is OK because with the context halted, nothing else is going
2468 * to happen on it anyway.
2469 */
2470static void is_sendctxt_err_int(struct hfi1_devdata *dd,
2471 unsigned int hw_context)
2472{
2473 struct send_context_info *sci;
2474 struct send_context *sc;
2475 char flags[96];
2476 u64 status;
2477 u32 sw_index;
2478
2479 sw_index = dd->hw_to_sw[hw_context];
2480 if (sw_index >= dd->num_send_contexts) {
2481 dd_dev_err(dd,
2482 "out of range sw index %u for send context %u\n",
2483 sw_index, hw_context);
2484 return;
2485 }
2486 sci = &dd->send_contexts[sw_index];
2487 sc = sci->sc;
2488 if (!sc) {
2489 dd_dev_err(dd, "%s: context %u(%u): no sc?\n", __func__,
2490 sw_index, hw_context);
2491 return;
2492 }
2493
2494 /* tell the software that a halt has begun */
2495 sc_stop(sc, SCF_HALTED);
2496
2497 status = read_kctxt_csr(dd, hw_context, SEND_CTXT_ERR_STATUS);
2498
2499 dd_dev_info(dd, "Send Context %u(%u) Error: %s\n", sw_index, hw_context,
2500 send_context_err_status_string(flags, sizeof(flags), status));
2501
2502 if (status & SEND_CTXT_ERR_STATUS_PIO_DISALLOWED_PACKET_ERR_SMASK)
2503 handle_send_egress_err_info(dd);
2504
2505 /*
2506 * Automatically restart halted kernel contexts out of interrupt
2507 * context. User contexts must ask the driver to restart the context.
2508 */
2509 if (sc->type != SC_USER)
2510 queue_work(dd->pport->hfi1_wq, &sc->halt_work);
2511}
2512
2513static void handle_sdma_eng_err(struct hfi1_devdata *dd,
2514 unsigned int source, u64 status)
2515{
2516 struct sdma_engine *sde;
2517
2518 sde = &dd->per_sdma[source];
2519#ifdef CONFIG_SDMA_VERBOSITY
2520 dd_dev_err(sde->dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
2521 slashstrip(__FILE__), __LINE__, __func__);
2522 dd_dev_err(sde->dd, "CONFIG SDMA(%u) source: %u status 0x%llx\n",
2523 sde->this_idx, source, (unsigned long long)status);
2524#endif
2525 sdma_engine_error(sde, status);
2526}
2527
2528/*
2529 * CCE block SDMA error interrupt. Source is < 16.
2530 */
2531static void is_sdma_eng_err_int(struct hfi1_devdata *dd, unsigned int source)
2532{
2533#ifdef CONFIG_SDMA_VERBOSITY
2534 struct sdma_engine *sde = &dd->per_sdma[source];
2535
2536 dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
2537 slashstrip(__FILE__), __LINE__, __func__);
2538 dd_dev_err(dd, "CONFIG SDMA(%u) source: %u\n", sde->this_idx,
2539 source);
2540 sdma_dumpstate(sde);
2541#endif
2542 interrupt_clear_down(dd, source, &sdma_eng_err);
2543}
2544
2545/*
2546 * CCE block "various" interrupt. Source is < 8.
2547 */
2548static void is_various_int(struct hfi1_devdata *dd, unsigned int source)
2549{
2550 const struct err_reg_info *eri = &various_err[source];
2551
2552 /*
2553 * TCritInt cannot go through interrupt_clear_down()
2554 * because it is not a second tier interrupt. The handler
2555 * should be called directly.
2556 */
2557 if (source == TCRIT_INT_SOURCE)
2558 handle_temp_err(dd);
2559 else if (eri->handler)
2560 interrupt_clear_down(dd, 0, eri);
2561 else
2562 dd_dev_info(dd,
2563 "%s: Unimplemented/reserved interrupt %d\n",
2564 __func__, source);
2565}
2566
2567static void handle_qsfp_int(struct hfi1_devdata *dd, u32 src_ctx, u64 reg)
2568{
2569 /* source is always zero */
2570 struct hfi1_pportdata *ppd = dd->pport;
2571 unsigned long flags;
2572 u64 qsfp_int_mgmt = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
2573
2574 if (reg & QSFP_HFI0_MODPRST_N) {
2575
2576 dd_dev_info(dd, "%s: ModPresent triggered QSFP interrupt\n",
2577 __func__);
2578
2579 if (!qsfp_mod_present(ppd)) {
2580 ppd->driver_link_ready = 0;
2581 /*
2582 * Cable removed, reset all our information about the
2583 * cache and cable capabilities
2584 */
2585
2586 spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2587 /*
2588 * We don't set cache_refresh_required here as we expect
2589 * an interrupt when a cable is inserted
2590 */
2591 ppd->qsfp_info.cache_valid = 0;
2592 ppd->qsfp_info.qsfp_interrupt_functional = 0;
2593 spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
2594 flags);
2595 write_csr(dd,
2596 dd->hfi1_id ?
2597 ASIC_QSFP2_INVERT :
2598 ASIC_QSFP1_INVERT,
2599 qsfp_int_mgmt);
2600 if (ppd->host_link_state == HLS_DN_POLL) {
2601 /*
2602 * The link is still in POLL. This means
2603 * that the normal link down processing
2604 * will not happen. We have to do it here
2605 * before turning the DC off.
2606 */
2607 queue_work(ppd->hfi1_wq, &ppd->link_down_work);
2608 }
2609 } else {
2610 spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2611 ppd->qsfp_info.cache_valid = 0;
2612 ppd->qsfp_info.cache_refresh_required = 1;
2613 spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
2614 flags);
2615
2616 qsfp_int_mgmt &= ~(u64)QSFP_HFI0_MODPRST_N;
2617 write_csr(dd,
2618 dd->hfi1_id ?
2619 ASIC_QSFP2_INVERT :
2620 ASIC_QSFP1_INVERT,
2621 qsfp_int_mgmt);
2622 }
2623 }
2624
2625 if (reg & QSFP_HFI0_INT_N) {
2626
2627 dd_dev_info(dd, "%s: IntN triggered QSFP interrupt\n",
2628 __func__);
2629 spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
2630 ppd->qsfp_info.check_interrupt_flags = 1;
2631 ppd->qsfp_info.qsfp_interrupt_functional = 1;
2632 spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock, flags);
2633 }
2634
2635 /* Schedule the QSFP work only if there is a cable attached. */
2636 if (qsfp_mod_present(ppd))
2637 queue_work(ppd->hfi1_wq, &ppd->qsfp_info.qsfp_work);
2638}
2639
2640static int request_host_lcb_access(struct hfi1_devdata *dd)
2641{
2642 int ret;
2643
2644 ret = do_8051_command(dd, HCMD_MISC,
2645 (u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
2646 NULL);
2647 if (ret != HCMD_SUCCESS) {
2648 dd_dev_err(dd, "%s: command failed with error %d\n",
2649 __func__, ret);
2650 }
2651 return ret == HCMD_SUCCESS ? 0 : -EBUSY;
2652}
2653
2654static int request_8051_lcb_access(struct hfi1_devdata *dd)
2655{
2656 int ret;
2657
2658 ret = do_8051_command(dd, HCMD_MISC,
2659 (u64)HCMD_MISC_GRANT_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT,
2660 NULL);
2661 if (ret != HCMD_SUCCESS) {
2662 dd_dev_err(dd, "%s: command failed with error %d\n",
2663 __func__, ret);
2664 }
2665 return ret == HCMD_SUCCESS ? 0 : -EBUSY;
2666}
2667
2668/*
2669 * Set the LCB selector - allow host access. The DCC selector always
2670 * points to the host.
2671 */
2672static inline void set_host_lcb_access(struct hfi1_devdata *dd)
2673{
2674 write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
2675 DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK
2676 | DC_DC8051_CFG_CSR_ACCESS_SEL_LCB_SMASK);
2677}
2678
2679/*
2680 * Clear the LCB selector - allow 8051 access. The DCC selector always
2681 * points to the host.
2682 */
2683static inline void set_8051_lcb_access(struct hfi1_devdata *dd)
2684{
2685 write_csr(dd, DC_DC8051_CFG_CSR_ACCESS_SEL,
2686 DC_DC8051_CFG_CSR_ACCESS_SEL_DCC_SMASK);
2687}
2688
2689/*
2690 * Acquire LCB access from the 8051. If the host already has access,
2691 * just increment a counter. Otherwise, inform the 8051 that the
2692 * host is taking access.
2693 *
2694 * Returns:
2695 * 0 on success
2696 * -EBUSY if the 8051 has control and cannot be disturbed
2697 * -errno if unable to acquire access from the 8051
2698 */
2699int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
2700{
2701 struct hfi1_pportdata *ppd = dd->pport;
2702 int ret = 0;
2703
2704 /*
2705 * Use the host link state lock so the operation of this routine
2706 * { link state check, selector change, count increment } can occur
2707 * as a unit against a link state change. Otherwise there is a
2708 * race between the state change and the count increment.
2709 */
2710 if (sleep_ok) {
2711 mutex_lock(&ppd->hls_lock);
2712 } else {
951842b0 2713 while (!mutex_trylock(&ppd->hls_lock))
77241056
MM
2714 udelay(1);
2715 }
2716
2717 /* this access is valid only when the link is up */
2718 if ((ppd->host_link_state & HLS_UP) == 0) {
2719 dd_dev_info(dd, "%s: link state %s not up\n",
2720 __func__, link_state_name(ppd->host_link_state));
2721 ret = -EBUSY;
2722 goto done;
2723 }
2724
2725 if (dd->lcb_access_count == 0) {
2726 ret = request_host_lcb_access(dd);
2727 if (ret) {
2728 dd_dev_err(dd,
2729 "%s: unable to acquire LCB access, err %d\n",
2730 __func__, ret);
2731 goto done;
2732 }
2733 set_host_lcb_access(dd);
2734 }
2735 dd->lcb_access_count++;
2736done:
2737 mutex_unlock(&ppd->hls_lock);
2738 return ret;
2739}
2740
2741/*
2742 * Release LCB access by decrementing the use count. If the count is moving
2743 * from 1 to 0, inform 8051 that it has control back.
2744 *
2745 * Returns:
2746 * 0 on success
2747 * -errno if unable to release access to the 8051
2748 */
2749int release_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
2750{
2751 int ret = 0;
2752
2753 /*
2754 * Use the host link state lock because the acquire needed it.
2755 * Here, we only need to keep { selector change, count decrement }
2756 * as a unit.
2757 */
2758 if (sleep_ok) {
2759 mutex_lock(&dd->pport->hls_lock);
2760 } else {
951842b0 2761 while (!mutex_trylock(&dd->pport->hls_lock))
77241056
MM
2762 udelay(1);
2763 }
2764
2765 if (dd->lcb_access_count == 0) {
2766 dd_dev_err(dd, "%s: LCB access count is zero. Skipping.\n",
2767 __func__);
2768 goto done;
2769 }
2770
2771 if (dd->lcb_access_count == 1) {
2772 set_8051_lcb_access(dd);
2773 ret = request_8051_lcb_access(dd);
2774 if (ret) {
2775 dd_dev_err(dd,
2776 "%s: unable to release LCB access, err %d\n",
2777 __func__, ret);
2778 /* restore host access if the grant didn't work */
2779 set_host_lcb_access(dd);
2780 goto done;
2781 }
2782 }
2783 dd->lcb_access_count--;
2784done:
2785 mutex_unlock(&dd->pport->hls_lock);
2786 return ret;
2787}
2788
2789/*
2790 * Initialize LCB access variables and state. Called during driver load,
2791 * after most of the initialization is finished.
2792 *
2793 * The DC default is LCB access on for the host. The driver defaults to
2794 * leaving access to the 8051. Assign access now - this constrains the call
2795 * to this routine to be after all LCB set-up is done. In particular, after
2796 * hf1_init_dd() -> set_up_interrupts() -> clear_all_interrupts()
2797 */
2798static void init_lcb_access(struct hfi1_devdata *dd)
2799{
2800 dd->lcb_access_count = 0;
2801}
2802
2803/*
2804 * Write a response back to a 8051 request.
2805 */
2806static void hreq_response(struct hfi1_devdata *dd, u8 return_code, u16 rsp_data)
2807{
2808 write_csr(dd, DC_DC8051_CFG_EXT_DEV_0,
2809 DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK
2810 | (u64)return_code << DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT
2811 | (u64)rsp_data << DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
2812}
2813
2814/*
2815 * Handle requests from the 8051.
2816 */
2817static void handle_8051_request(struct hfi1_devdata *dd)
2818{
2819 u64 reg;
2820 u16 data;
2821 u8 type;
2822
2823 reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_1);
2824 if ((reg & DC_DC8051_CFG_EXT_DEV_1_REQ_NEW_SMASK) == 0)
2825 return; /* no request */
2826
2827 /* zero out COMPLETED so the response is seen */
2828 write_csr(dd, DC_DC8051_CFG_EXT_DEV_0, 0);
2829
2830 /* extract request details */
2831 type = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_SHIFT)
2832 & DC_DC8051_CFG_EXT_DEV_1_REQ_TYPE_MASK;
2833 data = (reg >> DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT)
2834 & DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_MASK;
2835
2836 switch (type) {
2837 case HREQ_LOAD_CONFIG:
2838 case HREQ_SAVE_CONFIG:
2839 case HREQ_READ_CONFIG:
2840 case HREQ_SET_TX_EQ_ABS:
2841 case HREQ_SET_TX_EQ_REL:
2842 case HREQ_ENABLE:
2843 dd_dev_info(dd, "8051 request: request 0x%x not supported\n",
2844 type);
2845 hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
2846 break;
2847
2848 case HREQ_CONFIG_DONE:
2849 hreq_response(dd, HREQ_SUCCESS, 0);
2850 break;
2851
2852 case HREQ_INTERFACE_TEST:
2853 hreq_response(dd, HREQ_SUCCESS, data);
2854 break;
2855
2856 default:
2857 dd_dev_err(dd, "8051 request: unknown request 0x%x\n", type);
2858 hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
2859 break;
2860 }
2861}
2862
2863static void write_global_credit(struct hfi1_devdata *dd,
2864 u8 vau, u16 total, u16 shared)
2865{
2866 write_csr(dd, SEND_CM_GLOBAL_CREDIT,
2867 ((u64)total
2868 << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
2869 | ((u64)shared
2870 << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
2871 | ((u64)vau << SEND_CM_GLOBAL_CREDIT_AU_SHIFT));
2872}
2873
2874/*
2875 * Set up initial VL15 credits of the remote. Assumes the rest of
2876 * the CM credit registers are zero from a previous global or credit reset .
2877 */
2878void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf)
2879{
2880 /* leave shared count at zero for both global and VL15 */
2881 write_global_credit(dd, vau, vl15buf, 0);
2882
2883 /* We may need some credits for another VL when sending packets
2884 * with the snoop interface. Dividing it down the middle for VL15
2885 * and VL0 should suffice.
2886 */
2887 if (unlikely(dd->hfi1_snoop.mode_flag == HFI1_PORT_SNOOP_MODE)) {
2888 write_csr(dd, SEND_CM_CREDIT_VL15, (u64)(vl15buf >> 1)
2889 << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
2890 write_csr(dd, SEND_CM_CREDIT_VL, (u64)(vl15buf >> 1)
2891 << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT);
2892 } else {
2893 write_csr(dd, SEND_CM_CREDIT_VL15, (u64)vl15buf
2894 << SEND_CM_CREDIT_VL15_DEDICATED_LIMIT_VL_SHIFT);
2895 }
2896}
2897
2898/*
2899 * Zero all credit details from the previous connection and
2900 * reset the CM manager's internal counters.
2901 */
2902void reset_link_credits(struct hfi1_devdata *dd)
2903{
2904 int i;
2905
2906 /* remove all previous VL credit limits */
2907 for (i = 0; i < TXE_NUM_DATA_VL; i++)
2908 write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
2909 write_csr(dd, SEND_CM_CREDIT_VL15, 0);
2910 write_global_credit(dd, 0, 0, 0);
2911 /* reset the CM block */
2912 pio_send_control(dd, PSC_CM_RESET);
2913}
2914
2915/* convert a vCU to a CU */
2916static u32 vcu_to_cu(u8 vcu)
2917{
2918 return 1 << vcu;
2919}
2920
2921/* convert a CU to a vCU */
2922static u8 cu_to_vcu(u32 cu)
2923{
2924 return ilog2(cu);
2925}
2926
2927/* convert a vAU to an AU */
2928static u32 vau_to_au(u8 vau)
2929{
2930 return 8 * (1 << vau);
2931}
2932
2933static void set_linkup_defaults(struct hfi1_pportdata *ppd)
2934{
2935 ppd->sm_trap_qp = 0x0;
2936 ppd->sa_qp = 0x1;
2937}
2938
2939/*
2940 * Graceful LCB shutdown. This leaves the LCB FIFOs in reset.
2941 */
2942static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
2943{
2944 u64 reg;
2945
2946 /* clear lcb run: LCB_CFG_RUN.EN = 0 */
2947 write_csr(dd, DC_LCB_CFG_RUN, 0);
2948 /* set tx fifo reset: LCB_CFG_TX_FIFOS_RESET.VAL = 1 */
2949 write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET,
2950 1ull << DC_LCB_CFG_TX_FIFOS_RESET_VAL_SHIFT);
2951 /* set dcc reset csr: DCC_CFG_RESET.{reset_lcb,reset_rx_fpe} = 1 */
2952 dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN);
2953 reg = read_csr(dd, DCC_CFG_RESET);
2954 write_csr(dd, DCC_CFG_RESET,
2955 reg
2956 | (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT)
2957 | (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
2958 (void) read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
2959 if (!abort) {
2960 udelay(1); /* must hold for the longer of 16cclks or 20ns */
2961 write_csr(dd, DCC_CFG_RESET, reg);
2962 write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
2963 }
2964}
2965
2966/*
2967 * This routine should be called after the link has been transitioned to
2968 * OFFLINE (OFFLINE state has the side effect of putting the SerDes into
2969 * reset).
2970 *
2971 * The expectation is that the caller of this routine would have taken
2972 * care of properly transitioning the link into the correct state.
2973 */
2974static void dc_shutdown(struct hfi1_devdata *dd)
2975{
2976 unsigned long flags;
2977
2978 spin_lock_irqsave(&dd->dc8051_lock, flags);
2979 if (dd->dc_shutdown) {
2980 spin_unlock_irqrestore(&dd->dc8051_lock, flags);
2981 return;
2982 }
2983 dd->dc_shutdown = 1;
2984 spin_unlock_irqrestore(&dd->dc8051_lock, flags);
2985 /* Shutdown the LCB */
2986 lcb_shutdown(dd, 1);
2987 /* Going to OFFLINE would have causes the 8051 to put the
2988 * SerDes into reset already. Just need to shut down the 8051,
2989 * itself. */
2990 write_csr(dd, DC_DC8051_CFG_RST, 0x1);
2991}
2992
2993/* Calling this after the DC has been brought out of reset should not
2994 * do any damage. */
2995static void dc_start(struct hfi1_devdata *dd)
2996{
2997 unsigned long flags;
2998 int ret;
2999
3000 spin_lock_irqsave(&dd->dc8051_lock, flags);
3001 if (!dd->dc_shutdown)
3002 goto done;
3003 spin_unlock_irqrestore(&dd->dc8051_lock, flags);
3004 /* Take the 8051 out of reset */
3005 write_csr(dd, DC_DC8051_CFG_RST, 0ull);
3006 /* Wait until 8051 is ready */
3007 ret = wait_fm_ready(dd, TIMEOUT_8051_START);
3008 if (ret) {
3009 dd_dev_err(dd, "%s: timeout starting 8051 firmware\n",
3010 __func__);
3011 }
3012 /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
3013 write_csr(dd, DCC_CFG_RESET, 0x10);
3014 /* lcb_shutdown() with abort=1 does not restore these */
3015 write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
3016 spin_lock_irqsave(&dd->dc8051_lock, flags);
3017 dd->dc_shutdown = 0;
3018done:
3019 spin_unlock_irqrestore(&dd->dc8051_lock, flags);
3020}
3021
3022/*
3023 * These LCB adjustments are for the Aurora SerDes core in the FPGA.
3024 */
3025static void adjust_lcb_for_fpga_serdes(struct hfi1_devdata *dd)
3026{
3027 u64 rx_radr, tx_radr;
3028 u32 version;
3029
3030 if (dd->icode != ICODE_FPGA_EMULATION)
3031 return;
3032
3033 /*
3034 * These LCB defaults on emulator _s are good, nothing to do here:
3035 * LCB_CFG_TX_FIFOS_RADR
3036 * LCB_CFG_RX_FIFOS_RADR
3037 * LCB_CFG_LN_DCLK
3038 * LCB_CFG_IGNORE_LOST_RCLK
3039 */
3040 if (is_emulator_s(dd))
3041 return;
3042 /* else this is _p */
3043
3044 version = emulator_rev(dd);
3045 if (!is_a0(dd))
3046 version = 0x2d; /* all B0 use 0x2d or higher settings */
3047
3048 if (version <= 0x12) {
3049 /* release 0x12 and below */
3050
3051 /*
3052 * LCB_CFG_RX_FIFOS_RADR.RST_VAL = 0x9
3053 * LCB_CFG_RX_FIFOS_RADR.OK_TO_JUMP_VAL = 0x9
3054 * LCB_CFG_RX_FIFOS_RADR.DO_NOT_JUMP_VAL = 0xa
3055 */
3056 rx_radr =
3057 0xaull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3058 | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3059 | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3060 /*
3061 * LCB_CFG_TX_FIFOS_RADR.ON_REINIT = 0 (default)
3062 * LCB_CFG_TX_FIFOS_RADR.RST_VAL = 6
3063 */
3064 tx_radr = 6ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3065 } else if (version <= 0x18) {
3066 /* release 0x13 up to 0x18 */
3067 /* LCB_CFG_RX_FIFOS_RADR = 0x988 */
3068 rx_radr =
3069 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3070 | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3071 | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3072 tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3073 } else if (version == 0x19) {
3074 /* release 0x19 */
3075 /* LCB_CFG_RX_FIFOS_RADR = 0xa99 */
3076 rx_radr =
3077 0xAull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3078 | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3079 | 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3080 tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3081 } else if (version == 0x1a) {
3082 /* release 0x1a */
3083 /* LCB_CFG_RX_FIFOS_RADR = 0x988 */
3084 rx_radr =
3085 0x9ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3086 | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3087 | 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3088 tx_radr = 7ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3089 write_csr(dd, DC_LCB_CFG_LN_DCLK, 1ull);
3090 } else {
3091 /* release 0x1b and higher */
3092 /* LCB_CFG_RX_FIFOS_RADR = 0x877 */
3093 rx_radr =
3094 0x8ull << DC_LCB_CFG_RX_FIFOS_RADR_DO_NOT_JUMP_VAL_SHIFT
3095 | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_OK_TO_JUMP_VAL_SHIFT
3096 | 0x7ull << DC_LCB_CFG_RX_FIFOS_RADR_RST_VAL_SHIFT;
3097 tx_radr = 3ull << DC_LCB_CFG_TX_FIFOS_RADR_RST_VAL_SHIFT;
3098 }
3099
3100 write_csr(dd, DC_LCB_CFG_RX_FIFOS_RADR, rx_radr);
3101 /* LCB_CFG_IGNORE_LOST_RCLK.EN = 1 */
3102 write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK,
3103 DC_LCB_CFG_IGNORE_LOST_RCLK_EN_SMASK);
3104 write_csr(dd, DC_LCB_CFG_TX_FIFOS_RADR, tx_radr);
3105}
3106
3107/*
3108 * Handle a SMA idle message
3109 *
3110 * This is a work-queue function outside of the interrupt.
3111 */
3112void handle_sma_message(struct work_struct *work)
3113{
3114 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3115 sma_message_work);
3116 struct hfi1_devdata *dd = ppd->dd;
3117 u64 msg;
3118 int ret;
3119
3120 /* msg is bytes 1-4 of the 40-bit idle message - the command code
3121 is stripped off */
3122 ret = read_idle_sma(dd, &msg);
3123 if (ret)
3124 return;
3125 dd_dev_info(dd, "%s: SMA message 0x%llx\n", __func__, msg);
3126 /*
3127 * React to the SMA message. Byte[1] (0 for us) is the command.
3128 */
3129 switch (msg & 0xff) {
3130 case SMA_IDLE_ARM:
3131 /*
3132 * See OPAv1 table 9-14 - HFI and External Switch Ports Key
3133 * State Transitions
3134 *
3135 * Only expected in INIT or ARMED, discard otherwise.
3136 */
3137 if (ppd->host_link_state & (HLS_UP_INIT | HLS_UP_ARMED))
3138 ppd->neighbor_normal = 1;
3139 break;
3140 case SMA_IDLE_ACTIVE:
3141 /*
3142 * See OPAv1 table 9-14 - HFI and External Switch Ports Key
3143 * State Transitions
3144 *
3145 * Can activate the node. Discard otherwise.
3146 */
3147 if (ppd->host_link_state == HLS_UP_ARMED
3148 && ppd->is_active_optimize_enabled) {
3149 ppd->neighbor_normal = 1;
3150 ret = set_link_state(ppd, HLS_UP_ACTIVE);
3151 if (ret)
3152 dd_dev_err(
3153 dd,
3154 "%s: received Active SMA idle message, couldn't set link to Active\n",
3155 __func__);
3156 }
3157 break;
3158 default:
3159 dd_dev_err(dd,
3160 "%s: received unexpected SMA idle message 0x%llx\n",
3161 __func__, msg);
3162 break;
3163 }
3164}
3165
3166static void adjust_rcvctrl(struct hfi1_devdata *dd, u64 add, u64 clear)
3167{
3168 u64 rcvctrl;
3169 unsigned long flags;
3170
3171 spin_lock_irqsave(&dd->rcvctrl_lock, flags);
3172 rcvctrl = read_csr(dd, RCV_CTRL);
3173 rcvctrl |= add;
3174 rcvctrl &= ~clear;
3175 write_csr(dd, RCV_CTRL, rcvctrl);
3176 spin_unlock_irqrestore(&dd->rcvctrl_lock, flags);
3177}
3178
3179static inline void add_rcvctrl(struct hfi1_devdata *dd, u64 add)
3180{
3181 adjust_rcvctrl(dd, add, 0);
3182}
3183
3184static inline void clear_rcvctrl(struct hfi1_devdata *dd, u64 clear)
3185{
3186 adjust_rcvctrl(dd, 0, clear);
3187}
3188
3189/*
3190 * Called from all interrupt handlers to start handling an SPC freeze.
3191 */
3192void start_freeze_handling(struct hfi1_pportdata *ppd, int flags)
3193{
3194 struct hfi1_devdata *dd = ppd->dd;
3195 struct send_context *sc;
3196 int i;
3197
3198 if (flags & FREEZE_SELF)
3199 write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
3200
3201 /* enter frozen mode */
3202 dd->flags |= HFI1_FROZEN;
3203
3204 /* notify all SDMA engines that they are going into a freeze */
3205 sdma_freeze_notify(dd, !!(flags & FREEZE_LINK_DOWN));
3206
3207 /* do halt pre-handling on all enabled send contexts */
3208 for (i = 0; i < dd->num_send_contexts; i++) {
3209 sc = dd->send_contexts[i].sc;
3210 if (sc && (sc->flags & SCF_ENABLED))
3211 sc_stop(sc, SCF_FROZEN | SCF_HALTED);
3212 }
3213
3214 /* Send context are frozen. Notify user space */
3215 hfi1_set_uevent_bits(ppd, _HFI1_EVENT_FROZEN_BIT);
3216
3217 if (flags & FREEZE_ABORT) {
3218 dd_dev_err(dd,
3219 "Aborted freeze recovery. Please REBOOT system\n");
3220 return;
3221 }
3222 /* queue non-interrupt handler */
3223 queue_work(ppd->hfi1_wq, &ppd->freeze_work);
3224}
3225
3226/*
3227 * Wait until all 4 sub-blocks indicate that they have frozen or unfrozen,
3228 * depending on the "freeze" parameter.
3229 *
3230 * No need to return an error if it times out, our only option
3231 * is to proceed anyway.
3232 */
3233static void wait_for_freeze_status(struct hfi1_devdata *dd, int freeze)
3234{
3235 unsigned long timeout;
3236 u64 reg;
3237
3238 timeout = jiffies + msecs_to_jiffies(FREEZE_STATUS_TIMEOUT);
3239 while (1) {
3240 reg = read_csr(dd, CCE_STATUS);
3241 if (freeze) {
3242 /* waiting until all indicators are set */
3243 if ((reg & ALL_FROZE) == ALL_FROZE)
3244 return; /* all done */
3245 } else {
3246 /* waiting until all indicators are clear */
3247 if ((reg & ALL_FROZE) == 0)
3248 return; /* all done */
3249 }
3250
3251 if (time_after(jiffies, timeout)) {
3252 dd_dev_err(dd,
3253 "Time out waiting for SPC %sfreeze, bits 0x%llx, expecting 0x%llx, continuing",
3254 freeze ? "" : "un",
3255 reg & ALL_FROZE,
3256 freeze ? ALL_FROZE : 0ull);
3257 return;
3258 }
3259 usleep_range(80, 120);
3260 }
3261}
3262
3263/*
3264 * Do all freeze handling for the RXE block.
3265 */
3266static void rxe_freeze(struct hfi1_devdata *dd)
3267{
3268 int i;
3269
3270 /* disable port */
3271 clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3272
3273 /* disable all receive contexts */
3274 for (i = 0; i < dd->num_rcv_contexts; i++)
3275 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS, i);
3276}
3277
3278/*
3279 * Unfreeze handling for the RXE block - kernel contexts only.
3280 * This will also enable the port. User contexts will do unfreeze
3281 * handling on a per-context basis as they call into the driver.
3282 *
3283 */
3284static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
3285{
3286 int i;
3287
3288 /* enable all kernel contexts */
3289 for (i = 0; i < dd->n_krcv_queues; i++)
3290 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, i);
3291
3292 /* enable port */
3293 add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3294}
3295
3296/*
3297 * Non-interrupt SPC freeze handling.
3298 *
3299 * This is a work-queue function outside of the triggering interrupt.
3300 */
3301void handle_freeze(struct work_struct *work)
3302{
3303 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3304 freeze_work);
3305 struct hfi1_devdata *dd = ppd->dd;
3306
3307 /* wait for freeze indicators on all affected blocks */
3308 dd_dev_info(dd, "Entering SPC freeze\n");
3309 wait_for_freeze_status(dd, 1);
3310
3311 /* SPC is now frozen */
3312
3313 /* do send PIO freeze steps */
3314 pio_freeze(dd);
3315
3316 /* do send DMA freeze steps */
3317 sdma_freeze(dd);
3318
3319 /* do send egress freeze steps - nothing to do */
3320
3321 /* do receive freeze steps */
3322 rxe_freeze(dd);
3323
3324 /*
3325 * Unfreeze the hardware - clear the freeze, wait for each
3326 * block's frozen bit to clear, then clear the frozen flag.
3327 */
3328 write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK);
3329 wait_for_freeze_status(dd, 0);
3330
3331 if (is_a0(dd)) {
3332 write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_FREEZE_SMASK);
3333 wait_for_freeze_status(dd, 1);
3334 write_csr(dd, CCE_CTRL, CCE_CTRL_SPC_UNFREEZE_SMASK);
3335 wait_for_freeze_status(dd, 0);
3336 }
3337
3338 /* do send PIO unfreeze steps for kernel contexts */
3339 pio_kernel_unfreeze(dd);
3340
3341 /* do send DMA unfreeze steps */
3342 sdma_unfreeze(dd);
3343
3344 /* do send egress unfreeze steps - nothing to do */
3345
3346 /* do receive unfreeze steps for kernel contexts */
3347 rxe_kernel_unfreeze(dd);
3348
3349 /*
3350 * The unfreeze procedure touches global device registers when
3351 * it disables and re-enables RXE. Mark the device unfrozen
3352 * after all that is done so other parts of the driver waiting
3353 * for the device to unfreeze don't do things out of order.
3354 *
3355 * The above implies that the meaning of HFI1_FROZEN flag is
3356 * "Device has gone into freeze mode and freeze mode handling
3357 * is still in progress."
3358 *
3359 * The flag will be removed when freeze mode processing has
3360 * completed.
3361 */
3362 dd->flags &= ~HFI1_FROZEN;
3363 wake_up(&dd->event_queue);
3364
3365 /* no longer frozen */
3366 dd_dev_err(dd, "Exiting SPC freeze\n");
3367}
3368
3369/*
3370 * Handle a link up interrupt from the 8051.
3371 *
3372 * This is a work-queue function outside of the interrupt.
3373 */
3374void handle_link_up(struct work_struct *work)
3375{
3376 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3377 link_up_work);
3378 set_link_state(ppd, HLS_UP_INIT);
3379
3380 /* cache the read of DC_LCB_STS_ROUND_TRIP_LTP_CNT */
3381 read_ltp_rtt(ppd->dd);
3382 /*
3383 * OPA specifies that certain counters are cleared on a transition
3384 * to link up, so do that.
3385 */
3386 clear_linkup_counters(ppd->dd);
3387 /*
3388 * And (re)set link up default values.
3389 */
3390 set_linkup_defaults(ppd);
3391
3392 /* enforce link speed enabled */
3393 if ((ppd->link_speed_active & ppd->link_speed_enabled) == 0) {
3394 /* oops - current speed is not enabled, bounce */
3395 dd_dev_err(ppd->dd,
3396 "Link speed active 0x%x is outside enabled 0x%x, downing link\n",
3397 ppd->link_speed_active, ppd->link_speed_enabled);
3398 set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
3399 OPA_LINKDOWN_REASON_SPEED_POLICY);
3400 set_link_state(ppd, HLS_DN_OFFLINE);
3401 start_link(ppd);
3402 }
3403}
3404
3405/* Several pieces of LNI information were cached for SMA in ppd.
3406 * Reset these on link down */
3407static void reset_neighbor_info(struct hfi1_pportdata *ppd)
3408{
3409 ppd->neighbor_guid = 0;
3410 ppd->neighbor_port_number = 0;
3411 ppd->neighbor_type = 0;
3412 ppd->neighbor_fm_security = 0;
3413}
3414
3415/*
3416 * Handle a link down interrupt from the 8051.
3417 *
3418 * This is a work-queue function outside of the interrupt.
3419 */
3420void handle_link_down(struct work_struct *work)
3421{
3422 u8 lcl_reason, neigh_reason = 0;
3423 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3424 link_down_work);
3425
3426 /* go offline first, then deal with reasons */
3427 set_link_state(ppd, HLS_DN_OFFLINE);
3428
3429 lcl_reason = 0;
3430 read_planned_down_reason_code(ppd->dd, &neigh_reason);
3431
3432 /*
3433 * If no reason, assume peer-initiated but missed
3434 * LinkGoingDown idle flits.
3435 */
3436 if (neigh_reason == 0)
3437 lcl_reason = OPA_LINKDOWN_REASON_NEIGHBOR_UNKNOWN;
3438
3439 set_link_down_reason(ppd, lcl_reason, neigh_reason, 0);
3440
3441 reset_neighbor_info(ppd);
3442
3443 /* disable the port */
3444 clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
3445
3446 /* If there is no cable attached, turn the DC off. Otherwise,
3447 * start the link bring up. */
3448 if (!qsfp_mod_present(ppd))
3449 dc_shutdown(ppd->dd);
3450 else
3451 start_link(ppd);
3452}
3453
3454void handle_link_bounce(struct work_struct *work)
3455{
3456 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3457 link_bounce_work);
3458
3459 /*
3460 * Only do something if the link is currently up.
3461 */
3462 if (ppd->host_link_state & HLS_UP) {
3463 set_link_state(ppd, HLS_DN_OFFLINE);
3464 start_link(ppd);
3465 } else {
3466 dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
3467 __func__, link_state_name(ppd->host_link_state));
3468 }
3469}
3470
3471/*
3472 * Mask conversion: Capability exchange to Port LTP. The capability
3473 * exchange has an implicit 16b CRC that is mandatory.
3474 */
3475static int cap_to_port_ltp(int cap)
3476{
3477 int port_ltp = PORT_LTP_CRC_MODE_16; /* this mode is mandatory */
3478
3479 if (cap & CAP_CRC_14B)
3480 port_ltp |= PORT_LTP_CRC_MODE_14;
3481 if (cap & CAP_CRC_48B)
3482 port_ltp |= PORT_LTP_CRC_MODE_48;
3483 if (cap & CAP_CRC_12B_16B_PER_LANE)
3484 port_ltp |= PORT_LTP_CRC_MODE_PER_LANE;
3485
3486 return port_ltp;
3487}
3488
3489/*
3490 * Convert an OPA Port LTP mask to capability mask
3491 */
3492int port_ltp_to_cap(int port_ltp)
3493{
3494 int cap_mask = 0;
3495
3496 if (port_ltp & PORT_LTP_CRC_MODE_14)
3497 cap_mask |= CAP_CRC_14B;
3498 if (port_ltp & PORT_LTP_CRC_MODE_48)
3499 cap_mask |= CAP_CRC_48B;
3500 if (port_ltp & PORT_LTP_CRC_MODE_PER_LANE)
3501 cap_mask |= CAP_CRC_12B_16B_PER_LANE;
3502
3503 return cap_mask;
3504}
3505
3506/*
3507 * Convert a single DC LCB CRC mode to an OPA Port LTP mask.
3508 */
3509static int lcb_to_port_ltp(int lcb_crc)
3510{
3511 int port_ltp = 0;
3512
3513 if (lcb_crc == LCB_CRC_12B_16B_PER_LANE)
3514 port_ltp = PORT_LTP_CRC_MODE_PER_LANE;
3515 else if (lcb_crc == LCB_CRC_48B)
3516 port_ltp = PORT_LTP_CRC_MODE_48;
3517 else if (lcb_crc == LCB_CRC_14B)
3518 port_ltp = PORT_LTP_CRC_MODE_14;
3519 else
3520 port_ltp = PORT_LTP_CRC_MODE_16;
3521
3522 return port_ltp;
3523}
3524
3525/*
3526 * Our neighbor has indicated that we are allowed to act as a fabric
3527 * manager, so place the full management partition key in the second
3528 * (0-based) pkey array position (see OPAv1, section 20.2.2.6.8). Note
3529 * that we should already have the limited management partition key in
3530 * array element 1, and also that the port is not yet up when
3531 * add_full_mgmt_pkey() is invoked.
3532 */
3533static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
3534{
3535 struct hfi1_devdata *dd = ppd->dd;
3536
3537 /* Sanity check - ppd->pkeys[2] should be 0 */
3538 if (ppd->pkeys[2] != 0)
3539 dd_dev_err(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n",
3540 __func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
3541 ppd->pkeys[2] = FULL_MGMT_P_KEY;
3542 (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
3543}
3544
3545/*
3546 * Convert the given link width to the OPA link width bitmask.
3547 */
3548static u16 link_width_to_bits(struct hfi1_devdata *dd, u16 width)
3549{
3550 switch (width) {
3551 case 0:
3552 /*
3553 * Simulator and quick linkup do not set the width.
3554 * Just set it to 4x without complaint.
3555 */
3556 if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR || quick_linkup)
3557 return OPA_LINK_WIDTH_4X;
3558 return 0; /* no lanes up */
3559 case 1: return OPA_LINK_WIDTH_1X;
3560 case 2: return OPA_LINK_WIDTH_2X;
3561 case 3: return OPA_LINK_WIDTH_3X;
3562 default:
3563 dd_dev_info(dd, "%s: invalid width %d, using 4\n",
3564 __func__, width);
3565 /* fall through */
3566 case 4: return OPA_LINK_WIDTH_4X;
3567 }
3568}
3569
3570/*
3571 * Do a population count on the bottom nibble.
3572 */
3573static const u8 bit_counts[16] = {
3574 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4
3575};
3576static inline u8 nibble_to_count(u8 nibble)
3577{
3578 return bit_counts[nibble & 0xf];
3579}
3580
3581/*
3582 * Read the active lane information from the 8051 registers and return
3583 * their widths.
3584 *
3585 * Active lane information is found in these 8051 registers:
3586 * enable_lane_tx
3587 * enable_lane_rx
3588 */
3589static void get_link_widths(struct hfi1_devdata *dd, u16 *tx_width,
3590 u16 *rx_width)
3591{
3592 u16 tx, rx;
3593 u8 enable_lane_rx;
3594 u8 enable_lane_tx;
3595 u8 tx_polarity_inversion;
3596 u8 rx_polarity_inversion;
3597 u8 max_rate;
3598
3599 /* read the active lanes */
3600 read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
3601 &rx_polarity_inversion, &max_rate);
3602 read_local_lni(dd, &enable_lane_rx);
3603
3604 /* convert to counts */
3605 tx = nibble_to_count(enable_lane_tx);
3606 rx = nibble_to_count(enable_lane_rx);
3607
3608 /*
3609 * Set link_speed_active here, overriding what was set in
3610 * handle_verify_cap(). The ASIC 8051 firmware does not correctly
3611 * set the max_rate field in handle_verify_cap until v0.19.
3612 */
3613 if ((dd->icode == ICODE_RTL_SILICON)
3614 && (dd->dc8051_ver < dc8051_ver(0, 19))) {
3615 /* max_rate: 0 = 12.5G, 1 = 25G */
3616 switch (max_rate) {
3617 case 0:
3618 dd->pport[0].link_speed_active = OPA_LINK_SPEED_12_5G;
3619 break;
3620 default:
3621 dd_dev_err(dd,
3622 "%s: unexpected max rate %d, using 25Gb\n",
3623 __func__, (int)max_rate);
3624 /* fall through */
3625 case 1:
3626 dd->pport[0].link_speed_active = OPA_LINK_SPEED_25G;
3627 break;
3628 }
3629 }
3630
3631 dd_dev_info(dd,
3632 "Fabric active lanes (width): tx 0x%x (%d), rx 0x%x (%d)\n",
3633 enable_lane_tx, tx, enable_lane_rx, rx);
3634 *tx_width = link_width_to_bits(dd, tx);
3635 *rx_width = link_width_to_bits(dd, rx);
3636}
3637
3638/*
3639 * Read verify_cap_local_fm_link_width[1] to obtain the link widths.
3640 * Valid after the end of VerifyCap and during LinkUp. Does not change
3641 * after link up. I.e. look elsewhere for downgrade information.
3642 *
3643 * Bits are:
3644 * + bits [7:4] contain the number of active transmitters
3645 * + bits [3:0] contain the number of active receivers
3646 * These are numbers 1 through 4 and can be different values if the
3647 * link is asymmetric.
3648 *
3649 * verify_cap_local_fm_link_width[0] retains its original value.
3650 */
3651static void get_linkup_widths(struct hfi1_devdata *dd, u16 *tx_width,
3652 u16 *rx_width)
3653{
3654 u16 widths, tx, rx;
3655 u8 misc_bits, local_flags;
3656 u16 active_tx, active_rx;
3657
3658 read_vc_local_link_width(dd, &misc_bits, &local_flags, &widths);
3659 tx = widths >> 12;
3660 rx = (widths >> 8) & 0xf;
3661
3662 *tx_width = link_width_to_bits(dd, tx);
3663 *rx_width = link_width_to_bits(dd, rx);
3664
3665 /* print the active widths */
3666 get_link_widths(dd, &active_tx, &active_rx);
3667}
3668
3669/*
3670 * Set ppd->link_width_active and ppd->link_width_downgrade_active using
3671 * hardware information when the link first comes up.
3672 *
3673 * The link width is not available until after VerifyCap.AllFramesReceived
3674 * (the trigger for handle_verify_cap), so this is outside that routine
3675 * and should be called when the 8051 signals linkup.
3676 */
3677void get_linkup_link_widths(struct hfi1_pportdata *ppd)
3678{
3679 u16 tx_width, rx_width;
3680
3681 /* get end-of-LNI link widths */
3682 get_linkup_widths(ppd->dd, &tx_width, &rx_width);
3683
3684 /* use tx_width as the link is supposed to be symmetric on link up */
3685 ppd->link_width_active = tx_width;
3686 /* link width downgrade active (LWD.A) starts out matching LW.A */
3687 ppd->link_width_downgrade_tx_active = ppd->link_width_active;
3688 ppd->link_width_downgrade_rx_active = ppd->link_width_active;
3689 /* per OPA spec, on link up LWD.E resets to LWD.S */
3690 ppd->link_width_downgrade_enabled = ppd->link_width_downgrade_supported;
3691 /* cache the active egress rate (units {10^6 bits/sec]) */
3692 ppd->current_egress_rate = active_egress_rate(ppd);
3693}
3694
3695/*
3696 * Handle a verify capabilities interrupt from the 8051.
3697 *
3698 * This is a work-queue function outside of the interrupt.
3699 */
3700void handle_verify_cap(struct work_struct *work)
3701{
3702 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3703 link_vc_work);
3704 struct hfi1_devdata *dd = ppd->dd;
3705 u64 reg;
3706 u8 power_management;
3707 u8 continious;
3708 u8 vcu;
3709 u8 vau;
3710 u8 z;
3711 u16 vl15buf;
3712 u16 link_widths;
3713 u16 crc_mask;
3714 u16 crc_val;
3715 u16 device_id;
3716 u16 active_tx, active_rx;
3717 u8 partner_supported_crc;
3718 u8 remote_tx_rate;
3719 u8 device_rev;
3720
3721 set_link_state(ppd, HLS_VERIFY_CAP);
3722
3723 lcb_shutdown(dd, 0);
3724 adjust_lcb_for_fpga_serdes(dd);
3725
3726 /*
3727 * These are now valid:
3728 * remote VerifyCap fields in the general LNI config
3729 * CSR DC8051_STS_REMOTE_GUID
3730 * CSR DC8051_STS_REMOTE_NODE_TYPE
3731 * CSR DC8051_STS_REMOTE_FM_SECURITY
3732 * CSR DC8051_STS_REMOTE_PORT_NO
3733 */
3734
3735 read_vc_remote_phy(dd, &power_management, &continious);
3736 read_vc_remote_fabric(
3737 dd,
3738 &vau,
3739 &z,
3740 &vcu,
3741 &vl15buf,
3742 &partner_supported_crc);
3743 read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths);
3744 read_remote_device_id(dd, &device_id, &device_rev);
3745 /*
3746 * And the 'MgmtAllowed' information, which is exchanged during
3747 * LNI, is also be available at this point.
3748 */
3749 read_mgmt_allowed(dd, &ppd->mgmt_allowed);
3750 /* print the active widths */
3751 get_link_widths(dd, &active_tx, &active_rx);
3752 dd_dev_info(dd,
3753 "Peer PHY: power management 0x%x, continuous updates 0x%x\n",
3754 (int)power_management, (int)continious);
3755 dd_dev_info(dd,
3756 "Peer Fabric: vAU %d, Z %d, vCU %d, vl15 credits 0x%x, CRC sizes 0x%x\n",
3757 (int)vau,
3758 (int)z,
3759 (int)vcu,
3760 (int)vl15buf,
3761 (int)partner_supported_crc);
3762 dd_dev_info(dd, "Peer Link Width: tx rate 0x%x, widths 0x%x\n",
3763 (u32)remote_tx_rate, (u32)link_widths);
3764 dd_dev_info(dd, "Peer Device ID: 0x%04x, Revision 0x%02x\n",
3765 (u32)device_id, (u32)device_rev);
3766 /*
3767 * The peer vAU value just read is the peer receiver value. HFI does
3768 * not support a transmit vAU of 0 (AU == 8). We advertised that
3769 * with Z=1 in the fabric capabilities sent to the peer. The peer
3770 * will see our Z=1, and, if it advertised a vAU of 0, will move its
3771 * receive to vAU of 1 (AU == 16). Do the same here. We do not care
3772 * about the peer Z value - our sent vAU is 3 (hardwired) and is not
3773 * subject to the Z value exception.
3774 */
3775 if (vau == 0)
3776 vau = 1;
3777 set_up_vl15(dd, vau, vl15buf);
3778
3779 /* set up the LCB CRC mode */
3780 crc_mask = ppd->port_crc_mode_enabled & partner_supported_crc;
3781
3782 /* order is important: use the lowest bit in common */
3783 if (crc_mask & CAP_CRC_14B)
3784 crc_val = LCB_CRC_14B;
3785 else if (crc_mask & CAP_CRC_48B)
3786 crc_val = LCB_CRC_48B;
3787 else if (crc_mask & CAP_CRC_12B_16B_PER_LANE)
3788 crc_val = LCB_CRC_12B_16B_PER_LANE;
3789 else
3790 crc_val = LCB_CRC_16B;
3791
3792 dd_dev_info(dd, "Final LCB CRC mode: %d\n", (int)crc_val);
3793 write_csr(dd, DC_LCB_CFG_CRC_MODE,
3794 (u64)crc_val << DC_LCB_CFG_CRC_MODE_TX_VAL_SHIFT);
3795
3796 /* set (14b only) or clear sideband credit */
3797 reg = read_csr(dd, SEND_CM_CTRL);
3798 if (crc_val == LCB_CRC_14B && crc_14b_sideband) {
3799 write_csr(dd, SEND_CM_CTRL,
3800 reg | SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
3801 } else {
3802 write_csr(dd, SEND_CM_CTRL,
3803 reg & ~SEND_CM_CTRL_FORCE_CREDIT_MODE_SMASK);
3804 }
3805
3806 ppd->link_speed_active = 0; /* invalid value */
3807 if (dd->dc8051_ver < dc8051_ver(0, 20)) {
3808 /* remote_tx_rate: 0 = 12.5G, 1 = 25G */
3809 switch (remote_tx_rate) {
3810 case 0:
3811 ppd->link_speed_active = OPA_LINK_SPEED_12_5G;
3812 break;
3813 case 1:
3814 ppd->link_speed_active = OPA_LINK_SPEED_25G;
3815 break;
3816 }
3817 } else {
3818 /* actual rate is highest bit of the ANDed rates */
3819 u8 rate = remote_tx_rate & ppd->local_tx_rate;
3820
3821 if (rate & 2)
3822 ppd->link_speed_active = OPA_LINK_SPEED_25G;
3823 else if (rate & 1)
3824 ppd->link_speed_active = OPA_LINK_SPEED_12_5G;
3825 }
3826 if (ppd->link_speed_active == 0) {
3827 dd_dev_err(dd, "%s: unexpected remote tx rate %d, using 25Gb\n",
3828 __func__, (int)remote_tx_rate);
3829 ppd->link_speed_active = OPA_LINK_SPEED_25G;
3830 }
3831
3832 /*
3833 * Cache the values of the supported, enabled, and active
3834 * LTP CRC modes to return in 'portinfo' queries. But the bit
3835 * flags that are returned in the portinfo query differ from
3836 * what's in the link_crc_mask, crc_sizes, and crc_val
3837 * variables. Convert these here.
3838 */
3839 ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8;
3840 /* supported crc modes */
3841 ppd->port_ltp_crc_mode |=
3842 cap_to_port_ltp(ppd->port_crc_mode_enabled) << 4;
3843 /* enabled crc modes */
3844 ppd->port_ltp_crc_mode |= lcb_to_port_ltp(crc_val);
3845 /* active crc mode */
3846
3847 /* set up the remote credit return table */
3848 assign_remote_cm_au_table(dd, vcu);
3849
3850 /*
3851 * The LCB is reset on entry to handle_verify_cap(), so this must
3852 * be applied on every link up.
3853 *
3854 * Adjust LCB error kill enable to kill the link if
3855 * these RBUF errors are seen:
3856 * REPLAY_BUF_MBE_SMASK
3857 * FLIT_INPUT_BUF_MBE_SMASK
3858 */
3859 if (is_a0(dd)) { /* fixed in B0 */
3860 reg = read_csr(dd, DC_LCB_CFG_LINK_KILL_EN);
3861 reg |= DC_LCB_CFG_LINK_KILL_EN_REPLAY_BUF_MBE_SMASK
3862 | DC_LCB_CFG_LINK_KILL_EN_FLIT_INPUT_BUF_MBE_SMASK;
3863 write_csr(dd, DC_LCB_CFG_LINK_KILL_EN, reg);
3864 }
3865
3866 /* pull LCB fifos out of reset - all fifo clocks must be stable */
3867 write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
3868
3869 /* give 8051 access to the LCB CSRs */
3870 write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
3871 set_8051_lcb_access(dd);
3872
3873 ppd->neighbor_guid =
3874 read_csr(dd, DC_DC8051_STS_REMOTE_GUID);
3875 ppd->neighbor_port_number = read_csr(dd, DC_DC8051_STS_REMOTE_PORT_NO) &
3876 DC_DC8051_STS_REMOTE_PORT_NO_VAL_SMASK;
3877 ppd->neighbor_type =
3878 read_csr(dd, DC_DC8051_STS_REMOTE_NODE_TYPE) &
3879 DC_DC8051_STS_REMOTE_NODE_TYPE_VAL_MASK;
3880 ppd->neighbor_fm_security =
3881 read_csr(dd, DC_DC8051_STS_REMOTE_FM_SECURITY) &
3882 DC_DC8051_STS_LOCAL_FM_SECURITY_DISABLED_MASK;
3883 dd_dev_info(dd,
3884 "Neighbor Guid: %llx Neighbor type %d MgmtAllowed %d FM security bypass %d\n",
3885 ppd->neighbor_guid, ppd->neighbor_type,
3886 ppd->mgmt_allowed, ppd->neighbor_fm_security);
3887 if (ppd->mgmt_allowed)
3888 add_full_mgmt_pkey(ppd);
3889
3890 /* tell the 8051 to go to LinkUp */
3891 set_link_state(ppd, HLS_GOING_UP);
3892}
3893
3894/*
3895 * Apply the link width downgrade enabled policy against the current active
3896 * link widths.
3897 *
3898 * Called when the enabled policy changes or the active link widths change.
3899 */
3900void apply_link_downgrade_policy(struct hfi1_pportdata *ppd, int refresh_widths)
3901{
3902 int skip = 1;
3903 int do_bounce = 0;
3904 u16 lwde = ppd->link_width_downgrade_enabled;
3905 u16 tx, rx;
3906
3907 mutex_lock(&ppd->hls_lock);
3908 /* only apply if the link is up */
3909 if (ppd->host_link_state & HLS_UP)
3910 skip = 0;
3911 mutex_unlock(&ppd->hls_lock);
3912 if (skip)
3913 return;
3914
3915 if (refresh_widths) {
3916 get_link_widths(ppd->dd, &tx, &rx);
3917 ppd->link_width_downgrade_tx_active = tx;
3918 ppd->link_width_downgrade_rx_active = rx;
3919 }
3920
3921 if (lwde == 0) {
3922 /* downgrade is disabled */
3923
3924 /* bounce if not at starting active width */
3925 if ((ppd->link_width_active !=
3926 ppd->link_width_downgrade_tx_active)
3927 || (ppd->link_width_active !=
3928 ppd->link_width_downgrade_rx_active)) {
3929 dd_dev_err(ppd->dd,
3930 "Link downgrade is disabled and link has downgraded, downing link\n");
3931 dd_dev_err(ppd->dd,
3932 " original 0x%x, tx active 0x%x, rx active 0x%x\n",
3933 ppd->link_width_active,
3934 ppd->link_width_downgrade_tx_active,
3935 ppd->link_width_downgrade_rx_active);
3936 do_bounce = 1;
3937 }
3938 } else if ((lwde & ppd->link_width_downgrade_tx_active) == 0
3939 || (lwde & ppd->link_width_downgrade_rx_active) == 0) {
3940 /* Tx or Rx is outside the enabled policy */
3941 dd_dev_err(ppd->dd,
3942 "Link is outside of downgrade allowed, downing link\n");
3943 dd_dev_err(ppd->dd,
3944 " enabled 0x%x, tx active 0x%x, rx active 0x%x\n",
3945 lwde,
3946 ppd->link_width_downgrade_tx_active,
3947 ppd->link_width_downgrade_rx_active);
3948 do_bounce = 1;
3949 }
3950
3951 if (do_bounce) {
3952 set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
3953 OPA_LINKDOWN_REASON_WIDTH_POLICY);
3954 set_link_state(ppd, HLS_DN_OFFLINE);
3955 start_link(ppd);
3956 }
3957}
3958
3959/*
3960 * Handle a link downgrade interrupt from the 8051.
3961 *
3962 * This is a work-queue function outside of the interrupt.
3963 */
3964void handle_link_downgrade(struct work_struct *work)
3965{
3966 struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
3967 link_downgrade_work);
3968
3969 dd_dev_info(ppd->dd, "8051: Link width downgrade\n");
3970 apply_link_downgrade_policy(ppd, 1);
3971}
3972
3973static char *dcc_err_string(char *buf, int buf_len, u64 flags)
3974{
3975 return flag_string(buf, buf_len, flags, dcc_err_flags,
3976 ARRAY_SIZE(dcc_err_flags));
3977}
3978
3979static char *lcb_err_string(char *buf, int buf_len, u64 flags)
3980{
3981 return flag_string(buf, buf_len, flags, lcb_err_flags,
3982 ARRAY_SIZE(lcb_err_flags));
3983}
3984
3985static char *dc8051_err_string(char *buf, int buf_len, u64 flags)
3986{
3987 return flag_string(buf, buf_len, flags, dc8051_err_flags,
3988 ARRAY_SIZE(dc8051_err_flags));
3989}
3990
3991static char *dc8051_info_err_string(char *buf, int buf_len, u64 flags)
3992{
3993 return flag_string(buf, buf_len, flags, dc8051_info_err_flags,
3994 ARRAY_SIZE(dc8051_info_err_flags));
3995}
3996
3997static char *dc8051_info_host_msg_string(char *buf, int buf_len, u64 flags)
3998{
3999 return flag_string(buf, buf_len, flags, dc8051_info_host_msg_flags,
4000 ARRAY_SIZE(dc8051_info_host_msg_flags));
4001}
4002
4003static void handle_8051_interrupt(struct hfi1_devdata *dd, u32 unused, u64 reg)
4004{
4005 struct hfi1_pportdata *ppd = dd->pport;
4006 u64 info, err, host_msg;
4007 int queue_link_down = 0;
4008 char buf[96];
4009
4010 /* look at the flags */
4011 if (reg & DC_DC8051_ERR_FLG_SET_BY_8051_SMASK) {
4012 /* 8051 information set by firmware */
4013 /* read DC8051_DBG_ERR_INFO_SET_BY_8051 for details */
4014 info = read_csr(dd, DC_DC8051_DBG_ERR_INFO_SET_BY_8051);
4015 err = (info >> DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_SHIFT)
4016 & DC_DC8051_DBG_ERR_INFO_SET_BY_8051_ERROR_MASK;
4017 host_msg = (info >>
4018 DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_SHIFT)
4019 & DC_DC8051_DBG_ERR_INFO_SET_BY_8051_HOST_MSG_MASK;
4020
4021 /*
4022 * Handle error flags.
4023 */
4024 if (err & FAILED_LNI) {
4025 /*
4026 * LNI error indications are cleared by the 8051
4027 * only when starting polling. Only pay attention
4028 * to them when in the states that occur during
4029 * LNI.
4030 */
4031 if (ppd->host_link_state
4032 & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
4033 queue_link_down = 1;
4034 dd_dev_info(dd, "Link error: %s\n",
4035 dc8051_info_err_string(buf,
4036 sizeof(buf),
4037 err & FAILED_LNI));
4038 }
4039 err &= ~(u64)FAILED_LNI;
4040 }
4041 if (err) {
4042 /* report remaining errors, but do not do anything */
4043 dd_dev_err(dd, "8051 info error: %s\n",
4044 dc8051_info_err_string(buf, sizeof(buf), err));
4045 }
4046
4047 /*
4048 * Handle host message flags.
4049 */
4050 if (host_msg & HOST_REQ_DONE) {
4051 /*
4052 * Presently, the driver does a busy wait for
4053 * host requests to complete. This is only an
4054 * informational message.
4055 * NOTE: The 8051 clears the host message
4056 * information *on the next 8051 command*.
4057 * Therefore, when linkup is achieved,
4058 * this flag will still be set.
4059 */
4060 host_msg &= ~(u64)HOST_REQ_DONE;
4061 }
4062 if (host_msg & BC_SMA_MSG) {
4063 queue_work(ppd->hfi1_wq, &ppd->sma_message_work);
4064 host_msg &= ~(u64)BC_SMA_MSG;
4065 }
4066 if (host_msg & LINKUP_ACHIEVED) {
4067 dd_dev_info(dd, "8051: Link up\n");
4068 queue_work(ppd->hfi1_wq, &ppd->link_up_work);
4069 host_msg &= ~(u64)LINKUP_ACHIEVED;
4070 }
4071 if (host_msg & EXT_DEVICE_CFG_REQ) {
4072 handle_8051_request(dd);
4073 host_msg &= ~(u64)EXT_DEVICE_CFG_REQ;
4074 }
4075 if (host_msg & VERIFY_CAP_FRAME) {
4076 queue_work(ppd->hfi1_wq, &ppd->link_vc_work);
4077 host_msg &= ~(u64)VERIFY_CAP_FRAME;
4078 }
4079 if (host_msg & LINK_GOING_DOWN) {
4080 const char *extra = "";
4081 /* no downgrade action needed if going down */
4082 if (host_msg & LINK_WIDTH_DOWNGRADED) {
4083 host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
4084 extra = " (ignoring downgrade)";
4085 }
4086 dd_dev_info(dd, "8051: Link down%s\n", extra);
4087 queue_link_down = 1;
4088 host_msg &= ~(u64)LINK_GOING_DOWN;
4089 }
4090 if (host_msg & LINK_WIDTH_DOWNGRADED) {
4091 queue_work(ppd->hfi1_wq, &ppd->link_downgrade_work);
4092 host_msg &= ~(u64)LINK_WIDTH_DOWNGRADED;
4093 }
4094 if (host_msg) {
4095 /* report remaining messages, but do not do anything */
4096 dd_dev_info(dd, "8051 info host message: %s\n",
4097 dc8051_info_host_msg_string(buf, sizeof(buf),
4098 host_msg));
4099 }
4100
4101 reg &= ~DC_DC8051_ERR_FLG_SET_BY_8051_SMASK;
4102 }
4103 if (reg & DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK) {
4104 /*
4105 * Lost the 8051 heartbeat. If this happens, we
4106 * receive constant interrupts about it. Disable
4107 * the interrupt after the first.
4108 */
4109 dd_dev_err(dd, "Lost 8051 heartbeat\n");
4110 write_csr(dd, DC_DC8051_ERR_EN,
4111 read_csr(dd, DC_DC8051_ERR_EN)
4112 & ~DC_DC8051_ERR_EN_LOST_8051_HEART_BEAT_SMASK);
4113
4114 reg &= ~DC_DC8051_ERR_FLG_LOST_8051_HEART_BEAT_SMASK;
4115 }
4116 if (reg) {
4117 /* report the error, but do not do anything */
4118 dd_dev_err(dd, "8051 error: %s\n",
4119 dc8051_err_string(buf, sizeof(buf), reg));
4120 }
4121
4122 if (queue_link_down) {
4123 /* if the link is already going down or disabled, do not
4124 * queue another */
4125 if ((ppd->host_link_state
4126 & (HLS_GOING_OFFLINE|HLS_LINK_COOLDOWN))
4127 || ppd->link_enabled == 0) {
4128 dd_dev_info(dd, "%s: not queuing link down\n",
4129 __func__);
4130 } else {
4131 queue_work(ppd->hfi1_wq, &ppd->link_down_work);
4132 }
4133 }
4134}
4135
4136static const char * const fm_config_txt[] = {
4137[0] =
4138 "BadHeadDist: Distance violation between two head flits",
4139[1] =
4140 "BadTailDist: Distance violation between two tail flits",
4141[2] =
4142 "BadCtrlDist: Distance violation between two credit control flits",
4143[3] =
4144 "BadCrdAck: Credits return for unsupported VL",
4145[4] =
4146 "UnsupportedVLMarker: Received VL Marker",
4147[5] =
4148 "BadPreempt: Exceeded the preemption nesting level",
4149[6] =
4150 "BadControlFlit: Received unsupported control flit",
4151/* no 7 */
4152[8] =
4153 "UnsupportedVLMarker: Received VL Marker for unconfigured or disabled VL",
4154};
4155
4156static const char * const port_rcv_txt[] = {
4157[1] =
4158 "BadPktLen: Illegal PktLen",
4159[2] =
4160 "PktLenTooLong: Packet longer than PktLen",
4161[3] =
4162 "PktLenTooShort: Packet shorter than PktLen",
4163[4] =
4164 "BadSLID: Illegal SLID (0, using multicast as SLID, does not include security validation of SLID)",
4165[5] =
4166 "BadDLID: Illegal DLID (0, doesn't match HFI)",
4167[6] =
4168 "BadL2: Illegal L2 opcode",
4169[7] =
4170 "BadSC: Unsupported SC",
4171[9] =
4172 "BadRC: Illegal RC",
4173[11] =
4174 "PreemptError: Preempting with same VL",
4175[12] =
4176 "PreemptVL15: Preempting a VL15 packet",
4177};
4178
4179#define OPA_LDR_FMCONFIG_OFFSET 16
4180#define OPA_LDR_PORTRCV_OFFSET 0
4181static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
4182{
4183 u64 info, hdr0, hdr1;
4184 const char *extra;
4185 char buf[96];
4186 struct hfi1_pportdata *ppd = dd->pport;
4187 u8 lcl_reason = 0;
4188 int do_bounce = 0;
4189
4190 if (reg & DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK) {
4191 if (!(dd->err_info_uncorrectable & OPA_EI_STATUS_SMASK)) {
4192 info = read_csr(dd, DCC_ERR_INFO_UNCORRECTABLE);
4193 dd->err_info_uncorrectable = info & OPA_EI_CODE_SMASK;
4194 /* set status bit */
4195 dd->err_info_uncorrectable |= OPA_EI_STATUS_SMASK;
4196 }
4197 reg &= ~DCC_ERR_FLG_UNCORRECTABLE_ERR_SMASK;
4198 }
4199
4200 if (reg & DCC_ERR_FLG_LINK_ERR_SMASK) {
4201 struct hfi1_pportdata *ppd = dd->pport;
4202 /* this counter saturates at (2^32) - 1 */
4203 if (ppd->link_downed < (u32)UINT_MAX)
4204 ppd->link_downed++;
4205 reg &= ~DCC_ERR_FLG_LINK_ERR_SMASK;
4206 }
4207
4208 if (reg & DCC_ERR_FLG_FMCONFIG_ERR_SMASK) {
4209 u8 reason_valid = 1;
4210
4211 info = read_csr(dd, DCC_ERR_INFO_FMCONFIG);
4212 if (!(dd->err_info_fmconfig & OPA_EI_STATUS_SMASK)) {
4213 dd->err_info_fmconfig = info & OPA_EI_CODE_SMASK;
4214 /* set status bit */
4215 dd->err_info_fmconfig |= OPA_EI_STATUS_SMASK;
4216 }
4217 switch (info) {
4218 case 0:
4219 case 1:
4220 case 2:
4221 case 3:
4222 case 4:
4223 case 5:
4224 case 6:
4225 extra = fm_config_txt[info];
4226 break;
4227 case 8:
4228 extra = fm_config_txt[info];
4229 if (ppd->port_error_action &
4230 OPA_PI_MASK_FM_CFG_UNSUPPORTED_VL_MARKER) {
4231 do_bounce = 1;
4232 /*
4233 * lcl_reason cannot be derived from info
4234 * for this error
4235 */
4236 lcl_reason =
4237 OPA_LINKDOWN_REASON_UNSUPPORTED_VL_MARKER;
4238 }
4239 break;
4240 default:
4241 reason_valid = 0;
4242 snprintf(buf, sizeof(buf), "reserved%lld", info);
4243 extra = buf;
4244 break;
4245 }
4246
4247 if (reason_valid && !do_bounce) {
4248 do_bounce = ppd->port_error_action &
4249 (1 << (OPA_LDR_FMCONFIG_OFFSET + info));
4250 lcl_reason = info + OPA_LINKDOWN_REASON_BAD_HEAD_DIST;
4251 }
4252
4253 /* just report this */
4254 dd_dev_info(dd, "DCC Error: fmconfig error: %s\n", extra);
4255 reg &= ~DCC_ERR_FLG_FMCONFIG_ERR_SMASK;
4256 }
4257
4258 if (reg & DCC_ERR_FLG_RCVPORT_ERR_SMASK) {
4259 u8 reason_valid = 1;
4260
4261 info = read_csr(dd, DCC_ERR_INFO_PORTRCV);
4262 hdr0 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR0);
4263 hdr1 = read_csr(dd, DCC_ERR_INFO_PORTRCV_HDR1);
4264 if (!(dd->err_info_rcvport.status_and_code &
4265 OPA_EI_STATUS_SMASK)) {
4266 dd->err_info_rcvport.status_and_code =
4267 info & OPA_EI_CODE_SMASK;
4268 /* set status bit */
4269 dd->err_info_rcvport.status_and_code |=
4270 OPA_EI_STATUS_SMASK;
4271 /* save first 2 flits in the packet that caused
4272 * the error */
4273 dd->err_info_rcvport.packet_flit1 = hdr0;
4274 dd->err_info_rcvport.packet_flit2 = hdr1;
4275 }
4276 switch (info) {
4277 case 1:
4278 case 2:
4279 case 3:
4280 case 4:
4281 case 5:
4282 case 6:
4283 case 7:
4284 case 9:
4285 case 11:
4286 case 12:
4287 extra = port_rcv_txt[info];
4288 break;
4289 default:
4290 reason_valid = 0;
4291 snprintf(buf, sizeof(buf), "reserved%lld", info);
4292 extra = buf;
4293 break;
4294 }
4295
4296 if (reason_valid && !do_bounce) {
4297 do_bounce = ppd->port_error_action &
4298 (1 << (OPA_LDR_PORTRCV_OFFSET + info));
4299 lcl_reason = info + OPA_LINKDOWN_REASON_RCV_ERROR_0;
4300 }
4301
4302 /* just report this */
4303 dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra);
4304 dd_dev_info(dd, " hdr0 0x%llx, hdr1 0x%llx\n",
4305 hdr0, hdr1);
4306
4307 reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK;
4308 }
4309
4310 if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK) {
4311 /* informative only */
4312 dd_dev_info(dd, "8051 access to LCB blocked\n");
4313 reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK;
4314 }
4315 if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK) {
4316 /* informative only */
4317 dd_dev_info(dd, "host access to LCB blocked\n");
4318 reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
4319 }
4320
4321 /* report any remaining errors */
4322 if (reg)
4323 dd_dev_info(dd, "DCC Error: %s\n",
4324 dcc_err_string(buf, sizeof(buf), reg));
4325
4326 if (lcl_reason == 0)
4327 lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN;
4328
4329 if (do_bounce) {
4330 dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
4331 set_link_down_reason(ppd, lcl_reason, 0, lcl_reason);
4332 queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
4333 }
4334}
4335
4336static void handle_lcb_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
4337{
4338 char buf[96];
4339
4340 dd_dev_info(dd, "LCB Error: %s\n",
4341 lcb_err_string(buf, sizeof(buf), reg));
4342}
4343
4344/*
4345 * CCE block DC interrupt. Source is < 8.
4346 */
4347static void is_dc_int(struct hfi1_devdata *dd, unsigned int source)
4348{
4349 const struct err_reg_info *eri = &dc_errs[source];
4350
4351 if (eri->handler) {
4352 interrupt_clear_down(dd, 0, eri);
4353 } else if (source == 3 /* dc_lbm_int */) {
4354 /*
4355 * This indicates that a parity error has occurred on the
4356 * address/control lines presented to the LBM. The error
4357 * is a single pulse, there is no associated error flag,
4358 * and it is non-maskable. This is because if a parity
4359 * error occurs on the request the request is dropped.
4360 * This should never occur, but it is nice to know if it
4361 * ever does.
4362 */
4363 dd_dev_err(dd, "Parity error in DC LBM block\n");
4364 } else {
4365 dd_dev_err(dd, "Invalid DC interrupt %u\n", source);
4366 }
4367}
4368
4369/*
4370 * TX block send credit interrupt. Source is < 160.
4371 */
4372static void is_send_credit_int(struct hfi1_devdata *dd, unsigned int source)
4373{
4374 sc_group_release_update(dd, source);
4375}
4376
4377/*
4378 * TX block SDMA interrupt. Source is < 48.
4379 *
4380 * SDMA interrupts are grouped by type:
4381 *
4382 * 0 - N-1 = SDma
4383 * N - 2N-1 = SDmaProgress
4384 * 2N - 3N-1 = SDmaIdle
4385 */
4386static void is_sdma_eng_int(struct hfi1_devdata *dd, unsigned int source)
4387{
4388 /* what interrupt */
4389 unsigned int what = source / TXE_NUM_SDMA_ENGINES;
4390 /* which engine */
4391 unsigned int which = source % TXE_NUM_SDMA_ENGINES;
4392
4393#ifdef CONFIG_SDMA_VERBOSITY
4394 dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", which,
4395 slashstrip(__FILE__), __LINE__, __func__);
4396 sdma_dumpstate(&dd->per_sdma[which]);
4397#endif
4398
4399 if (likely(what < 3 && which < dd->num_sdma)) {
4400 sdma_engine_interrupt(&dd->per_sdma[which], 1ull << source);
4401 } else {
4402 /* should not happen */
4403 dd_dev_err(dd, "Invalid SDMA interrupt 0x%x\n", source);
4404 }
4405}
4406
4407/*
4408 * RX block receive available interrupt. Source is < 160.
4409 */
4410static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
4411{
4412 struct hfi1_ctxtdata *rcd;
4413 char *err_detail;
4414
4415 if (likely(source < dd->num_rcv_contexts)) {
4416 rcd = dd->rcd[source];
4417 if (rcd) {
4418 if (source < dd->first_user_ctxt)
4419 rcd->do_interrupt(rcd);
4420 else
4421 handle_user_interrupt(rcd);
4422 return; /* OK */
4423 }
4424 /* received an interrupt, but no rcd */
4425 err_detail = "dataless";
4426 } else {
4427 /* received an interrupt, but are not using that context */
4428 err_detail = "out of range";
4429 }
4430 dd_dev_err(dd, "unexpected %s receive available context interrupt %u\n",
4431 err_detail, source);
4432}
4433
4434/*
4435 * RX block receive urgent interrupt. Source is < 160.
4436 */
4437static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
4438{
4439 struct hfi1_ctxtdata *rcd;
4440 char *err_detail;
4441
4442 if (likely(source < dd->num_rcv_contexts)) {
4443 rcd = dd->rcd[source];
4444 if (rcd) {
4445 /* only pay attention to user urgent interrupts */
4446 if (source >= dd->first_user_ctxt)
4447 handle_user_interrupt(rcd);
4448 return; /* OK */
4449 }
4450 /* received an interrupt, but no rcd */
4451 err_detail = "dataless";
4452 } else {
4453 /* received an interrupt, but are not using that context */
4454 err_detail = "out of range";
4455 }
4456 dd_dev_err(dd, "unexpected %s receive urgent context interrupt %u\n",
4457 err_detail, source);
4458}
4459
4460/*
4461 * Reserved range interrupt. Should not be called in normal operation.
4462 */
4463static void is_reserved_int(struct hfi1_devdata *dd, unsigned int source)
4464{
4465 char name[64];
4466
4467 dd_dev_err(dd, "unexpected %s interrupt\n",
4468 is_reserved_name(name, sizeof(name), source));
4469}
4470
4471static const struct is_table is_table[] = {
4472/* start end
4473 name func interrupt func */
4474{ IS_GENERAL_ERR_START, IS_GENERAL_ERR_END,
4475 is_misc_err_name, is_misc_err_int },
4476{ IS_SDMAENG_ERR_START, IS_SDMAENG_ERR_END,
4477 is_sdma_eng_err_name, is_sdma_eng_err_int },
4478{ IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
4479 is_sendctxt_err_name, is_sendctxt_err_int },
4480{ IS_SDMA_START, IS_SDMA_END,
4481 is_sdma_eng_name, is_sdma_eng_int },
4482{ IS_VARIOUS_START, IS_VARIOUS_END,
4483 is_various_name, is_various_int },
4484{ IS_DC_START, IS_DC_END,
4485 is_dc_name, is_dc_int },
4486{ IS_RCVAVAIL_START, IS_RCVAVAIL_END,
4487 is_rcv_avail_name, is_rcv_avail_int },
4488{ IS_RCVURGENT_START, IS_RCVURGENT_END,
4489 is_rcv_urgent_name, is_rcv_urgent_int },
4490{ IS_SENDCREDIT_START, IS_SENDCREDIT_END,
4491 is_send_credit_name, is_send_credit_int},
4492{ IS_RESERVED_START, IS_RESERVED_END,
4493 is_reserved_name, is_reserved_int},
4494};
4495
4496/*
4497 * Interrupt source interrupt - called when the given source has an interrupt.
4498 * Source is a bit index into an array of 64-bit integers.
4499 */
4500static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
4501{
4502 const struct is_table *entry;
4503
4504 /* avoids a double compare by walking the table in-order */
4505 for (entry = &is_table[0]; entry->is_name; entry++) {
4506 if (source < entry->end) {
4507 trace_hfi1_interrupt(dd, entry, source);
4508 entry->is_int(dd, source - entry->start);
4509 return;
4510 }
4511 }
4512 /* fell off the end */
4513 dd_dev_err(dd, "invalid interrupt source %u\n", source);
4514}
4515
4516/*
4517 * General interrupt handler. This is able to correctly handle
4518 * all interrupts in case INTx is used.
4519 */
4520static irqreturn_t general_interrupt(int irq, void *data)
4521{
4522 struct hfi1_devdata *dd = data;
4523 u64 regs[CCE_NUM_INT_CSRS];
4524 u32 bit;
4525 int i;
4526
4527 this_cpu_inc(*dd->int_counter);
4528
4529 /* phase 1: scan and clear all handled interrupts */
4530 for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
4531 if (dd->gi_mask[i] == 0) {
4532 regs[i] = 0; /* used later */
4533 continue;
4534 }
4535 regs[i] = read_csr(dd, CCE_INT_STATUS + (8 * i)) &
4536 dd->gi_mask[i];
4537 /* only clear if anything is set */
4538 if (regs[i])
4539 write_csr(dd, CCE_INT_CLEAR + (8 * i), regs[i]);
4540 }
4541
4542 /* phase 2: call the appropriate handler */
4543 for_each_set_bit(bit, (unsigned long *)&regs[0],
4544 CCE_NUM_INT_CSRS*64) {
4545 is_interrupt(dd, bit);
4546 }
4547
4548 return IRQ_HANDLED;
4549}
4550
4551static irqreturn_t sdma_interrupt(int irq, void *data)
4552{
4553 struct sdma_engine *sde = data;
4554 struct hfi1_devdata *dd = sde->dd;
4555 u64 status;
4556
4557#ifdef CONFIG_SDMA_VERBOSITY
4558 dd_dev_err(dd, "CONFIG SDMA(%u) %s:%d %s()\n", sde->this_idx,
4559 slashstrip(__FILE__), __LINE__, __func__);
4560 sdma_dumpstate(sde);
4561#endif
4562
4563 this_cpu_inc(*dd->int_counter);
4564
4565 /* This read_csr is really bad in the hot path */
4566 status = read_csr(dd,
4567 CCE_INT_STATUS + (8*(IS_SDMA_START/64)))
4568 & sde->imask;
4569 if (likely(status)) {
4570 /* clear the interrupt(s) */
4571 write_csr(dd,
4572 CCE_INT_CLEAR + (8*(IS_SDMA_START/64)),
4573 status);
4574
4575 /* handle the interrupt(s) */
4576 sdma_engine_interrupt(sde, status);
4577 } else
4578 dd_dev_err(dd, "SDMA engine %u interrupt, but no status bits set\n",
4579 sde->this_idx);
4580
4581 return IRQ_HANDLED;
4582}
4583
4584/*
4585 * NOTE: this routine expects to be on its own MSI-X interrupt. If
4586 * multiple receive contexts share the same MSI-X interrupt, then this
4587 * routine must check for who received it.
4588 */
4589static irqreturn_t receive_context_interrupt(int irq, void *data)
4590{
4591 struct hfi1_ctxtdata *rcd = data;
4592 struct hfi1_devdata *dd = rcd->dd;
4593
4594 trace_hfi1_receive_interrupt(dd, rcd->ctxt);
4595 this_cpu_inc(*dd->int_counter);
4596
4597 /* clear the interrupt */
4598 write_csr(rcd->dd, CCE_INT_CLEAR + (8*rcd->ireg), rcd->imask);
4599
4600 /* handle the interrupt */
4601 rcd->do_interrupt(rcd);
4602
4603 return IRQ_HANDLED;
4604}
4605
4606/* ========================================================================= */
4607
4608u32 read_physical_state(struct hfi1_devdata *dd)
4609{
4610 u64 reg;
4611
4612 reg = read_csr(dd, DC_DC8051_STS_CUR_STATE);
4613 return (reg >> DC_DC8051_STS_CUR_STATE_PORT_SHIFT)
4614 & DC_DC8051_STS_CUR_STATE_PORT_MASK;
4615}
4616
4617static u32 read_logical_state(struct hfi1_devdata *dd)
4618{
4619 u64 reg;
4620
4621 reg = read_csr(dd, DCC_CFG_PORT_CONFIG);
4622 return (reg >> DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT)
4623 & DCC_CFG_PORT_CONFIG_LINK_STATE_MASK;
4624}
4625
4626static void set_logical_state(struct hfi1_devdata *dd, u32 chip_lstate)
4627{
4628 u64 reg;
4629
4630 reg = read_csr(dd, DCC_CFG_PORT_CONFIG);
4631 /* clear current state, set new state */
4632 reg &= ~DCC_CFG_PORT_CONFIG_LINK_STATE_SMASK;
4633 reg |= (u64)chip_lstate << DCC_CFG_PORT_CONFIG_LINK_STATE_SHIFT;
4634 write_csr(dd, DCC_CFG_PORT_CONFIG, reg);
4635}
4636
4637/*
4638 * Use the 8051 to read a LCB CSR.
4639 */
4640static int read_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 *data)
4641{
4642 u32 regno;
4643 int ret;
4644
4645 if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
4646 if (acquire_lcb_access(dd, 0) == 0) {
4647 *data = read_csr(dd, addr);
4648 release_lcb_access(dd, 0);
4649 return 0;
4650 }
4651 return -EBUSY;
4652 }
4653
4654 /* register is an index of LCB registers: (offset - base) / 8 */
4655 regno = (addr - DC_LCB_CFG_RUN) >> 3;
4656 ret = do_8051_command(dd, HCMD_READ_LCB_CSR, regno, data);
4657 if (ret != HCMD_SUCCESS)
4658 return -EBUSY;
4659 return 0;
4660}
4661
4662/*
4663 * Read an LCB CSR. Access may not be in host control, so check.
4664 * Return 0 on success, -EBUSY on failure.
4665 */
4666int read_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 *data)
4667{
4668 struct hfi1_pportdata *ppd = dd->pport;
4669
4670 /* if up, go through the 8051 for the value */
4671 if (ppd->host_link_state & HLS_UP)
4672 return read_lcb_via_8051(dd, addr, data);
4673 /* if going up or down, no access */
4674 if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
4675 return -EBUSY;
4676 /* otherwise, host has access */
4677 *data = read_csr(dd, addr);
4678 return 0;
4679}
4680
4681/*
4682 * Use the 8051 to write a LCB CSR.
4683 */
4684static int write_lcb_via_8051(struct hfi1_devdata *dd, u32 addr, u64 data)
4685{
4686
4687 if (acquire_lcb_access(dd, 0) == 0) {
4688 write_csr(dd, addr, data);
4689 release_lcb_access(dd, 0);
4690 return 0;
4691 }
4692 return -EBUSY;
4693}
4694
4695/*
4696 * Write an LCB CSR. Access may not be in host control, so check.
4697 * Return 0 on success, -EBUSY on failure.
4698 */
4699int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data)
4700{
4701 struct hfi1_pportdata *ppd = dd->pport;
4702
4703 /* if up, go through the 8051 for the value */
4704 if (ppd->host_link_state & HLS_UP)
4705 return write_lcb_via_8051(dd, addr, data);
4706 /* if going up or down, no access */
4707 if (ppd->host_link_state & (HLS_GOING_UP | HLS_GOING_OFFLINE))
4708 return -EBUSY;
4709 /* otherwise, host has access */
4710 write_csr(dd, addr, data);
4711 return 0;
4712}
4713
4714/*
4715 * Returns:
4716 * < 0 = Linux error, not able to get access
4717 * > 0 = 8051 command RETURN_CODE
4718 */
4719static int do_8051_command(
4720 struct hfi1_devdata *dd,
4721 u32 type,
4722 u64 in_data,
4723 u64 *out_data)
4724{
4725 u64 reg, completed;
4726 int return_code;
4727 unsigned long flags;
4728 unsigned long timeout;
4729
4730 hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data);
4731
4732 /*
4733 * Alternative to holding the lock for a long time:
4734 * - keep busy wait - have other users bounce off
4735 */
4736 spin_lock_irqsave(&dd->dc8051_lock, flags);
4737
4738 /* We can't send any commands to the 8051 if it's in reset */
4739 if (dd->dc_shutdown) {
4740 return_code = -ENODEV;
4741 goto fail;
4742 }
4743
4744 /*
4745 * If an 8051 host command timed out previously, then the 8051 is
4746 * stuck.
4747 *
4748 * On first timeout, attempt to reset and restart the entire DC
4749 * block (including 8051). (Is this too big of a hammer?)
4750 *
4751 * If the 8051 times out a second time, the reset did not bring it
4752 * back to healthy life. In that case, fail any subsequent commands.
4753 */
4754 if (dd->dc8051_timed_out) {
4755 if (dd->dc8051_timed_out > 1) {
4756 dd_dev_err(dd,
4757 "Previous 8051 host command timed out, skipping command %u\n",
4758 type);
4759 return_code = -ENXIO;
4760 goto fail;
4761 }
4762 spin_unlock_irqrestore(&dd->dc8051_lock, flags);
4763 dc_shutdown(dd);
4764 dc_start(dd);
4765 spin_lock_irqsave(&dd->dc8051_lock, flags);
4766 }
4767
4768 /*
4769 * If there is no timeout, then the 8051 command interface is
4770 * waiting for a command.
4771 */
4772
4773 /*
4774 * Do two writes: the first to stabilize the type and req_data, the
4775 * second to activate.
4776 */
4777 reg = ((u64)type & DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_MASK)
4778 << DC_DC8051_CFG_HOST_CMD_0_REQ_TYPE_SHIFT
4779 | (in_data & DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_MASK)
4780 << DC_DC8051_CFG_HOST_CMD_0_REQ_DATA_SHIFT;
4781 write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg);
4782 reg |= DC_DC8051_CFG_HOST_CMD_0_REQ_NEW_SMASK;
4783 write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, reg);
4784
4785 /* wait for completion, alternate: interrupt */
4786 timeout = jiffies + msecs_to_jiffies(DC8051_COMMAND_TIMEOUT);
4787 while (1) {
4788 reg = read_csr(dd, DC_DC8051_CFG_HOST_CMD_1);
4789 completed = reg & DC_DC8051_CFG_HOST_CMD_1_COMPLETED_SMASK;
4790 if (completed)
4791 break;
4792 if (time_after(jiffies, timeout)) {
4793 dd->dc8051_timed_out++;
4794 dd_dev_err(dd, "8051 host command %u timeout\n", type);
4795 if (out_data)
4796 *out_data = 0;
4797 return_code = -ETIMEDOUT;
4798 goto fail;
4799 }
4800 udelay(2);
4801 }
4802
4803 if (out_data) {
4804 *out_data = (reg >> DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_SHIFT)
4805 & DC_DC8051_CFG_HOST_CMD_1_RSP_DATA_MASK;
4806 if (type == HCMD_READ_LCB_CSR) {
4807 /* top 16 bits are in a different register */
4808 *out_data |= (read_csr(dd, DC_DC8051_CFG_EXT_DEV_1)
4809 & DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SMASK)
4810 << (48
4811 - DC_DC8051_CFG_EXT_DEV_1_REQ_DATA_SHIFT);
4812 }
4813 }
4814 return_code = (reg >> DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_SHIFT)
4815 & DC_DC8051_CFG_HOST_CMD_1_RETURN_CODE_MASK;
4816 dd->dc8051_timed_out = 0;
4817 /*
4818 * Clear command for next user.
4819 */
4820 write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0);
4821
4822fail:
4823 spin_unlock_irqrestore(&dd->dc8051_lock, flags);
4824
4825 return return_code;
4826}
4827
4828static int set_physical_link_state(struct hfi1_devdata *dd, u64 state)
4829{
4830 return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL);
4831}
4832
4833static int load_8051_config(struct hfi1_devdata *dd, u8 field_id,
4834 u8 lane_id, u32 config_data)
4835{
4836 u64 data;
4837 int ret;
4838
4839 data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT
4840 | (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT
4841 | (u64)config_data << LOAD_DATA_DATA_SHIFT;
4842 ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL);
4843 if (ret != HCMD_SUCCESS) {
4844 dd_dev_err(dd,
4845 "load 8051 config: field id %d, lane %d, err %d\n",
4846 (int)field_id, (int)lane_id, ret);
4847 }
4848 return ret;
4849}
4850
4851/*
4852 * Read the 8051 firmware "registers". Use the RAM directly. Always
4853 * set the result, even on error.
4854 * Return 0 on success, -errno on failure
4855 */
4856static int read_8051_config(struct hfi1_devdata *dd, u8 field_id, u8 lane_id,
4857 u32 *result)
4858{
4859 u64 big_data;
4860 u32 addr;
4861 int ret;
4862
4863 /* address start depends on the lane_id */
4864 if (lane_id < 4)
4865 addr = (4 * NUM_GENERAL_FIELDS)
4866 + (lane_id * 4 * NUM_LANE_FIELDS);
4867 else
4868 addr = 0;
4869 addr += field_id * 4;
4870
4871 /* read is in 8-byte chunks, hardware will truncate the address down */
4872 ret = read_8051_data(dd, addr, 8, &big_data);
4873
4874 if (ret == 0) {
4875 /* extract the 4 bytes we want */
4876 if (addr & 0x4)
4877 *result = (u32)(big_data >> 32);
4878 else
4879 *result = (u32)big_data;
4880 } else {
4881 *result = 0;
4882 dd_dev_err(dd, "%s: direct read failed, lane %d, field %d!\n",
4883 __func__, lane_id, field_id);
4884 }
4885
4886 return ret;
4887}
4888
4889static int write_vc_local_phy(struct hfi1_devdata *dd, u8 power_management,
4890 u8 continuous)
4891{
4892 u32 frame;
4893
4894 frame = continuous << CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT
4895 | power_management << POWER_MANAGEMENT_SHIFT;
4896 return load_8051_config(dd, VERIFY_CAP_LOCAL_PHY,
4897 GENERAL_CONFIG, frame);
4898}
4899
4900static int write_vc_local_fabric(struct hfi1_devdata *dd, u8 vau, u8 z, u8 vcu,
4901 u16 vl15buf, u8 crc_sizes)
4902{
4903 u32 frame;
4904
4905 frame = (u32)vau << VAU_SHIFT
4906 | (u32)z << Z_SHIFT
4907 | (u32)vcu << VCU_SHIFT
4908 | (u32)vl15buf << VL15BUF_SHIFT
4909 | (u32)crc_sizes << CRC_SIZES_SHIFT;
4910 return load_8051_config(dd, VERIFY_CAP_LOCAL_FABRIC,
4911 GENERAL_CONFIG, frame);
4912}
4913
4914static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
4915 u8 *flag_bits, u16 *link_widths)
4916{
4917 u32 frame;
4918
4919 read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
4920 &frame);
4921 *misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK;
4922 *flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK;
4923 *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
4924}
4925
4926static int write_vc_local_link_width(struct hfi1_devdata *dd,
4927 u8 misc_bits,
4928 u8 flag_bits,
4929 u16 link_widths)
4930{
4931 u32 frame;
4932
4933 frame = (u32)misc_bits << MISC_CONFIG_BITS_SHIFT
4934 | (u32)flag_bits << LOCAL_FLAG_BITS_SHIFT
4935 | (u32)link_widths << LINK_WIDTH_SHIFT;
4936 return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
4937 frame);
4938}
4939
4940static int write_local_device_id(struct hfi1_devdata *dd, u16 device_id,
4941 u8 device_rev)
4942{
4943 u32 frame;
4944
4945 frame = ((u32)device_id << LOCAL_DEVICE_ID_SHIFT)
4946 | ((u32)device_rev << LOCAL_DEVICE_REV_SHIFT);
4947 return load_8051_config(dd, LOCAL_DEVICE_ID, GENERAL_CONFIG, frame);
4948}
4949
4950static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
4951 u8 *device_rev)
4952{
4953 u32 frame;
4954
4955 read_8051_config(dd, REMOTE_DEVICE_ID, GENERAL_CONFIG, &frame);
4956 *device_id = (frame >> REMOTE_DEVICE_ID_SHIFT) & REMOTE_DEVICE_ID_MASK;
4957 *device_rev = (frame >> REMOTE_DEVICE_REV_SHIFT)
4958 & REMOTE_DEVICE_REV_MASK;
4959}
4960
4961void read_misc_status(struct hfi1_devdata *dd, u8 *ver_a, u8 *ver_b)
4962{
4963 u32 frame;
4964
4965 read_8051_config(dd, MISC_STATUS, GENERAL_CONFIG, &frame);
4966 *ver_a = (frame >> STS_FM_VERSION_A_SHIFT) & STS_FM_VERSION_A_MASK;
4967 *ver_b = (frame >> STS_FM_VERSION_B_SHIFT) & STS_FM_VERSION_B_MASK;
4968}
4969
4970static void read_vc_remote_phy(struct hfi1_devdata *dd, u8 *power_management,
4971 u8 *continuous)
4972{
4973 u32 frame;
4974
4975 read_8051_config(dd, VERIFY_CAP_REMOTE_PHY, GENERAL_CONFIG, &frame);
4976 *power_management = (frame >> POWER_MANAGEMENT_SHIFT)
4977 & POWER_MANAGEMENT_MASK;
4978 *continuous = (frame >> CONTINIOUS_REMOTE_UPDATE_SUPPORT_SHIFT)
4979 & CONTINIOUS_REMOTE_UPDATE_SUPPORT_MASK;
4980}
4981
4982static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
4983 u8 *vcu, u16 *vl15buf, u8 *crc_sizes)
4984{
4985 u32 frame;
4986
4987 read_8051_config(dd, VERIFY_CAP_REMOTE_FABRIC, GENERAL_CONFIG, &frame);
4988 *vau = (frame >> VAU_SHIFT) & VAU_MASK;
4989 *z = (frame >> Z_SHIFT) & Z_MASK;
4990 *vcu = (frame >> VCU_SHIFT) & VCU_MASK;
4991 *vl15buf = (frame >> VL15BUF_SHIFT) & VL15BUF_MASK;
4992 *crc_sizes = (frame >> CRC_SIZES_SHIFT) & CRC_SIZES_MASK;
4993}
4994
4995static void read_vc_remote_link_width(struct hfi1_devdata *dd,
4996 u8 *remote_tx_rate,
4997 u16 *link_widths)
4998{
4999 u32 frame;
5000
5001 read_8051_config(dd, VERIFY_CAP_REMOTE_LINK_WIDTH, GENERAL_CONFIG,
5002 &frame);
5003 *remote_tx_rate = (frame >> REMOTE_TX_RATE_SHIFT)
5004 & REMOTE_TX_RATE_MASK;
5005 *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
5006}
5007
5008static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx)
5009{
5010 u32 frame;
5011
5012 read_8051_config(dd, LOCAL_LNI_INFO, GENERAL_CONFIG, &frame);
5013 *enable_lane_rx = (frame >> ENABLE_LANE_RX_SHIFT) & ENABLE_LANE_RX_MASK;
5014}
5015
5016static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed)
5017{
5018 u32 frame;
5019
5020 read_8051_config(dd, REMOTE_LNI_INFO, GENERAL_CONFIG, &frame);
5021 *mgmt_allowed = (frame >> MGMT_ALLOWED_SHIFT) & MGMT_ALLOWED_MASK;
5022}
5023
5024static void read_last_local_state(struct hfi1_devdata *dd, u32 *lls)
5025{
5026 read_8051_config(dd, LAST_LOCAL_STATE_COMPLETE, GENERAL_CONFIG, lls);
5027}
5028
5029static void read_last_remote_state(struct hfi1_devdata *dd, u32 *lrs)
5030{
5031 read_8051_config(dd, LAST_REMOTE_STATE_COMPLETE, GENERAL_CONFIG, lrs);
5032}
5033
5034void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality)
5035{
5036 u32 frame;
5037 int ret;
5038
5039 *link_quality = 0;
5040 if (dd->pport->host_link_state & HLS_UP) {
5041 ret = read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG,
5042 &frame);
5043 if (ret == 0)
5044 *link_quality = (frame >> LINK_QUALITY_SHIFT)
5045 & LINK_QUALITY_MASK;
5046 }
5047}
5048
5049static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc)
5050{
5051 u32 frame;
5052
5053 read_8051_config(dd, LINK_QUALITY_INFO, GENERAL_CONFIG, &frame);
5054 *pdrrc = (frame >> DOWN_REMOTE_REASON_SHIFT) & DOWN_REMOTE_REASON_MASK;
5055}
5056
5057static int read_tx_settings(struct hfi1_devdata *dd,
5058 u8 *enable_lane_tx,
5059 u8 *tx_polarity_inversion,
5060 u8 *rx_polarity_inversion,
5061 u8 *max_rate)
5062{
5063 u32 frame;
5064 int ret;
5065
5066 ret = read_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, &frame);
5067 *enable_lane_tx = (frame >> ENABLE_LANE_TX_SHIFT)
5068 & ENABLE_LANE_TX_MASK;
5069 *tx_polarity_inversion = (frame >> TX_POLARITY_INVERSION_SHIFT)
5070 & TX_POLARITY_INVERSION_MASK;
5071 *rx_polarity_inversion = (frame >> RX_POLARITY_INVERSION_SHIFT)
5072 & RX_POLARITY_INVERSION_MASK;
5073 *max_rate = (frame >> MAX_RATE_SHIFT) & MAX_RATE_MASK;
5074 return ret;
5075}
5076
5077static int write_tx_settings(struct hfi1_devdata *dd,
5078 u8 enable_lane_tx,
5079 u8 tx_polarity_inversion,
5080 u8 rx_polarity_inversion,
5081 u8 max_rate)
5082{
5083 u32 frame;
5084
5085 /* no need to mask, all variable sizes match field widths */
5086 frame = enable_lane_tx << ENABLE_LANE_TX_SHIFT
5087 | tx_polarity_inversion << TX_POLARITY_INVERSION_SHIFT
5088 | rx_polarity_inversion << RX_POLARITY_INVERSION_SHIFT
5089 | max_rate << MAX_RATE_SHIFT;
5090 return load_8051_config(dd, TX_SETTINGS, GENERAL_CONFIG, frame);
5091}
5092
5093static void check_fabric_firmware_versions(struct hfi1_devdata *dd)
5094{
5095 u32 frame, version, prod_id;
5096 int ret, lane;
5097
5098 /* 4 lanes */
5099 for (lane = 0; lane < 4; lane++) {
5100 ret = read_8051_config(dd, SPICO_FW_VERSION, lane, &frame);
5101 if (ret) {
5102 dd_dev_err(
5103 dd,
5104 "Unable to read lane %d firmware details\n",
5105 lane);
5106 continue;
5107 }
5108 version = (frame >> SPICO_ROM_VERSION_SHIFT)
5109 & SPICO_ROM_VERSION_MASK;
5110 prod_id = (frame >> SPICO_ROM_PROD_ID_SHIFT)
5111 & SPICO_ROM_PROD_ID_MASK;
5112 dd_dev_info(dd,
5113 "Lane %d firmware: version 0x%04x, prod_id 0x%04x\n",
5114 lane, version, prod_id);
5115 }
5116}
5117
5118/*
5119 * Read an idle LCB message.
5120 *
5121 * Returns 0 on success, -EINVAL on error
5122 */
5123static int read_idle_message(struct hfi1_devdata *dd, u64 type, u64 *data_out)
5124{
5125 int ret;
5126
5127 ret = do_8051_command(dd, HCMD_READ_LCB_IDLE_MSG,
5128 type, data_out);
5129 if (ret != HCMD_SUCCESS) {
5130 dd_dev_err(dd, "read idle message: type %d, err %d\n",
5131 (u32)type, ret);
5132 return -EINVAL;
5133 }
5134 dd_dev_info(dd, "%s: read idle message 0x%llx\n", __func__, *data_out);
5135 /* return only the payload as we already know the type */
5136 *data_out >>= IDLE_PAYLOAD_SHIFT;
5137 return 0;
5138}
5139
5140/*
5141 * Read an idle SMA message. To be done in response to a notification from
5142 * the 8051.
5143 *
5144 * Returns 0 on success, -EINVAL on error
5145 */
5146static int read_idle_sma(struct hfi1_devdata *dd, u64 *data)
5147{
5148 return read_idle_message(dd,
5149 (u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT, data);
5150}
5151
5152/*
5153 * Send an idle LCB message.
5154 *
5155 * Returns 0 on success, -EINVAL on error
5156 */
5157static int send_idle_message(struct hfi1_devdata *dd, u64 data)
5158{
5159 int ret;
5160
5161 dd_dev_info(dd, "%s: sending idle message 0x%llx\n", __func__, data);
5162 ret = do_8051_command(dd, HCMD_SEND_LCB_IDLE_MSG, data, NULL);
5163 if (ret != HCMD_SUCCESS) {
5164 dd_dev_err(dd, "send idle message: data 0x%llx, err %d\n",
5165 data, ret);
5166 return -EINVAL;
5167 }
5168 return 0;
5169}
5170
5171/*
5172 * Send an idle SMA message.
5173 *
5174 * Returns 0 on success, -EINVAL on error
5175 */
5176int send_idle_sma(struct hfi1_devdata *dd, u64 message)
5177{
5178 u64 data;
5179
5180 data = ((message & IDLE_PAYLOAD_MASK) << IDLE_PAYLOAD_SHIFT)
5181 | ((u64)IDLE_SMA << IDLE_MSG_TYPE_SHIFT);
5182 return send_idle_message(dd, data);
5183}
5184
5185/*
5186 * Initialize the LCB then do a quick link up. This may or may not be
5187 * in loopback.
5188 *
5189 * return 0 on success, -errno on error
5190 */
5191static int do_quick_linkup(struct hfi1_devdata *dd)
5192{
5193 u64 reg;
5194 unsigned long timeout;
5195 int ret;
5196
5197 lcb_shutdown(dd, 0);
5198
5199 if (loopback) {
5200 /* LCB_CFG_LOOPBACK.VAL = 2 */
5201 /* LCB_CFG_LANE_WIDTH.VAL = 0 */
5202 write_csr(dd, DC_LCB_CFG_LOOPBACK,
5203 IB_PACKET_TYPE << DC_LCB_CFG_LOOPBACK_VAL_SHIFT);
5204 write_csr(dd, DC_LCB_CFG_LANE_WIDTH, 0);
5205 }
5206
5207 /* start the LCBs */
5208 /* LCB_CFG_TX_FIFOS_RESET.VAL = 0 */
5209 write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0);
5210
5211 /* simulator only loopback steps */
5212 if (loopback && dd->icode == ICODE_FUNCTIONAL_SIMULATOR) {
5213 /* LCB_CFG_RUN.EN = 1 */
5214 write_csr(dd, DC_LCB_CFG_RUN,
5215 1ull << DC_LCB_CFG_RUN_EN_SHIFT);
5216
5217 /* watch LCB_STS_LINK_TRANSFER_ACTIVE */
5218 timeout = jiffies + msecs_to_jiffies(10);
5219 while (1) {
5220 reg = read_csr(dd,
5221 DC_LCB_STS_LINK_TRANSFER_ACTIVE);
5222 if (reg)
5223 break;
5224 if (time_after(jiffies, timeout)) {
5225 dd_dev_err(dd,
5226 "timeout waiting for LINK_TRANSFER_ACTIVE\n");
5227 return -ETIMEDOUT;
5228 }
5229 udelay(2);
5230 }
5231
5232 write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP,
5233 1ull << DC_LCB_CFG_ALLOW_LINK_UP_VAL_SHIFT);
5234 }
5235
5236 if (!loopback) {
5237 /*
5238 * When doing quick linkup and not in loopback, both
5239 * sides must be done with LCB set-up before either
5240 * starts the quick linkup. Put a delay here so that
5241 * both sides can be started and have a chance to be
5242 * done with LCB set up before resuming.
5243 */
5244 dd_dev_err(dd,
5245 "Pausing for peer to be finished with LCB set up\n");
5246 msleep(5000);
5247 dd_dev_err(dd,
5248 "Continuing with quick linkup\n");
5249 }
5250
5251 write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */
5252 set_8051_lcb_access(dd);
5253
5254 /*
5255 * State "quick" LinkUp request sets the physical link state to
5256 * LinkUp without a verify capability sequence.
5257 * This state is in simulator v37 and later.
5258 */
5259 ret = set_physical_link_state(dd, PLS_QUICK_LINKUP);
5260 if (ret != HCMD_SUCCESS) {
5261 dd_dev_err(dd,
5262 "%s: set physical link state to quick LinkUp failed with return %d\n",
5263 __func__, ret);
5264
5265 set_host_lcb_access(dd);
5266 write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
5267
5268 if (ret >= 0)
5269 ret = -EINVAL;
5270 return ret;
5271 }
5272
5273 return 0; /* success */
5274}
5275
5276/*
5277 * Set the SerDes to internal loopback mode.
5278 * Returns 0 on success, -errno on error.
5279 */
5280static int set_serdes_loopback_mode(struct hfi1_devdata *dd)
5281{
5282 int ret;
5283
5284 ret = set_physical_link_state(dd, PLS_INTERNAL_SERDES_LOOPBACK);
5285 if (ret == HCMD_SUCCESS)
5286 return 0;
5287 dd_dev_err(dd,
5288 "Set physical link state to SerDes Loopback failed with return %d\n",
5289 ret);
5290 if (ret >= 0)
5291 ret = -EINVAL;
5292 return ret;
5293}
5294
5295/*
5296 * Do all special steps to set up loopback.
5297 */
5298static int init_loopback(struct hfi1_devdata *dd)
5299{
5300 dd_dev_info(dd, "Entering loopback mode\n");
5301
5302 /* all loopbacks should disable self GUID check */
5303 write_csr(dd, DC_DC8051_CFG_MODE,
5304 (read_csr(dd, DC_DC8051_CFG_MODE) | DISABLE_SELF_GUID_CHECK));
5305
5306 /*
5307 * The simulator has only one loopback option - LCB. Switch
5308 * to that option, which includes quick link up.
5309 *
5310 * Accept all valid loopback values.
5311 */
5312 if ((dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
5313 && (loopback == LOOPBACK_SERDES
5314 || loopback == LOOPBACK_LCB
5315 || loopback == LOOPBACK_CABLE)) {
5316 loopback = LOOPBACK_LCB;
5317 quick_linkup = 1;
5318 return 0;
5319 }
5320
5321 /* handle serdes loopback */
5322 if (loopback == LOOPBACK_SERDES) {
5323 /* internal serdes loopack needs quick linkup on RTL */
5324 if (dd->icode == ICODE_RTL_SILICON)
5325 quick_linkup = 1;
5326 return set_serdes_loopback_mode(dd);
5327 }
5328
5329 /* LCB loopback - handled at poll time */
5330 if (loopback == LOOPBACK_LCB) {
5331 quick_linkup = 1; /* LCB is always quick linkup */
5332
5333 /* not supported in emulation due to emulation RTL changes */
5334 if (dd->icode == ICODE_FPGA_EMULATION) {
5335 dd_dev_err(dd,
5336 "LCB loopback not supported in emulation\n");
5337 return -EINVAL;
5338 }
5339 return 0;
5340 }
5341
5342 /* external cable loopback requires no extra steps */
5343 if (loopback == LOOPBACK_CABLE)
5344 return 0;
5345
5346 dd_dev_err(dd, "Invalid loopback mode %d\n", loopback);
5347 return -EINVAL;
5348}
5349
5350/*
5351 * Translate from the OPA_LINK_WIDTH handed to us by the FM to bits
5352 * used in the Verify Capability link width attribute.
5353 */
5354static u16 opa_to_vc_link_widths(u16 opa_widths)
5355{
5356 int i;
5357 u16 result = 0;
5358
5359 static const struct link_bits {
5360 u16 from;
5361 u16 to;
5362 } opa_link_xlate[] = {
5363 { OPA_LINK_WIDTH_1X, 1 << (1-1) },
5364 { OPA_LINK_WIDTH_2X, 1 << (2-1) },
5365 { OPA_LINK_WIDTH_3X, 1 << (3-1) },
5366 { OPA_LINK_WIDTH_4X, 1 << (4-1) },
5367 };
5368
5369 for (i = 0; i < ARRAY_SIZE(opa_link_xlate); i++) {
5370 if (opa_widths & opa_link_xlate[i].from)
5371 result |= opa_link_xlate[i].to;
5372 }
5373 return result;
5374}
5375
5376/*
5377 * Set link attributes before moving to polling.
5378 */
5379static int set_local_link_attributes(struct hfi1_pportdata *ppd)
5380{
5381 struct hfi1_devdata *dd = ppd->dd;
5382 u8 enable_lane_tx;
5383 u8 tx_polarity_inversion;
5384 u8 rx_polarity_inversion;
5385 int ret;
5386
5387 /* reset our fabric serdes to clear any lingering problems */
5388 fabric_serdes_reset(dd);
5389
5390 /* set the local tx rate - need to read-modify-write */
5391 ret = read_tx_settings(dd, &enable_lane_tx, &tx_polarity_inversion,
5392 &rx_polarity_inversion, &ppd->local_tx_rate);
5393 if (ret)
5394 goto set_local_link_attributes_fail;
5395
5396 if (dd->dc8051_ver < dc8051_ver(0, 20)) {
5397 /* set the tx rate to the fastest enabled */
5398 if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
5399 ppd->local_tx_rate = 1;
5400 else
5401 ppd->local_tx_rate = 0;
5402 } else {
5403 /* set the tx rate to all enabled */
5404 ppd->local_tx_rate = 0;
5405 if (ppd->link_speed_enabled & OPA_LINK_SPEED_25G)
5406 ppd->local_tx_rate |= 2;
5407 if (ppd->link_speed_enabled & OPA_LINK_SPEED_12_5G)
5408 ppd->local_tx_rate |= 1;
5409 }
5410 ret = write_tx_settings(dd, enable_lane_tx, tx_polarity_inversion,
5411 rx_polarity_inversion, ppd->local_tx_rate);
5412 if (ret != HCMD_SUCCESS)
5413 goto set_local_link_attributes_fail;
5414
5415 /*
5416 * DC supports continuous updates.
5417 */
5418 ret = write_vc_local_phy(dd, 0 /* no power management */,
5419 1 /* continuous updates */);
5420 if (ret != HCMD_SUCCESS)
5421 goto set_local_link_attributes_fail;
5422
5423 /* z=1 in the next call: AU of 0 is not supported by the hardware */
5424 ret = write_vc_local_fabric(dd, dd->vau, 1, dd->vcu, dd->vl15_init,
5425 ppd->port_crc_mode_enabled);
5426 if (ret != HCMD_SUCCESS)
5427 goto set_local_link_attributes_fail;
5428
5429 ret = write_vc_local_link_width(dd, 0, 0,
5430 opa_to_vc_link_widths(ppd->link_width_enabled));
5431 if (ret != HCMD_SUCCESS)
5432 goto set_local_link_attributes_fail;
5433
5434 /* let peer know who we are */
5435 ret = write_local_device_id(dd, dd->pcidev->device, dd->minrev);
5436 if (ret == HCMD_SUCCESS)
5437 return 0;
5438
5439set_local_link_attributes_fail:
5440 dd_dev_err(dd,
5441 "Failed to set local link attributes, return 0x%x\n",
5442 ret);
5443 return ret;
5444}
5445
5446/*
5447 * Call this to start the link. Schedule a retry if the cable is not
5448 * present or if unable to start polling. Do not do anything if the
5449 * link is disabled. Returns 0 if link is disabled or moved to polling
5450 */
5451int start_link(struct hfi1_pportdata *ppd)
5452{
5453 if (!ppd->link_enabled) {
5454 dd_dev_info(ppd->dd,
5455 "%s: stopping link start because link is disabled\n",
5456 __func__);
5457 return 0;
5458 }
5459 if (!ppd->driver_link_ready) {
5460 dd_dev_info(ppd->dd,
5461 "%s: stopping link start because driver is not ready\n",
5462 __func__);
5463 return 0;
5464 }
5465
5466 if (qsfp_mod_present(ppd) || loopback == LOOPBACK_SERDES ||
5467 loopback == LOOPBACK_LCB ||
5468 ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
5469 return set_link_state(ppd, HLS_DN_POLL);
5470
5471 dd_dev_info(ppd->dd,
5472 "%s: stopping link start because no cable is present\n",
5473 __func__);
5474 return -EAGAIN;
5475}
5476
5477static void reset_qsfp(struct hfi1_pportdata *ppd)
5478{
5479 struct hfi1_devdata *dd = ppd->dd;
5480 u64 mask, qsfp_mask;
5481
5482 mask = (u64)QSFP_HFI0_RESET_N;
5483 qsfp_mask = read_csr(dd,
5484 dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE);
5485 qsfp_mask |= mask;
5486 write_csr(dd,
5487 dd->hfi1_id ? ASIC_QSFP2_OE : ASIC_QSFP1_OE,
5488 qsfp_mask);
5489
5490 qsfp_mask = read_csr(dd,
5491 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT);
5492 qsfp_mask &= ~mask;
5493 write_csr(dd,
5494 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
5495 qsfp_mask);
5496
5497 udelay(10);
5498
5499 qsfp_mask |= mask;
5500 write_csr(dd,
5501 dd->hfi1_id ? ASIC_QSFP2_OUT : ASIC_QSFP1_OUT,
5502 qsfp_mask);
5503}
5504
5505static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
5506 u8 *qsfp_interrupt_status)
5507{
5508 struct hfi1_devdata *dd = ppd->dd;
5509
5510 if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
5511 (qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
5512 dd_dev_info(dd,
5513 "%s: QSFP cable on fire\n",
5514 __func__);
5515
5516 if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
5517 (qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
5518 dd_dev_info(dd,
5519 "%s: QSFP cable temperature too low\n",
5520 __func__);
5521
5522 if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
5523 (qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
5524 dd_dev_info(dd,
5525 "%s: QSFP supply voltage too high\n",
5526 __func__);
5527
5528 if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
5529 (qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
5530 dd_dev_info(dd,
5531 "%s: QSFP supply voltage too low\n",
5532 __func__);
5533
5534 /* Byte 2 is vendor specific */
5535
5536 if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
5537 (qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
5538 dd_dev_info(dd,
5539 "%s: Cable RX channel 1/2 power too high\n",
5540 __func__);
5541
5542 if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
5543 (qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
5544 dd_dev_info(dd,
5545 "%s: Cable RX channel 1/2 power too low\n",
5546 __func__);
5547
5548 if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
5549 (qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
5550 dd_dev_info(dd,
5551 "%s: Cable RX channel 3/4 power too high\n",
5552 __func__);
5553
5554 if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
5555 (qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
5556 dd_dev_info(dd,
5557 "%s: Cable RX channel 3/4 power too low\n",
5558 __func__);
5559
5560 if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
5561 (qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
5562 dd_dev_info(dd,
5563 "%s: Cable TX channel 1/2 bias too high\n",
5564 __func__);
5565
5566 if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
5567 (qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
5568 dd_dev_info(dd,
5569 "%s: Cable TX channel 1/2 bias too low\n",
5570 __func__);
5571
5572 if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
5573 (qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
5574 dd_dev_info(dd,
5575 "%s: Cable TX channel 3/4 bias too high\n",
5576 __func__);
5577
5578 if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
5579 (qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
5580 dd_dev_info(dd,
5581 "%s: Cable TX channel 3/4 bias too low\n",
5582 __func__);
5583
5584 if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
5585 (qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
5586 dd_dev_info(dd,
5587 "%s: Cable TX channel 1/2 power too high\n",
5588 __func__);
5589
5590 if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
5591 (qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
5592 dd_dev_info(dd,
5593 "%s: Cable TX channel 1/2 power too low\n",
5594 __func__);
5595
5596 if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
5597 (qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
5598 dd_dev_info(dd,
5599 "%s: Cable TX channel 3/4 power too high\n",
5600 __func__);
5601
5602 if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
5603 (qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
5604 dd_dev_info(dd,
5605 "%s: Cable TX channel 3/4 power too low\n",
5606 __func__);
5607
5608 /* Bytes 9-10 and 11-12 are reserved */
5609 /* Bytes 13-15 are vendor specific */
5610
5611 return 0;
5612}
5613
5614static int do_pre_lni_host_behaviors(struct hfi1_pportdata *ppd)
5615{
5616 refresh_qsfp_cache(ppd, &ppd->qsfp_info);
5617
5618 return 0;
5619}
5620
5621static int do_qsfp_intr_fallback(struct hfi1_pportdata *ppd)
5622{
5623 struct hfi1_devdata *dd = ppd->dd;
5624 u8 qsfp_interrupt_status = 0;
5625
5626 if (qsfp_read(ppd, dd->hfi1_id, 2, &qsfp_interrupt_status, 1)
5627 != 1) {
5628 dd_dev_info(dd,
5629 "%s: Failed to read status of QSFP module\n",
5630 __func__);
5631 return -EIO;
5632 }
5633
5634 /* We don't care about alarms & warnings with a non-functional INT_N */
5635 if (!(qsfp_interrupt_status & QSFP_DATA_NOT_READY))
5636 do_pre_lni_host_behaviors(ppd);
5637
5638 return 0;
5639}
5640
5641/* This routine will only be scheduled if the QSFP module is present */
5642static void qsfp_event(struct work_struct *work)
5643{
5644 struct qsfp_data *qd;
5645 struct hfi1_pportdata *ppd;
5646 struct hfi1_devdata *dd;
5647
5648 qd = container_of(work, struct qsfp_data, qsfp_work);
5649 ppd = qd->ppd;
5650 dd = ppd->dd;
5651
5652 /* Sanity check */
5653 if (!qsfp_mod_present(ppd))
5654 return;
5655
5656 /*
5657 * Turn DC back on after cables has been
5658 * re-inserted. Up until now, the DC has been in
5659 * reset to save power.
5660 */
5661 dc_start(dd);
5662
5663 if (qd->cache_refresh_required) {
5664 msleep(3000);
5665 reset_qsfp(ppd);
5666
5667 /* Check for QSFP interrupt after t_init (SFF 8679)
5668 * + extra
5669 */
5670 msleep(3000);
5671 if (!qd->qsfp_interrupt_functional) {
5672 if (do_qsfp_intr_fallback(ppd) < 0)
5673 dd_dev_info(dd, "%s: QSFP fallback failed\n",
5674 __func__);
5675 ppd->driver_link_ready = 1;
5676 start_link(ppd);
5677 }
5678 }
5679
5680 if (qd->check_interrupt_flags) {
5681 u8 qsfp_interrupt_status[16] = {0,};
5682
5683 if (qsfp_read(ppd, dd->hfi1_id, 6,
5684 &qsfp_interrupt_status[0], 16) != 16) {
5685 dd_dev_info(dd,
5686 "%s: Failed to read status of QSFP module\n",
5687 __func__);
5688 } else {
5689 unsigned long flags;
5690 u8 data_status;
5691
5692 spin_lock_irqsave(&ppd->qsfp_info.qsfp_lock, flags);
5693 ppd->qsfp_info.check_interrupt_flags = 0;
5694 spin_unlock_irqrestore(&ppd->qsfp_info.qsfp_lock,
5695 flags);
5696
5697 if (qsfp_read(ppd, dd->hfi1_id, 2, &data_status, 1)
5698 != 1) {
5699 dd_dev_info(dd,
5700 "%s: Failed to read status of QSFP module\n",
5701 __func__);
5702 }
5703 if (!(data_status & QSFP_DATA_NOT_READY)) {
5704 do_pre_lni_host_behaviors(ppd);
5705 start_link(ppd);
5706 } else
5707 handle_qsfp_error_conditions(ppd,
5708 qsfp_interrupt_status);
5709 }
5710 }
5711}
5712
5713void init_qsfp(struct hfi1_pportdata *ppd)
5714{
5715 struct hfi1_devdata *dd = ppd->dd;
5716 u64 qsfp_mask;
5717
5718 if (loopback == LOOPBACK_SERDES || loopback == LOOPBACK_LCB ||
5719 ppd->dd->icode == ICODE_FUNCTIONAL_SIMULATOR ||
5720 !HFI1_CAP_IS_KSET(QSFP_ENABLED)) {
5721 ppd->driver_link_ready = 1;
5722 return;
5723 }
5724
5725 ppd->qsfp_info.ppd = ppd;
5726 INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
5727
5728 qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
5729 /* Clear current status to avoid spurious interrupts */
5730 write_csr(dd,
5731 dd->hfi1_id ?
5732 ASIC_QSFP2_CLEAR :
5733 ASIC_QSFP1_CLEAR,
5734 qsfp_mask);
5735
5736 /* Handle active low nature of INT_N and MODPRST_N pins */
5737 if (qsfp_mod_present(ppd))
5738 qsfp_mask &= ~(u64)QSFP_HFI0_MODPRST_N;
5739 write_csr(dd,
5740 dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
5741 qsfp_mask);
5742
5743 /* Allow only INT_N and MODPRST_N to trigger QSFP interrupts */
5744 qsfp_mask |= (u64)QSFP_HFI0_MODPRST_N;
5745 write_csr(dd,
5746 dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK,
5747 qsfp_mask);
5748
5749 if (qsfp_mod_present(ppd)) {
5750 msleep(3000);
5751 reset_qsfp(ppd);
5752
5753 /* Check for QSFP interrupt after t_init (SFF 8679)
5754 * + extra
5755 */
5756 msleep(3000);
5757 if (!ppd->qsfp_info.qsfp_interrupt_functional) {
5758 if (do_qsfp_intr_fallback(ppd) < 0)
5759 dd_dev_info(dd,
5760 "%s: QSFP fallback failed\n",
5761 __func__);
5762 ppd->driver_link_ready = 1;
5763 }
5764 }
5765}
5766
5767int bringup_serdes(struct hfi1_pportdata *ppd)
5768{
5769 struct hfi1_devdata *dd = ppd->dd;
5770 u64 guid;
5771 int ret;
5772
5773 if (HFI1_CAP_IS_KSET(EXTENDED_PSN))
5774 add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK);
5775
5776 guid = ppd->guid;
5777 if (!guid) {
5778 if (dd->base_guid)
5779 guid = dd->base_guid + ppd->port - 1;
5780 ppd->guid = guid;
5781 }
5782
5783 /* the link defaults to enabled */
5784 ppd->link_enabled = 1;
5785 /* Set linkinit_reason on power up per OPA spec */
5786 ppd->linkinit_reason = OPA_LINKINIT_REASON_LINKUP;
5787
5788 if (loopback) {
5789 ret = init_loopback(dd);
5790 if (ret < 0)
5791 return ret;
5792 }
5793
5794 return start_link(ppd);
5795}
5796
5797void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
5798{
5799 struct hfi1_devdata *dd = ppd->dd;
5800
5801 /*
5802 * Shut down the link and keep it down. First turn off that the
5803 * driver wants to allow the link to be up (driver_link_ready).
5804 * Then make sure the link is not automatically restarted
5805 * (link_enabled). Cancel any pending restart. And finally
5806 * go offline.
5807 */
5808 ppd->driver_link_ready = 0;
5809 ppd->link_enabled = 0;
5810
5811 set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
5812 OPA_LINKDOWN_REASON_SMA_DISABLED);
5813 set_link_state(ppd, HLS_DN_OFFLINE);
5814
5815 /* disable the port */
5816 clear_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
5817}
5818
5819static inline int init_cpu_counters(struct hfi1_devdata *dd)
5820{
5821 struct hfi1_pportdata *ppd;
5822 int i;
5823
5824 ppd = (struct hfi1_pportdata *)(dd + 1);
5825 for (i = 0; i < dd->num_pports; i++, ppd++) {
5826 ppd->ibport_data.rc_acks = NULL;
5827 ppd->ibport_data.rc_qacks = NULL;
5828 ppd->ibport_data.rc_acks = alloc_percpu(u64);
5829 ppd->ibport_data.rc_qacks = alloc_percpu(u64);
5830 ppd->ibport_data.rc_delayed_comp = alloc_percpu(u64);
5831 if ((ppd->ibport_data.rc_acks == NULL) ||
5832 (ppd->ibport_data.rc_delayed_comp == NULL) ||
5833 (ppd->ibport_data.rc_qacks == NULL))
5834 return -ENOMEM;
5835 }
5836
5837 return 0;
5838}
5839
5840static const char * const pt_names[] = {
5841 "expected",
5842 "eager",
5843 "invalid"
5844};
5845
5846static const char *pt_name(u32 type)
5847{
5848 return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type];
5849}
5850
5851/*
5852 * index is the index into the receive array
5853 */
5854void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
5855 u32 type, unsigned long pa, u16 order)
5856{
5857 u64 reg;
5858 void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc :
5859 (dd->kregbase + RCV_ARRAY));
5860
5861 if (!(dd->flags & HFI1_PRESENT))
5862 goto done;
5863
5864 if (type == PT_INVALID) {
5865 pa = 0;
5866 } else if (type > PT_INVALID) {
5867 dd_dev_err(dd,
5868 "unexpected receive array type %u for index %u, not handled\n",
5869 type, index);
5870 goto done;
5871 }
5872
5873 hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx",
5874 pt_name(type), index, pa, (unsigned long)order);
5875
5876#define RT_ADDR_SHIFT 12 /* 4KB kernel address boundary */
5877 reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK
5878 | (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT
5879 | ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK)
5880 << RCV_ARRAY_RT_ADDR_SHIFT;
5881 writeq(reg, base + (index * 8));
5882
5883 if (type == PT_EAGER)
5884 /*
5885 * Eager entries are written one-by-one so we have to push them
5886 * after we write the entry.
5887 */
5888 flush_wc();
5889done:
5890 return;
5891}
5892
5893void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
5894{
5895 struct hfi1_devdata *dd = rcd->dd;
5896 u32 i;
5897
5898 /* this could be optimized */
5899 for (i = rcd->eager_base; i < rcd->eager_base +
5900 rcd->egrbufs.alloced; i++)
5901 hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
5902
5903 for (i = rcd->expected_base;
5904 i < rcd->expected_base + rcd->expected_count; i++)
5905 hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
5906}
5907
5908int hfi1_get_base_kinfo(struct hfi1_ctxtdata *rcd,
5909 struct hfi1_ctxt_info *kinfo)
5910{
5911 kinfo->runtime_flags = (HFI1_MISC_GET() << HFI1_CAP_USER_SHIFT) |
5912 HFI1_CAP_UGET(MASK) | HFI1_CAP_KGET(K2U);
5913 return 0;
5914}
5915
5916struct hfi1_message_header *hfi1_get_msgheader(
5917 struct hfi1_devdata *dd, __le32 *rhf_addr)
5918{
5919 u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
5920
5921 return (struct hfi1_message_header *)
5922 (rhf_addr - dd->rhf_offset + offset);
5923}
5924
5925static const char * const ib_cfg_name_strings[] = {
5926 "HFI1_IB_CFG_LIDLMC",
5927 "HFI1_IB_CFG_LWID_DG_ENB",
5928 "HFI1_IB_CFG_LWID_ENB",
5929 "HFI1_IB_CFG_LWID",
5930 "HFI1_IB_CFG_SPD_ENB",
5931 "HFI1_IB_CFG_SPD",
5932 "HFI1_IB_CFG_RXPOL_ENB",
5933 "HFI1_IB_CFG_LREV_ENB",
5934 "HFI1_IB_CFG_LINKLATENCY",
5935 "HFI1_IB_CFG_HRTBT",
5936 "HFI1_IB_CFG_OP_VLS",
5937 "HFI1_IB_CFG_VL_HIGH_CAP",
5938 "HFI1_IB_CFG_VL_LOW_CAP",
5939 "HFI1_IB_CFG_OVERRUN_THRESH",
5940 "HFI1_IB_CFG_PHYERR_THRESH",
5941 "HFI1_IB_CFG_LINKDEFAULT",
5942 "HFI1_IB_CFG_PKEYS",
5943 "HFI1_IB_CFG_MTU",
5944 "HFI1_IB_CFG_LSTATE",
5945 "HFI1_IB_CFG_VL_HIGH_LIMIT",
5946 "HFI1_IB_CFG_PMA_TICKS",
5947 "HFI1_IB_CFG_PORT"
5948};
5949
5950static const char *ib_cfg_name(int which)
5951{
5952 if (which < 0 || which >= ARRAY_SIZE(ib_cfg_name_strings))
5953 return "invalid";
5954 return ib_cfg_name_strings[which];
5955}
5956
5957int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which)
5958{
5959 struct hfi1_devdata *dd = ppd->dd;
5960 int val = 0;
5961
5962 switch (which) {
5963 case HFI1_IB_CFG_LWID_ENB: /* allowed Link-width */
5964 val = ppd->link_width_enabled;
5965 break;
5966 case HFI1_IB_CFG_LWID: /* currently active Link-width */
5967 val = ppd->link_width_active;
5968 break;
5969 case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */
5970 val = ppd->link_speed_enabled;
5971 break;
5972 case HFI1_IB_CFG_SPD: /* current Link speed */
5973 val = ppd->link_speed_active;
5974 break;
5975
5976 case HFI1_IB_CFG_RXPOL_ENB: /* Auto-RX-polarity enable */
5977 case HFI1_IB_CFG_LREV_ENB: /* Auto-Lane-reversal enable */
5978 case HFI1_IB_CFG_LINKLATENCY:
5979 goto unimplemented;
5980
5981 case HFI1_IB_CFG_OP_VLS:
5982 val = ppd->vls_operational;
5983 break;
5984 case HFI1_IB_CFG_VL_HIGH_CAP: /* VL arb high priority table size */
5985 val = VL_ARB_HIGH_PRIO_TABLE_SIZE;
5986 break;
5987 case HFI1_IB_CFG_VL_LOW_CAP: /* VL arb low priority table size */
5988 val = VL_ARB_LOW_PRIO_TABLE_SIZE;
5989 break;
5990 case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
5991 val = ppd->overrun_threshold;
5992 break;
5993 case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
5994 val = ppd->phy_error_threshold;
5995 break;
5996 case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
5997 val = dd->link_default;
5998 break;
5999
6000 case HFI1_IB_CFG_HRTBT: /* Heartbeat off/enable/auto */
6001 case HFI1_IB_CFG_PMA_TICKS:
6002 default:
6003unimplemented:
6004 if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
6005 dd_dev_info(
6006 dd,
6007 "%s: which %s: not implemented\n",
6008 __func__,
6009 ib_cfg_name(which));
6010 break;
6011 }
6012
6013 return val;
6014}
6015
6016/*
6017 * The largest MAD packet size.
6018 */
6019#define MAX_MAD_PACKET 2048
6020
6021/*
6022 * Return the maximum header bytes that can go on the _wire_
6023 * for this device. This count includes the ICRC which is
6024 * not part of the packet held in memory but it is appended
6025 * by the HW.
6026 * This is dependent on the device's receive header entry size.
6027 * HFI allows this to be set per-receive context, but the
6028 * driver presently enforces a global value.
6029 */
6030u32 lrh_max_header_bytes(struct hfi1_devdata *dd)
6031{
6032 /*
6033 * The maximum non-payload (MTU) bytes in LRH.PktLen are
6034 * the Receive Header Entry Size minus the PBC (or RHF) size
6035 * plus one DW for the ICRC appended by HW.
6036 *
6037 * dd->rcd[0].rcvhdrqentsize is in DW.
6038 * We use rcd[0] as all context will have the same value. Also,
6039 * the first kernel context would have been allocated by now so
6040 * we are guaranteed a valid value.
6041 */
6042 return (dd->rcd[0]->rcvhdrqentsize - 2/*PBC/RHF*/ + 1/*ICRC*/) << 2;
6043}
6044
6045/*
6046 * Set Send Length
6047 * @ppd - per port data
6048 *
6049 * Set the MTU by limiting how many DWs may be sent. The SendLenCheck*
6050 * registers compare against LRH.PktLen, so use the max bytes included
6051 * in the LRH.
6052 *
6053 * This routine changes all VL values except VL15, which it maintains at
6054 * the same value.
6055 */
6056static void set_send_length(struct hfi1_pportdata *ppd)
6057{
6058 struct hfi1_devdata *dd = ppd->dd;
6059 u32 max_hb = lrh_max_header_bytes(dd), maxvlmtu = 0, dcmtu;
6060 u64 len1 = 0, len2 = (((dd->vld[15].mtu + max_hb) >> 2)
6061 & SEND_LEN_CHECK1_LEN_VL15_MASK) <<
6062 SEND_LEN_CHECK1_LEN_VL15_SHIFT;
6063 int i;
6064
6065 for (i = 0; i < ppd->vls_supported; i++) {
6066 if (dd->vld[i].mtu > maxvlmtu)
6067 maxvlmtu = dd->vld[i].mtu;
6068 if (i <= 3)
6069 len1 |= (((dd->vld[i].mtu + max_hb) >> 2)
6070 & SEND_LEN_CHECK0_LEN_VL0_MASK) <<
6071 ((i % 4) * SEND_LEN_CHECK0_LEN_VL1_SHIFT);
6072 else
6073 len2 |= (((dd->vld[i].mtu + max_hb) >> 2)
6074 & SEND_LEN_CHECK1_LEN_VL4_MASK) <<
6075 ((i % 4) * SEND_LEN_CHECK1_LEN_VL5_SHIFT);
6076 }
6077 write_csr(dd, SEND_LEN_CHECK0, len1);
6078 write_csr(dd, SEND_LEN_CHECK1, len2);
6079 /* adjust kernel credit return thresholds based on new MTUs */
6080 /* all kernel receive contexts have the same hdrqentsize */
6081 for (i = 0; i < ppd->vls_supported; i++) {
6082 sc_set_cr_threshold(dd->vld[i].sc,
6083 sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu,
6084 dd->rcd[0]->rcvhdrqentsize));
6085 }
6086 sc_set_cr_threshold(dd->vld[15].sc,
6087 sc_mtu_to_threshold(dd->vld[15].sc, dd->vld[15].mtu,
6088 dd->rcd[0]->rcvhdrqentsize));
6089
6090 /* Adjust maximum MTU for the port in DC */
6091 dcmtu = maxvlmtu == 10240 ? DCC_CFG_PORT_MTU_CAP_10240 :
6092 (ilog2(maxvlmtu >> 8) + 1);
6093 len1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG);
6094 len1 &= ~DCC_CFG_PORT_CONFIG_MTU_CAP_SMASK;
6095 len1 |= ((u64)dcmtu & DCC_CFG_PORT_CONFIG_MTU_CAP_MASK) <<
6096 DCC_CFG_PORT_CONFIG_MTU_CAP_SHIFT;
6097 write_csr(ppd->dd, DCC_CFG_PORT_CONFIG, len1);
6098}
6099
6100static void set_lidlmc(struct hfi1_pportdata *ppd)
6101{
6102 int i;
6103 u64 sreg = 0;
6104 struct hfi1_devdata *dd = ppd->dd;
6105 u32 mask = ~((1U << ppd->lmc) - 1);
6106 u64 c1 = read_csr(ppd->dd, DCC_CFG_PORT_CONFIG1);
6107
6108 if (dd->hfi1_snoop.mode_flag)
6109 dd_dev_info(dd, "Set lid/lmc while snooping");
6110
6111 c1 &= ~(DCC_CFG_PORT_CONFIG1_TARGET_DLID_SMASK
6112 | DCC_CFG_PORT_CONFIG1_DLID_MASK_SMASK);
6113 c1 |= ((ppd->lid & DCC_CFG_PORT_CONFIG1_TARGET_DLID_MASK)
6114 << DCC_CFG_PORT_CONFIG1_TARGET_DLID_SHIFT)|
6115 ((mask & DCC_CFG_PORT_CONFIG1_DLID_MASK_MASK)
6116 << DCC_CFG_PORT_CONFIG1_DLID_MASK_SHIFT);
6117 write_csr(ppd->dd, DCC_CFG_PORT_CONFIG1, c1);
6118
6119 /*
6120 * Iterate over all the send contexts and set their SLID check
6121 */
6122 sreg = ((mask & SEND_CTXT_CHECK_SLID_MASK_MASK) <<
6123 SEND_CTXT_CHECK_SLID_MASK_SHIFT) |
6124 (((ppd->lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) <<
6125 SEND_CTXT_CHECK_SLID_VALUE_SHIFT);
6126
6127 for (i = 0; i < dd->chip_send_contexts; i++) {
6128 hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x",
6129 i, (u32)sreg);
6130 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg);
6131 }
6132
6133 /* Now we have to do the same thing for the sdma engines */
6134 sdma_update_lmc(dd, mask, ppd->lid);
6135}
6136
6137static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs)
6138{
6139 unsigned long timeout;
6140 u32 curr_state;
6141
6142 timeout = jiffies + msecs_to_jiffies(msecs);
6143 while (1) {
6144 curr_state = read_physical_state(dd);
6145 if (curr_state == state)
6146 break;
6147 if (time_after(jiffies, timeout)) {
6148 dd_dev_err(dd,
6149 "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
6150 state, curr_state);
6151 return -ETIMEDOUT;
6152 }
6153 usleep_range(1950, 2050); /* sleep 2ms-ish */
6154 }
6155
6156 return 0;
6157}
6158
6159/*
6160 * Helper for set_link_state(). Do not call except from that routine.
6161 * Expects ppd->hls_mutex to be held.
6162 *
6163 * @rem_reason value to be sent to the neighbor
6164 *
6165 * LinkDownReasons only set if transition succeeds.
6166 */
6167static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
6168{
6169 struct hfi1_devdata *dd = ppd->dd;
6170 u32 pstate, previous_state;
6171 u32 last_local_state;
6172 u32 last_remote_state;
6173 int ret;
6174 int do_transition;
6175 int do_wait;
6176
6177 previous_state = ppd->host_link_state;
6178 ppd->host_link_state = HLS_GOING_OFFLINE;
6179 pstate = read_physical_state(dd);
6180 if (pstate == PLS_OFFLINE) {
6181 do_transition = 0; /* in right state */
6182 do_wait = 0; /* ...no need to wait */
6183 } else if ((pstate & 0xff) == PLS_OFFLINE) {
6184 do_transition = 0; /* in an offline transient state */
6185 do_wait = 1; /* ...wait for it to settle */
6186 } else {
6187 do_transition = 1; /* need to move to offline */
6188 do_wait = 1; /* ...will need to wait */
6189 }
6190
6191 if (do_transition) {
6192 ret = set_physical_link_state(dd,
6193 PLS_OFFLINE | (rem_reason << 8));
6194
6195 if (ret != HCMD_SUCCESS) {
6196 dd_dev_err(dd,
6197 "Failed to transition to Offline link state, return %d\n",
6198 ret);
6199 return -EINVAL;
6200 }
6201 if (ppd->offline_disabled_reason == OPA_LINKDOWN_REASON_NONE)
6202 ppd->offline_disabled_reason =
6203 OPA_LINKDOWN_REASON_TRANSIENT;
6204 }
6205
6206 if (do_wait) {
6207 /* it can take a while for the link to go down */
6208 ret = wait_phy_linkstate(dd, PLS_OFFLINE, 5000);
6209 if (ret < 0)
6210 return ret;
6211 }
6212
6213 /* make sure the logical state is also down */
6214 wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
6215
6216 /*
6217 * Now in charge of LCB - must be after the physical state is
6218 * offline.quiet and before host_link_state is changed.
6219 */
6220 set_host_lcb_access(dd);
6221 write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */
6222 ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
6223
6224 /*
6225 * The LNI has a mandatory wait time after the physical state
6226 * moves to Offline.Quiet. The wait time may be different
6227 * depending on how the link went down. The 8051 firmware
6228 * will observe the needed wait time and only move to ready
6229 * when that is completed. The largest of the quiet timeouts
6230 * is 2.5s, so wait that long and then a bit more.
6231 */
6232 ret = wait_fm_ready(dd, 3000);
6233 if (ret) {
6234 dd_dev_err(dd,
6235 "After going offline, timed out waiting for the 8051 to become ready to accept host requests\n");
6236 /* state is really offline, so make it so */
6237 ppd->host_link_state = HLS_DN_OFFLINE;
6238 return ret;
6239 }
6240
6241 /*
6242 * The state is now offline and the 8051 is ready to accept host
6243 * requests.
6244 * - change our state
6245 * - notify others if we were previously in a linkup state
6246 */
6247 ppd->host_link_state = HLS_DN_OFFLINE;
6248 if (previous_state & HLS_UP) {
6249 /* went down while link was up */
6250 handle_linkup_change(dd, 0);
6251 } else if (previous_state
6252 & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
6253 /* went down while attempting link up */
6254 /* byte 1 of last_*_state is the failure reason */
6255 read_last_local_state(dd, &last_local_state);
6256 read_last_remote_state(dd, &last_remote_state);
6257 dd_dev_err(dd,
6258 "LNI failure last states: local 0x%08x, remote 0x%08x\n",
6259 last_local_state, last_remote_state);
6260 }
6261
6262 /* the active link width (downgrade) is 0 on link down */
6263 ppd->link_width_active = 0;
6264 ppd->link_width_downgrade_tx_active = 0;
6265 ppd->link_width_downgrade_rx_active = 0;
6266 ppd->current_egress_rate = 0;
6267 return 0;
6268}
6269
6270/* return the link state name */
6271static const char *link_state_name(u32 state)
6272{
6273 const char *name;
6274 int n = ilog2(state);
6275 static const char * const names[] = {
6276 [__HLS_UP_INIT_BP] = "INIT",
6277 [__HLS_UP_ARMED_BP] = "ARMED",
6278 [__HLS_UP_ACTIVE_BP] = "ACTIVE",
6279 [__HLS_DN_DOWNDEF_BP] = "DOWNDEF",
6280 [__HLS_DN_POLL_BP] = "POLL",
6281 [__HLS_DN_DISABLE_BP] = "DISABLE",
6282 [__HLS_DN_OFFLINE_BP] = "OFFLINE",
6283 [__HLS_VERIFY_CAP_BP] = "VERIFY_CAP",
6284 [__HLS_GOING_UP_BP] = "GOING_UP",
6285 [__HLS_GOING_OFFLINE_BP] = "GOING_OFFLINE",
6286 [__HLS_LINK_COOLDOWN_BP] = "LINK_COOLDOWN"
6287 };
6288
6289 name = n < ARRAY_SIZE(names) ? names[n] : NULL;
6290 return name ? name : "unknown";
6291}
6292
6293/* return the link state reason name */
6294static const char *link_state_reason_name(struct hfi1_pportdata *ppd, u32 state)
6295{
6296 if (state == HLS_UP_INIT) {
6297 switch (ppd->linkinit_reason) {
6298 case OPA_LINKINIT_REASON_LINKUP:
6299 return "(LINKUP)";
6300 case OPA_LINKINIT_REASON_FLAPPING:
6301 return "(FLAPPING)";
6302 case OPA_LINKINIT_OUTSIDE_POLICY:
6303 return "(OUTSIDE_POLICY)";
6304 case OPA_LINKINIT_QUARANTINED:
6305 return "(QUARANTINED)";
6306 case OPA_LINKINIT_INSUFIC_CAPABILITY:
6307 return "(INSUFIC_CAPABILITY)";
6308 default:
6309 break;
6310 }
6311 }
6312 return "";
6313}
6314
6315/*
6316 * driver_physical_state - convert the driver's notion of a port's
6317 * state (an HLS_*) into a physical state (a {IB,OPA}_PORTPHYSSTATE_*).
6318 * Return -1 (converted to a u32) to indicate error.
6319 */
6320u32 driver_physical_state(struct hfi1_pportdata *ppd)
6321{
6322 switch (ppd->host_link_state) {
6323 case HLS_UP_INIT:
6324 case HLS_UP_ARMED:
6325 case HLS_UP_ACTIVE:
6326 return IB_PORTPHYSSTATE_LINKUP;
6327 case HLS_DN_POLL:
6328 return IB_PORTPHYSSTATE_POLLING;
6329 case HLS_DN_DISABLE:
6330 return IB_PORTPHYSSTATE_DISABLED;
6331 case HLS_DN_OFFLINE:
6332 return OPA_PORTPHYSSTATE_OFFLINE;
6333 case HLS_VERIFY_CAP:
6334 return IB_PORTPHYSSTATE_POLLING;
6335 case HLS_GOING_UP:
6336 return IB_PORTPHYSSTATE_POLLING;
6337 case HLS_GOING_OFFLINE:
6338 return OPA_PORTPHYSSTATE_OFFLINE;
6339 case HLS_LINK_COOLDOWN:
6340 return OPA_PORTPHYSSTATE_OFFLINE;
6341 case HLS_DN_DOWNDEF:
6342 default:
6343 dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n",
6344 ppd->host_link_state);
6345 return -1;
6346 }
6347}
6348
6349/*
6350 * driver_logical_state - convert the driver's notion of a port's
6351 * state (an HLS_*) into a logical state (a IB_PORT_*). Return -1
6352 * (converted to a u32) to indicate error.
6353 */
6354u32 driver_logical_state(struct hfi1_pportdata *ppd)
6355{
6356 if (ppd->host_link_state && !(ppd->host_link_state & HLS_UP))
6357 return IB_PORT_DOWN;
6358
6359 switch (ppd->host_link_state & HLS_UP) {
6360 case HLS_UP_INIT:
6361 return IB_PORT_INIT;
6362 case HLS_UP_ARMED:
6363 return IB_PORT_ARMED;
6364 case HLS_UP_ACTIVE:
6365 return IB_PORT_ACTIVE;
6366 default:
6367 dd_dev_err(ppd->dd, "invalid host_link_state 0x%x\n",
6368 ppd->host_link_state);
6369 return -1;
6370 }
6371}
6372
6373void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
6374 u8 neigh_reason, u8 rem_reason)
6375{
6376 if (ppd->local_link_down_reason.latest == 0 &&
6377 ppd->neigh_link_down_reason.latest == 0) {
6378 ppd->local_link_down_reason.latest = lcl_reason;
6379 ppd->neigh_link_down_reason.latest = neigh_reason;
6380 ppd->remote_link_down_reason = rem_reason;
6381 }
6382}
6383
6384/*
6385 * Change the physical and/or logical link state.
6386 *
6387 * Do not call this routine while inside an interrupt. It contains
6388 * calls to routines that can take multiple seconds to finish.
6389 *
6390 * Returns 0 on success, -errno on failure.
6391 */
6392int set_link_state(struct hfi1_pportdata *ppd, u32 state)
6393{
6394 struct hfi1_devdata *dd = ppd->dd;
6395 struct ib_event event = {.device = NULL};
6396 int ret1, ret = 0;
6397 int was_up, is_down;
6398 int orig_new_state, poll_bounce;
6399
6400 mutex_lock(&ppd->hls_lock);
6401
6402 orig_new_state = state;
6403 if (state == HLS_DN_DOWNDEF)
6404 state = dd->link_default;
6405
6406 /* interpret poll -> poll as a link bounce */
6407 poll_bounce = ppd->host_link_state == HLS_DN_POLL
6408 && state == HLS_DN_POLL;
6409
6410 dd_dev_info(dd, "%s: current %s, new %s %s%s\n", __func__,
6411 link_state_name(ppd->host_link_state),
6412 link_state_name(orig_new_state),
6413 poll_bounce ? "(bounce) " : "",
6414 link_state_reason_name(ppd, state));
6415
6416 was_up = !!(ppd->host_link_state & HLS_UP);
6417
6418 /*
6419 * If we're going to a (HLS_*) link state that implies the logical
6420 * link state is neither of (IB_PORT_ARMED, IB_PORT_ACTIVE), then
6421 * reset is_sm_config_started to 0.
6422 */
6423 if (!(state & (HLS_UP_ARMED | HLS_UP_ACTIVE)))
6424 ppd->is_sm_config_started = 0;
6425
6426 /*
6427 * Do nothing if the states match. Let a poll to poll link bounce
6428 * go through.
6429 */
6430 if (ppd->host_link_state == state && !poll_bounce)
6431 goto done;
6432
6433 switch (state) {
6434 case HLS_UP_INIT:
6435 if (ppd->host_link_state == HLS_DN_POLL && (quick_linkup
6436 || dd->icode == ICODE_FUNCTIONAL_SIMULATOR)) {
6437 /*
6438 * Quick link up jumps from polling to here.
6439 *
6440 * Whether in normal or loopback mode, the
6441 * simulator jumps from polling to link up.
6442 * Accept that here.
6443 */
6444 /* OK */;
6445 } else if (ppd->host_link_state != HLS_GOING_UP) {
6446 goto unexpected;
6447 }
6448
6449 ppd->host_link_state = HLS_UP_INIT;
6450 ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
6451 if (ret) {
6452 /* logical state didn't change, stay at going_up */
6453 ppd->host_link_state = HLS_GOING_UP;
6454 dd_dev_err(dd,
6455 "%s: logical state did not change to INIT\n",
6456 __func__);
6457 } else {
6458 /* clear old transient LINKINIT_REASON code */
6459 if (ppd->linkinit_reason >= OPA_LINKINIT_REASON_CLEAR)
6460 ppd->linkinit_reason =
6461 OPA_LINKINIT_REASON_LINKUP;
6462
6463 /* enable the port */
6464 add_rcvctrl(dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
6465
6466 handle_linkup_change(dd, 1);
6467 }
6468 break;
6469 case HLS_UP_ARMED:
6470 if (ppd->host_link_state != HLS_UP_INIT)
6471 goto unexpected;
6472
6473 ppd->host_link_state = HLS_UP_ARMED;
6474 set_logical_state(dd, LSTATE_ARMED);
6475 ret = wait_logical_linkstate(ppd, IB_PORT_ARMED, 1000);
6476 if (ret) {
6477 /* logical state didn't change, stay at init */
6478 ppd->host_link_state = HLS_UP_INIT;
6479 dd_dev_err(dd,
6480 "%s: logical state did not change to ARMED\n",
6481 __func__);
6482 }
6483 /*
6484 * The simulator does not currently implement SMA messages,
6485 * so neighbor_normal is not set. Set it here when we first
6486 * move to Armed.
6487 */
6488 if (dd->icode == ICODE_FUNCTIONAL_SIMULATOR)
6489 ppd->neighbor_normal = 1;
6490 break;
6491 case HLS_UP_ACTIVE:
6492 if (ppd->host_link_state != HLS_UP_ARMED)
6493 goto unexpected;
6494
6495 ppd->host_link_state = HLS_UP_ACTIVE;
6496 set_logical_state(dd, LSTATE_ACTIVE);
6497 ret = wait_logical_linkstate(ppd, IB_PORT_ACTIVE, 1000);
6498 if (ret) {
6499 /* logical state didn't change, stay at armed */
6500 ppd->host_link_state = HLS_UP_ARMED;
6501 dd_dev_err(dd,
6502 "%s: logical state did not change to ACTIVE\n",
6503 __func__);
6504 } else {
6505
6506 /* tell all engines to go running */
6507 sdma_all_running(dd);
6508
6509 /* Signal the IB layer that the port has went active */
6510 event.device = &dd->verbs_dev.ibdev;
6511 event.element.port_num = ppd->port;
6512 event.event = IB_EVENT_PORT_ACTIVE;
6513 }
6514 break;
6515 case HLS_DN_POLL:
6516 if ((ppd->host_link_state == HLS_DN_DISABLE ||
6517 ppd->host_link_state == HLS_DN_OFFLINE) &&
6518 dd->dc_shutdown)
6519 dc_start(dd);
6520 /* Hand LED control to the DC */
6521 write_csr(dd, DCC_CFG_LED_CNTRL, 0);
6522
6523 if (ppd->host_link_state != HLS_DN_OFFLINE) {
6524 u8 tmp = ppd->link_enabled;
6525
6526 ret = goto_offline(ppd, ppd->remote_link_down_reason);
6527 if (ret) {
6528 ppd->link_enabled = tmp;
6529 break;
6530 }
6531 ppd->remote_link_down_reason = 0;
6532
6533 if (ppd->driver_link_ready)
6534 ppd->link_enabled = 1;
6535 }
6536
6537 ret = set_local_link_attributes(ppd);
6538 if (ret)
6539 break;
6540
6541 ppd->port_error_action = 0;
6542 ppd->host_link_state = HLS_DN_POLL;
6543
6544 if (quick_linkup) {
6545 /* quick linkup does not go into polling */
6546 ret = do_quick_linkup(dd);
6547 } else {
6548 ret1 = set_physical_link_state(dd, PLS_POLLING);
6549 if (ret1 != HCMD_SUCCESS) {
6550 dd_dev_err(dd,
6551 "Failed to transition to Polling link state, return 0x%x\n",
6552 ret1);
6553 ret = -EINVAL;
6554 }
6555 }
6556 ppd->offline_disabled_reason = OPA_LINKDOWN_REASON_NONE;
6557 /*
6558 * If an error occurred above, go back to offline. The
6559 * caller may reschedule another attempt.
6560 */
6561 if (ret)
6562 goto_offline(ppd, 0);
6563 break;
6564 case HLS_DN_DISABLE:
6565 /* link is disabled */
6566 ppd->link_enabled = 0;
6567
6568 /* allow any state to transition to disabled */
6569
6570 /* must transition to offline first */
6571 if (ppd->host_link_state != HLS_DN_OFFLINE) {
6572 ret = goto_offline(ppd, ppd->remote_link_down_reason);
6573 if (ret)
6574 break;
6575 ppd->remote_link_down_reason = 0;
6576 }
6577
6578 ret1 = set_physical_link_state(dd, PLS_DISABLED);
6579 if (ret1 != HCMD_SUCCESS) {
6580 dd_dev_err(dd,
6581 "Failed to transition to Disabled link state, return 0x%x\n",
6582 ret1);
6583 ret = -EINVAL;
6584 break;
6585 }
6586 ppd->host_link_state = HLS_DN_DISABLE;
6587 dc_shutdown(dd);
6588 break;
6589 case HLS_DN_OFFLINE:
6590 if (ppd->host_link_state == HLS_DN_DISABLE)
6591 dc_start(dd);
6592
6593 /* allow any state to transition to offline */
6594 ret = goto_offline(ppd, ppd->remote_link_down_reason);
6595 if (!ret)
6596 ppd->remote_link_down_reason = 0;
6597 break;
6598 case HLS_VERIFY_CAP:
6599 if (ppd->host_link_state != HLS_DN_POLL)
6600 goto unexpected;
6601 ppd->host_link_state = HLS_VERIFY_CAP;
6602 break;
6603 case HLS_GOING_UP:
6604 if (ppd->host_link_state != HLS_VERIFY_CAP)
6605 goto unexpected;
6606
6607 ret1 = set_physical_link_state(dd, PLS_LINKUP);
6608 if (ret1 != HCMD_SUCCESS) {
6609 dd_dev_err(dd,
6610 "Failed to transition to link up state, return 0x%x\n",
6611 ret1);
6612 ret = -EINVAL;
6613 break;
6614 }
6615 ppd->host_link_state = HLS_GOING_UP;
6616 break;
6617
6618 case HLS_GOING_OFFLINE: /* transient within goto_offline() */
6619 case HLS_LINK_COOLDOWN: /* transient within goto_offline() */
6620 default:
6621 dd_dev_info(dd, "%s: state 0x%x: not supported\n",
6622 __func__, state);
6623 ret = -EINVAL;
6624 break;
6625 }
6626
6627 is_down = !!(ppd->host_link_state & (HLS_DN_POLL |
6628 HLS_DN_DISABLE | HLS_DN_OFFLINE));
6629
6630 if (was_up && is_down && ppd->local_link_down_reason.sma == 0 &&
6631 ppd->neigh_link_down_reason.sma == 0) {
6632 ppd->local_link_down_reason.sma =
6633 ppd->local_link_down_reason.latest;
6634 ppd->neigh_link_down_reason.sma =
6635 ppd->neigh_link_down_reason.latest;
6636 }
6637
6638 goto done;
6639
6640unexpected:
6641 dd_dev_err(dd, "%s: unexpected state transition from %s to %s\n",
6642 __func__, link_state_name(ppd->host_link_state),
6643 link_state_name(state));
6644 ret = -EINVAL;
6645
6646done:
6647 mutex_unlock(&ppd->hls_lock);
6648
6649 if (event.device)
6650 ib_dispatch_event(&event);
6651
6652 return ret;
6653}
6654
6655int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val)
6656{
6657 u64 reg;
6658 int ret = 0;
6659
6660 switch (which) {
6661 case HFI1_IB_CFG_LIDLMC:
6662 set_lidlmc(ppd);
6663 break;
6664 case HFI1_IB_CFG_VL_HIGH_LIMIT:
6665 /*
6666 * The VL Arbitrator high limit is sent in units of 4k
6667 * bytes, while HFI stores it in units of 64 bytes.
6668 */
6669 val *= 4096/64;
6670 reg = ((u64)val & SEND_HIGH_PRIORITY_LIMIT_LIMIT_MASK)
6671 << SEND_HIGH_PRIORITY_LIMIT_LIMIT_SHIFT;
6672 write_csr(ppd->dd, SEND_HIGH_PRIORITY_LIMIT, reg);
6673 break;
6674 case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */
6675 /* HFI only supports POLL as the default link down state */
6676 if (val != HLS_DN_POLL)
6677 ret = -EINVAL;
6678 break;
6679 case HFI1_IB_CFG_OP_VLS:
6680 if (ppd->vls_operational != val) {
6681 ppd->vls_operational = val;
6682 if (!ppd->port)
6683 ret = -EINVAL;
6684 else
6685 ret = sdma_map_init(
6686 ppd->dd,
6687 ppd->port - 1,
6688 val,
6689 NULL);
6690 }
6691 break;
6692 /*
6693 * For link width, link width downgrade, and speed enable, always AND
6694 * the setting with what is actually supported. This has two benefits.
6695 * First, enabled can't have unsupported values, no matter what the
6696 * SM or FM might want. Second, the ALL_SUPPORTED wildcards that mean
6697 * "fill in with your supported value" have all the bits in the
6698 * field set, so simply ANDing with supported has the desired result.
6699 */
6700 case HFI1_IB_CFG_LWID_ENB: /* set allowed Link-width */
6701 ppd->link_width_enabled = val & ppd->link_width_supported;
6702 break;
6703 case HFI1_IB_CFG_LWID_DG_ENB: /* set allowed link width downgrade */
6704 ppd->link_width_downgrade_enabled =
6705 val & ppd->link_width_downgrade_supported;
6706 break;
6707 case HFI1_IB_CFG_SPD_ENB: /* allowed Link speeds */
6708 ppd->link_speed_enabled = val & ppd->link_speed_supported;
6709 break;
6710 case HFI1_IB_CFG_OVERRUN_THRESH: /* IB overrun threshold */
6711 /*
6712 * HFI does not follow IB specs, save this value
6713 * so we can report it, if asked.
6714 */
6715 ppd->overrun_threshold = val;
6716 break;
6717 case HFI1_IB_CFG_PHYERR_THRESH: /* IB PHY error threshold */
6718 /*
6719 * HFI does not follow IB specs, save this value
6720 * so we can report it, if asked.
6721 */
6722 ppd->phy_error_threshold = val;
6723 break;
6724
6725 case HFI1_IB_CFG_MTU:
6726 set_send_length(ppd);
6727 break;
6728
6729 case HFI1_IB_CFG_PKEYS:
6730 if (HFI1_CAP_IS_KSET(PKEY_CHECK))
6731 set_partition_keys(ppd);
6732 break;
6733
6734 default:
6735 if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
6736 dd_dev_info(ppd->dd,
6737 "%s: which %s, val 0x%x: not implemented\n",
6738 __func__, ib_cfg_name(which), val);
6739 break;
6740 }
6741 return ret;
6742}
6743
6744/* begin functions related to vl arbitration table caching */
6745static void init_vl_arb_caches(struct hfi1_pportdata *ppd)
6746{
6747 int i;
6748
6749 BUILD_BUG_ON(VL_ARB_TABLE_SIZE !=
6750 VL_ARB_LOW_PRIO_TABLE_SIZE);
6751 BUILD_BUG_ON(VL_ARB_TABLE_SIZE !=
6752 VL_ARB_HIGH_PRIO_TABLE_SIZE);
6753
6754 /*
6755 * Note that we always return values directly from the
6756 * 'vl_arb_cache' (and do no CSR reads) in response to a
6757 * 'Get(VLArbTable)'. This is obviously correct after a
6758 * 'Set(VLArbTable)', since the cache will then be up to
6759 * date. But it's also correct prior to any 'Set(VLArbTable)'
6760 * since then both the cache, and the relevant h/w registers
6761 * will be zeroed.
6762 */
6763
6764 for (i = 0; i < MAX_PRIO_TABLE; i++)
6765 spin_lock_init(&ppd->vl_arb_cache[i].lock);
6766}
6767
6768/*
6769 * vl_arb_lock_cache
6770 *
6771 * All other vl_arb_* functions should be called only after locking
6772 * the cache.
6773 */
6774static inline struct vl_arb_cache *
6775vl_arb_lock_cache(struct hfi1_pportdata *ppd, int idx)
6776{
6777 if (idx != LO_PRIO_TABLE && idx != HI_PRIO_TABLE)
6778 return NULL;
6779 spin_lock(&ppd->vl_arb_cache[idx].lock);
6780 return &ppd->vl_arb_cache[idx];
6781}
6782
6783static inline void vl_arb_unlock_cache(struct hfi1_pportdata *ppd, int idx)
6784{
6785 spin_unlock(&ppd->vl_arb_cache[idx].lock);
6786}
6787
6788static void vl_arb_get_cache(struct vl_arb_cache *cache,
6789 struct ib_vl_weight_elem *vl)
6790{
6791 memcpy(vl, cache->table, VL_ARB_TABLE_SIZE * sizeof(*vl));
6792}
6793
6794static void vl_arb_set_cache(struct vl_arb_cache *cache,
6795 struct ib_vl_weight_elem *vl)
6796{
6797 memcpy(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
6798}
6799
6800static int vl_arb_match_cache(struct vl_arb_cache *cache,
6801 struct ib_vl_weight_elem *vl)
6802{
6803 return !memcmp(cache->table, vl, VL_ARB_TABLE_SIZE * sizeof(*vl));
6804}
6805/* end functions related to vl arbitration table caching */
6806
6807static int set_vl_weights(struct hfi1_pportdata *ppd, u32 target,
6808 u32 size, struct ib_vl_weight_elem *vl)
6809{
6810 struct hfi1_devdata *dd = ppd->dd;
6811 u64 reg;
6812 unsigned int i, is_up = 0;
6813 int drain, ret = 0;
6814
6815 mutex_lock(&ppd->hls_lock);
6816
6817 if (ppd->host_link_state & HLS_UP)
6818 is_up = 1;
6819
6820 drain = !is_ax(dd) && is_up;
6821
6822 if (drain)
6823 /*
6824 * Before adjusting VL arbitration weights, empty per-VL
6825 * FIFOs, otherwise a packet whose VL weight is being
6826 * set to 0 could get stuck in a FIFO with no chance to
6827 * egress.
6828 */
6829 ret = stop_drain_data_vls(dd);
6830
6831 if (ret) {
6832 dd_dev_err(
6833 dd,
6834 "%s: cannot stop/drain VLs - refusing to change VL arbitration weights\n",
6835 __func__);
6836 goto err;
6837 }
6838
6839 for (i = 0; i < size; i++, vl++) {
6840 /*
6841 * NOTE: The low priority shift and mask are used here, but
6842 * they are the same for both the low and high registers.
6843 */
6844 reg = (((u64)vl->vl & SEND_LOW_PRIORITY_LIST_VL_MASK)
6845 << SEND_LOW_PRIORITY_LIST_VL_SHIFT)
6846 | (((u64)vl->weight
6847 & SEND_LOW_PRIORITY_LIST_WEIGHT_MASK)
6848 << SEND_LOW_PRIORITY_LIST_WEIGHT_SHIFT);
6849 write_csr(dd, target + (i * 8), reg);
6850 }
6851 pio_send_control(dd, PSC_GLOBAL_VLARB_ENABLE);
6852
6853 if (drain)
6854 open_fill_data_vls(dd); /* reopen all VLs */
6855
6856err:
6857 mutex_unlock(&ppd->hls_lock);
6858
6859 return ret;
6860}
6861
6862/*
6863 * Read one credit merge VL register.
6864 */
6865static void read_one_cm_vl(struct hfi1_devdata *dd, u32 csr,
6866 struct vl_limit *vll)
6867{
6868 u64 reg = read_csr(dd, csr);
6869
6870 vll->dedicated = cpu_to_be16(
6871 (reg >> SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT)
6872 & SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_MASK);
6873 vll->shared = cpu_to_be16(
6874 (reg >> SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT)
6875 & SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_MASK);
6876}
6877
6878/*
6879 * Read the current credit merge limits.
6880 */
6881static int get_buffer_control(struct hfi1_devdata *dd,
6882 struct buffer_control *bc, u16 *overall_limit)
6883{
6884 u64 reg;
6885 int i;
6886
6887 /* not all entries are filled in */
6888 memset(bc, 0, sizeof(*bc));
6889
6890 /* OPA and HFI have a 1-1 mapping */
6891 for (i = 0; i < TXE_NUM_DATA_VL; i++)
6892 read_one_cm_vl(dd, SEND_CM_CREDIT_VL + (8*i), &bc->vl[i]);
6893
6894 /* NOTE: assumes that VL* and VL15 CSRs are bit-wise identical */
6895 read_one_cm_vl(dd, SEND_CM_CREDIT_VL15, &bc->vl[15]);
6896
6897 reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
6898 bc->overall_shared_limit = cpu_to_be16(
6899 (reg >> SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT)
6900 & SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_MASK);
6901 if (overall_limit)
6902 *overall_limit = (reg
6903 >> SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT)
6904 & SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_MASK;
6905 return sizeof(struct buffer_control);
6906}
6907
6908static int get_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
6909{
6910 u64 reg;
6911 int i;
6912
6913 /* each register contains 16 SC->VLnt mappings, 4 bits each */
6914 reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_15_0);
6915 for (i = 0; i < sizeof(u64); i++) {
6916 u8 byte = *(((u8 *)&reg) + i);
6917
6918 dp->vlnt[2 * i] = byte & 0xf;
6919 dp->vlnt[(2 * i) + 1] = (byte & 0xf0) >> 4;
6920 }
6921
6922 reg = read_csr(dd, DCC_CFG_SC_VL_TABLE_31_16);
6923 for (i = 0; i < sizeof(u64); i++) {
6924 u8 byte = *(((u8 *)&reg) + i);
6925
6926 dp->vlnt[16 + (2 * i)] = byte & 0xf;
6927 dp->vlnt[16 + (2 * i) + 1] = (byte & 0xf0) >> 4;
6928 }
6929 return sizeof(struct sc2vlnt);
6930}
6931
6932static void get_vlarb_preempt(struct hfi1_devdata *dd, u32 nelems,
6933 struct ib_vl_weight_elem *vl)
6934{
6935 unsigned int i;
6936
6937 for (i = 0; i < nelems; i++, vl++) {
6938 vl->vl = 0xf;
6939 vl->weight = 0;
6940 }
6941}
6942
6943static void set_sc2vlnt(struct hfi1_devdata *dd, struct sc2vlnt *dp)
6944{
6945 write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0,
6946 DC_SC_VL_VAL(15_0,
6947 0, dp->vlnt[0] & 0xf,
6948 1, dp->vlnt[1] & 0xf,
6949 2, dp->vlnt[2] & 0xf,
6950 3, dp->vlnt[3] & 0xf,
6951 4, dp->vlnt[4] & 0xf,
6952 5, dp->vlnt[5] & 0xf,
6953 6, dp->vlnt[6] & 0xf,
6954 7, dp->vlnt[7] & 0xf,
6955 8, dp->vlnt[8] & 0xf,
6956 9, dp->vlnt[9] & 0xf,
6957 10, dp->vlnt[10] & 0xf,
6958 11, dp->vlnt[11] & 0xf,
6959 12, dp->vlnt[12] & 0xf,
6960 13, dp->vlnt[13] & 0xf,
6961 14, dp->vlnt[14] & 0xf,
6962 15, dp->vlnt[15] & 0xf));
6963 write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16,
6964 DC_SC_VL_VAL(31_16,
6965 16, dp->vlnt[16] & 0xf,
6966 17, dp->vlnt[17] & 0xf,
6967 18, dp->vlnt[18] & 0xf,
6968 19, dp->vlnt[19] & 0xf,
6969 20, dp->vlnt[20] & 0xf,
6970 21, dp->vlnt[21] & 0xf,
6971 22, dp->vlnt[22] & 0xf,
6972 23, dp->vlnt[23] & 0xf,
6973 24, dp->vlnt[24] & 0xf,
6974 25, dp->vlnt[25] & 0xf,
6975 26, dp->vlnt[26] & 0xf,
6976 27, dp->vlnt[27] & 0xf,
6977 28, dp->vlnt[28] & 0xf,
6978 29, dp->vlnt[29] & 0xf,
6979 30, dp->vlnt[30] & 0xf,
6980 31, dp->vlnt[31] & 0xf));
6981}
6982
6983static void nonzero_msg(struct hfi1_devdata *dd, int idx, const char *what,
6984 u16 limit)
6985{
6986 if (limit != 0)
6987 dd_dev_info(dd, "Invalid %s limit %d on VL %d, ignoring\n",
6988 what, (int)limit, idx);
6989}
6990
6991/* change only the shared limit portion of SendCmGLobalCredit */
6992static void set_global_shared(struct hfi1_devdata *dd, u16 limit)
6993{
6994 u64 reg;
6995
6996 reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
6997 reg &= ~SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SMASK;
6998 reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_SHARED_LIMIT_SHIFT;
6999 write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
7000}
7001
7002/* change only the total credit limit portion of SendCmGLobalCredit */
7003static void set_global_limit(struct hfi1_devdata *dd, u16 limit)
7004{
7005 u64 reg;
7006
7007 reg = read_csr(dd, SEND_CM_GLOBAL_CREDIT);
7008 reg &= ~SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SMASK;
7009 reg |= (u64)limit << SEND_CM_GLOBAL_CREDIT_TOTAL_CREDIT_LIMIT_SHIFT;
7010 write_csr(dd, SEND_CM_GLOBAL_CREDIT, reg);
7011}
7012
7013/* set the given per-VL shared limit */
7014static void set_vl_shared(struct hfi1_devdata *dd, int vl, u16 limit)
7015{
7016 u64 reg;
7017 u32 addr;
7018
7019 if (vl < TXE_NUM_DATA_VL)
7020 addr = SEND_CM_CREDIT_VL + (8 * vl);
7021 else
7022 addr = SEND_CM_CREDIT_VL15;
7023
7024 reg = read_csr(dd, addr);
7025 reg &= ~SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SMASK;
7026 reg |= (u64)limit << SEND_CM_CREDIT_VL_SHARED_LIMIT_VL_SHIFT;
7027 write_csr(dd, addr, reg);
7028}
7029
7030/* set the given per-VL dedicated limit */
7031static void set_vl_dedicated(struct hfi1_devdata *dd, int vl, u16 limit)
7032{
7033 u64 reg;
7034 u32 addr;
7035
7036 if (vl < TXE_NUM_DATA_VL)
7037 addr = SEND_CM_CREDIT_VL + (8 * vl);
7038 else
7039 addr = SEND_CM_CREDIT_VL15;
7040
7041 reg = read_csr(dd, addr);
7042 reg &= ~SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SMASK;
7043 reg |= (u64)limit << SEND_CM_CREDIT_VL_DEDICATED_LIMIT_VL_SHIFT;
7044 write_csr(dd, addr, reg);
7045}
7046
7047/* spin until the given per-VL status mask bits clear */
7048static void wait_for_vl_status_clear(struct hfi1_devdata *dd, u64 mask,
7049 const char *which)
7050{
7051 unsigned long timeout;
7052 u64 reg;
7053
7054 timeout = jiffies + msecs_to_jiffies(VL_STATUS_CLEAR_TIMEOUT);
7055 while (1) {
7056 reg = read_csr(dd, SEND_CM_CREDIT_USED_STATUS) & mask;
7057
7058 if (reg == 0)
7059 return; /* success */
7060 if (time_after(jiffies, timeout))
7061 break; /* timed out */
7062 udelay(1);
7063 }
7064
7065 dd_dev_err(dd,
7066 "%s credit change status not clearing after %dms, mask 0x%llx, not clear 0x%llx\n",
7067 which, VL_STATUS_CLEAR_TIMEOUT, mask, reg);
7068 /*
7069 * If this occurs, it is likely there was a credit loss on the link.
7070 * The only recovery from that is a link bounce.
7071 */
7072 dd_dev_err(dd,
7073 "Continuing anyway. A credit loss may occur. Suggest a link bounce\n");
7074}
7075
7076/*
7077 * The number of credits on the VLs may be changed while everything
7078 * is "live", but the following algorithm must be followed due to
7079 * how the hardware is actually implemented. In particular,
7080 * Return_Credit_Status[] is the only correct status check.
7081 *
7082 * if (reducing Global_Shared_Credit_Limit or any shared limit changing)
7083 * set Global_Shared_Credit_Limit = 0
7084 * use_all_vl = 1
7085 * mask0 = all VLs that are changing either dedicated or shared limits
7086 * set Shared_Limit[mask0] = 0
7087 * spin until Return_Credit_Status[use_all_vl ? all VL : mask0] == 0
7088 * if (changing any dedicated limit)
7089 * mask1 = all VLs that are lowering dedicated limits
7090 * lower Dedicated_Limit[mask1]
7091 * spin until Return_Credit_Status[mask1] == 0
7092 * raise Dedicated_Limits
7093 * raise Shared_Limits
7094 * raise Global_Shared_Credit_Limit
7095 *
7096 * lower = if the new limit is lower, set the limit to the new value
7097 * raise = if the new limit is higher than the current value (may be changed
7098 * earlier in the algorithm), set the new limit to the new value
7099 */
7100static int set_buffer_control(struct hfi1_devdata *dd,
7101 struct buffer_control *new_bc)
7102{
7103 u64 changing_mask, ld_mask, stat_mask;
7104 int change_count;
7105 int i, use_all_mask;
7106 int this_shared_changing;
7107 /*
7108 * A0: add the variable any_shared_limit_changing below and in the
7109 * algorithm above. If removing A0 support, it can be removed.
7110 */
7111 int any_shared_limit_changing;
7112 struct buffer_control cur_bc;
7113 u8 changing[OPA_MAX_VLS];
7114 u8 lowering_dedicated[OPA_MAX_VLS];
7115 u16 cur_total;
7116 u32 new_total = 0;
7117 const u64 all_mask =
7118 SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK
7119 | SEND_CM_CREDIT_USED_STATUS_VL1_RETURN_CREDIT_STATUS_SMASK
7120 | SEND_CM_CREDIT_USED_STATUS_VL2_RETURN_CREDIT_STATUS_SMASK
7121 | SEND_CM_CREDIT_USED_STATUS_VL3_RETURN_CREDIT_STATUS_SMASK
7122 | SEND_CM_CREDIT_USED_STATUS_VL4_RETURN_CREDIT_STATUS_SMASK
7123 | SEND_CM_CREDIT_USED_STATUS_VL5_RETURN_CREDIT_STATUS_SMASK
7124 | SEND_CM_CREDIT_USED_STATUS_VL6_RETURN_CREDIT_STATUS_SMASK
7125 | SEND_CM_CREDIT_USED_STATUS_VL7_RETURN_CREDIT_STATUS_SMASK
7126 | SEND_CM_CREDIT_USED_STATUS_VL15_RETURN_CREDIT_STATUS_SMASK;
7127
7128#define valid_vl(idx) ((idx) < TXE_NUM_DATA_VL || (idx) == 15)
7129#define NUM_USABLE_VLS 16 /* look at VL15 and less */
7130
7131
7132 /* find the new total credits, do sanity check on unused VLs */
7133 for (i = 0; i < OPA_MAX_VLS; i++) {
7134 if (valid_vl(i)) {
7135 new_total += be16_to_cpu(new_bc->vl[i].dedicated);
7136 continue;
7137 }
7138 nonzero_msg(dd, i, "dedicated",
7139 be16_to_cpu(new_bc->vl[i].dedicated));
7140 nonzero_msg(dd, i, "shared",
7141 be16_to_cpu(new_bc->vl[i].shared));
7142 new_bc->vl[i].dedicated = 0;
7143 new_bc->vl[i].shared = 0;
7144 }
7145 new_total += be16_to_cpu(new_bc->overall_shared_limit);
7146 if (new_total > (u32)dd->link_credits)
7147 return -EINVAL;
7148 /* fetch the current values */
7149 get_buffer_control(dd, &cur_bc, &cur_total);
7150
7151 /*
7152 * Create the masks we will use.
7153 */
7154 memset(changing, 0, sizeof(changing));
7155 memset(lowering_dedicated, 0, sizeof(lowering_dedicated));
7156 /* NOTE: Assumes that the individual VL bits are adjacent and in
7157 increasing order */
7158 stat_mask =
7159 SEND_CM_CREDIT_USED_STATUS_VL0_RETURN_CREDIT_STATUS_SMASK;
7160 changing_mask = 0;
7161 ld_mask = 0;
7162 change_count = 0;
7163 any_shared_limit_changing = 0;
7164 for (i = 0; i < NUM_USABLE_VLS; i++, stat_mask <<= 1) {
7165 if (!valid_vl(i))
7166 continue;
7167 this_shared_changing = new_bc->vl[i].shared
7168 != cur_bc.vl[i].shared;
7169 if (this_shared_changing)
7170 any_shared_limit_changing = 1;
7171 if (new_bc->vl[i].dedicated != cur_bc.vl[i].dedicated
7172 || this_shared_changing) {
7173 changing[i] = 1;
7174 changing_mask |= stat_mask;
7175 change_count++;
7176 }
7177 if (be16_to_cpu(new_bc->vl[i].dedicated) <
7178 be16_to_cpu(cur_bc.vl[i].dedicated)) {
7179 lowering_dedicated[i] = 1;
7180 ld_mask |= stat_mask;
7181 }
7182 }
7183
7184 /* bracket the credit change with a total adjustment */
7185 if (new_total > cur_total)
7186 set_global_limit(dd, new_total);
7187
7188 /*
7189 * Start the credit change algorithm.
7190 */
7191 use_all_mask = 0;
7192 if ((be16_to_cpu(new_bc->overall_shared_limit) <
7193 be16_to_cpu(cur_bc.overall_shared_limit))
7194 || (is_a0(dd) && any_shared_limit_changing)) {
7195 set_global_shared(dd, 0);
7196 cur_bc.overall_shared_limit = 0;
7197 use_all_mask = 1;
7198 }
7199
7200 for (i = 0; i < NUM_USABLE_VLS; i++) {
7201 if (!valid_vl(i))
7202 continue;
7203
7204 if (changing[i]) {
7205 set_vl_shared(dd, i, 0);
7206 cur_bc.vl[i].shared = 0;
7207 }
7208 }
7209
7210 wait_for_vl_status_clear(dd, use_all_mask ? all_mask : changing_mask,
7211 "shared");
7212
7213 if (change_count > 0) {
7214 for (i = 0; i < NUM_USABLE_VLS; i++) {
7215 if (!valid_vl(i))
7216 continue;
7217
7218 if (lowering_dedicated[i]) {
7219 set_vl_dedicated(dd, i,
7220 be16_to_cpu(new_bc->vl[i].dedicated));
7221 cur_bc.vl[i].dedicated =
7222 new_bc->vl[i].dedicated;
7223 }
7224 }
7225
7226 wait_for_vl_status_clear(dd, ld_mask, "dedicated");
7227
7228 /* now raise all dedicated that are going up */
7229 for (i = 0; i < NUM_USABLE_VLS; i++) {
7230 if (!valid_vl(i))
7231 continue;
7232
7233 if (be16_to_cpu(new_bc->vl[i].dedicated) >
7234 be16_to_cpu(cur_bc.vl[i].dedicated))
7235 set_vl_dedicated(dd, i,
7236 be16_to_cpu(new_bc->vl[i].dedicated));
7237 }
7238 }
7239
7240 /* next raise all shared that are going up */
7241 for (i = 0; i < NUM_USABLE_VLS; i++) {
7242 if (!valid_vl(i))
7243 continue;
7244
7245 if (be16_to_cpu(new_bc->vl[i].shared) >
7246 be16_to_cpu(cur_bc.vl[i].shared))
7247 set_vl_shared(dd, i, be16_to_cpu(new_bc->vl[i].shared));
7248 }
7249
7250 /* finally raise the global shared */
7251 if (be16_to_cpu(new_bc->overall_shared_limit) >
7252 be16_to_cpu(cur_bc.overall_shared_limit))
7253 set_global_shared(dd,
7254 be16_to_cpu(new_bc->overall_shared_limit));
7255
7256 /* bracket the credit change with a total adjustment */
7257 if (new_total < cur_total)
7258 set_global_limit(dd, new_total);
7259 return 0;
7260}
7261
7262/*
7263 * Read the given fabric manager table. Return the size of the
7264 * table (in bytes) on success, and a negative error code on
7265 * failure.
7266 */
7267int fm_get_table(struct hfi1_pportdata *ppd, int which, void *t)
7268
7269{
7270 int size;
7271 struct vl_arb_cache *vlc;
7272
7273 switch (which) {
7274 case FM_TBL_VL_HIGH_ARB:
7275 size = 256;
7276 /*
7277 * OPA specifies 128 elements (of 2 bytes each), though
7278 * HFI supports only 16 elements in h/w.
7279 */
7280 vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE);
7281 vl_arb_get_cache(vlc, t);
7282 vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7283 break;
7284 case FM_TBL_VL_LOW_ARB:
7285 size = 256;
7286 /*
7287 * OPA specifies 128 elements (of 2 bytes each), though
7288 * HFI supports only 16 elements in h/w.
7289 */
7290 vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE);
7291 vl_arb_get_cache(vlc, t);
7292 vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7293 break;
7294 case FM_TBL_BUFFER_CONTROL:
7295 size = get_buffer_control(ppd->dd, t, NULL);
7296 break;
7297 case FM_TBL_SC2VLNT:
7298 size = get_sc2vlnt(ppd->dd, t);
7299 break;
7300 case FM_TBL_VL_PREEMPT_ELEMS:
7301 size = 256;
7302 /* OPA specifies 128 elements, of 2 bytes each */
7303 get_vlarb_preempt(ppd->dd, OPA_MAX_VLS, t);
7304 break;
7305 case FM_TBL_VL_PREEMPT_MATRIX:
7306 size = 256;
7307 /*
7308 * OPA specifies that this is the same size as the VL
7309 * arbitration tables (i.e., 256 bytes).
7310 */
7311 break;
7312 default:
7313 return -EINVAL;
7314 }
7315 return size;
7316}
7317
7318/*
7319 * Write the given fabric manager table.
7320 */
7321int fm_set_table(struct hfi1_pportdata *ppd, int which, void *t)
7322{
7323 int ret = 0;
7324 struct vl_arb_cache *vlc;
7325
7326 switch (which) {
7327 case FM_TBL_VL_HIGH_ARB:
7328 vlc = vl_arb_lock_cache(ppd, HI_PRIO_TABLE);
7329 if (vl_arb_match_cache(vlc, t)) {
7330 vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7331 break;
7332 }
7333 vl_arb_set_cache(vlc, t);
7334 vl_arb_unlock_cache(ppd, HI_PRIO_TABLE);
7335 ret = set_vl_weights(ppd, SEND_HIGH_PRIORITY_LIST,
7336 VL_ARB_HIGH_PRIO_TABLE_SIZE, t);
7337 break;
7338 case FM_TBL_VL_LOW_ARB:
7339 vlc = vl_arb_lock_cache(ppd, LO_PRIO_TABLE);
7340 if (vl_arb_match_cache(vlc, t)) {
7341 vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7342 break;
7343 }
7344 vl_arb_set_cache(vlc, t);
7345 vl_arb_unlock_cache(ppd, LO_PRIO_TABLE);
7346 ret = set_vl_weights(ppd, SEND_LOW_PRIORITY_LIST,
7347 VL_ARB_LOW_PRIO_TABLE_SIZE, t);
7348 break;
7349 case FM_TBL_BUFFER_CONTROL:
7350 ret = set_buffer_control(ppd->dd, t);
7351 break;
7352 case FM_TBL_SC2VLNT:
7353 set_sc2vlnt(ppd->dd, t);
7354 break;
7355 default:
7356 ret = -EINVAL;
7357 }
7358 return ret;
7359}
7360
7361/*
7362 * Disable all data VLs.
7363 *
7364 * Return 0 if disabled, non-zero if the VLs cannot be disabled.
7365 */
7366static int disable_data_vls(struct hfi1_devdata *dd)
7367{
7368 if (is_a0(dd))
7369 return 1;
7370
7371 pio_send_control(dd, PSC_DATA_VL_DISABLE);
7372
7373 return 0;
7374}
7375
7376/*
7377 * open_fill_data_vls() - the counterpart to stop_drain_data_vls().
7378 * Just re-enables all data VLs (the "fill" part happens
7379 * automatically - the name was chosen for symmetry with
7380 * stop_drain_data_vls()).
7381 *
7382 * Return 0 if successful, non-zero if the VLs cannot be enabled.
7383 */
7384int open_fill_data_vls(struct hfi1_devdata *dd)
7385{
7386 if (is_a0(dd))
7387 return 1;
7388
7389 pio_send_control(dd, PSC_DATA_VL_ENABLE);
7390
7391 return 0;
7392}
7393
7394/*
7395 * drain_data_vls() - assumes that disable_data_vls() has been called,
7396 * wait for occupancy (of per-VL FIFOs) for all contexts, and SDMA
7397 * engines to drop to 0.
7398 */
7399static void drain_data_vls(struct hfi1_devdata *dd)
7400{
7401 sc_wait(dd);
7402 sdma_wait(dd);
7403 pause_for_credit_return(dd);
7404}
7405
7406/*
7407 * stop_drain_data_vls() - disable, then drain all per-VL fifos.
7408 *
7409 * Use open_fill_data_vls() to resume using data VLs. This pair is
7410 * meant to be used like this:
7411 *
7412 * stop_drain_data_vls(dd);
7413 * // do things with per-VL resources
7414 * open_fill_data_vls(dd);
7415 */
7416int stop_drain_data_vls(struct hfi1_devdata *dd)
7417{
7418 int ret;
7419
7420 ret = disable_data_vls(dd);
7421 if (ret == 0)
7422 drain_data_vls(dd);
7423
7424 return ret;
7425}
7426
7427/*
7428 * Convert a nanosecond time to a cclock count. No matter how slow
7429 * the cclock, a non-zero ns will always have a non-zero result.
7430 */
7431u32 ns_to_cclock(struct hfi1_devdata *dd, u32 ns)
7432{
7433 u32 cclocks;
7434
7435 if (dd->icode == ICODE_FPGA_EMULATION)
7436 cclocks = (ns * 1000) / FPGA_CCLOCK_PS;
7437 else /* simulation pretends to be ASIC */
7438 cclocks = (ns * 1000) / ASIC_CCLOCK_PS;
7439 if (ns && !cclocks) /* if ns nonzero, must be at least 1 */
7440 cclocks = 1;
7441 return cclocks;
7442}
7443
7444/*
7445 * Convert a cclock count to nanoseconds. Not matter how slow
7446 * the cclock, a non-zero cclocks will always have a non-zero result.
7447 */
7448u32 cclock_to_ns(struct hfi1_devdata *dd, u32 cclocks)
7449{
7450 u32 ns;
7451
7452 if (dd->icode == ICODE_FPGA_EMULATION)
7453 ns = (cclocks * FPGA_CCLOCK_PS) / 1000;
7454 else /* simulation pretends to be ASIC */
7455 ns = (cclocks * ASIC_CCLOCK_PS) / 1000;
7456 if (cclocks && !ns)
7457 ns = 1;
7458 return ns;
7459}
7460
7461/*
7462 * Dynamically adjust the receive interrupt timeout for a context based on
7463 * incoming packet rate.
7464 *
7465 * NOTE: Dynamic adjustment does not allow rcv_intr_count to be zero.
7466 */
7467static void adjust_rcv_timeout(struct hfi1_ctxtdata *rcd, u32 npkts)
7468{
7469 struct hfi1_devdata *dd = rcd->dd;
7470 u32 timeout = rcd->rcvavail_timeout;
7471
7472 /*
7473 * This algorithm doubles or halves the timeout depending on whether
7474 * the number of packets received in this interrupt were less than or
7475 * greater equal the interrupt count.
7476 *
7477 * The calculations below do not allow a steady state to be achieved.
7478 * Only at the endpoints it is possible to have an unchanging
7479 * timeout.
7480 */
7481 if (npkts < rcv_intr_count) {
7482 /*
7483 * Not enough packets arrived before the timeout, adjust
7484 * timeout downward.
7485 */
7486 if (timeout < 2) /* already at minimum? */
7487 return;
7488 timeout >>= 1;
7489 } else {
7490 /*
7491 * More than enough packets arrived before the timeout, adjust
7492 * timeout upward.
7493 */
7494 if (timeout >= dd->rcv_intr_timeout_csr) /* already at max? */
7495 return;
7496 timeout = min(timeout << 1, dd->rcv_intr_timeout_csr);
7497 }
7498
7499 rcd->rcvavail_timeout = timeout;
7500 /* timeout cannot be larger than rcv_intr_timeout_csr which has already
7501 been verified to be in range */
7502 write_kctxt_csr(dd, rcd->ctxt, RCV_AVAIL_TIME_OUT,
7503 (u64)timeout << RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
7504}
7505
7506void update_usrhead(struct hfi1_ctxtdata *rcd, u32 hd, u32 updegr, u32 egrhd,
7507 u32 intr_adjust, u32 npkts)
7508{
7509 struct hfi1_devdata *dd = rcd->dd;
7510 u64 reg;
7511 u32 ctxt = rcd->ctxt;
7512
7513 /*
7514 * Need to write timeout register before updating RcvHdrHead to ensure
7515 * that a new value is used when the HW decides to restart counting.
7516 */
7517 if (intr_adjust)
7518 adjust_rcv_timeout(rcd, npkts);
7519 if (updegr) {
7520 reg = (egrhd & RCV_EGR_INDEX_HEAD_HEAD_MASK)
7521 << RCV_EGR_INDEX_HEAD_HEAD_SHIFT;
7522 write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, reg);
7523 }
7524 mmiowb();
7525 reg = ((u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT) |
7526 (((u64)hd & RCV_HDR_HEAD_HEAD_MASK)
7527 << RCV_HDR_HEAD_HEAD_SHIFT);
7528 write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
7529 mmiowb();
7530}
7531
7532u32 hdrqempty(struct hfi1_ctxtdata *rcd)
7533{
7534 u32 head, tail;
7535
7536 head = (read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_HEAD)
7537 & RCV_HDR_HEAD_HEAD_SMASK) >> RCV_HDR_HEAD_HEAD_SHIFT;
7538
7539 if (rcd->rcvhdrtail_kvaddr)
7540 tail = get_rcvhdrtail(rcd);
7541 else
7542 tail = read_uctxt_csr(rcd->dd, rcd->ctxt, RCV_HDR_TAIL);
7543
7544 return head == tail;
7545}
7546
7547/*
7548 * Context Control and Receive Array encoding for buffer size:
7549 * 0x0 invalid
7550 * 0x1 4 KB
7551 * 0x2 8 KB
7552 * 0x3 16 KB
7553 * 0x4 32 KB
7554 * 0x5 64 KB
7555 * 0x6 128 KB
7556 * 0x7 256 KB
7557 * 0x8 512 KB (Receive Array only)
7558 * 0x9 1 MB (Receive Array only)
7559 * 0xa 2 MB (Receive Array only)
7560 *
7561 * 0xB-0xF - reserved (Receive Array only)
7562 *
7563 *
7564 * This routine assumes that the value has already been sanity checked.
7565 */
7566static u32 encoded_size(u32 size)
7567{
7568 switch (size) {
7569 case 4*1024: return 0x1;
7570 case 8*1024: return 0x2;
7571 case 16*1024: return 0x3;
7572 case 32*1024: return 0x4;
7573 case 64*1024: return 0x5;
7574 case 128*1024: return 0x6;
7575 case 256*1024: return 0x7;
7576 case 512*1024: return 0x8;
7577 case 1*1024*1024: return 0x9;
7578 case 2*1024*1024: return 0xa;
7579 }
7580 return 0x1; /* if invalid, go with the minimum size */
7581}
7582
7583void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
7584{
7585 struct hfi1_ctxtdata *rcd;
7586 u64 rcvctrl, reg;
7587 int did_enable = 0;
7588
7589 rcd = dd->rcd[ctxt];
7590 if (!rcd)
7591 return;
7592
7593 hfi1_cdbg(RCVCTRL, "ctxt %d op 0x%x", ctxt, op);
7594
7595 rcvctrl = read_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL);
7596 /* if the context already enabled, don't do the extra steps */
7597 if ((op & HFI1_RCVCTRL_CTXT_ENB)
7598 && !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
7599 /* reset the tail and hdr addresses, and sequence count */
7600 write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
7601 rcd->rcvhdrq_phys);
7602 if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
7603 write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
7604 rcd->rcvhdrqtailaddr_phys);
7605 rcd->seq_cnt = 1;
7606
7607 /* reset the cached receive header queue head value */
7608 rcd->head = 0;
7609
7610 /*
7611 * Zero the receive header queue so we don't get false
7612 * positives when checking the sequence number. The
7613 * sequence numbers could land exactly on the same spot.
7614 * E.g. a rcd restart before the receive header wrapped.
7615 */
7616 memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size);
7617
7618 /* starting timeout */
7619 rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr;
7620
7621 /* enable the context */
7622 rcvctrl |= RCV_CTXT_CTRL_ENABLE_SMASK;
7623
7624 /* clean the egr buffer size first */
7625 rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
7626 rcvctrl |= ((u64)encoded_size(rcd->egrbufs.rcvtid_size)
7627 & RCV_CTXT_CTRL_EGR_BUF_SIZE_MASK)
7628 << RCV_CTXT_CTRL_EGR_BUF_SIZE_SHIFT;
7629
7630 /* zero RcvHdrHead - set RcvHdrHead.Counter after enable */
7631 write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0);
7632 did_enable = 1;
7633
7634 /* zero RcvEgrIndexHead */
7635 write_uctxt_csr(dd, ctxt, RCV_EGR_INDEX_HEAD, 0);
7636
7637 /* set eager count and base index */
7638 reg = (((u64)(rcd->egrbufs.alloced >> RCV_SHIFT)
7639 & RCV_EGR_CTRL_EGR_CNT_MASK)
7640 << RCV_EGR_CTRL_EGR_CNT_SHIFT) |
7641 (((rcd->eager_base >> RCV_SHIFT)
7642 & RCV_EGR_CTRL_EGR_BASE_INDEX_MASK)
7643 << RCV_EGR_CTRL_EGR_BASE_INDEX_SHIFT);
7644 write_kctxt_csr(dd, ctxt, RCV_EGR_CTRL, reg);
7645
7646 /*
7647 * Set TID (expected) count and base index.
7648 * rcd->expected_count is set to individual RcvArray entries,
7649 * not pairs, and the CSR takes a pair-count in groups of
7650 * four, so divide by 8.
7651 */
7652 reg = (((rcd->expected_count >> RCV_SHIFT)
7653 & RCV_TID_CTRL_TID_PAIR_CNT_MASK)
7654 << RCV_TID_CTRL_TID_PAIR_CNT_SHIFT) |
7655 (((rcd->expected_base >> RCV_SHIFT)
7656 & RCV_TID_CTRL_TID_BASE_INDEX_MASK)
7657 << RCV_TID_CTRL_TID_BASE_INDEX_SHIFT);
7658 write_kctxt_csr(dd, ctxt, RCV_TID_CTRL, reg);
7659 if (ctxt == VL15CTXT)
7660 write_csr(dd, RCV_VL15, VL15CTXT);
7661 }
7662 if (op & HFI1_RCVCTRL_CTXT_DIS) {
7663 write_csr(dd, RCV_VL15, 0);
7664 rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
7665 }
7666 if (op & HFI1_RCVCTRL_INTRAVAIL_ENB)
7667 rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
7668 if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
7669 rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
7670 if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
7671 rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7672 if (op & HFI1_RCVCTRL_TAILUPD_DIS)
7673 rcvctrl &= ~RCV_CTXT_CTRL_TAIL_UPD_SMASK;
7674 if (op & HFI1_RCVCTRL_TIDFLOW_ENB)
7675 rcvctrl |= RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
7676 if (op & HFI1_RCVCTRL_TIDFLOW_DIS)
7677 rcvctrl &= ~RCV_CTXT_CTRL_TID_FLOW_ENABLE_SMASK;
7678 if (op & HFI1_RCVCTRL_ONE_PKT_EGR_ENB) {
7679 /* In one-packet-per-eager mode, the size comes from
7680 the RcvArray entry. */
7681 rcvctrl &= ~RCV_CTXT_CTRL_EGR_BUF_SIZE_SMASK;
7682 rcvctrl |= RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
7683 }
7684 if (op & HFI1_RCVCTRL_ONE_PKT_EGR_DIS)
7685 rcvctrl &= ~RCV_CTXT_CTRL_ONE_PACKET_PER_EGR_BUFFER_SMASK;
7686 if (op & HFI1_RCVCTRL_NO_RHQ_DROP_ENB)
7687 rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK;
7688 if (op & HFI1_RCVCTRL_NO_RHQ_DROP_DIS)
7689 rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK;
7690 if (op & HFI1_RCVCTRL_NO_EGR_DROP_ENB)
7691 rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
7692 if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
7693 rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
7694 rcd->rcvctrl = rcvctrl;
7695 hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
7696 write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl);
7697
7698 /* work around sticky RcvCtxtStatus.BlockedRHQFull */
7699 if (did_enable
7700 && (rcvctrl & RCV_CTXT_CTRL_DONT_DROP_RHQ_FULL_SMASK)) {
7701 reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
7702 if (reg != 0) {
7703 dd_dev_info(dd, "ctxt %d status %lld (blocked)\n",
7704 ctxt, reg);
7705 read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
7706 write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x10);
7707 write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, 0x00);
7708 read_uctxt_csr(dd, ctxt, RCV_HDR_HEAD);
7709 reg = read_kctxt_csr(dd, ctxt, RCV_CTXT_STATUS);
7710 dd_dev_info(dd, "ctxt %d status %lld (%s blocked)\n",
7711 ctxt, reg, reg == 0 ? "not" : "still");
7712 }
7713 }
7714
7715 if (did_enable) {
7716 /*
7717 * The interrupt timeout and count must be set after
7718 * the context is enabled to take effect.
7719 */
7720 /* set interrupt timeout */
7721 write_kctxt_csr(dd, ctxt, RCV_AVAIL_TIME_OUT,
7722 (u64)rcd->rcvavail_timeout <<
7723 RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_SHIFT);
7724
7725 /* set RcvHdrHead.Counter, zero RcvHdrHead.Head (again) */
7726 reg = (u64)rcv_intr_count << RCV_HDR_HEAD_COUNTER_SHIFT;
7727 write_uctxt_csr(dd, ctxt, RCV_HDR_HEAD, reg);
7728 }
7729
7730 if (op & (HFI1_RCVCTRL_TAILUPD_DIS | HFI1_RCVCTRL_CTXT_DIS))
7731 /*
7732 * If the context has been disabled and the Tail Update has
7733 * been cleared, clear the RCV_HDR_TAIL_ADDR CSR so
7734 * it doesn't contain an address that is invalid.
7735 */
7736 write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, 0);
7737}
7738
7739u32 hfi1_read_cntrs(struct hfi1_devdata *dd, loff_t pos, char **namep,
7740 u64 **cntrp)
7741{
7742 int ret;
7743 u64 val = 0;
7744
7745 if (namep) {
7746 ret = dd->cntrnameslen;
7747 if (pos != 0) {
7748 dd_dev_err(dd, "read_cntrs does not support indexing");
7749 return 0;
7750 }
7751 *namep = dd->cntrnames;
7752 } else {
7753 const struct cntr_entry *entry;
7754 int i, j;
7755
7756 ret = (dd->ndevcntrs) * sizeof(u64);
7757 if (pos != 0) {
7758 dd_dev_err(dd, "read_cntrs does not support indexing");
7759 return 0;
7760 }
7761
7762 /* Get the start of the block of counters */
7763 *cntrp = dd->cntrs;
7764
7765 /*
7766 * Now go and fill in each counter in the block.
7767 */
7768 for (i = 0; i < DEV_CNTR_LAST; i++) {
7769 entry = &dev_cntrs[i];
7770 hfi1_cdbg(CNTR, "reading %s", entry->name);
7771 if (entry->flags & CNTR_DISABLED) {
7772 /* Nothing */
7773 hfi1_cdbg(CNTR, "\tDisabled\n");
7774 } else {
7775 if (entry->flags & CNTR_VL) {
7776 hfi1_cdbg(CNTR, "\tPer VL\n");
7777 for (j = 0; j < C_VL_COUNT; j++) {
7778 val = entry->rw_cntr(entry,
7779 dd, j,
7780 CNTR_MODE_R,
7781 0);
7782 hfi1_cdbg(
7783 CNTR,
7784 "\t\tRead 0x%llx for %d\n",
7785 val, j);
7786 dd->cntrs[entry->offset + j] =
7787 val;
7788 }
7789 } else {
7790 val = entry->rw_cntr(entry, dd,
7791 CNTR_INVALID_VL,
7792 CNTR_MODE_R, 0);
7793 dd->cntrs[entry->offset] = val;
7794 hfi1_cdbg(CNTR, "\tRead 0x%llx", val);
7795 }
7796 }
7797 }
7798 }
7799 return ret;
7800}
7801
7802/*
7803 * Used by sysfs to create files for hfi stats to read
7804 */
7805u32 hfi1_read_portcntrs(struct hfi1_devdata *dd, loff_t pos, u32 port,
7806 char **namep, u64 **cntrp)
7807{
7808 int ret;
7809 u64 val = 0;
7810
7811 if (namep) {
7812 ret = dd->portcntrnameslen;
7813 if (pos != 0) {
7814 dd_dev_err(dd, "index not supported");
7815 return 0;
7816 }
7817 *namep = dd->portcntrnames;
7818 } else {
7819 const struct cntr_entry *entry;
7820 struct hfi1_pportdata *ppd;
7821 int i, j;
7822
7823 ret = (dd->nportcntrs) * sizeof(u64);
7824 if (pos != 0) {
7825 dd_dev_err(dd, "indexing not supported");
7826 return 0;
7827 }
7828 ppd = (struct hfi1_pportdata *)(dd + 1 + port);
7829 *cntrp = ppd->cntrs;
7830
7831 for (i = 0; i < PORT_CNTR_LAST; i++) {
7832 entry = &port_cntrs[i];
7833 hfi1_cdbg(CNTR, "reading %s", entry->name);
7834 if (entry->flags & CNTR_DISABLED) {
7835 /* Nothing */
7836 hfi1_cdbg(CNTR, "\tDisabled\n");
7837 continue;
7838 }
7839
7840 if (entry->flags & CNTR_VL) {
7841 hfi1_cdbg(CNTR, "\tPer VL");
7842 for (j = 0; j < C_VL_COUNT; j++) {
7843 val = entry->rw_cntr(entry, ppd, j,
7844 CNTR_MODE_R,
7845 0);
7846 hfi1_cdbg(
7847 CNTR,
7848 "\t\tRead 0x%llx for %d",
7849 val, j);
7850 ppd->cntrs[entry->offset + j] = val;
7851 }
7852 } else {
7853 val = entry->rw_cntr(entry, ppd,
7854 CNTR_INVALID_VL,
7855 CNTR_MODE_R,
7856 0);
7857 ppd->cntrs[entry->offset] = val;
7858 hfi1_cdbg(CNTR, "\tRead 0x%llx", val);
7859 }
7860 }
7861 }
7862 return ret;
7863}
7864
7865static void free_cntrs(struct hfi1_devdata *dd)
7866{
7867 struct hfi1_pportdata *ppd;
7868 int i;
7869
7870 if (dd->synth_stats_timer.data)
7871 del_timer_sync(&dd->synth_stats_timer);
7872 dd->synth_stats_timer.data = 0;
7873 ppd = (struct hfi1_pportdata *)(dd + 1);
7874 for (i = 0; i < dd->num_pports; i++, ppd++) {
7875 kfree(ppd->cntrs);
7876 kfree(ppd->scntrs);
7877 free_percpu(ppd->ibport_data.rc_acks);
7878 free_percpu(ppd->ibport_data.rc_qacks);
7879 free_percpu(ppd->ibport_data.rc_delayed_comp);
7880 ppd->cntrs = NULL;
7881 ppd->scntrs = NULL;
7882 ppd->ibport_data.rc_acks = NULL;
7883 ppd->ibport_data.rc_qacks = NULL;
7884 ppd->ibport_data.rc_delayed_comp = NULL;
7885 }
7886 kfree(dd->portcntrnames);
7887 dd->portcntrnames = NULL;
7888 kfree(dd->cntrs);
7889 dd->cntrs = NULL;
7890 kfree(dd->scntrs);
7891 dd->scntrs = NULL;
7892 kfree(dd->cntrnames);
7893 dd->cntrnames = NULL;
7894}
7895
7896#define CNTR_MAX 0xFFFFFFFFFFFFFFFFULL
7897#define CNTR_32BIT_MAX 0x00000000FFFFFFFF
7898
7899static u64 read_dev_port_cntr(struct hfi1_devdata *dd, struct cntr_entry *entry,
7900 u64 *psval, void *context, int vl)
7901{
7902 u64 val;
7903 u64 sval = *psval;
7904
7905 if (entry->flags & CNTR_DISABLED) {
7906 dd_dev_err(dd, "Counter %s not enabled", entry->name);
7907 return 0;
7908 }
7909
7910 hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval);
7911
7912 val = entry->rw_cntr(entry, context, vl, CNTR_MODE_R, 0);
7913
7914 /* If its a synthetic counter there is more work we need to do */
7915 if (entry->flags & CNTR_SYNTH) {
7916 if (sval == CNTR_MAX) {
7917 /* No need to read already saturated */
7918 return CNTR_MAX;
7919 }
7920
7921 if (entry->flags & CNTR_32BIT) {
7922 /* 32bit counters can wrap multiple times */
7923 u64 upper = sval >> 32;
7924 u64 lower = (sval << 32) >> 32;
7925
7926 if (lower > val) { /* hw wrapped */
7927 if (upper == CNTR_32BIT_MAX)
7928 val = CNTR_MAX;
7929 else
7930 upper++;
7931 }
7932
7933 if (val != CNTR_MAX)
7934 val = (upper << 32) | val;
7935
7936 } else {
7937 /* If we rolled we are saturated */
7938 if ((val < sval) || (val > CNTR_MAX))
7939 val = CNTR_MAX;
7940 }
7941 }
7942
7943 *psval = val;
7944
7945 hfi1_cdbg(CNTR, "\tNew val=0x%llx", val);
7946
7947 return val;
7948}
7949
7950static u64 write_dev_port_cntr(struct hfi1_devdata *dd,
7951 struct cntr_entry *entry,
7952 u64 *psval, void *context, int vl, u64 data)
7953{
7954 u64 val;
7955
7956 if (entry->flags & CNTR_DISABLED) {
7957 dd_dev_err(dd, "Counter %s not enabled", entry->name);
7958 return 0;
7959 }
7960
7961 hfi1_cdbg(CNTR, "cntr: %s vl %d psval 0x%llx", entry->name, vl, *psval);
7962
7963 if (entry->flags & CNTR_SYNTH) {
7964 *psval = data;
7965 if (entry->flags & CNTR_32BIT) {
7966 val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W,
7967 (data << 32) >> 32);
7968 val = data; /* return the full 64bit value */
7969 } else {
7970 val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W,
7971 data);
7972 }
7973 } else {
7974 val = entry->rw_cntr(entry, context, vl, CNTR_MODE_W, data);
7975 }
7976
7977 *psval = val;
7978
7979 hfi1_cdbg(CNTR, "\tNew val=0x%llx", val);
7980
7981 return val;
7982}
7983
7984u64 read_dev_cntr(struct hfi1_devdata *dd, int index, int vl)
7985{
7986 struct cntr_entry *entry;
7987 u64 *sval;
7988
7989 entry = &dev_cntrs[index];
7990 sval = dd->scntrs + entry->offset;
7991
7992 if (vl != CNTR_INVALID_VL)
7993 sval += vl;
7994
7995 return read_dev_port_cntr(dd, entry, sval, dd, vl);
7996}
7997
7998u64 write_dev_cntr(struct hfi1_devdata *dd, int index, int vl, u64 data)
7999{
8000 struct cntr_entry *entry;
8001 u64 *sval;
8002
8003 entry = &dev_cntrs[index];
8004 sval = dd->scntrs + entry->offset;
8005
8006 if (vl != CNTR_INVALID_VL)
8007 sval += vl;
8008
8009 return write_dev_port_cntr(dd, entry, sval, dd, vl, data);
8010}
8011
8012u64 read_port_cntr(struct hfi1_pportdata *ppd, int index, int vl)
8013{
8014 struct cntr_entry *entry;
8015 u64 *sval;
8016
8017 entry = &port_cntrs[index];
8018 sval = ppd->scntrs + entry->offset;
8019
8020 if (vl != CNTR_INVALID_VL)
8021 sval += vl;
8022
8023 if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) &&
8024 (index <= C_RCV_HDR_OVF_LAST)) {
8025 /* We do not want to bother for disabled contexts */
8026 return 0;
8027 }
8028
8029 return read_dev_port_cntr(ppd->dd, entry, sval, ppd, vl);
8030}
8031
8032u64 write_port_cntr(struct hfi1_pportdata *ppd, int index, int vl, u64 data)
8033{
8034 struct cntr_entry *entry;
8035 u64 *sval;
8036
8037 entry = &port_cntrs[index];
8038 sval = ppd->scntrs + entry->offset;
8039
8040 if (vl != CNTR_INVALID_VL)
8041 sval += vl;
8042
8043 if ((index >= C_RCV_HDR_OVF_FIRST + ppd->dd->num_rcv_contexts) &&
8044 (index <= C_RCV_HDR_OVF_LAST)) {
8045 /* We do not want to bother for disabled contexts */
8046 return 0;
8047 }
8048
8049 return write_dev_port_cntr(ppd->dd, entry, sval, ppd, vl, data);
8050}
8051
8052static void update_synth_timer(unsigned long opaque)
8053{
8054 u64 cur_tx;
8055 u64 cur_rx;
8056 u64 total_flits;
8057 u8 update = 0;
8058 int i, j, vl;
8059 struct hfi1_pportdata *ppd;
8060 struct cntr_entry *entry;
8061
8062 struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque;
8063
8064 /*
8065 * Rather than keep beating on the CSRs pick a minimal set that we can
8066 * check to watch for potential roll over. We can do this by looking at
8067 * the number of flits sent/recv. If the total flits exceeds 32bits then
8068 * we have to iterate all the counters and update.
8069 */
8070 entry = &dev_cntrs[C_DC_RCV_FLITS];
8071 cur_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0);
8072
8073 entry = &dev_cntrs[C_DC_XMIT_FLITS];
8074 cur_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL, CNTR_MODE_R, 0);
8075
8076 hfi1_cdbg(
8077 CNTR,
8078 "[%d] curr tx=0x%llx rx=0x%llx :: last tx=0x%llx rx=0x%llx\n",
8079 dd->unit, cur_tx, cur_rx, dd->last_tx, dd->last_rx);
8080
8081 if ((cur_tx < dd->last_tx) || (cur_rx < dd->last_rx)) {
8082 /*
8083 * May not be strictly necessary to update but it won't hurt and
8084 * simplifies the logic here.
8085 */
8086 update = 1;
8087 hfi1_cdbg(CNTR, "[%d] Tripwire counter rolled, updating",
8088 dd->unit);
8089 } else {
8090 total_flits = (cur_tx - dd->last_tx) + (cur_rx - dd->last_rx);
8091 hfi1_cdbg(CNTR,
8092 "[%d] total flits 0x%llx limit 0x%llx\n", dd->unit,
8093 total_flits, (u64)CNTR_32BIT_MAX);
8094 if (total_flits >= CNTR_32BIT_MAX) {
8095 hfi1_cdbg(CNTR, "[%d] 32bit limit hit, updating",
8096 dd->unit);
8097 update = 1;
8098 }
8099 }
8100
8101 if (update) {
8102 hfi1_cdbg(CNTR, "[%d] Updating dd and ppd counters", dd->unit);
8103 for (i = 0; i < DEV_CNTR_LAST; i++) {
8104 entry = &dev_cntrs[i];
8105 if (entry->flags & CNTR_VL) {
8106 for (vl = 0; vl < C_VL_COUNT; vl++)
8107 read_dev_cntr(dd, i, vl);
8108 } else {
8109 read_dev_cntr(dd, i, CNTR_INVALID_VL);
8110 }
8111 }
8112 ppd = (struct hfi1_pportdata *)(dd + 1);
8113 for (i = 0; i < dd->num_pports; i++, ppd++) {
8114 for (j = 0; j < PORT_CNTR_LAST; j++) {
8115 entry = &port_cntrs[j];
8116 if (entry->flags & CNTR_VL) {
8117 for (vl = 0; vl < C_VL_COUNT; vl++)
8118 read_port_cntr(ppd, j, vl);
8119 } else {
8120 read_port_cntr(ppd, j, CNTR_INVALID_VL);
8121 }
8122 }
8123 }
8124
8125 /*
8126 * We want the value in the register. The goal is to keep track
8127 * of the number of "ticks" not the counter value. In other
8128 * words if the register rolls we want to notice it and go ahead
8129 * and force an update.
8130 */
8131 entry = &dev_cntrs[C_DC_XMIT_FLITS];
8132 dd->last_tx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL,
8133 CNTR_MODE_R, 0);
8134
8135 entry = &dev_cntrs[C_DC_RCV_FLITS];
8136 dd->last_rx = entry->rw_cntr(entry, dd, CNTR_INVALID_VL,
8137 CNTR_MODE_R, 0);
8138
8139 hfi1_cdbg(CNTR, "[%d] setting last tx/rx to 0x%llx 0x%llx",
8140 dd->unit, dd->last_tx, dd->last_rx);
8141
8142 } else {
8143 hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit);
8144 }
8145
8146mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
8147}
8148
8149#define C_MAX_NAME 13 /* 12 chars + one for /0 */
8150static int init_cntrs(struct hfi1_devdata *dd)
8151{
8152 int i, rcv_ctxts, index, j;
8153 size_t sz;
8154 char *p;
8155 char name[C_MAX_NAME];
8156 struct hfi1_pportdata *ppd;
8157
8158 /* set up the stats timer; the add_timer is done at the end */
8159 init_timer(&dd->synth_stats_timer);
8160 dd->synth_stats_timer.function = update_synth_timer;
8161 dd->synth_stats_timer.data = (unsigned long) dd;
8162
8163 /***********************/
8164 /* per device counters */
8165 /***********************/
8166
8167 /* size names and determine how many we have*/
8168 dd->ndevcntrs = 0;
8169 sz = 0;
8170 index = 0;
8171
8172 for (i = 0; i < DEV_CNTR_LAST; i++) {
8173 hfi1_dbg_early("Init cntr %s\n", dev_cntrs[i].name);
8174 if (dev_cntrs[i].flags & CNTR_DISABLED) {
8175 hfi1_dbg_early("\tSkipping %s\n", dev_cntrs[i].name);
8176 continue;
8177 }
8178
8179 if (dev_cntrs[i].flags & CNTR_VL) {
8180 hfi1_dbg_early("\tProcessing VL cntr\n");
8181 dev_cntrs[i].offset = index;
8182 for (j = 0; j < C_VL_COUNT; j++) {
8183 memset(name, '\0', C_MAX_NAME);
8184 snprintf(name, C_MAX_NAME, "%s%d",
8185 dev_cntrs[i].name,
8186 vl_from_idx(j));
8187 sz += strlen(name);
8188 sz++;
8189 hfi1_dbg_early("\t\t%s\n", name);
8190 dd->ndevcntrs++;
8191 index++;
8192 }
8193 } else {
8194 /* +1 for newline */
8195 sz += strlen(dev_cntrs[i].name) + 1;
8196 dd->ndevcntrs++;
8197 dev_cntrs[i].offset = index;
8198 index++;
8199 hfi1_dbg_early("\tAdding %s\n", dev_cntrs[i].name);
8200 }
8201 }
8202
8203 /* allocate space for the counter values */
8204 dd->cntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
8205 if (!dd->cntrs)
8206 goto bail;
8207
8208 dd->scntrs = kcalloc(index, sizeof(u64), GFP_KERNEL);
8209 if (!dd->scntrs)
8210 goto bail;
8211
8212
8213 /* allocate space for the counter names */
8214 dd->cntrnameslen = sz;
8215 dd->cntrnames = kmalloc(sz, GFP_KERNEL);
8216 if (!dd->cntrnames)
8217 goto bail;
8218
8219 /* fill in the names */
8220 for (p = dd->cntrnames, i = 0, index = 0; i < DEV_CNTR_LAST; i++) {
8221 if (dev_cntrs[i].flags & CNTR_DISABLED) {
8222 /* Nothing */
8223 } else {
8224 if (dev_cntrs[i].flags & CNTR_VL) {
8225 for (j = 0; j < C_VL_COUNT; j++) {
8226 memset(name, '\0', C_MAX_NAME);
8227 snprintf(name, C_MAX_NAME, "%s%d",
8228 dev_cntrs[i].name,
8229 vl_from_idx(j));
8230 memcpy(p, name, strlen(name));
8231 p += strlen(name);
8232 *p++ = '\n';
8233 }
8234 } else {
8235 memcpy(p, dev_cntrs[i].name,
8236 strlen(dev_cntrs[i].name));
8237 p += strlen(dev_cntrs[i].name);
8238 *p++ = '\n';
8239 }
8240 index++;
8241 }
8242 }
8243
8244 /*********************/
8245 /* per port counters */
8246 /*********************/
8247
8248 /*
8249 * Go through the counters for the overflows and disable the ones we
8250 * don't need. This varies based on platform so we need to do it
8251 * dynamically here.
8252 */
8253 rcv_ctxts = dd->num_rcv_contexts;
8254 for (i = C_RCV_HDR_OVF_FIRST + rcv_ctxts;
8255 i <= C_RCV_HDR_OVF_LAST; i++) {
8256 port_cntrs[i].flags |= CNTR_DISABLED;
8257 }
8258
8259 /* size port counter names and determine how many we have*/
8260 sz = 0;
8261 dd->nportcntrs = 0;
8262 for (i = 0; i < PORT_CNTR_LAST; i++) {
8263 hfi1_dbg_early("Init pcntr %s\n", port_cntrs[i].name);
8264 if (port_cntrs[i].flags & CNTR_DISABLED) {
8265 hfi1_dbg_early("\tSkipping %s\n", port_cntrs[i].name);
8266 continue;
8267 }
8268
8269 if (port_cntrs[i].flags & CNTR_VL) {
8270 hfi1_dbg_early("\tProcessing VL cntr\n");
8271 port_cntrs[i].offset = dd->nportcntrs;
8272 for (j = 0; j < C_VL_COUNT; j++) {
8273 memset(name, '\0', C_MAX_NAME);
8274 snprintf(name, C_MAX_NAME, "%s%d",
8275 port_cntrs[i].name,
8276 vl_from_idx(j));
8277 sz += strlen(name);
8278 sz++;
8279 hfi1_dbg_early("\t\t%s\n", name);
8280 dd->nportcntrs++;
8281 }
8282 } else {
8283 /* +1 for newline */
8284 sz += strlen(port_cntrs[i].name) + 1;
8285 port_cntrs[i].offset = dd->nportcntrs;
8286 dd->nportcntrs++;
8287 hfi1_dbg_early("\tAdding %s\n", port_cntrs[i].name);
8288 }
8289 }
8290
8291 /* allocate space for the counter names */
8292 dd->portcntrnameslen = sz;
8293 dd->portcntrnames = kmalloc(sz, GFP_KERNEL);
8294 if (!dd->portcntrnames)
8295 goto bail;
8296
8297 /* fill in port cntr names */
8298 for (p = dd->portcntrnames, i = 0; i < PORT_CNTR_LAST; i++) {
8299 if (port_cntrs[i].flags & CNTR_DISABLED)
8300 continue;
8301
8302 if (port_cntrs[i].flags & CNTR_VL) {
8303 for (j = 0; j < C_VL_COUNT; j++) {
8304 memset(name, '\0', C_MAX_NAME);
8305 snprintf(name, C_MAX_NAME, "%s%d",
8306 port_cntrs[i].name,
8307 vl_from_idx(j));
8308 memcpy(p, name, strlen(name));
8309 p += strlen(name);
8310 *p++ = '\n';
8311 }
8312 } else {
8313 memcpy(p, port_cntrs[i].name,
8314 strlen(port_cntrs[i].name));
8315 p += strlen(port_cntrs[i].name);
8316 *p++ = '\n';
8317 }
8318 }
8319
8320 /* allocate per port storage for counter values */
8321 ppd = (struct hfi1_pportdata *)(dd + 1);
8322 for (i = 0; i < dd->num_pports; i++, ppd++) {
8323 ppd->cntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL);
8324 if (!ppd->cntrs)
8325 goto bail;
8326
8327 ppd->scntrs = kcalloc(dd->nportcntrs, sizeof(u64), GFP_KERNEL);
8328 if (!ppd->scntrs)
8329 goto bail;
8330 }
8331
8332 /* CPU counters need to be allocated and zeroed */
8333 if (init_cpu_counters(dd))
8334 goto bail;
8335
8336 mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
8337 return 0;
8338bail:
8339 free_cntrs(dd);
8340 return -ENOMEM;
8341}
8342
8343
8344static u32 chip_to_opa_lstate(struct hfi1_devdata *dd, u32 chip_lstate)
8345{
8346 switch (chip_lstate) {
8347 default:
8348 dd_dev_err(dd,
8349 "Unknown logical state 0x%x, reporting IB_PORT_DOWN\n",
8350 chip_lstate);
8351 /* fall through */
8352 case LSTATE_DOWN:
8353 return IB_PORT_DOWN;
8354 case LSTATE_INIT:
8355 return IB_PORT_INIT;
8356 case LSTATE_ARMED:
8357 return IB_PORT_ARMED;
8358 case LSTATE_ACTIVE:
8359 return IB_PORT_ACTIVE;
8360 }
8361}
8362
8363u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
8364{
8365 /* look at the HFI meta-states only */
8366 switch (chip_pstate & 0xf0) {
8367 default:
8368 dd_dev_err(dd, "Unexpected chip physical state of 0x%x\n",
8369 chip_pstate);
8370 /* fall through */
8371 case PLS_DISABLED:
8372 return IB_PORTPHYSSTATE_DISABLED;
8373 case PLS_OFFLINE:
8374 return OPA_PORTPHYSSTATE_OFFLINE;
8375 case PLS_POLLING:
8376 return IB_PORTPHYSSTATE_POLLING;
8377 case PLS_CONFIGPHY:
8378 return IB_PORTPHYSSTATE_TRAINING;
8379 case PLS_LINKUP:
8380 return IB_PORTPHYSSTATE_LINKUP;
8381 case PLS_PHYTEST:
8382 return IB_PORTPHYSSTATE_PHY_TEST;
8383 }
8384}
8385
8386/* return the OPA port logical state name */
8387const char *opa_lstate_name(u32 lstate)
8388{
8389 static const char * const port_logical_names[] = {
8390 "PORT_NOP",
8391 "PORT_DOWN",
8392 "PORT_INIT",
8393 "PORT_ARMED",
8394 "PORT_ACTIVE",
8395 "PORT_ACTIVE_DEFER",
8396 };
8397 if (lstate < ARRAY_SIZE(port_logical_names))
8398 return port_logical_names[lstate];
8399 return "unknown";
8400}
8401
8402/* return the OPA port physical state name */
8403const char *opa_pstate_name(u32 pstate)
8404{
8405 static const char * const port_physical_names[] = {
8406 "PHYS_NOP",
8407 "reserved1",
8408 "PHYS_POLL",
8409 "PHYS_DISABLED",
8410 "PHYS_TRAINING",
8411 "PHYS_LINKUP",
8412 "PHYS_LINK_ERR_RECOVER",
8413 "PHYS_PHY_TEST",
8414 "reserved8",
8415 "PHYS_OFFLINE",
8416 "PHYS_GANGED",
8417 "PHYS_TEST",
8418 };
8419 if (pstate < ARRAY_SIZE(port_physical_names))
8420 return port_physical_names[pstate];
8421 return "unknown";
8422}
8423
8424/*
8425 * Read the hardware link state and set the driver's cached value of it.
8426 * Return the (new) current value.
8427 */
8428u32 get_logical_state(struct hfi1_pportdata *ppd)
8429{
8430 u32 new_state;
8431
8432 new_state = chip_to_opa_lstate(ppd->dd, read_logical_state(ppd->dd));
8433 if (new_state != ppd->lstate) {
8434 dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
8435 opa_lstate_name(new_state), new_state);
8436 ppd->lstate = new_state;
8437 }
8438 /*
8439 * Set port status flags in the page mapped into userspace
8440 * memory. Do it here to ensure a reliable state - this is
8441 * the only function called by all state handling code.
8442 * Always set the flags due to the fact that the cache value
8443 * might have been changed explicitly outside of this
8444 * function.
8445 */
8446 if (ppd->statusp) {
8447 switch (ppd->lstate) {
8448 case IB_PORT_DOWN:
8449 case IB_PORT_INIT:
8450 *ppd->statusp &= ~(HFI1_STATUS_IB_CONF |
8451 HFI1_STATUS_IB_READY);
8452 break;
8453 case IB_PORT_ARMED:
8454 *ppd->statusp |= HFI1_STATUS_IB_CONF;
8455 break;
8456 case IB_PORT_ACTIVE:
8457 *ppd->statusp |= HFI1_STATUS_IB_READY;
8458 break;
8459 }
8460 }
8461 return ppd->lstate;
8462}
8463
8464/**
8465 * wait_logical_linkstate - wait for an IB link state change to occur
8466 * @ppd: port device
8467 * @state: the state to wait for
8468 * @msecs: the number of milliseconds to wait
8469 *
8470 * Wait up to msecs milliseconds for IB link state change to occur.
8471 * For now, take the easy polling route.
8472 * Returns 0 if state reached, otherwise -ETIMEDOUT.
8473 */
8474static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
8475 int msecs)
8476{
8477 unsigned long timeout;
8478
8479 timeout = jiffies + msecs_to_jiffies(msecs);
8480 while (1) {
8481 if (get_logical_state(ppd) == state)
8482 return 0;
8483 if (time_after(jiffies, timeout))
8484 break;
8485 msleep(20);
8486 }
8487 dd_dev_err(ppd->dd, "timeout waiting for link state 0x%x\n", state);
8488
8489 return -ETIMEDOUT;
8490}
8491
8492u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
8493{
8494 static u32 remembered_state = 0xff;
8495 u32 pstate;
8496 u32 ib_pstate;
8497
8498 pstate = read_physical_state(ppd->dd);
8499 ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
8500 if (remembered_state != ib_pstate) {
8501 dd_dev_info(ppd->dd,
8502 "%s: physical state changed to %s (0x%x), phy 0x%x\n",
8503 __func__, opa_pstate_name(ib_pstate), ib_pstate,
8504 pstate);
8505 remembered_state = ib_pstate;
8506 }
8507 return ib_pstate;
8508}
8509
8510/*
8511 * Read/modify/write ASIC_QSFP register bits as selected by mask
8512 * data: 0 or 1 in the positions depending on what needs to be written
8513 * dir: 0 for read, 1 for write
8514 * mask: select by setting
8515 * I2CCLK (bit 0)
8516 * I2CDATA (bit 1)
8517 */
8518u64 hfi1_gpio_mod(struct hfi1_devdata *dd, u32 target, u32 data, u32 dir,
8519 u32 mask)
8520{
8521 u64 qsfp_oe, target_oe;
8522
8523 target_oe = target ? ASIC_QSFP2_OE : ASIC_QSFP1_OE;
8524 if (mask) {
8525 /* We are writing register bits, so lock access */
8526 dir &= mask;
8527 data &= mask;
8528
8529 qsfp_oe = read_csr(dd, target_oe);
8530 qsfp_oe = (qsfp_oe & ~(u64)mask) | (u64)dir;
8531 write_csr(dd, target_oe, qsfp_oe);
8532 }
8533 /* We are exclusively reading bits here, but it is unlikely
8534 * we'll get valid data when we set the direction of the pin
8535 * in the same call, so read should call this function again
8536 * to get valid data
8537 */
8538 return read_csr(dd, target ? ASIC_QSFP2_IN : ASIC_QSFP1_IN);
8539}
8540
8541#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
8542(r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
8543
8544#define SET_STATIC_RATE_CONTROL_SMASK(r) \
8545(r |= SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
8546
8547int hfi1_init_ctxt(struct send_context *sc)
8548{
8549 if (sc != NULL) {
8550 struct hfi1_devdata *dd = sc->dd;
8551 u64 reg;
8552 u8 set = (sc->type == SC_USER ?
8553 HFI1_CAP_IS_USET(STATIC_RATE_CTRL) :
8554 HFI1_CAP_IS_KSET(STATIC_RATE_CTRL));
8555 reg = read_kctxt_csr(dd, sc->hw_context,
8556 SEND_CTXT_CHECK_ENABLE);
8557 if (set)
8558 CLEAR_STATIC_RATE_CONTROL_SMASK(reg);
8559 else
8560 SET_STATIC_RATE_CONTROL_SMASK(reg);
8561 write_kctxt_csr(dd, sc->hw_context,
8562 SEND_CTXT_CHECK_ENABLE, reg);
8563 }
8564 return 0;
8565}
8566
8567int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
8568{
8569 int ret = 0;
8570 u64 reg;
8571
8572 if (dd->icode != ICODE_RTL_SILICON) {
8573 if (HFI1_CAP_IS_KSET(PRINT_UNIMPL))
8574 dd_dev_info(dd, "%s: tempsense not supported by HW\n",
8575 __func__);
8576 return -EINVAL;
8577 }
8578 reg = read_csr(dd, ASIC_STS_THERM);
8579 temp->curr = ((reg >> ASIC_STS_THERM_CURR_TEMP_SHIFT) &
8580 ASIC_STS_THERM_CURR_TEMP_MASK);
8581 temp->lo_lim = ((reg >> ASIC_STS_THERM_LO_TEMP_SHIFT) &
8582 ASIC_STS_THERM_LO_TEMP_MASK);
8583 temp->hi_lim = ((reg >> ASIC_STS_THERM_HI_TEMP_SHIFT) &
8584 ASIC_STS_THERM_HI_TEMP_MASK);
8585 temp->crit_lim = ((reg >> ASIC_STS_THERM_CRIT_TEMP_SHIFT) &
8586 ASIC_STS_THERM_CRIT_TEMP_MASK);
8587 /* triggers is a 3-bit value - 1 bit per trigger. */
8588 temp->triggers = (u8)((reg >> ASIC_STS_THERM_LOW_SHIFT) & 0x7);
8589
8590 return ret;
8591}
8592
8593/* ========================================================================= */
8594
8595/*
8596 * Enable/disable chip from delivering interrupts.
8597 */
8598void set_intr_state(struct hfi1_devdata *dd, u32 enable)
8599{
8600 int i;
8601
8602 /*
8603 * In HFI, the mask needs to be 1 to allow interrupts.
8604 */
8605 if (enable) {
8606 u64 cce_int_mask;
8607 const int qsfp1_int_smask = QSFP1_INT % 64;
8608 const int qsfp2_int_smask = QSFP2_INT % 64;
8609
8610 /* enable all interrupts */
8611 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8612 write_csr(dd, CCE_INT_MASK + (8*i), ~(u64)0);
8613
8614 /*
8615 * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
8616 * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
8617 * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
8618 * the index of the appropriate CSR in the CCEIntMask CSR array
8619 */
8620 cce_int_mask = read_csr(dd, CCE_INT_MASK +
8621 (8*(QSFP1_INT/64)));
8622 if (dd->hfi1_id) {
8623 cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
8624 write_csr(dd, CCE_INT_MASK + (8*(QSFP1_INT/64)),
8625 cce_int_mask);
8626 } else {
8627 cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
8628 write_csr(dd, CCE_INT_MASK + (8*(QSFP2_INT/64)),
8629 cce_int_mask);
8630 }
8631 } else {
8632 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8633 write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
8634 }
8635}
8636
8637/*
8638 * Clear all interrupt sources on the chip.
8639 */
8640static void clear_all_interrupts(struct hfi1_devdata *dd)
8641{
8642 int i;
8643
8644 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8645 write_csr(dd, CCE_INT_CLEAR + (8*i), ~(u64)0);
8646
8647 write_csr(dd, CCE_ERR_CLEAR, ~(u64)0);
8648 write_csr(dd, MISC_ERR_CLEAR, ~(u64)0);
8649 write_csr(dd, RCV_ERR_CLEAR, ~(u64)0);
8650 write_csr(dd, SEND_ERR_CLEAR, ~(u64)0);
8651 write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0);
8652 write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0);
8653 write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0);
8654 for (i = 0; i < dd->chip_send_contexts; i++)
8655 write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0);
8656 for (i = 0; i < dd->chip_sdma_engines; i++)
8657 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0);
8658
8659 write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0);
8660 write_csr(dd, DC_LCB_ERR_CLR, ~(u64)0);
8661 write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
8662}
8663
8664/* Move to pcie.c? */
8665static void disable_intx(struct pci_dev *pdev)
8666{
8667 pci_intx(pdev, 0);
8668}
8669
8670static void clean_up_interrupts(struct hfi1_devdata *dd)
8671{
8672 int i;
8673
8674 /* remove irqs - must happen before disabling/turning off */
8675 if (dd->num_msix_entries) {
8676 /* MSI-X */
8677 struct hfi1_msix_entry *me = dd->msix_entries;
8678
8679 for (i = 0; i < dd->num_msix_entries; i++, me++) {
8680 if (me->arg == NULL) /* => no irq, no affinity */
8681 break;
8682 irq_set_affinity_hint(dd->msix_entries[i].msix.vector,
8683 NULL);
8684 free_irq(me->msix.vector, me->arg);
8685 }
8686 } else {
8687 /* INTx */
8688 if (dd->requested_intx_irq) {
8689 free_irq(dd->pcidev->irq, dd);
8690 dd->requested_intx_irq = 0;
8691 }
8692 }
8693
8694 /* turn off interrupts */
8695 if (dd->num_msix_entries) {
8696 /* MSI-X */
8697 hfi1_nomsix(dd);
8698 } else {
8699 /* INTx */
8700 disable_intx(dd->pcidev);
8701 }
8702
8703 /* clean structures */
8704 for (i = 0; i < dd->num_msix_entries; i++)
8705 free_cpumask_var(dd->msix_entries[i].mask);
8706 kfree(dd->msix_entries);
8707 dd->msix_entries = NULL;
8708 dd->num_msix_entries = 0;
8709}
8710
8711/*
8712 * Remap the interrupt source from the general handler to the given MSI-X
8713 * interrupt.
8714 */
8715static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
8716{
8717 u64 reg;
8718 int m, n;
8719
8720 /* clear from the handled mask of the general interrupt */
8721 m = isrc / 64;
8722 n = isrc % 64;
8723 dd->gi_mask[m] &= ~((u64)1 << n);
8724
8725 /* direct the chip source to the given MSI-X interrupt */
8726 m = isrc / 8;
8727 n = isrc % 8;
8728 reg = read_csr(dd, CCE_INT_MAP + (8*m));
8729 reg &= ~((u64)0xff << (8*n));
8730 reg |= ((u64)msix_intr & 0xff) << (8*n);
8731 write_csr(dd, CCE_INT_MAP + (8*m), reg);
8732}
8733
8734static void remap_sdma_interrupts(struct hfi1_devdata *dd,
8735 int engine, int msix_intr)
8736{
8737 /*
8738 * SDMA engine interrupt sources grouped by type, rather than
8739 * engine. Per-engine interrupts are as follows:
8740 * SDMA
8741 * SDMAProgress
8742 * SDMAIdle
8743 */
8744 remap_intr(dd, IS_SDMA_START + 0*TXE_NUM_SDMA_ENGINES + engine,
8745 msix_intr);
8746 remap_intr(dd, IS_SDMA_START + 1*TXE_NUM_SDMA_ENGINES + engine,
8747 msix_intr);
8748 remap_intr(dd, IS_SDMA_START + 2*TXE_NUM_SDMA_ENGINES + engine,
8749 msix_intr);
8750}
8751
8752static void remap_receive_available_interrupt(struct hfi1_devdata *dd,
8753 int rx, int msix_intr)
8754{
8755 remap_intr(dd, IS_RCVAVAIL_START + rx, msix_intr);
8756}
8757
8758static int request_intx_irq(struct hfi1_devdata *dd)
8759{
8760 int ret;
8761
8762 snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME"_%d",
8763 dd->unit);
8764 ret = request_irq(dd->pcidev->irq, general_interrupt,
8765 IRQF_SHARED, dd->intx_name, dd);
8766 if (ret)
8767 dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
8768 ret);
8769 else
8770 dd->requested_intx_irq = 1;
8771 return ret;
8772}
8773
8774static int request_msix_irqs(struct hfi1_devdata *dd)
8775{
8776 const struct cpumask *local_mask;
8777 cpumask_var_t def, rcv;
8778 bool def_ret, rcv_ret;
8779 int first_general, last_general;
8780 int first_sdma, last_sdma;
8781 int first_rx, last_rx;
8782 int first_cpu, restart_cpu, curr_cpu;
8783 int rcv_cpu, sdma_cpu;
8784 int i, ret = 0, possible;
8785 int ht;
8786
8787 /* calculate the ranges we are going to use */
8788 first_general = 0;
8789 first_sdma = last_general = first_general + 1;
8790 first_rx = last_sdma = first_sdma + dd->num_sdma;
8791 last_rx = first_rx + dd->n_krcv_queues;
8792
8793 /*
8794 * Interrupt affinity.
8795 *
8796 * non-rcv avail gets a default mask that
8797 * starts as possible cpus with threads reset
8798 * and each rcv avail reset.
8799 *
8800 * rcv avail gets node relative 1 wrapping back
8801 * to the node relative 1 as necessary.
8802 *
8803 */
8804 local_mask = cpumask_of_pcibus(dd->pcidev->bus);
8805 /* if first cpu is invalid, use NUMA 0 */
8806 if (cpumask_first(local_mask) >= nr_cpu_ids)
8807 local_mask = topology_core_cpumask(0);
8808
8809 def_ret = zalloc_cpumask_var(&def, GFP_KERNEL);
8810 rcv_ret = zalloc_cpumask_var(&rcv, GFP_KERNEL);
8811 if (!def_ret || !rcv_ret)
8812 goto bail;
8813 /* use local mask as default */
8814 cpumask_copy(def, local_mask);
8815 possible = cpumask_weight(def);
8816 /* disarm threads from default */
8817 ht = cpumask_weight(
8818 topology_sibling_cpumask(cpumask_first(local_mask)));
8819 for (i = possible/ht; i < possible; i++)
8820 cpumask_clear_cpu(i, def);
8821 /* reset possible */
8822 possible = cpumask_weight(def);
8823 /* def now has full cores on chosen node*/
8824 first_cpu = cpumask_first(def);
8825 if (nr_cpu_ids >= first_cpu)
8826 first_cpu++;
8827 restart_cpu = first_cpu;
8828 curr_cpu = restart_cpu;
8829
8830 for (i = first_cpu; i < dd->n_krcv_queues + first_cpu; i++) {
8831 cpumask_clear_cpu(curr_cpu, def);
8832 cpumask_set_cpu(curr_cpu, rcv);
8833 if (curr_cpu >= possible)
8834 curr_cpu = restart_cpu;
8835 else
8836 curr_cpu++;
8837 }
8838 /* def mask has non-rcv, rcv has recv mask */
8839 rcv_cpu = cpumask_first(rcv);
8840 sdma_cpu = cpumask_first(def);
8841
8842 /*
8843 * Sanity check - the code expects all SDMA chip source
8844 * interrupts to be in the same CSR, starting at bit 0. Verify
8845 * that this is true by checking the bit location of the start.
8846 */
8847 BUILD_BUG_ON(IS_SDMA_START % 64);
8848
8849 for (i = 0; i < dd->num_msix_entries; i++) {
8850 struct hfi1_msix_entry *me = &dd->msix_entries[i];
8851 const char *err_info;
8852 irq_handler_t handler;
8853 void *arg;
8854 int idx;
8855 struct hfi1_ctxtdata *rcd = NULL;
8856 struct sdma_engine *sde = NULL;
8857
8858 /* obtain the arguments to request_irq */
8859 if (first_general <= i && i < last_general) {
8860 idx = i - first_general;
8861 handler = general_interrupt;
8862 arg = dd;
8863 snprintf(me->name, sizeof(me->name),
8864 DRIVER_NAME"_%d", dd->unit);
8865 err_info = "general";
8866 } else if (first_sdma <= i && i < last_sdma) {
8867 idx = i - first_sdma;
8868 sde = &dd->per_sdma[idx];
8869 handler = sdma_interrupt;
8870 arg = sde;
8871 snprintf(me->name, sizeof(me->name),
8872 DRIVER_NAME"_%d sdma%d", dd->unit, idx);
8873 err_info = "sdma";
8874 remap_sdma_interrupts(dd, idx, i);
8875 } else if (first_rx <= i && i < last_rx) {
8876 idx = i - first_rx;
8877 rcd = dd->rcd[idx];
8878 /* no interrupt if no rcd */
8879 if (!rcd)
8880 continue;
8881 /*
8882 * Set the interrupt register and mask for this
8883 * context's interrupt.
8884 */
8885 rcd->ireg = (IS_RCVAVAIL_START+idx) / 64;
8886 rcd->imask = ((u64)1) <<
8887 ((IS_RCVAVAIL_START+idx) % 64);
8888 handler = receive_context_interrupt;
8889 arg = rcd;
8890 snprintf(me->name, sizeof(me->name),
8891 DRIVER_NAME"_%d kctxt%d", dd->unit, idx);
8892 err_info = "receive context";
8893 remap_receive_available_interrupt(dd, idx, i);
8894 } else {
8895 /* not in our expected range - complain, then
8896 ignore it */
8897 dd_dev_err(dd,
8898 "Unexpected extra MSI-X interrupt %d\n", i);
8899 continue;
8900 }
8901 /* no argument, no interrupt */
8902 if (arg == NULL)
8903 continue;
8904 /* make sure the name is terminated */
8905 me->name[sizeof(me->name)-1] = 0;
8906
8907 ret = request_irq(me->msix.vector, handler, 0, me->name, arg);
8908 if (ret) {
8909 dd_dev_err(dd,
8910 "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
8911 err_info, me->msix.vector, idx, ret);
8912 return ret;
8913 }
8914 /*
8915 * assign arg after request_irq call, so it will be
8916 * cleaned up
8917 */
8918 me->arg = arg;
8919
8920 if (!zalloc_cpumask_var(
8921 &dd->msix_entries[i].mask,
8922 GFP_KERNEL))
8923 goto bail;
8924 if (handler == sdma_interrupt) {
8925 dd_dev_info(dd, "sdma engine %d cpu %d\n",
8926 sde->this_idx, sdma_cpu);
8927 cpumask_set_cpu(sdma_cpu, dd->msix_entries[i].mask);
8928 sdma_cpu = cpumask_next(sdma_cpu, def);
8929 if (sdma_cpu >= nr_cpu_ids)
8930 sdma_cpu = cpumask_first(def);
8931 } else if (handler == receive_context_interrupt) {
8932 dd_dev_info(dd, "rcv ctxt %d cpu %d\n",
8933 rcd->ctxt, rcv_cpu);
8934 cpumask_set_cpu(rcv_cpu, dd->msix_entries[i].mask);
8935 rcv_cpu = cpumask_next(rcv_cpu, rcv);
8936 if (rcv_cpu >= nr_cpu_ids)
8937 rcv_cpu = cpumask_first(rcv);
8938 } else {
8939 /* otherwise first def */
8940 dd_dev_info(dd, "%s cpu %d\n",
8941 err_info, cpumask_first(def));
8942 cpumask_set_cpu(
8943 cpumask_first(def), dd->msix_entries[i].mask);
8944 }
8945 irq_set_affinity_hint(
8946 dd->msix_entries[i].msix.vector,
8947 dd->msix_entries[i].mask);
8948 }
8949
8950out:
8951 free_cpumask_var(def);
8952 free_cpumask_var(rcv);
8953 return ret;
8954bail:
8955 ret = -ENOMEM;
8956 goto out;
8957}
8958
8959/*
8960 * Set the general handler to accept all interrupts, remap all
8961 * chip interrupts back to MSI-X 0.
8962 */
8963static void reset_interrupts(struct hfi1_devdata *dd)
8964{
8965 int i;
8966
8967 /* all interrupts handled by the general handler */
8968 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
8969 dd->gi_mask[i] = ~(u64)0;
8970
8971 /* all chip interrupts map to MSI-X 0 */
8972 for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
8973 write_csr(dd, CCE_INT_MAP + (8*i), 0);
8974}
8975
8976static int set_up_interrupts(struct hfi1_devdata *dd)
8977{
8978 struct hfi1_msix_entry *entries;
8979 u32 total, request;
8980 int i, ret;
8981 int single_interrupt = 0; /* we expect to have all the interrupts */
8982
8983 /*
8984 * Interrupt count:
8985 * 1 general, "slow path" interrupt (includes the SDMA engines
8986 * slow source, SDMACleanupDone)
8987 * N interrupts - one per used SDMA engine
8988 * M interrupt - one per kernel receive context
8989 */
8990 total = 1 + dd->num_sdma + dd->n_krcv_queues;
8991
8992 entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
8993 if (!entries) {
8994 dd_dev_err(dd, "cannot allocate msix table\n");
8995 ret = -ENOMEM;
8996 goto fail;
8997 }
8998 /* 1-1 MSI-X entry assignment */
8999 for (i = 0; i < total; i++)
9000 entries[i].msix.entry = i;
9001
9002 /* ask for MSI-X interrupts */
9003 request = total;
9004 request_msix(dd, &request, entries);
9005
9006 if (request == 0) {
9007 /* using INTx */
9008 /* dd->num_msix_entries already zero */
9009 kfree(entries);
9010 single_interrupt = 1;
9011 dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n");
9012 } else {
9013 /* using MSI-X */
9014 dd->num_msix_entries = request;
9015 dd->msix_entries = entries;
9016
9017 if (request != total) {
9018 /* using MSI-X, with reduced interrupts */
9019 dd_dev_err(
9020 dd,
9021 "cannot handle reduced interrupt case, want %u, got %u\n",
9022 total, request);
9023 ret = -EINVAL;
9024 goto fail;
9025 }
9026 dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
9027 }
9028
9029 /* mask all interrupts */
9030 set_intr_state(dd, 0);
9031 /* clear all pending interrupts */
9032 clear_all_interrupts(dd);
9033
9034 /* reset general handler mask, chip MSI-X mappings */
9035 reset_interrupts(dd);
9036
9037 if (single_interrupt)
9038 ret = request_intx_irq(dd);
9039 else
9040 ret = request_msix_irqs(dd);
9041 if (ret)
9042 goto fail;
9043
9044 return 0;
9045
9046fail:
9047 clean_up_interrupts(dd);
9048 return ret;
9049}
9050
9051/*
9052 * Set up context values in dd. Sets:
9053 *
9054 * num_rcv_contexts - number of contexts being used
9055 * n_krcv_queues - number of kernel contexts
9056 * first_user_ctxt - first non-kernel context in array of contexts
9057 * freectxts - number of free user contexts
9058 * num_send_contexts - number of PIO send contexts being used
9059 */
9060static int set_up_context_variables(struct hfi1_devdata *dd)
9061{
9062 int num_kernel_contexts;
9063 int num_user_contexts;
9064 int total_contexts;
9065 int ret;
9066 unsigned ngroups;
9067
9068 /*
9069 * Kernel contexts: (to be fixed later):
9070 * - min or 2 or 1 context/numa
9071 * - Context 0 - default/errors
9072 * - Context 1 - VL15
9073 */
9074 if (n_krcvqs)
9075 num_kernel_contexts = n_krcvqs + MIN_KERNEL_KCTXTS;
9076 else
9077 num_kernel_contexts = num_online_nodes();
9078 num_kernel_contexts =
9079 max_t(int, MIN_KERNEL_KCTXTS, num_kernel_contexts);
9080 /*
9081 * Every kernel receive context needs an ACK send context.
9082 * one send context is allocated for each VL{0-7} and VL15
9083 */
9084 if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
9085 dd_dev_err(dd,
9086 "Reducing # kernel rcv contexts to: %d, from %d\n",
9087 (int)(dd->chip_send_contexts - num_vls - 1),
9088 (int)num_kernel_contexts);
9089 num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
9090 }
9091 /*
9092 * User contexts: (to be fixed later)
9093 * - set to num_rcv_contexts if non-zero
9094 * - default to 1 user context per CPU
9095 */
9096 if (num_rcv_contexts)
9097 num_user_contexts = num_rcv_contexts;
9098 else
9099 num_user_contexts = num_online_cpus();
9100
9101 total_contexts = num_kernel_contexts + num_user_contexts;
9102
9103 /*
9104 * Adjust the counts given a global max.
9105 */
9106 if (total_contexts > dd->chip_rcv_contexts) {
9107 dd_dev_err(dd,
9108 "Reducing # user receive contexts to: %d, from %d\n",
9109 (int)(dd->chip_rcv_contexts - num_kernel_contexts),
9110 (int)num_user_contexts);
9111 num_user_contexts = dd->chip_rcv_contexts - num_kernel_contexts;
9112 /* recalculate */
9113 total_contexts = num_kernel_contexts + num_user_contexts;
9114 }
9115
9116 /* the first N are kernel contexts, the rest are user contexts */
9117 dd->num_rcv_contexts = total_contexts;
9118 dd->n_krcv_queues = num_kernel_contexts;
9119 dd->first_user_ctxt = num_kernel_contexts;
9120 dd->freectxts = num_user_contexts;
9121 dd_dev_info(dd,
9122 "rcv contexts: chip %d, used %d (kernel %d, user %d)\n",
9123 (int)dd->chip_rcv_contexts,
9124 (int)dd->num_rcv_contexts,
9125 (int)dd->n_krcv_queues,
9126 (int)dd->num_rcv_contexts - dd->n_krcv_queues);
9127
9128 /*
9129 * Receive array allocation:
9130 * All RcvArray entries are divided into groups of 8. This
9131 * is required by the hardware and will speed up writes to
9132 * consecutive entries by using write-combining of the entire
9133 * cacheline.
9134 *
9135 * The number of groups are evenly divided among all contexts.
9136 * any left over groups will be given to the first N user
9137 * contexts.
9138 */
9139 dd->rcv_entries.group_size = RCV_INCREMENT;
9140 ngroups = dd->chip_rcv_array_count / dd->rcv_entries.group_size;
9141 dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts;
9142 dd->rcv_entries.nctxt_extra = ngroups -
9143 (dd->num_rcv_contexts * dd->rcv_entries.ngroups);
9144 dd_dev_info(dd, "RcvArray groups %u, ctxts extra %u\n",
9145 dd->rcv_entries.ngroups,
9146 dd->rcv_entries.nctxt_extra);
9147 if (dd->rcv_entries.ngroups * dd->rcv_entries.group_size >
9148 MAX_EAGER_ENTRIES * 2) {
9149 dd->rcv_entries.ngroups = (MAX_EAGER_ENTRIES * 2) /
9150 dd->rcv_entries.group_size;
9151 dd_dev_info(dd,
9152 "RcvArray group count too high, change to %u\n",
9153 dd->rcv_entries.ngroups);
9154 dd->rcv_entries.nctxt_extra = 0;
9155 }
9156 /*
9157 * PIO send contexts
9158 */
9159 ret = init_sc_pools_and_sizes(dd);
9160 if (ret >= 0) { /* success */
9161 dd->num_send_contexts = ret;
9162 dd_dev_info(
9163 dd,
9164 "send contexts: chip %d, used %d (kernel %d, ack %d, user %d)\n",
9165 dd->chip_send_contexts,
9166 dd->num_send_contexts,
9167 dd->sc_sizes[SC_KERNEL].count,
9168 dd->sc_sizes[SC_ACK].count,
9169 dd->sc_sizes[SC_USER].count);
9170 ret = 0; /* success */
9171 }
9172
9173 return ret;
9174}
9175
9176/*
9177 * Set the device/port partition key table. The MAD code
9178 * will ensure that, at least, the partial management
9179 * partition key is present in the table.
9180 */
9181static void set_partition_keys(struct hfi1_pportdata *ppd)
9182{
9183 struct hfi1_devdata *dd = ppd->dd;
9184 u64 reg = 0;
9185 int i;
9186
9187 dd_dev_info(dd, "Setting partition keys\n");
9188 for (i = 0; i < hfi1_get_npkeys(dd); i++) {
9189 reg |= (ppd->pkeys[i] &
9190 RCV_PARTITION_KEY_PARTITION_KEY_A_MASK) <<
9191 ((i % 4) *
9192 RCV_PARTITION_KEY_PARTITION_KEY_B_SHIFT);
9193 /* Each register holds 4 PKey values. */
9194 if ((i % 4) == 3) {
9195 write_csr(dd, RCV_PARTITION_KEY +
9196 ((i - 3) * 2), reg);
9197 reg = 0;
9198 }
9199 }
9200
9201 /* Always enable HW pkeys check when pkeys table is set */
9202 add_rcvctrl(dd, RCV_CTRL_RCV_PARTITION_KEY_ENABLE_SMASK);
9203}
9204
9205/*
9206 * These CSRs and memories are uninitialized on reset and must be
9207 * written before reading to set the ECC/parity bits.
9208 *
9209 * NOTE: All user context CSRs that are not mmaped write-only
9210 * (e.g. the TID flows) must be initialized even if the driver never
9211 * reads them.
9212 */
9213static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
9214{
9215 int i, j;
9216
9217 /* CceIntMap */
9218 for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9219 write_csr(dd, CCE_INT_MAP+(8*i), 0);
9220
9221 /* SendCtxtCreditReturnAddr */
9222 for (i = 0; i < dd->chip_send_contexts; i++)
9223 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
9224
9225 /* PIO Send buffers */
9226 /* SDMA Send buffers */
9227 /* These are not normally read, and (presently) have no method
9228 to be read, so are not pre-initialized */
9229
9230 /* RcvHdrAddr */
9231 /* RcvHdrTailAddr */
9232 /* RcvTidFlowTable */
9233 for (i = 0; i < dd->chip_rcv_contexts; i++) {
9234 write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
9235 write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
9236 for (j = 0; j < RXE_NUM_TID_FLOWS; j++)
9237 write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE+(8*j), 0);
9238 }
9239
9240 /* RcvArray */
9241 for (i = 0; i < dd->chip_rcv_array_count; i++)
9242 write_csr(dd, RCV_ARRAY + (8*i),
9243 RCV_ARRAY_RT_WRITE_ENABLE_SMASK);
9244
9245 /* RcvQPMapTable */
9246 for (i = 0; i < 32; i++)
9247 write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0);
9248}
9249
9250/*
9251 * Use the ctrl_bits in CceCtrl to clear the status_bits in CceStatus.
9252 */
9253static void clear_cce_status(struct hfi1_devdata *dd, u64 status_bits,
9254 u64 ctrl_bits)
9255{
9256 unsigned long timeout;
9257 u64 reg;
9258
9259 /* is the condition present? */
9260 reg = read_csr(dd, CCE_STATUS);
9261 if ((reg & status_bits) == 0)
9262 return;
9263
9264 /* clear the condition */
9265 write_csr(dd, CCE_CTRL, ctrl_bits);
9266
9267 /* wait for the condition to clear */
9268 timeout = jiffies + msecs_to_jiffies(CCE_STATUS_TIMEOUT);
9269 while (1) {
9270 reg = read_csr(dd, CCE_STATUS);
9271 if ((reg & status_bits) == 0)
9272 return;
9273 if (time_after(jiffies, timeout)) {
9274 dd_dev_err(dd,
9275 "Timeout waiting for CceStatus to clear bits 0x%llx, remaining 0x%llx\n",
9276 status_bits, reg & status_bits);
9277 return;
9278 }
9279 udelay(1);
9280 }
9281}
9282
9283/* set CCE CSRs to chip reset defaults */
9284static void reset_cce_csrs(struct hfi1_devdata *dd)
9285{
9286 int i;
9287
9288 /* CCE_REVISION read-only */
9289 /* CCE_REVISION2 read-only */
9290 /* CCE_CTRL - bits clear automatically */
9291 /* CCE_STATUS read-only, use CceCtrl to clear */
9292 clear_cce_status(dd, ALL_FROZE, CCE_CTRL_SPC_UNFREEZE_SMASK);
9293 clear_cce_status(dd, ALL_TXE_PAUSE, CCE_CTRL_TXE_RESUME_SMASK);
9294 clear_cce_status(dd, ALL_RXE_PAUSE, CCE_CTRL_RXE_RESUME_SMASK);
9295 for (i = 0; i < CCE_NUM_SCRATCH; i++)
9296 write_csr(dd, CCE_SCRATCH + (8 * i), 0);
9297 /* CCE_ERR_STATUS read-only */
9298 write_csr(dd, CCE_ERR_MASK, 0);
9299 write_csr(dd, CCE_ERR_CLEAR, ~0ull);
9300 /* CCE_ERR_FORCE leave alone */
9301 for (i = 0; i < CCE_NUM_32_BIT_COUNTERS; i++)
9302 write_csr(dd, CCE_COUNTER_ARRAY32 + (8 * i), 0);
9303 write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_RESETCSR);
9304 /* CCE_PCIE_CTRL leave alone */
9305 for (i = 0; i < CCE_NUM_MSIX_VECTORS; i++) {
9306 write_csr(dd, CCE_MSIX_TABLE_LOWER + (8 * i), 0);
9307 write_csr(dd, CCE_MSIX_TABLE_UPPER + (8 * i),
9308 CCE_MSIX_TABLE_UPPER_RESETCSR);
9309 }
9310 for (i = 0; i < CCE_NUM_MSIX_PBAS; i++) {
9311 /* CCE_MSIX_PBA read-only */
9312 write_csr(dd, CCE_MSIX_INT_GRANTED, ~0ull);
9313 write_csr(dd, CCE_MSIX_VEC_CLR_WITHOUT_INT, ~0ull);
9314 }
9315 for (i = 0; i < CCE_NUM_INT_MAP_CSRS; i++)
9316 write_csr(dd, CCE_INT_MAP, 0);
9317 for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
9318 /* CCE_INT_STATUS read-only */
9319 write_csr(dd, CCE_INT_MASK + (8 * i), 0);
9320 write_csr(dd, CCE_INT_CLEAR + (8 * i), ~0ull);
9321 /* CCE_INT_FORCE leave alone */
9322 /* CCE_INT_BLOCKED read-only */
9323 }
9324 for (i = 0; i < CCE_NUM_32_BIT_INT_COUNTERS; i++)
9325 write_csr(dd, CCE_INT_COUNTER_ARRAY32 + (8 * i), 0);
9326}
9327
9328/* set ASIC CSRs to chip reset defaults */
9329static void reset_asic_csrs(struct hfi1_devdata *dd)
9330{
9331 static DEFINE_MUTEX(asic_mutex);
9332 static int called;
9333 int i;
9334
9335 /*
9336 * If the HFIs are shared between separate nodes or VMs,
9337 * then more will need to be done here. One idea is a module
9338 * parameter that returns early, letting the first power-on or
9339 * a known first load do the reset and blocking all others.
9340 */
9341
9342 /*
9343 * These CSRs should only be reset once - the first one here will
9344 * do the work. Use a mutex so that a non-first caller waits until
9345 * the first is finished before it can proceed.
9346 */
9347 mutex_lock(&asic_mutex);
9348 if (called)
9349 goto done;
9350 called = 1;
9351
9352 if (dd->icode != ICODE_FPGA_EMULATION) {
9353 /* emulation does not have an SBus - leave these alone */
9354 /*
9355 * All writes to ASIC_CFG_SBUS_REQUEST do something.
9356 * Notes:
9357 * o The reset is not zero if aimed at the core. See the
9358 * SBus documentation for details.
9359 * o If the SBus firmware has been updated (e.g. by the BIOS),
9360 * will the reset revert that?
9361 */
9362 /* ASIC_CFG_SBUS_REQUEST leave alone */
9363 write_csr(dd, ASIC_CFG_SBUS_EXECUTE, 0);
9364 }
9365 /* ASIC_SBUS_RESULT read-only */
9366 write_csr(dd, ASIC_STS_SBUS_COUNTERS, 0);
9367 for (i = 0; i < ASIC_NUM_SCRATCH; i++)
9368 write_csr(dd, ASIC_CFG_SCRATCH + (8 * i), 0);
9369 write_csr(dd, ASIC_CFG_MUTEX, 0); /* this will clear it */
9370 write_csr(dd, ASIC_CFG_DRV_STR, 0);
9371 write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0);
9372 /* ASIC_STS_THERM read-only */
9373 /* ASIC_CFG_RESET leave alone */
9374
9375 write_csr(dd, ASIC_PCIE_SD_HOST_CMD, 0);
9376 /* ASIC_PCIE_SD_HOST_STATUS read-only */
9377 write_csr(dd, ASIC_PCIE_SD_INTRPT_DATA_CODE, 0);
9378 write_csr(dd, ASIC_PCIE_SD_INTRPT_ENABLE, 0);
9379 /* ASIC_PCIE_SD_INTRPT_PROGRESS read-only */
9380 write_csr(dd, ASIC_PCIE_SD_INTRPT_STATUS, ~0ull); /* clear */
9381 /* ASIC_HFI0_PCIE_SD_INTRPT_RSPD_DATA read-only */
9382 /* ASIC_HFI1_PCIE_SD_INTRPT_RSPD_DATA read-only */
9383 for (i = 0; i < 16; i++)
9384 write_csr(dd, ASIC_PCIE_SD_INTRPT_LIST + (8 * i), 0);
9385
9386 /* ASIC_GPIO_IN read-only */
9387 write_csr(dd, ASIC_GPIO_OE, 0);
9388 write_csr(dd, ASIC_GPIO_INVERT, 0);
9389 write_csr(dd, ASIC_GPIO_OUT, 0);
9390 write_csr(dd, ASIC_GPIO_MASK, 0);
9391 /* ASIC_GPIO_STATUS read-only */
9392 write_csr(dd, ASIC_GPIO_CLEAR, ~0ull);
9393 /* ASIC_GPIO_FORCE leave alone */
9394
9395 /* ASIC_QSFP1_IN read-only */
9396 write_csr(dd, ASIC_QSFP1_OE, 0);
9397 write_csr(dd, ASIC_QSFP1_INVERT, 0);
9398 write_csr(dd, ASIC_QSFP1_OUT, 0);
9399 write_csr(dd, ASIC_QSFP1_MASK, 0);
9400 /* ASIC_QSFP1_STATUS read-only */
9401 write_csr(dd, ASIC_QSFP1_CLEAR, ~0ull);
9402 /* ASIC_QSFP1_FORCE leave alone */
9403
9404 /* ASIC_QSFP2_IN read-only */
9405 write_csr(dd, ASIC_QSFP2_OE, 0);
9406 write_csr(dd, ASIC_QSFP2_INVERT, 0);
9407 write_csr(dd, ASIC_QSFP2_OUT, 0);
9408 write_csr(dd, ASIC_QSFP2_MASK, 0);
9409 /* ASIC_QSFP2_STATUS read-only */
9410 write_csr(dd, ASIC_QSFP2_CLEAR, ~0ull);
9411 /* ASIC_QSFP2_FORCE leave alone */
9412
9413 write_csr(dd, ASIC_EEP_CTL_STAT, ASIC_EEP_CTL_STAT_RESETCSR);
9414 /* this also writes a NOP command, clearing paging mode */
9415 write_csr(dd, ASIC_EEP_ADDR_CMD, 0);
9416 write_csr(dd, ASIC_EEP_DATA, 0);
9417
9418done:
9419 mutex_unlock(&asic_mutex);
9420}
9421
9422/* set MISC CSRs to chip reset defaults */
9423static void reset_misc_csrs(struct hfi1_devdata *dd)
9424{
9425 int i;
9426
9427 for (i = 0; i < 32; i++) {
9428 write_csr(dd, MISC_CFG_RSA_R2 + (8 * i), 0);
9429 write_csr(dd, MISC_CFG_RSA_SIGNATURE + (8 * i), 0);
9430 write_csr(dd, MISC_CFG_RSA_MODULUS + (8 * i), 0);
9431 }
9432 /* MISC_CFG_SHA_PRELOAD leave alone - always reads 0 and can
9433 only be written 128-byte chunks */
9434 /* init RSA engine to clear lingering errors */
9435 write_csr(dd, MISC_CFG_RSA_CMD, 1);
9436 write_csr(dd, MISC_CFG_RSA_MU, 0);
9437 write_csr(dd, MISC_CFG_FW_CTRL, 0);
9438 /* MISC_STS_8051_DIGEST read-only */
9439 /* MISC_STS_SBM_DIGEST read-only */
9440 /* MISC_STS_PCIE_DIGEST read-only */
9441 /* MISC_STS_FAB_DIGEST read-only */
9442 /* MISC_ERR_STATUS read-only */
9443 write_csr(dd, MISC_ERR_MASK, 0);
9444 write_csr(dd, MISC_ERR_CLEAR, ~0ull);
9445 /* MISC_ERR_FORCE leave alone */
9446}
9447
9448/* set TXE CSRs to chip reset defaults */
9449static void reset_txe_csrs(struct hfi1_devdata *dd)
9450{
9451 int i;
9452
9453 /*
9454 * TXE Kernel CSRs
9455 */
9456 write_csr(dd, SEND_CTRL, 0);
9457 __cm_reset(dd, 0); /* reset CM internal state */
9458 /* SEND_CONTEXTS read-only */
9459 /* SEND_DMA_ENGINES read-only */
9460 /* SEND_PIO_MEM_SIZE read-only */
9461 /* SEND_DMA_MEM_SIZE read-only */
9462 write_csr(dd, SEND_HIGH_PRIORITY_LIMIT, 0);
9463 pio_reset_all(dd); /* SEND_PIO_INIT_CTXT */
9464 /* SEND_PIO_ERR_STATUS read-only */
9465 write_csr(dd, SEND_PIO_ERR_MASK, 0);
9466 write_csr(dd, SEND_PIO_ERR_CLEAR, ~0ull);
9467 /* SEND_PIO_ERR_FORCE leave alone */
9468 /* SEND_DMA_ERR_STATUS read-only */
9469 write_csr(dd, SEND_DMA_ERR_MASK, 0);
9470 write_csr(dd, SEND_DMA_ERR_CLEAR, ~0ull);
9471 /* SEND_DMA_ERR_FORCE leave alone */
9472 /* SEND_EGRESS_ERR_STATUS read-only */
9473 write_csr(dd, SEND_EGRESS_ERR_MASK, 0);
9474 write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~0ull);
9475 /* SEND_EGRESS_ERR_FORCE leave alone */
9476 write_csr(dd, SEND_BTH_QP, 0);
9477 write_csr(dd, SEND_STATIC_RATE_CONTROL, 0);
9478 write_csr(dd, SEND_SC2VLT0, 0);
9479 write_csr(dd, SEND_SC2VLT1, 0);
9480 write_csr(dd, SEND_SC2VLT2, 0);
9481 write_csr(dd, SEND_SC2VLT3, 0);
9482 write_csr(dd, SEND_LEN_CHECK0, 0);
9483 write_csr(dd, SEND_LEN_CHECK1, 0);
9484 /* SEND_ERR_STATUS read-only */
9485 write_csr(dd, SEND_ERR_MASK, 0);
9486 write_csr(dd, SEND_ERR_CLEAR, ~0ull);
9487 /* SEND_ERR_FORCE read-only */
9488 for (i = 0; i < VL_ARB_LOW_PRIO_TABLE_SIZE; i++)
9489 write_csr(dd, SEND_LOW_PRIORITY_LIST + (8*i), 0);
9490 for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++)
9491 write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8*i), 0);
9492 for (i = 0; i < dd->chip_send_contexts/NUM_CONTEXTS_PER_SET; i++)
9493 write_csr(dd, SEND_CONTEXT_SET_CTRL + (8*i), 0);
9494 for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++)
9495 write_csr(dd, SEND_COUNTER_ARRAY32 + (8*i), 0);
9496 for (i = 0; i < TXE_NUM_64_BIT_COUNTER; i++)
9497 write_csr(dd, SEND_COUNTER_ARRAY64 + (8*i), 0);
9498 write_csr(dd, SEND_CM_CTRL, SEND_CM_CTRL_RESETCSR);
9499 write_csr(dd, SEND_CM_GLOBAL_CREDIT,
9500 SEND_CM_GLOBAL_CREDIT_RESETCSR);
9501 /* SEND_CM_CREDIT_USED_STATUS read-only */
9502 write_csr(dd, SEND_CM_TIMER_CTRL, 0);
9503 write_csr(dd, SEND_CM_LOCAL_AU_TABLE0_TO3, 0);
9504 write_csr(dd, SEND_CM_LOCAL_AU_TABLE4_TO7, 0);
9505 write_csr(dd, SEND_CM_REMOTE_AU_TABLE0_TO3, 0);
9506 write_csr(dd, SEND_CM_REMOTE_AU_TABLE4_TO7, 0);
9507 for (i = 0; i < TXE_NUM_DATA_VL; i++)
9508 write_csr(dd, SEND_CM_CREDIT_VL + (8*i), 0);
9509 write_csr(dd, SEND_CM_CREDIT_VL15, 0);
9510 /* SEND_CM_CREDIT_USED_VL read-only */
9511 /* SEND_CM_CREDIT_USED_VL15 read-only */
9512 /* SEND_EGRESS_CTXT_STATUS read-only */
9513 /* SEND_EGRESS_SEND_DMA_STATUS read-only */
9514 write_csr(dd, SEND_EGRESS_ERR_INFO, ~0ull);
9515 /* SEND_EGRESS_ERR_INFO read-only */
9516 /* SEND_EGRESS_ERR_SOURCE read-only */
9517
9518 /*
9519 * TXE Per-Context CSRs
9520 */
9521 for (i = 0; i < dd->chip_send_contexts; i++) {
9522 write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
9523 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0);
9524 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
9525 write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_FORCE, 0);
9526 write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, 0);
9527 write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~0ull);
9528 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_ENABLE, 0);
9529 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_VL, 0);
9530 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_JOB_KEY, 0);
9531 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_PARTITION_KEY, 0);
9532 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, 0);
9533 write_kctxt_csr(dd, i, SEND_CTXT_CHECK_OPCODE, 0);
9534 }
9535
9536 /*
9537 * TXE Per-SDMA CSRs
9538 */
9539 for (i = 0; i < dd->chip_sdma_engines; i++) {
9540 write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
9541 /* SEND_DMA_STATUS read-only */
9542 write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0);
9543 write_kctxt_csr(dd, i, SEND_DMA_LEN_GEN, 0);
9544 write_kctxt_csr(dd, i, SEND_DMA_TAIL, 0);
9545 /* SEND_DMA_HEAD read-only */
9546 write_kctxt_csr(dd, i, SEND_DMA_HEAD_ADDR, 0);
9547 write_kctxt_csr(dd, i, SEND_DMA_PRIORITY_THLD, 0);
9548 /* SEND_DMA_IDLE_CNT read-only */
9549 write_kctxt_csr(dd, i, SEND_DMA_RELOAD_CNT, 0);
9550 write_kctxt_csr(dd, i, SEND_DMA_DESC_CNT, 0);
9551 /* SEND_DMA_DESC_FETCHED_CNT read-only */
9552 /* SEND_DMA_ENG_ERR_STATUS read-only */
9553 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, 0);
9554 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~0ull);
9555 /* SEND_DMA_ENG_ERR_FORCE leave alone */
9556 write_kctxt_csr(dd, i, SEND_DMA_CHECK_ENABLE, 0);
9557 write_kctxt_csr(dd, i, SEND_DMA_CHECK_VL, 0);
9558 write_kctxt_csr(dd, i, SEND_DMA_CHECK_JOB_KEY, 0);
9559 write_kctxt_csr(dd, i, SEND_DMA_CHECK_PARTITION_KEY, 0);
9560 write_kctxt_csr(dd, i, SEND_DMA_CHECK_SLID, 0);
9561 write_kctxt_csr(dd, i, SEND_DMA_CHECK_OPCODE, 0);
9562 write_kctxt_csr(dd, i, SEND_DMA_MEMORY, 0);
9563 }
9564}
9565
9566/*
9567 * Expect on entry:
9568 * o Packet ingress is disabled, i.e. RcvCtrl.RcvPortEnable == 0
9569 */
9570static void init_rbufs(struct hfi1_devdata *dd)
9571{
9572 u64 reg;
9573 int count;
9574
9575 /*
9576 * Wait for DMA to stop: RxRbufPktPending and RxPktInProgress are
9577 * clear.
9578 */
9579 count = 0;
9580 while (1) {
9581 reg = read_csr(dd, RCV_STATUS);
9582 if ((reg & (RCV_STATUS_RX_RBUF_PKT_PENDING_SMASK
9583 | RCV_STATUS_RX_PKT_IN_PROGRESS_SMASK)) == 0)
9584 break;
9585 /*
9586 * Give up after 1ms - maximum wait time.
9587 *
9588 * RBuf size is 148KiB. Slowest possible is PCIe Gen1 x1 at
9589 * 250MB/s bandwidth. Lower rate to 66% for overhead to get:
9590 * 148 KB / (66% * 250MB/s) = 920us
9591 */
9592 if (count++ > 500) {
9593 dd_dev_err(dd,
9594 "%s: in-progress DMA not clearing: RcvStatus 0x%llx, continuing\n",
9595 __func__, reg);
9596 break;
9597 }
9598 udelay(2); /* do not busy-wait the CSR */
9599 }
9600
9601 /* start the init - expect RcvCtrl to be 0 */
9602 write_csr(dd, RCV_CTRL, RCV_CTRL_RX_RBUF_INIT_SMASK);
9603
9604 /*
9605 * Read to force the write of Rcvtrl.RxRbufInit. There is a brief
9606 * period after the write before RcvStatus.RxRbufInitDone is valid.
9607 * The delay in the first run through the loop below is sufficient and
9608 * required before the first read of RcvStatus.RxRbufInintDone.
9609 */
9610 read_csr(dd, RCV_CTRL);
9611
9612 /* wait for the init to finish */
9613 count = 0;
9614 while (1) {
9615 /* delay is required first time through - see above */
9616 udelay(2); /* do not busy-wait the CSR */
9617 reg = read_csr(dd, RCV_STATUS);
9618 if (reg & (RCV_STATUS_RX_RBUF_INIT_DONE_SMASK))
9619 break;
9620
9621 /* give up after 100us - slowest possible at 33MHz is 73us */
9622 if (count++ > 50) {
9623 dd_dev_err(dd,
9624 "%s: RcvStatus.RxRbufInit not set, continuing\n",
9625 __func__);
9626 break;
9627 }
9628 }
9629}
9630
9631/* set RXE CSRs to chip reset defaults */
9632static void reset_rxe_csrs(struct hfi1_devdata *dd)
9633{
9634 int i, j;
9635
9636 /*
9637 * RXE Kernel CSRs
9638 */
9639 write_csr(dd, RCV_CTRL, 0);
9640 init_rbufs(dd);
9641 /* RCV_STATUS read-only */
9642 /* RCV_CONTEXTS read-only */
9643 /* RCV_ARRAY_CNT read-only */
9644 /* RCV_BUF_SIZE read-only */
9645 write_csr(dd, RCV_BTH_QP, 0);
9646 write_csr(dd, RCV_MULTICAST, 0);
9647 write_csr(dd, RCV_BYPASS, 0);
9648 write_csr(dd, RCV_VL15, 0);
9649 /* this is a clear-down */
9650 write_csr(dd, RCV_ERR_INFO,
9651 RCV_ERR_INFO_RCV_EXCESS_BUFFER_OVERRUN_SMASK);
9652 /* RCV_ERR_STATUS read-only */
9653 write_csr(dd, RCV_ERR_MASK, 0);
9654 write_csr(dd, RCV_ERR_CLEAR, ~0ull);
9655 /* RCV_ERR_FORCE leave alone */
9656 for (i = 0; i < 32; i++)
9657 write_csr(dd, RCV_QP_MAP_TABLE + (8 * i), 0);
9658 for (i = 0; i < 4; i++)
9659 write_csr(dd, RCV_PARTITION_KEY + (8 * i), 0);
9660 for (i = 0; i < RXE_NUM_32_BIT_COUNTERS; i++)
9661 write_csr(dd, RCV_COUNTER_ARRAY32 + (8 * i), 0);
9662 for (i = 0; i < RXE_NUM_64_BIT_COUNTERS; i++)
9663 write_csr(dd, RCV_COUNTER_ARRAY64 + (8 * i), 0);
9664 for (i = 0; i < RXE_NUM_RSM_INSTANCES; i++) {
9665 write_csr(dd, RCV_RSM_CFG + (8 * i), 0);
9666 write_csr(dd, RCV_RSM_SELECT + (8 * i), 0);
9667 write_csr(dd, RCV_RSM_MATCH + (8 * i), 0);
9668 }
9669 for (i = 0; i < 32; i++)
9670 write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), 0);
9671
9672 /*
9673 * RXE Kernel and User Per-Context CSRs
9674 */
9675 for (i = 0; i < dd->chip_rcv_contexts; i++) {
9676 /* kernel */
9677 write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0);
9678 /* RCV_CTXT_STATUS read-only */
9679 write_kctxt_csr(dd, i, RCV_EGR_CTRL, 0);
9680 write_kctxt_csr(dd, i, RCV_TID_CTRL, 0);
9681 write_kctxt_csr(dd, i, RCV_KEY_CTRL, 0);
9682 write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
9683 write_kctxt_csr(dd, i, RCV_HDR_CNT, 0);
9684 write_kctxt_csr(dd, i, RCV_HDR_ENT_SIZE, 0);
9685 write_kctxt_csr(dd, i, RCV_HDR_SIZE, 0);
9686 write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
9687 write_kctxt_csr(dd, i, RCV_AVAIL_TIME_OUT, 0);
9688 write_kctxt_csr(dd, i, RCV_HDR_OVFL_CNT, 0);
9689
9690 /* user */
9691 /* RCV_HDR_TAIL read-only */
9692 write_uctxt_csr(dd, i, RCV_HDR_HEAD, 0);
9693 /* RCV_EGR_INDEX_TAIL read-only */
9694 write_uctxt_csr(dd, i, RCV_EGR_INDEX_HEAD, 0);
9695 /* RCV_EGR_OFFSET_TAIL read-only */
9696 for (j = 0; j < RXE_NUM_TID_FLOWS; j++) {
9697 write_uctxt_csr(dd, i, RCV_TID_FLOW_TABLE + (8 * j),
9698 0);
9699 }
9700 }
9701}
9702
9703/*
9704 * Set sc2vl tables.
9705 *
9706 * They power on to zeros, so to avoid send context errors
9707 * they need to be set:
9708 *
9709 * SC 0-7 -> VL 0-7 (respectively)
9710 * SC 15 -> VL 15
9711 * otherwise
9712 * -> VL 0
9713 */
9714static void init_sc2vl_tables(struct hfi1_devdata *dd)
9715{
9716 int i;
9717 /* init per architecture spec, constrained by hardware capability */
9718
9719 /* HFI maps sent packets */
9720 write_csr(dd, SEND_SC2VLT0, SC2VL_VAL(
9721 0,
9722 0, 0, 1, 1,
9723 2, 2, 3, 3,
9724 4, 4, 5, 5,
9725 6, 6, 7, 7));
9726 write_csr(dd, SEND_SC2VLT1, SC2VL_VAL(
9727 1,
9728 8, 0, 9, 0,
9729 10, 0, 11, 0,
9730 12, 0, 13, 0,
9731 14, 0, 15, 15));
9732 write_csr(dd, SEND_SC2VLT2, SC2VL_VAL(
9733 2,
9734 16, 0, 17, 0,
9735 18, 0, 19, 0,
9736 20, 0, 21, 0,
9737 22, 0, 23, 0));
9738 write_csr(dd, SEND_SC2VLT3, SC2VL_VAL(
9739 3,
9740 24, 0, 25, 0,
9741 26, 0, 27, 0,
9742 28, 0, 29, 0,
9743 30, 0, 31, 0));
9744
9745 /* DC maps received packets */
9746 write_csr(dd, DCC_CFG_SC_VL_TABLE_15_0, DC_SC_VL_VAL(
9747 15_0,
9748 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7,
9749 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15, 15));
9750 write_csr(dd, DCC_CFG_SC_VL_TABLE_31_16, DC_SC_VL_VAL(
9751 31_16,
9752 16, 0, 17, 0, 18, 0, 19, 0, 20, 0, 21, 0, 22, 0, 23, 0,
9753 24, 0, 25, 0, 26, 0, 27, 0, 28, 0, 29, 0, 30, 0, 31, 0));
9754
9755 /* initialize the cached sc2vl values consistently with h/w */
9756 for (i = 0; i < 32; i++) {
9757 if (i < 8 || i == 15)
9758 *((u8 *)(dd->sc2vl) + i) = (u8)i;
9759 else
9760 *((u8 *)(dd->sc2vl) + i) = 0;
9761 }
9762}
9763
9764/*
9765 * Read chip sizes and then reset parts to sane, disabled, values. We cannot
9766 * depend on the chip going through a power-on reset - a driver may be loaded
9767 * and unloaded many times.
9768 *
9769 * Do not write any CSR values to the chip in this routine - there may be
9770 * a reset following the (possible) FLR in this routine.
9771 *
9772 */
9773static void init_chip(struct hfi1_devdata *dd)
9774{
9775 int i;
9776
9777 /*
9778 * Put the HFI CSRs in a known state.
9779 * Combine this with a DC reset.
9780 *
9781 * Stop the device from doing anything while we do a
9782 * reset. We know there are no other active users of
9783 * the device since we are now in charge. Turn off
9784 * off all outbound and inbound traffic and make sure
9785 * the device does not generate any interrupts.
9786 */
9787
9788 /* disable send contexts and SDMA engines */
9789 write_csr(dd, SEND_CTRL, 0);
9790 for (i = 0; i < dd->chip_send_contexts; i++)
9791 write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
9792 for (i = 0; i < dd->chip_sdma_engines; i++)
9793 write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
9794 /* disable port (turn off RXE inbound traffic) and contexts */
9795 write_csr(dd, RCV_CTRL, 0);
9796 for (i = 0; i < dd->chip_rcv_contexts; i++)
9797 write_csr(dd, RCV_CTXT_CTRL, 0);
9798 /* mask all interrupt sources */
9799 for (i = 0; i < CCE_NUM_INT_CSRS; i++)
9800 write_csr(dd, CCE_INT_MASK + (8*i), 0ull);
9801
9802 /*
9803 * DC Reset: do a full DC reset before the register clear.
9804 * A recommended length of time to hold is one CSR read,
9805 * so reread the CceDcCtrl. Then, hold the DC in reset
9806 * across the clear.
9807 */
9808 write_csr(dd, CCE_DC_CTRL, CCE_DC_CTRL_DC_RESET_SMASK);
9809 (void) read_csr(dd, CCE_DC_CTRL);
9810
9811 if (use_flr) {
9812 /*
9813 * A FLR will reset the SPC core and part of the PCIe.
9814 * The parts that need to be restored have already been
9815 * saved.
9816 */
9817 dd_dev_info(dd, "Resetting CSRs with FLR\n");
9818
9819 /* do the FLR, the DC reset will remain */
9820 hfi1_pcie_flr(dd);
9821
9822 /* restore command and BARs */
9823 restore_pci_variables(dd);
9824
9825 if (is_a0(dd)) {
9826 dd_dev_info(dd, "Resetting CSRs with FLR\n");
9827 hfi1_pcie_flr(dd);
9828 restore_pci_variables(dd);
9829 }
9830
9831 } else {
9832 dd_dev_info(dd, "Resetting CSRs with writes\n");
9833 reset_cce_csrs(dd);
9834 reset_txe_csrs(dd);
9835 reset_rxe_csrs(dd);
9836 reset_asic_csrs(dd);
9837 reset_misc_csrs(dd);
9838 }
9839 /* clear the DC reset */
9840 write_csr(dd, CCE_DC_CTRL, 0);
9841 /* Set the LED off */
9842 if (is_a0(dd))
9843 setextled(dd, 0);
9844 /*
9845 * Clear the QSFP reset.
9846 * A0 leaves the out lines floating on power on, then on an FLR
9847 * enforces a 0 on all out pins. The driver does not touch
9848 * ASIC_QSFPn_OUT otherwise. This leaves RESET_N low and
9849 * anything plugged constantly in reset, if it pays attention
9850 * to RESET_N.
9851 * A prime example of this is SiPh. For now, set all pins high.
9852 * I2CCLK and I2CDAT will change per direction, and INT_N and
9853 * MODPRS_N are input only and their value is ignored.
9854 */
9855 if (is_a0(dd)) {
9856 write_csr(dd, ASIC_QSFP1_OUT, 0x1f);
9857 write_csr(dd, ASIC_QSFP2_OUT, 0x1f);
9858 }
9859}
9860
9861static void init_early_variables(struct hfi1_devdata *dd)
9862{
9863 int i;
9864
9865 /* assign link credit variables */
9866 dd->vau = CM_VAU;
9867 dd->link_credits = CM_GLOBAL_CREDITS;
9868 if (is_a0(dd))
9869 dd->link_credits--;
9870 dd->vcu = cu_to_vcu(hfi1_cu);
9871 /* enough room for 8 MAD packets plus header - 17K */
9872 dd->vl15_init = (8 * (2048 + 128)) / vau_to_au(dd->vau);
9873 if (dd->vl15_init > dd->link_credits)
9874 dd->vl15_init = dd->link_credits;
9875
9876 write_uninitialized_csrs_and_memories(dd);
9877
9878 if (HFI1_CAP_IS_KSET(PKEY_CHECK))
9879 for (i = 0; i < dd->num_pports; i++) {
9880 struct hfi1_pportdata *ppd = &dd->pport[i];
9881
9882 set_partition_keys(ppd);
9883 }
9884 init_sc2vl_tables(dd);
9885}
9886
9887static void init_kdeth_qp(struct hfi1_devdata *dd)
9888{
9889 /* user changed the KDETH_QP */
9890 if (kdeth_qp != 0 && kdeth_qp >= 0xff) {
9891 /* out of range or illegal value */
9892 dd_dev_err(dd, "Invalid KDETH queue pair prefix, ignoring");
9893 kdeth_qp = 0;
9894 }
9895 if (kdeth_qp == 0) /* not set, or failed range check */
9896 kdeth_qp = DEFAULT_KDETH_QP;
9897
9898 write_csr(dd, SEND_BTH_QP,
9899 (kdeth_qp & SEND_BTH_QP_KDETH_QP_MASK)
9900 << SEND_BTH_QP_KDETH_QP_SHIFT);
9901
9902 write_csr(dd, RCV_BTH_QP,
9903 (kdeth_qp & RCV_BTH_QP_KDETH_QP_MASK)
9904 << RCV_BTH_QP_KDETH_QP_SHIFT);
9905}
9906
9907/**
9908 * init_qpmap_table
9909 * @dd - device data
9910 * @first_ctxt - first context
9911 * @last_ctxt - first context
9912 *
9913 * This return sets the qpn mapping table that
9914 * is indexed by qpn[8:1].
9915 *
9916 * The routine will round robin the 256 settings
9917 * from first_ctxt to last_ctxt.
9918 *
9919 * The first/last looks ahead to having specialized
9920 * receive contexts for mgmt and bypass. Normal
9921 * verbs traffic will assumed to be on a range
9922 * of receive contexts.
9923 */
9924static void init_qpmap_table(struct hfi1_devdata *dd,
9925 u32 first_ctxt,
9926 u32 last_ctxt)
9927{
9928 u64 reg = 0;
9929 u64 regno = RCV_QP_MAP_TABLE;
9930 int i;
9931 u64 ctxt = first_ctxt;
9932
9933 for (i = 0; i < 256;) {
9934 if (ctxt == VL15CTXT) {
9935 ctxt++;
9936 if (ctxt > last_ctxt)
9937 ctxt = first_ctxt;
9938 continue;
9939 }
9940 reg |= ctxt << (8 * (i % 8));
9941 i++;
9942 ctxt++;
9943 if (ctxt > last_ctxt)
9944 ctxt = first_ctxt;
9945 if (i % 8 == 0) {
9946 write_csr(dd, regno, reg);
9947 reg = 0;
9948 regno += 8;
9949 }
9950 }
9951 if (i % 8)
9952 write_csr(dd, regno, reg);
9953
9954 add_rcvctrl(dd, RCV_CTRL_RCV_QP_MAP_ENABLE_SMASK
9955 | RCV_CTRL_RCV_BYPASS_ENABLE_SMASK);
9956}
9957
9958/**
9959 * init_qos - init RX qos
9960 * @dd - device data
9961 * @first_context
9962 *
9963 * This routine initializes Rule 0 and the
9964 * RSM map table to implement qos.
9965 *
9966 * If all of the limit tests succeed,
9967 * qos is applied based on the array
9968 * interpretation of krcvqs where
9969 * entry 0 is VL0.
9970 *
9971 * The number of vl bits (n) and the number of qpn
9972 * bits (m) are computed to feed both the RSM map table
9973 * and the single rule.
9974 *
9975 */
9976static void init_qos(struct hfi1_devdata *dd, u32 first_ctxt)
9977{
9978 u8 max_by_vl = 0;
9979 unsigned qpns_per_vl, ctxt, i, qpn, n = 1, m;
9980 u64 *rsmmap;
9981 u64 reg;
9982 u8 rxcontext = is_a0(dd) ? 0 : 0xff; /* 0 is default if a0 ver. */
9983
9984 /* validate */
9985 if (dd->n_krcv_queues <= MIN_KERNEL_KCTXTS ||
9986 num_vls == 1 ||
9987 krcvqsset <= 1)
9988 goto bail;
9989 for (i = 0; i < min_t(unsigned, num_vls, krcvqsset); i++)
9990 if (krcvqs[i] > max_by_vl)
9991 max_by_vl = krcvqs[i];
9992 if (max_by_vl > 32)
9993 goto bail;
9994 qpns_per_vl = __roundup_pow_of_two(max_by_vl);
9995 /* determine bits vl */
9996 n = ilog2(num_vls);
9997 /* determine bits for qpn */
9998 m = ilog2(qpns_per_vl);
9999 if ((m + n) > 7)
10000 goto bail;
10001 if (num_vls * qpns_per_vl > dd->chip_rcv_contexts)
10002 goto bail;
10003 rsmmap = kmalloc_array(NUM_MAP_REGS, sizeof(u64), GFP_KERNEL);
10004 memset(rsmmap, rxcontext, NUM_MAP_REGS * sizeof(u64));
10005 /* init the local copy of the table */
10006 for (i = 0, ctxt = first_ctxt; i < num_vls; i++) {
10007 unsigned tctxt;
10008
10009 for (qpn = 0, tctxt = ctxt;
10010 krcvqs[i] && qpn < qpns_per_vl; qpn++) {
10011 unsigned idx, regoff, regidx;
10012
10013 /* generate index <= 128 */
10014 idx = (qpn << n) ^ i;
10015 regoff = (idx % 8) * 8;
10016 regidx = idx / 8;
10017 reg = rsmmap[regidx];
10018 /* replace 0xff with context number */
10019 reg &= ~(RCV_RSM_MAP_TABLE_RCV_CONTEXT_A_MASK
10020 << regoff);
10021 reg |= (u64)(tctxt++) << regoff;
10022 rsmmap[regidx] = reg;
10023 if (tctxt == ctxt + krcvqs[i])
10024 tctxt = ctxt;
10025 }
10026 ctxt += krcvqs[i];
10027 }
10028 /* flush cached copies to chip */
10029 for (i = 0; i < NUM_MAP_REGS; i++)
10030 write_csr(dd, RCV_RSM_MAP_TABLE + (8 * i), rsmmap[i]);
10031 /* add rule0 */
10032 write_csr(dd, RCV_RSM_CFG /* + (8 * 0) */,
10033 RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_MASK
10034 << RCV_RSM_CFG_ENABLE_OR_CHAIN_RSM0_SHIFT |
10035 2ull << RCV_RSM_CFG_PACKET_TYPE_SHIFT);
10036 write_csr(dd, RCV_RSM_SELECT /* + (8 * 0) */,
10037 LRH_BTH_MATCH_OFFSET
10038 << RCV_RSM_SELECT_FIELD1_OFFSET_SHIFT |
10039 LRH_SC_MATCH_OFFSET << RCV_RSM_SELECT_FIELD2_OFFSET_SHIFT |
10040 LRH_SC_SELECT_OFFSET << RCV_RSM_SELECT_INDEX1_OFFSET_SHIFT |
10041 ((u64)n) << RCV_RSM_SELECT_INDEX1_WIDTH_SHIFT |
10042 QPN_SELECT_OFFSET << RCV_RSM_SELECT_INDEX2_OFFSET_SHIFT |
10043 ((u64)m + (u64)n) << RCV_RSM_SELECT_INDEX2_WIDTH_SHIFT);
10044 write_csr(dd, RCV_RSM_MATCH /* + (8 * 0) */,
10045 LRH_BTH_MASK << RCV_RSM_MATCH_MASK1_SHIFT |
10046 LRH_BTH_VALUE << RCV_RSM_MATCH_VALUE1_SHIFT |
10047 LRH_SC_MASK << RCV_RSM_MATCH_MASK2_SHIFT |
10048 LRH_SC_VALUE << RCV_RSM_MATCH_VALUE2_SHIFT);
10049 /* Enable RSM */
10050 add_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
10051 kfree(rsmmap);
10052 /* map everything else (non-VL15) to context 0 */
10053 init_qpmap_table(
10054 dd,
10055 0,
10056 0);
10057 dd->qos_shift = n + 1;
10058 return;
10059bail:
10060 dd->qos_shift = 1;
10061 init_qpmap_table(
10062 dd,
10063 dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0,
10064 dd->n_krcv_queues - 1);
10065}
10066
10067static void init_rxe(struct hfi1_devdata *dd)
10068{
10069 /* enable all receive errors */
10070 write_csr(dd, RCV_ERR_MASK, ~0ull);
10071 /* setup QPN map table - start where VL15 context leaves off */
10072 init_qos(
10073 dd,
10074 dd->n_krcv_queues > MIN_KERNEL_KCTXTS ? MIN_KERNEL_KCTXTS : 0);
10075 /*
10076 * make sure RcvCtrl.RcvWcb <= PCIe Device Control
10077 * Register Max_Payload_Size (PCI_EXP_DEVCTL in Linux PCIe config
10078 * space, PciCfgCap2.MaxPayloadSize in HFI). There is only one
10079 * invalid configuration: RcvCtrl.RcvWcb set to its max of 256 and
10080 * Max_PayLoad_Size set to its minimum of 128.
10081 *
10082 * Presently, RcvCtrl.RcvWcb is not modified from its default of 0
10083 * (64 bytes). Max_Payload_Size is possibly modified upward in
10084 * tune_pcie_caps() which is called after this routine.
10085 */
10086}
10087
10088static void init_other(struct hfi1_devdata *dd)
10089{
10090 /* enable all CCE errors */
10091 write_csr(dd, CCE_ERR_MASK, ~0ull);
10092 /* enable *some* Misc errors */
10093 write_csr(dd, MISC_ERR_MASK, DRIVER_MISC_MASK);
10094 /* enable all DC errors, except LCB */
10095 write_csr(dd, DCC_ERR_FLG_EN, ~0ull);
10096 write_csr(dd, DC_DC8051_ERR_EN, ~0ull);
10097}
10098
10099/*
10100 * Fill out the given AU table using the given CU. A CU is defined in terms
10101 * AUs. The table is a an encoding: given the index, how many AUs does that
10102 * represent?
10103 *
10104 * NOTE: Assumes that the register layout is the same for the
10105 * local and remote tables.
10106 */
10107static void assign_cm_au_table(struct hfi1_devdata *dd, u32 cu,
10108 u32 csr0to3, u32 csr4to7)
10109{
10110 write_csr(dd, csr0to3,
10111 0ull <<
10112 SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE0_SHIFT
10113 | 1ull <<
10114 SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE1_SHIFT
10115 | 2ull * cu <<
10116 SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE2_SHIFT
10117 | 4ull * cu <<
10118 SEND_CM_LOCAL_AU_TABLE0_TO3_LOCAL_AU_TABLE3_SHIFT);
10119 write_csr(dd, csr4to7,
10120 8ull * cu <<
10121 SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE4_SHIFT
10122 | 16ull * cu <<
10123 SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE5_SHIFT
10124 | 32ull * cu <<
10125 SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE6_SHIFT
10126 | 64ull * cu <<
10127 SEND_CM_LOCAL_AU_TABLE4_TO7_LOCAL_AU_TABLE7_SHIFT);
10128
10129}
10130
10131static void assign_local_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
10132{
10133 assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_LOCAL_AU_TABLE0_TO3,
10134 SEND_CM_LOCAL_AU_TABLE4_TO7);
10135}
10136
10137void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu)
10138{
10139 assign_cm_au_table(dd, vcu_to_cu(vcu), SEND_CM_REMOTE_AU_TABLE0_TO3,
10140 SEND_CM_REMOTE_AU_TABLE4_TO7);
10141}
10142
10143static void init_txe(struct hfi1_devdata *dd)
10144{
10145 int i;
10146
10147 /* enable all PIO, SDMA, general, and Egress errors */
10148 write_csr(dd, SEND_PIO_ERR_MASK, ~0ull);
10149 write_csr(dd, SEND_DMA_ERR_MASK, ~0ull);
10150 write_csr(dd, SEND_ERR_MASK, ~0ull);
10151 write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull);
10152
10153 /* enable all per-context and per-SDMA engine errors */
10154 for (i = 0; i < dd->chip_send_contexts; i++)
10155 write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull);
10156 for (i = 0; i < dd->chip_sdma_engines; i++)
10157 write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull);
10158
10159 /* set the local CU to AU mapping */
10160 assign_local_cm_au_table(dd, dd->vcu);
10161
10162 /*
10163 * Set reasonable default for Credit Return Timer
10164 * Don't set on Simulator - causes it to choke.
10165 */
10166 if (dd->icode != ICODE_FUNCTIONAL_SIMULATOR)
10167 write_csr(dd, SEND_CM_TIMER_CTRL, HFI1_CREDIT_RETURN_RATE);
10168}
10169
10170int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey)
10171{
10172 struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
10173 unsigned sctxt;
10174 int ret = 0;
10175 u64 reg;
10176
10177 if (!rcd || !rcd->sc) {
10178 ret = -EINVAL;
10179 goto done;
10180 }
10181 sctxt = rcd->sc->hw_context;
10182 reg = SEND_CTXT_CHECK_JOB_KEY_MASK_SMASK | /* mask is always 1's */
10183 ((jkey & SEND_CTXT_CHECK_JOB_KEY_VALUE_MASK) <<
10184 SEND_CTXT_CHECK_JOB_KEY_VALUE_SHIFT);
10185 /* JOB_KEY_ALLOW_PERMISSIVE is not allowed by default */
10186 if (HFI1_CAP_KGET_MASK(rcd->flags, ALLOW_PERM_JKEY))
10187 reg |= SEND_CTXT_CHECK_JOB_KEY_ALLOW_PERMISSIVE_SMASK;
10188 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, reg);
10189 /*
10190 * Enable send-side J_KEY integrity check, unless this is A0 h/w
10191 * (due to A0 erratum).
10192 */
10193 if (!is_a0(dd)) {
10194 reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10195 reg |= SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
10196 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10197 }
10198
10199 /* Enable J_KEY check on receive context. */
10200 reg = RCV_KEY_CTRL_JOB_KEY_ENABLE_SMASK |
10201 ((jkey & RCV_KEY_CTRL_JOB_KEY_VALUE_MASK) <<
10202 RCV_KEY_CTRL_JOB_KEY_VALUE_SHIFT);
10203 write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, reg);
10204done:
10205 return ret;
10206}
10207
10208int hfi1_clear_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt)
10209{
10210 struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
10211 unsigned sctxt;
10212 int ret = 0;
10213 u64 reg;
10214
10215 if (!rcd || !rcd->sc) {
10216 ret = -EINVAL;
10217 goto done;
10218 }
10219 sctxt = rcd->sc->hw_context;
10220 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_JOB_KEY, 0);
10221 /*
10222 * Disable send-side J_KEY integrity check, unless this is A0 h/w.
10223 * This check would not have been enabled for A0 h/w, see
10224 * set_ctxt_jkey().
10225 */
10226 if (!is_a0(dd)) {
10227 reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10228 reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK;
10229 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10230 }
10231 /* Turn off the J_KEY on the receive side */
10232 write_kctxt_csr(dd, ctxt, RCV_KEY_CTRL, 0);
10233done:
10234 return ret;
10235}
10236
10237int hfi1_set_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt, u16 pkey)
10238{
10239 struct hfi1_ctxtdata *rcd;
10240 unsigned sctxt;
10241 int ret = 0;
10242 u64 reg;
10243
10244 if (ctxt < dd->num_rcv_contexts)
10245 rcd = dd->rcd[ctxt];
10246 else {
10247 ret = -EINVAL;
10248 goto done;
10249 }
10250 if (!rcd || !rcd->sc) {
10251 ret = -EINVAL;
10252 goto done;
10253 }
10254 sctxt = rcd->sc->hw_context;
10255 reg = ((u64)pkey & SEND_CTXT_CHECK_PARTITION_KEY_VALUE_MASK) <<
10256 SEND_CTXT_CHECK_PARTITION_KEY_VALUE_SHIFT;
10257 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, reg);
10258 reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10259 reg |= SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
10260 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10261done:
10262 return ret;
10263}
10264
10265int hfi1_clear_ctxt_pkey(struct hfi1_devdata *dd, unsigned ctxt)
10266{
10267 struct hfi1_ctxtdata *rcd;
10268 unsigned sctxt;
10269 int ret = 0;
10270 u64 reg;
10271
10272 if (ctxt < dd->num_rcv_contexts)
10273 rcd = dd->rcd[ctxt];
10274 else {
10275 ret = -EINVAL;
10276 goto done;
10277 }
10278 if (!rcd || !rcd->sc) {
10279 ret = -EINVAL;
10280 goto done;
10281 }
10282 sctxt = rcd->sc->hw_context;
10283 reg = read_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE);
10284 reg &= ~SEND_CTXT_CHECK_ENABLE_CHECK_PARTITION_KEY_SMASK;
10285 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_ENABLE, reg);
10286 write_kctxt_csr(dd, sctxt, SEND_CTXT_CHECK_PARTITION_KEY, 0);
10287done:
10288 return ret;
10289}
10290
10291/*
10292 * Start doing the clean up the the chip. Our clean up happens in multiple
10293 * stages and this is just the first.
10294 */
10295void hfi1_start_cleanup(struct hfi1_devdata *dd)
10296{
10297 free_cntrs(dd);
10298 free_rcverr(dd);
10299 clean_up_interrupts(dd);
10300}
10301
10302#define HFI_BASE_GUID(dev) \
10303 ((dev)->base_guid & ~(1ULL << GUID_HFI_INDEX_SHIFT))
10304
10305/*
10306 * Certain chip functions need to be initialized only once per asic
10307 * instead of per-device. This function finds the peer device and
10308 * checks whether that chip initialization needs to be done by this
10309 * device.
10310 */
10311static void asic_should_init(struct hfi1_devdata *dd)
10312{
10313 unsigned long flags;
10314 struct hfi1_devdata *tmp, *peer = NULL;
10315
10316 spin_lock_irqsave(&hfi1_devs_lock, flags);
10317 /* Find our peer device */
10318 list_for_each_entry(tmp, &hfi1_dev_list, list) {
10319 if ((HFI_BASE_GUID(dd) == HFI_BASE_GUID(tmp)) &&
10320 dd->unit != tmp->unit) {
10321 peer = tmp;
10322 break;
10323 }
10324 }
10325
10326 /*
10327 * "Claim" the ASIC for initialization if it hasn't been
10328 " "claimed" yet.
10329 */
10330 if (!peer || !(peer->flags & HFI1_DO_INIT_ASIC))
10331 dd->flags |= HFI1_DO_INIT_ASIC;
10332 spin_unlock_irqrestore(&hfi1_devs_lock, flags);
10333}
10334
10335/**
10336 * Allocate an initialize the device structure for the hfi.
10337 * @dev: the pci_dev for hfi1_ib device
10338 * @ent: pci_device_id struct for this dev
10339 *
10340 * Also allocates, initializes, and returns the devdata struct for this
10341 * device instance
10342 *
10343 * This is global, and is called directly at init to set up the
10344 * chip-specific function pointers for later use.
10345 */
10346struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
10347 const struct pci_device_id *ent)
10348{
10349 struct hfi1_devdata *dd;
10350 struct hfi1_pportdata *ppd;
10351 u64 reg;
10352 int i, ret;
10353 static const char * const inames[] = { /* implementation names */
10354 "RTL silicon",
10355 "RTL VCS simulation",
10356 "RTL FPGA emulation",
10357 "Functional simulator"
10358 };
10359
10360 dd = hfi1_alloc_devdata(pdev,
10361 NUM_IB_PORTS * sizeof(struct hfi1_pportdata));
10362 if (IS_ERR(dd))
10363 goto bail;
10364 ppd = dd->pport;
10365 for (i = 0; i < dd->num_pports; i++, ppd++) {
10366 int vl;
10367 /* init common fields */
10368 hfi1_init_pportdata(pdev, ppd, dd, 0, 1);
10369 /* DC supports 4 link widths */
10370 ppd->link_width_supported =
10371 OPA_LINK_WIDTH_1X | OPA_LINK_WIDTH_2X |
10372 OPA_LINK_WIDTH_3X | OPA_LINK_WIDTH_4X;
10373 ppd->link_width_downgrade_supported =
10374 ppd->link_width_supported;
10375 /* start out enabling only 4X */
10376 ppd->link_width_enabled = OPA_LINK_WIDTH_4X;
10377 ppd->link_width_downgrade_enabled =
10378 ppd->link_width_downgrade_supported;
10379 /* link width active is 0 when link is down */
10380 /* link width downgrade active is 0 when link is down */
10381
10382 if (num_vls < HFI1_MIN_VLS_SUPPORTED
10383 || num_vls > HFI1_MAX_VLS_SUPPORTED) {
10384 hfi1_early_err(&pdev->dev,
10385 "Invalid num_vls %u, using %u VLs\n",
10386 num_vls, HFI1_MAX_VLS_SUPPORTED);
10387 num_vls = HFI1_MAX_VLS_SUPPORTED;
10388 }
10389 ppd->vls_supported = num_vls;
10390 ppd->vls_operational = ppd->vls_supported;
10391 /* Set the default MTU. */
10392 for (vl = 0; vl < num_vls; vl++)
10393 dd->vld[vl].mtu = hfi1_max_mtu;
10394 dd->vld[15].mtu = MAX_MAD_PACKET;
10395 /*
10396 * Set the initial values to reasonable default, will be set
10397 * for real when link is up.
10398 */
10399 ppd->lstate = IB_PORT_DOWN;
10400 ppd->overrun_threshold = 0x4;
10401 ppd->phy_error_threshold = 0xf;
10402 ppd->port_crc_mode_enabled = link_crc_mask;
10403 /* initialize supported LTP CRC mode */
10404 ppd->port_ltp_crc_mode = cap_to_port_ltp(link_crc_mask) << 8;
10405 /* initialize enabled LTP CRC mode */
10406 ppd->port_ltp_crc_mode |= cap_to_port_ltp(link_crc_mask) << 4;
10407 /* start in offline */
10408 ppd->host_link_state = HLS_DN_OFFLINE;
10409 init_vl_arb_caches(ppd);
10410 }
10411
10412 dd->link_default = HLS_DN_POLL;
10413
10414 /*
10415 * Do remaining PCIe setup and save PCIe values in dd.
10416 * Any error printing is already done by the init code.
10417 * On return, we have the chip mapped.
10418 */
10419 ret = hfi1_pcie_ddinit(dd, pdev, ent);
10420 if (ret < 0)
10421 goto bail_free;
10422
10423 /* verify that reads actually work, save revision for reset check */
10424 dd->revision = read_csr(dd, CCE_REVISION);
10425 if (dd->revision == ~(u64)0) {
10426 dd_dev_err(dd, "cannot read chip CSRs\n");
10427 ret = -EINVAL;
10428 goto bail_cleanup;
10429 }
10430 dd->majrev = (dd->revision >> CCE_REVISION_CHIP_REV_MAJOR_SHIFT)
10431 & CCE_REVISION_CHIP_REV_MAJOR_MASK;
10432 dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
10433 & CCE_REVISION_CHIP_REV_MINOR_MASK;
10434
10435 /* obtain the hardware ID - NOT related to unit, which is a
10436 software enumeration */
10437 reg = read_csr(dd, CCE_REVISION2);
10438 dd->hfi1_id = (reg >> CCE_REVISION2_HFI_ID_SHIFT)
10439 & CCE_REVISION2_HFI_ID_MASK;
10440 /* the variable size will remove unwanted bits */
10441 dd->icode = reg >> CCE_REVISION2_IMPL_CODE_SHIFT;
10442 dd->irev = reg >> CCE_REVISION2_IMPL_REVISION_SHIFT;
10443 dd_dev_info(dd, "Implementation: %s, revision 0x%x\n",
10444 dd->icode < ARRAY_SIZE(inames) ? inames[dd->icode] : "unknown",
10445 (int)dd->irev);
10446
10447 /* speeds the hardware can support */
10448 dd->pport->link_speed_supported = OPA_LINK_SPEED_25G;
10449 /* speeds allowed to run at */
10450 dd->pport->link_speed_enabled = dd->pport->link_speed_supported;
10451 /* give a reasonable active value, will be set on link up */
10452 dd->pport->link_speed_active = OPA_LINK_SPEED_25G;
10453
10454 dd->chip_rcv_contexts = read_csr(dd, RCV_CONTEXTS);
10455 dd->chip_send_contexts = read_csr(dd, SEND_CONTEXTS);
10456 dd->chip_sdma_engines = read_csr(dd, SEND_DMA_ENGINES);
10457 dd->chip_pio_mem_size = read_csr(dd, SEND_PIO_MEM_SIZE);
10458 dd->chip_sdma_mem_size = read_csr(dd, SEND_DMA_MEM_SIZE);
10459 /* fix up link widths for emulation _p */
10460 ppd = dd->pport;
10461 if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) {
10462 ppd->link_width_supported =
10463 ppd->link_width_enabled =
10464 ppd->link_width_downgrade_supported =
10465 ppd->link_width_downgrade_enabled =
10466 OPA_LINK_WIDTH_1X;
10467 }
10468 /* insure num_vls isn't larger than number of sdma engines */
10469 if (HFI1_CAP_IS_KSET(SDMA) && num_vls > dd->chip_sdma_engines) {
10470 dd_dev_err(dd, "num_vls %u too large, using %u VLs\n",
10471 num_vls, HFI1_MAX_VLS_SUPPORTED);
10472 ppd->vls_supported = num_vls = HFI1_MAX_VLS_SUPPORTED;
10473 ppd->vls_operational = ppd->vls_supported;
10474 }
10475
10476 /*
10477 * Convert the ns parameter to the 64 * cclocks used in the CSR.
10478 * Limit the max if larger than the field holds. If timeout is
10479 * non-zero, then the calculated field will be at least 1.
10480 *
10481 * Must be after icode is set up - the cclock rate depends
10482 * on knowing the hardware being used.
10483 */
10484 dd->rcv_intr_timeout_csr = ns_to_cclock(dd, rcv_intr_timeout) / 64;
10485 if (dd->rcv_intr_timeout_csr >
10486 RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK)
10487 dd->rcv_intr_timeout_csr =
10488 RCV_AVAIL_TIME_OUT_TIME_OUT_RELOAD_MASK;
10489 else if (dd->rcv_intr_timeout_csr == 0 && rcv_intr_timeout)
10490 dd->rcv_intr_timeout_csr = 1;
10491
10492 /* obtain chip sizes, reset chip CSRs */
10493 init_chip(dd);
10494
10495 /* read in the PCIe link speed information */
10496 ret = pcie_speeds(dd);
10497 if (ret)
10498 goto bail_cleanup;
10499
10500 /* needs to be done before we look for the peer device */
10501 read_guid(dd);
10502
10503 asic_should_init(dd);
10504
10505 /* read in firmware */
10506 ret = hfi1_firmware_init(dd);
10507 if (ret)
10508 goto bail_cleanup;
10509
10510 /*
10511 * In general, the PCIe Gen3 transition must occur after the
10512 * chip has been idled (so it won't initiate any PCIe transactions
10513 * e.g. an interrupt) and before the driver changes any registers
10514 * (the transition will reset the registers).
10515 *
10516 * In particular, place this call after:
10517 * - init_chip() - the chip will not initiate any PCIe transactions
10518 * - pcie_speeds() - reads the current link speed
10519 * - hfi1_firmware_init() - the needed firmware is ready to be
10520 * downloaded
10521 */
10522 ret = do_pcie_gen3_transition(dd);
10523 if (ret)
10524 goto bail_cleanup;
10525
10526 /* start setting dd values and adjusting CSRs */
10527 init_early_variables(dd);
10528
10529 parse_platform_config(dd);
10530
10531 /* add board names as they are defined */
10532 dd->boardname = kmalloc(64, GFP_KERNEL);
10533 if (!dd->boardname)
10534 goto bail_cleanup;
10535 snprintf(dd->boardname, 64, "Board ID 0x%llx",
10536 dd->revision >> CCE_REVISION_BOARD_ID_LOWER_NIBBLE_SHIFT
10537 & CCE_REVISION_BOARD_ID_LOWER_NIBBLE_MASK);
10538
10539 snprintf(dd->boardversion, BOARD_VERS_MAX,
10540 "ChipABI %u.%u, %s, ChipRev %u.%u, SW Compat %llu\n",
10541 HFI1_CHIP_VERS_MAJ, HFI1_CHIP_VERS_MIN,
10542 dd->boardname,
10543 (u32)dd->majrev,
10544 (u32)dd->minrev,
10545 (dd->revision >> CCE_REVISION_SW_SHIFT)
10546 & CCE_REVISION_SW_MASK);
10547
10548 ret = set_up_context_variables(dd);
10549 if (ret)
10550 goto bail_cleanup;
10551
10552 /* set initial RXE CSRs */
10553 init_rxe(dd);
10554 /* set initial TXE CSRs */
10555 init_txe(dd);
10556 /* set initial non-RXE, non-TXE CSRs */
10557 init_other(dd);
10558 /* set up KDETH QP prefix in both RX and TX CSRs */
10559 init_kdeth_qp(dd);
10560
10561 /* send contexts must be set up before receive contexts */
10562 ret = init_send_contexts(dd);
10563 if (ret)
10564 goto bail_cleanup;
10565
10566 ret = hfi1_create_ctxts(dd);
10567 if (ret)
10568 goto bail_cleanup;
10569
10570 dd->rcvhdrsize = DEFAULT_RCVHDRSIZE;
10571 /*
10572 * rcd[0] is guaranteed to be valid by this point. Also, all
10573 * context are using the same value, as per the module parameter.
10574 */
10575 dd->rhf_offset = dd->rcd[0]->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
10576
10577 ret = init_pervl_scs(dd);
10578 if (ret)
10579 goto bail_cleanup;
10580
10581 /* sdma init */
10582 for (i = 0; i < dd->num_pports; ++i) {
10583 ret = sdma_init(dd, i);
10584 if (ret)
10585 goto bail_cleanup;
10586 }
10587
10588 /* use contexts created by hfi1_create_ctxts */
10589 ret = set_up_interrupts(dd);
10590 if (ret)
10591 goto bail_cleanup;
10592
10593 /* set up LCB access - must be after set_up_interrupts() */
10594 init_lcb_access(dd);
10595
10596 snprintf(dd->serial, SERIAL_MAX, "0x%08llx\n",
10597 dd->base_guid & 0xFFFFFF);
10598
10599 dd->oui1 = dd->base_guid >> 56 & 0xFF;
10600 dd->oui2 = dd->base_guid >> 48 & 0xFF;
10601 dd->oui3 = dd->base_guid >> 40 & 0xFF;
10602
10603 ret = load_firmware(dd); /* asymmetric with dispose_firmware() */
10604 if (ret)
10605 goto bail_clear_intr;
10606 check_fabric_firmware_versions(dd);
10607
10608 thermal_init(dd);
10609
10610 ret = init_cntrs(dd);
10611 if (ret)
10612 goto bail_clear_intr;
10613
10614 ret = init_rcverr(dd);
10615 if (ret)
10616 goto bail_free_cntrs;
10617
10618 ret = eprom_init(dd);
10619 if (ret)
10620 goto bail_free_rcverr;
10621
10622 goto bail;
10623
10624bail_free_rcverr:
10625 free_rcverr(dd);
10626bail_free_cntrs:
10627 free_cntrs(dd);
10628bail_clear_intr:
10629 clean_up_interrupts(dd);
10630bail_cleanup:
10631 hfi1_pcie_ddcleanup(dd);
10632bail_free:
10633 hfi1_free_devdata(dd);
10634 dd = ERR_PTR(ret);
10635bail:
10636 return dd;
10637}
10638
10639static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
10640 u32 dw_len)
10641{
10642 u32 delta_cycles;
10643 u32 current_egress_rate = ppd->current_egress_rate;
10644 /* rates here are in units of 10^6 bits/sec */
10645
10646 if (desired_egress_rate == -1)
10647 return 0; /* shouldn't happen */
10648
10649 if (desired_egress_rate >= current_egress_rate)
10650 return 0; /* we can't help go faster, only slower */
10651
10652 delta_cycles = egress_cycles(dw_len * 4, desired_egress_rate) -
10653 egress_cycles(dw_len * 4, current_egress_rate);
10654
10655 return (u16)delta_cycles;
10656}
10657
10658
10659/**
10660 * create_pbc - build a pbc for transmission
10661 * @flags: special case flags or-ed in built pbc
10662 * @srate: static rate
10663 * @vl: vl
10664 * @dwlen: dword length (header words + data words + pbc words)
10665 *
10666 * Create a PBC with the given flags, rate, VL, and length.
10667 *
10668 * NOTE: The PBC created will not insert any HCRC - all callers but one are
10669 * for verbs, which does not use this PSM feature. The lone other caller
10670 * is for the diagnostic interface which calls this if the user does not
10671 * supply their own PBC.
10672 */
10673u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
10674 u32 dw_len)
10675{
10676 u64 pbc, delay = 0;
10677
10678 if (unlikely(srate_mbs))
10679 delay = delay_cycles(ppd, srate_mbs, dw_len);
10680
10681 pbc = flags
10682 | (delay << PBC_STATIC_RATE_CONTROL_COUNT_SHIFT)
10683 | ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
10684 | (vl & PBC_VL_MASK) << PBC_VL_SHIFT
10685 | (dw_len & PBC_LENGTH_DWS_MASK)
10686 << PBC_LENGTH_DWS_SHIFT;
10687
10688 return pbc;
10689}
10690
10691#define SBUS_THERMAL 0x4f
10692#define SBUS_THERM_MONITOR_MODE 0x1
10693
10694#define THERM_FAILURE(dev, ret, reason) \
10695 dd_dev_err((dd), \
10696 "Thermal sensor initialization failed: %s (%d)\n", \
10697 (reason), (ret))
10698
10699/*
10700 * Initialize the Avago Thermal sensor.
10701 *
10702 * After initialization, enable polling of thermal sensor through
10703 * SBus interface. In order for this to work, the SBus Master
10704 * firmware has to be loaded due to the fact that the HW polling
10705 * logic uses SBus interrupts, which are not supported with
10706 * default firmware. Otherwise, no data will be returned through
10707 * the ASIC_STS_THERM CSR.
10708 */
10709static int thermal_init(struct hfi1_devdata *dd)
10710{
10711 int ret = 0;
10712
10713 if (dd->icode != ICODE_RTL_SILICON ||
10714 !(dd->flags & HFI1_DO_INIT_ASIC))
10715 return ret;
10716
10717 acquire_hw_mutex(dd);
10718 dd_dev_info(dd, "Initializing thermal sensor\n");
10719 /* Thermal Sensor Initialization */
10720 /* Step 1: Reset the Thermal SBus Receiver */
10721 ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10722 RESET_SBUS_RECEIVER, 0);
10723 if (ret) {
10724 THERM_FAILURE(dd, ret, "Bus Reset");
10725 goto done;
10726 }
10727 /* Step 2: Set Reset bit in Thermal block */
10728 ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10729 WRITE_SBUS_RECEIVER, 0x1);
10730 if (ret) {
10731 THERM_FAILURE(dd, ret, "Therm Block Reset");
10732 goto done;
10733 }
10734 /* Step 3: Write clock divider value (100MHz -> 2MHz) */
10735 ret = sbus_request_slow(dd, SBUS_THERMAL, 0x1,
10736 WRITE_SBUS_RECEIVER, 0x32);
10737 if (ret) {
10738 THERM_FAILURE(dd, ret, "Write Clock Div");
10739 goto done;
10740 }
10741 /* Step 4: Select temperature mode */
10742 ret = sbus_request_slow(dd, SBUS_THERMAL, 0x3,
10743 WRITE_SBUS_RECEIVER,
10744 SBUS_THERM_MONITOR_MODE);
10745 if (ret) {
10746 THERM_FAILURE(dd, ret, "Write Mode Sel");
10747 goto done;
10748 }
10749 /* Step 5: De-assert block reset and start conversion */
10750 ret = sbus_request_slow(dd, SBUS_THERMAL, 0x0,
10751 WRITE_SBUS_RECEIVER, 0x2);
10752 if (ret) {
10753 THERM_FAILURE(dd, ret, "Write Reset Deassert");
10754 goto done;
10755 }
10756 /* Step 5.1: Wait for first conversion (21.5ms per spec) */
10757 msleep(22);
10758
10759 /* Enable polling of thermal readings */
10760 write_csr(dd, ASIC_CFG_THERM_POLL_EN, 0x1);
10761done:
10762 release_hw_mutex(dd);
10763 return ret;
10764}
10765
10766static void handle_temp_err(struct hfi1_devdata *dd)
10767{
10768 struct hfi1_pportdata *ppd = &dd->pport[0];
10769 /*
10770 * Thermal Critical Interrupt
10771 * Put the device into forced freeze mode, take link down to
10772 * offline, and put DC into reset.
10773 */
10774 dd_dev_emerg(dd,
10775 "Critical temperature reached! Forcing device into freeze mode!\n");
10776 dd->flags |= HFI1_FORCED_FREEZE;
10777 start_freeze_handling(ppd, FREEZE_SELF|FREEZE_ABORT);
10778 /*
10779 * Shut DC down as much and as quickly as possible.
10780 *
10781 * Step 1: Take the link down to OFFLINE. This will cause the
10782 * 8051 to put the Serdes in reset. However, we don't want to
10783 * go through the entire link state machine since we want to
10784 * shutdown ASAP. Furthermore, this is not a graceful shutdown
10785 * but rather an attempt to save the chip.
10786 * Code below is almost the same as quiet_serdes() but avoids
10787 * all the extra work and the sleeps.
10788 */
10789 ppd->driver_link_ready = 0;
10790 ppd->link_enabled = 0;
10791 set_physical_link_state(dd, PLS_OFFLINE |
10792 (OPA_LINKDOWN_REASON_SMA_DISABLED << 8));
10793 /*
10794 * Step 2: Shutdown LCB and 8051
10795 * After shutdown, do not restore DC_CFG_RESET value.
10796 */
10797 dc_shutdown(dd);
10798}
This page took 0.662007 seconds and 5 git commands to generate.