Commit | Line | Data |
---|---|---|
225c7b1f RD |
1 | /* |
2 | * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. | |
51a379d0 | 3 | * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. |
225c7b1f RD |
4 | * |
5 | * This software is available to you under a choice of one of two | |
6 | * licenses. You may choose to be licensed under the terms of the GNU | |
7 | * General Public License (GPL) Version 2, available from the file | |
8 | * COPYING in the main directory of this source tree, or the | |
9 | * OpenIB.org BSD license below: | |
10 | * | |
11 | * Redistribution and use in source and binary forms, with or | |
12 | * without modification, are permitted provided that the following | |
13 | * conditions are met: | |
14 | * | |
15 | * - Redistributions of source code must retain the above | |
16 | * copyright notice, this list of conditions and the following | |
17 | * disclaimer. | |
18 | * | |
19 | * - Redistributions in binary form must reproduce the above | |
20 | * copyright notice, this list of conditions and the following | |
21 | * disclaimer in the documentation and/or other materials | |
22 | * provided with the distribution. | |
23 | * | |
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
26 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
28 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
29 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
31 | * SOFTWARE. | |
32 | */ | |
33 | ||
ee49bd93 | 34 | #include <linux/workqueue.h> |
9d9779e7 | 35 | #include <linux/module.h> |
ee49bd93 | 36 | |
225c7b1f RD |
37 | #include "mlx4.h" |
38 | ||
ee49bd93 JM |
39 | enum { |
40 | MLX4_CATAS_POLL_INTERVAL = 5 * HZ, | |
41 | }; | |
42 | ||
43 | static DEFINE_SPINLOCK(catas_lock); | |
44 | ||
45 | static LIST_HEAD(catas_list); | |
ee49bd93 JM |
46 | static struct work_struct catas_work; |
47 | ||
48 | static int internal_err_reset = 1; | |
49 | module_param(internal_err_reset, int, 0644); | |
50 | MODULE_PARM_DESC(internal_err_reset, | |
d81c7186 JM |
51 | "Reset device on internal errors if non-zero" |
52 | " (default 1, in SRIOV mode default is 0)"); | |
ee49bd93 JM |
53 | |
54 | static void dump_err_buf(struct mlx4_dev *dev) | |
225c7b1f RD |
55 | { |
56 | struct mlx4_priv *priv = mlx4_priv(dev); | |
57 | ||
58 | int i; | |
59 | ||
ee49bd93 | 60 | mlx4_err(dev, "Internal error detected:\n"); |
225c7b1f RD |
61 | for (i = 0; i < priv->fw.catas_size; ++i) |
62 | mlx4_err(dev, " buf[%02x]: %08x\n", | |
63 | i, swab32(readl(priv->catas_err.map + i))); | |
ee49bd93 | 64 | } |
225c7b1f | 65 | |
ee49bd93 JM |
66 | static void poll_catas(unsigned long dev_ptr) |
67 | { | |
68 | struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr; | |
69 | struct mlx4_priv *priv = mlx4_priv(dev); | |
70 | ||
71 | if (readl(priv->catas_err.map)) { | |
57dbf29a KSS |
72 | /* If the device is off-line, we cannot try to recover it */ |
73 | if (pci_channel_offline(dev->pdev)) | |
74 | mod_timer(&priv->catas_err.timer, | |
75 | round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); | |
76 | else { | |
77 | dump_err_buf(dev); | |
78 | mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); | |
ee49bd93 | 79 | |
57dbf29a KSS |
80 | if (internal_err_reset) { |
81 | spin_lock(&catas_lock); | |
82 | list_add(&priv->catas_err.list, &catas_list); | |
83 | spin_unlock(&catas_lock); | |
ee49bd93 | 84 | |
57dbf29a KSS |
85 | queue_work(mlx4_wq, &catas_work); |
86 | } | |
ee49bd93 JM |
87 | } |
88 | } else | |
89 | mod_timer(&priv->catas_err.timer, | |
90 | round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); | |
225c7b1f RD |
91 | } |
92 | ||
ee49bd93 JM |
93 | static void catas_reset(struct work_struct *work) |
94 | { | |
95 | struct mlx4_priv *priv, *tmppriv; | |
96 | struct mlx4_dev *dev; | |
97 | ||
98 | LIST_HEAD(tlist); | |
99 | int ret; | |
100 | ||
101 | spin_lock_irq(&catas_lock); | |
102 | list_splice_init(&catas_list, &tlist); | |
103 | spin_unlock_irq(&catas_lock); | |
104 | ||
105 | list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) { | |
634354d7 VG |
106 | struct pci_dev *pdev = priv->dev.pdev; |
107 | ||
57dbf29a KSS |
108 | /* If the device is off-line, we cannot reset it */ |
109 | if (pci_channel_offline(pdev)) | |
110 | continue; | |
111 | ||
ee49bd93 | 112 | ret = mlx4_restart_one(priv->dev.pdev); |
634354d7 | 113 | /* 'priv' now is not valid */ |
ee49bd93 | 114 | if (ret) |
0a645e80 JP |
115 | pr_err("mlx4 %s: Reset failed (%d)\n", |
116 | pci_name(pdev), ret); | |
634354d7 VG |
117 | else { |
118 | dev = pci_get_drvdata(pdev); | |
ee49bd93 | 119 | mlx4_dbg(dev, "Reset succeeded\n"); |
634354d7 | 120 | } |
ee49bd93 JM |
121 | } |
122 | } | |
123 | ||
124 | void mlx4_start_catas_poll(struct mlx4_dev *dev) | |
225c7b1f RD |
125 | { |
126 | struct mlx4_priv *priv = mlx4_priv(dev); | |
4979d18f | 127 | phys_addr_t addr; |
225c7b1f | 128 | |
d81c7186 JM |
129 | /*If we are in SRIOV the default of the module param must be 0*/ |
130 | if (mlx4_is_mfunc(dev)) | |
131 | internal_err_reset = 0; | |
132 | ||
ee49bd93 JM |
133 | INIT_LIST_HEAD(&priv->catas_err.list); |
134 | init_timer(&priv->catas_err.timer); | |
135 | priv->catas_err.map = NULL; | |
136 | ||
225c7b1f RD |
137 | addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) + |
138 | priv->fw.catas_offset; | |
139 | ||
140 | priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); | |
ee49bd93 | 141 | if (!priv->catas_err.map) { |
4979d18f RD |
142 | mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n", |
143 | (unsigned long long) addr); | |
ee49bd93 JM |
144 | return; |
145 | } | |
225c7b1f | 146 | |
ee49bd93 JM |
147 | priv->catas_err.timer.data = (unsigned long) dev; |
148 | priv->catas_err.timer.function = poll_catas; | |
149 | priv->catas_err.timer.expires = | |
150 | round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL); | |
151 | add_timer(&priv->catas_err.timer); | |
225c7b1f RD |
152 | } |
153 | ||
ee49bd93 | 154 | void mlx4_stop_catas_poll(struct mlx4_dev *dev) |
225c7b1f RD |
155 | { |
156 | struct mlx4_priv *priv = mlx4_priv(dev); | |
157 | ||
ee49bd93 JM |
158 | del_timer_sync(&priv->catas_err.timer); |
159 | ||
225c7b1f RD |
160 | if (priv->catas_err.map) |
161 | iounmap(priv->catas_err.map); | |
ee49bd93 JM |
162 | |
163 | spin_lock_irq(&catas_lock); | |
164 | list_del(&priv->catas_err.list); | |
165 | spin_unlock_irq(&catas_lock); | |
166 | } | |
167 | ||
27bf91d6 | 168 | void __init mlx4_catas_init(void) |
ee49bd93 JM |
169 | { |
170 | INIT_WORK(&catas_work, catas_reset); | |
225c7b1f | 171 | } |