powerpc/eeh: Platform dependent EEH operations
[deliverable/linux.git] / arch / powerpc / platforms / pseries / eeh.c
CommitLineData
1da177e4 1/*
3c8c90ab
LV
2 * Copyright IBM Corporation 2001, 2005, 2006
3 * Copyright Dave Engebretsen & Todd Inglett 2001
4 * Copyright Linas Vepstas 2005, 2006
cb3bc9d0 5 * Copyright 2001-2012 IBM Corporation.
69376502 6 *
1da177e4
LT
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
69376502 11 *
1da177e4
LT
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
69376502 16 *
1da177e4
LT
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
3c8c90ab
LV
20 *
21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
1da177e4
LT
22 */
23
6dee3fb9 24#include <linux/delay.h>
cb3bc9d0 25#include <linux/sched.h>
1da177e4
LT
26#include <linux/init.h>
27#include <linux/list.h>
1da177e4
LT
28#include <linux/pci.h>
29#include <linux/proc_fs.h>
30#include <linux/rbtree.h>
31#include <linux/seq_file.h>
32#include <linux/spinlock.h>
66b15db6 33#include <linux/export.h>
acaa6176
SR
34#include <linux/of.h>
35
60063497 36#include <linux/atomic.h>
1da177e4 37#include <asm/eeh.h>
172ca926 38#include <asm/eeh_event.h>
1da177e4
LT
39#include <asm/io.h>
40#include <asm/machdep.h>
172ca926 41#include <asm/ppc-pci.h>
1da177e4 42#include <asm/rtas.h>
1da177e4 43
1da177e4
LT
44
45/** Overview:
46 * EEH, or "Extended Error Handling" is a PCI bridge technology for
47 * dealing with PCI bus errors that can't be dealt with within the
48 * usual PCI framework, except by check-stopping the CPU. Systems
49 * that are designed for high-availability/reliability cannot afford
50 * to crash due to a "mere" PCI error, thus the need for EEH.
51 * An EEH-capable bridge operates by converting a detected error
52 * into a "slot freeze", taking the PCI adapter off-line, making
53 * the slot behave, from the OS'es point of view, as if the slot
54 * were "empty": all reads return 0xff's and all writes are silently
55 * ignored. EEH slot isolation events can be triggered by parity
56 * errors on the address or data busses (e.g. during posted writes),
69376502
LV
57 * which in turn might be caused by low voltage on the bus, dust,
58 * vibration, humidity, radioactivity or plain-old failed hardware.
1da177e4
LT
59 *
60 * Note, however, that one of the leading causes of EEH slot
61 * freeze events are buggy device drivers, buggy device microcode,
62 * or buggy device hardware. This is because any attempt by the
63 * device to bus-master data to a memory address that is not
64 * assigned to the device will trigger a slot freeze. (The idea
65 * is to prevent devices-gone-wild from corrupting system memory).
66 * Buggy hardware/drivers will have a miserable time co-existing
67 * with EEH.
68 *
69 * Ideally, a PCI device driver, when suspecting that an isolation
25985edc 70 * event has occurred (e.g. by reading 0xff's), will then ask EEH
1da177e4
LT
71 * whether this is the case, and then take appropriate steps to
72 * reset the PCI slot, the PCI device, and then resume operations.
73 * However, until that day, the checking is done here, with the
74 * eeh_check_failure() routine embedded in the MMIO macros. If
75 * the slot is found to be isolated, an "EEH Event" is synthesized
76 * and sent out for processing.
77 */
78
5c1344e9 79/* If a device driver keeps reading an MMIO register in an interrupt
f36c5227
MM
80 * handler after a slot isolation event, it might be broken.
81 * This sets the threshold for how many read attempts we allow
82 * before printing an error message.
1da177e4 83 */
2fd30be8 84#define EEH_MAX_FAILS 2100000
1da177e4 85
17213c3b 86/* Time to wait for a PCI slot to report status, in milliseconds */
9c547768
LV
87#define PCI_BUS_RESET_WAIT_MSEC (60*1000)
88
1da177e4
LT
89/* RTAS tokens */
90static int ibm_set_eeh_option;
91static int ibm_set_slot_reset;
92static int ibm_read_slot_reset_state;
93static int ibm_read_slot_reset_state2;
94static int ibm_slot_error_detail;
25e591f6 95static int ibm_get_config_addr_info;
147d6a37 96static int ibm_get_config_addr_info2;
21e464dd 97static int ibm_configure_bridge;
65f47f13 98static int ibm_configure_pe;
1da177e4 99
aa1e6374
GS
100/* Platform dependent EEH operations */
101struct eeh_ops *eeh_ops = NULL;
102
1e28a7dd
DW
103int eeh_subsystem_enabled;
104EXPORT_SYMBOL(eeh_subsystem_enabled);
1da177e4 105
fd761fd8 106/* Lock to avoid races due to multiple reports of an error */
3d372628 107static DEFINE_RAW_SPINLOCK(confirm_error_lock);
fd761fd8 108
17213c3b
LV
109/* Buffer for reporting slot-error-detail rtas calls. Its here
110 * in BSS, and not dynamically alloced, so that it ends up in
111 * RMO where RTAS can access it.
112 */
1da177e4
LT
113static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
114static DEFINE_SPINLOCK(slot_errbuf_lock);
115static int eeh_error_buf_size;
116
17213c3b
LV
117/* Buffer for reporting pci register dumps. Its here in BSS, and
118 * not dynamically alloced, so that it ends up in RMO where RTAS
119 * can access it.
120 */
d99bb1db
LV
121#define EEH_PCI_REGS_LOG_LEN 4096
122static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
123
1da177e4 124/* System monitoring statistics */
257ffc64
LV
125static unsigned long no_device;
126static unsigned long no_dn;
127static unsigned long no_cfg_addr;
128static unsigned long ignored_check;
129static unsigned long total_mmio_ffs;
130static unsigned long false_positives;
257ffc64 131static unsigned long slot_resets;
1da177e4 132
7684b40c
LV
133#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
134
cb3bc9d0 135/**
cce4b2d2 136 * eeh_rtas_slot_error_detail - Retrieve error log through RTAS call
cb3bc9d0
GS
137 * @pdn: device node
138 * @severity: temporary or permanent error log
139 * @driver_log: driver log to be combined with the retrieved error log
140 * @loglen: length of driver log
141 *
142 * This routine should be called to retrieve error log through the dedicated
143 * RTAS call.
144 */
cce4b2d2 145static void eeh_rtas_slot_error_detail(struct pci_dn *pdn, int severity,
d99bb1db 146 char *driver_log, size_t loglen)
df7242b1 147{
fcb7543e 148 int config_addr;
df7242b1
LV
149 unsigned long flags;
150 int rc;
151
152 /* Log the error with the rtas logger */
153 spin_lock_irqsave(&slot_errbuf_lock, flags);
154 memset(slot_errbuf, 0, eeh_error_buf_size);
155
fcb7543e
LV
156 /* Use PE configuration address, if present */
157 config_addr = pdn->eeh_config_addr;
158 if (pdn->eeh_pe_config_addr)
159 config_addr = pdn->eeh_pe_config_addr;
160
df7242b1 161 rc = rtas_call(ibm_slot_error_detail,
fcb7543e 162 8, 1, NULL, config_addr,
df7242b1 163 BUID_HI(pdn->phb->buid),
d99bb1db
LV
164 BUID_LO(pdn->phb->buid),
165 virt_to_phys(driver_log), loglen,
df7242b1
LV
166 virt_to_phys(slot_errbuf),
167 eeh_error_buf_size,
168 severity);
169
170 if (rc == 0)
171 log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
172 spin_unlock_irqrestore(&slot_errbuf_lock, flags);
173}
174
d99bb1db 175/**
cce4b2d2 176 * eeh_gather_pci_data - Copy assorted PCI config space registers to buff
d99bb1db
LV
177 * @pdn: device to report data for
178 * @buf: point to buffer in which to log
179 * @len: amount of room in buffer
180 *
181 * This routine captures assorted PCI configuration space data,
182 * and puts them into a buffer for RTAS error logging.
183 */
cce4b2d2 184static size_t eeh_gather_pci_data(struct pci_dn *pdn, char * buf, size_t len)
d99bb1db 185{
0b9369f4 186 struct pci_dev *dev = pdn->pcidev;
d99bb1db 187 u32 cfg;
fcf9892b 188 int cap, i;
d99bb1db
LV
189 int n = 0;
190
fcf9892b
LV
191 n += scnprintf(buf+n, len-n, "%s\n", pdn->node->full_name);
192 printk(KERN_WARNING "EEH: of node=%s\n", pdn->node->full_name);
193
d99bb1db 194 rtas_read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
fcf9892b
LV
195 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
196 printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
197
d99bb1db
LV
198 rtas_read_config(pdn, PCI_COMMAND, 4, &cfg);
199 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
fcf9892b
LV
200 printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
201
b37ceefe
LV
202 if (!dev) {
203 printk(KERN_WARNING "EEH: no PCI device for this of node\n");
204 return n;
205 }
206
0b9369f4
LV
207 /* Gather bridge-specific registers */
208 if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
209 rtas_read_config(pdn, PCI_SEC_STATUS, 2, &cfg);
210 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
211 printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
212
213 rtas_read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg);
214 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
215 printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
216 }
217
fcf9892b 218 /* Dump out the PCI-X command and status regs */
b37ceefe 219 cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
fcf9892b
LV
220 if (cap) {
221 rtas_read_config(pdn, cap, 4, &cfg);
222 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
223 printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
224
225 rtas_read_config(pdn, cap+4, 4, &cfg);
226 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
227 printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
228 }
229
230 /* If PCI-E capable, dump PCI-E cap 10, and the AER */
b37ceefe 231 cap = pci_find_capability(dev, PCI_CAP_ID_EXP);
fcf9892b
LV
232 if (cap) {
233 n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
234 printk(KERN_WARNING
235 "EEH: PCI-E capabilities and status follow:\n");
236
237 for (i=0; i<=8; i++) {
238 rtas_read_config(pdn, cap+4*i, 4, &cfg);
239 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
240 printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
241 }
242
b37ceefe 243 cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
fcf9892b
LV
244 if (cap) {
245 n += scnprintf(buf+n, len-n, "pci-e AER:\n");
246 printk(KERN_WARNING
247 "EEH: PCI-E AER capability register set follows:\n");
248
249 for (i=0; i<14; i++) {
250 rtas_read_config(pdn, cap+4*i, 4, &cfg);
251 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
252 printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
253 }
254 }
255 }
0b9369f4
LV
256
257 /* Gather status on devices under the bridge */
258 if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
acaa6176
SR
259 struct device_node *dn;
260
261 for_each_child_of_node(pdn->node, dn) {
0b9369f4
LV
262 pdn = PCI_DN(dn);
263 if (pdn)
cce4b2d2 264 n += eeh_gather_pci_data(pdn, buf+n, len-n);
0b9369f4
LV
265 }
266 }
267
d99bb1db
LV
268 return n;
269}
270
cb3bc9d0
GS
271/**
272 * eeh_slot_error_detail - Generate combined log including driver log and error log
273 * @pdn: device node
274 * @severity: temporary or permanent error log
275 *
276 * This routine should be called to generate the combined log, which
277 * is comprised of driver log and error log. The driver log is figured
278 * out from the config space of the corresponding PCI device, while
279 * the error log is fetched through platform dependent function call.
280 */
d99bb1db
LV
281void eeh_slot_error_detail(struct pci_dn *pdn, int severity)
282{
283 size_t loglen = 0;
17213c3b 284 pci_regs_buf[0] = 0;
d99bb1db 285
cce4b2d2
GS
286 eeh_pci_enable(pdn, EEH_THAW_MMIO);
287 eeh_configure_bridge(pdn);
65f47f13 288 eeh_restore_bars(pdn);
cce4b2d2 289 loglen = eeh_gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN);
d99bb1db 290
cce4b2d2 291 eeh_rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen);
d99bb1db
LV
292}
293
1da177e4 294/**
cce4b2d2 295 * eeh_read_slot_reset_state - Read the reset state of a device node's slot
1da177e4
LT
296 * @dn: device node to read
297 * @rets: array to return results in
cb3bc9d0
GS
298 *
299 * Read the reset state of a device node's slot through platform dependent
300 * function call.
1da177e4 301 */
cce4b2d2 302static int eeh_read_slot_reset_state(struct pci_dn *pdn, int rets[])
1da177e4
LT
303{
304 int token, outputs;
fcb7543e 305 int config_addr;
1da177e4
LT
306
307 if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
308 token = ibm_read_slot_reset_state2;
309 outputs = 4;
310 } else {
311 token = ibm_read_slot_reset_state;
69376502 312 rets[2] = 0; /* fake PE Unavailable info */
1da177e4
LT
313 outputs = 3;
314 }
315
fcb7543e
LV
316 /* Use PE configuration address, if present */
317 config_addr = pdn->eeh_config_addr;
318 if (pdn->eeh_pe_config_addr)
319 config_addr = pdn->eeh_pe_config_addr;
320
321 return rtas_call(token, 3, outputs, rets, config_addr,
1635317f 322 BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid));
1da177e4
LT
323}
324
9c547768 325/**
cb3bc9d0
GS
326 * eeh_wait_for_slot_status - Returns error status of slot
327 * @pdn: pci device node
328 * @max_wait_msecs: maximum number to millisecs to wait
9c547768
LV
329 *
330 * Return negative value if a permanent error, else return
331 * Partition Endpoint (PE) status value.
332 *
333 * If @max_wait_msecs is positive, then this routine will
334 * sleep until a valid status can be obtained, or until
335 * the max allowed wait time is exceeded, in which case
336 * a -2 is returned.
337 */
cce4b2d2 338int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs)
9c547768
LV
339{
340 int rc;
341 int rets[3];
342 int mwait;
343
344 while (1) {
cce4b2d2 345 rc = eeh_read_slot_reset_state(pdn, rets);
9c547768
LV
346 if (rc) return rc;
347 if (rets[1] == 0) return -1; /* EEH is not supported */
348
349 if (rets[0] != 5) return rets[0]; /* return actual status */
350
351 if (rets[2] == 0) return -1; /* permanently unavailable */
352
2c84b407 353 if (max_wait_msecs <= 0) break;
9c547768
LV
354
355 mwait = rets[2];
356 if (mwait <= 0) {
cb3bc9d0
GS
357 printk(KERN_WARNING "EEH: Firmware returned bad wait value=%d\n",
358 mwait);
9c547768
LV
359 mwait = 1000;
360 } else if (mwait > 300*1000) {
cb3bc9d0
GS
361 printk(KERN_WARNING "EEH: Firmware is taking too long, time=%d\n",
362 mwait);
9c547768
LV
363 mwait = 300*1000;
364 }
365 max_wait_msecs -= mwait;
cb3bc9d0 366 msleep(mwait);
9c547768
LV
367 }
368
369 printk(KERN_WARNING "EEH: Timed out waiting for slot status\n");
370 return -2;
371}
372
1da177e4 373/**
cb3bc9d0
GS
374 * eeh_token_to_phys - Convert EEH address token to phys address
375 * @token: I/O token, should be address in the form 0xA....
376 *
377 * This routine should be called to convert virtual I/O address
378 * to physical one.
1da177e4
LT
379 */
380static inline unsigned long eeh_token_to_phys(unsigned long token)
381{
382 pte_t *ptep;
383 unsigned long pa;
384
20cee16c 385 ptep = find_linux_pte(init_mm.pgd, token);
1da177e4
LT
386 if (!ptep)
387 return token;
388 pa = pte_pfn(*ptep) << PAGE_SHIFT;
389
390 return pa | (token & (PAGE_SIZE-1));
391}
392
cb3bc9d0 393/**
cce4b2d2 394 * eeh_find_device_pe - Retrieve the PE for the given device
cb3bc9d0
GS
395 * @dn: device node
396 *
397 * Return the PE under which this device lies
fd761fd8 398 */
cce4b2d2 399struct device_node *eeh_find_device_pe(struct device_node *dn)
fd761fd8
LV
400{
401 while ((dn->parent) && PCI_DN(dn->parent) &&
402 (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
403 dn = dn->parent;
404 }
405 return dn;
406}
407
cb3bc9d0
GS
408/**
409 * __eeh_mark_slot - Mark all child devices as failed
410 * @parent: parent device
411 * @mode_flag: failure flag
412 *
413 * Mark all devices that are children of this device as failed.
414 * Mark the device driver too, so that it can see the failure
415 * immediately; this is critical, since some drivers poll
416 * status registers in interrupts ... If a driver is polling,
417 * and the slot is frozen, then the driver can deadlock in
418 * an interrupt context, which is bad.
fd761fd8 419 */
acaa6176 420static void __eeh_mark_slot(struct device_node *parent, int mode_flag)
fd761fd8 421{
acaa6176
SR
422 struct device_node *dn;
423
424 for_each_child_of_node(parent, dn) {
d9564ad1 425 if (PCI_DN(dn)) {
77bd7415
LV
426 /* Mark the pci device driver too */
427 struct pci_dev *dev = PCI_DN(dn)->pcidev;
ea183a95
OJ
428
429 PCI_DN(dn)->eeh_mode |= mode_flag;
430
77bd7415
LV
431 if (dev && dev->driver)
432 dev->error_state = pci_channel_io_frozen;
433
acaa6176 434 __eeh_mark_slot(dn, mode_flag);
d9564ad1 435 }
fd761fd8
LV
436 }
437}
438
cb3bc9d0
GS
439/**
440 * eeh_mark_slot - Mark the indicated device and its children as failed
441 * @dn: parent device
442 * @mode_flag: failure flag
443 *
444 * Mark the indicated device and its child devices as failed.
445 * The device drivers are marked as failed as well.
446 */
447void eeh_mark_slot(struct device_node *dn, int mode_flag)
d9564ad1 448{
022d51b1 449 struct pci_dev *dev;
cce4b2d2 450 dn = eeh_find_device_pe(dn);
3914ac7b
LV
451
452 /* Back up one, since config addrs might be shared */
4980d5eb 453 if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
3914ac7b
LV
454 dn = dn->parent;
455
d9564ad1 456 PCI_DN(dn)->eeh_mode |= mode_flag;
022d51b1
LV
457
458 /* Mark the pci device too */
459 dev = PCI_DN(dn)->pcidev;
460 if (dev)
461 dev->error_state = pci_channel_io_frozen;
462
acaa6176 463 __eeh_mark_slot(dn, mode_flag);
d9564ad1
LV
464}
465
cb3bc9d0
GS
466/**
467 * __eeh_clear_slot - Clear failure flag for the child devices
468 * @parent: parent device
469 * @mode_flag: flag to be cleared
470 *
471 * Clear failure flag for the child devices.
472 */
acaa6176 473static void __eeh_clear_slot(struct device_node *parent, int mode_flag)
fd761fd8 474{
acaa6176
SR
475 struct device_node *dn;
476
477 for_each_child_of_node(parent, dn) {
d9564ad1
LV
478 if (PCI_DN(dn)) {
479 PCI_DN(dn)->eeh_mode &= ~mode_flag;
480 PCI_DN(dn)->eeh_check_count = 0;
acaa6176 481 __eeh_clear_slot(dn, mode_flag);
d9564ad1 482 }
fd761fd8
LV
483 }
484}
485
cb3bc9d0
GS
486/**
487 * eeh_clear_slot - Clear failure flag for the indicated device and its children
488 * @dn: parent device
489 * @mode_flag: flag to be cleared
490 *
491 * Clear failure flag for the indicated device and its children.
492 */
493void eeh_clear_slot(struct device_node *dn, int mode_flag)
fd761fd8
LV
494{
495 unsigned long flags;
3d372628 496 raw_spin_lock_irqsave(&confirm_error_lock, flags);
3914ac7b 497
cce4b2d2 498 dn = eeh_find_device_pe(dn);
3914ac7b
LV
499
500 /* Back up one, since config addrs might be shared */
4980d5eb 501 if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
3914ac7b
LV
502 dn = dn->parent;
503
d9564ad1
LV
504 PCI_DN(dn)->eeh_mode &= ~mode_flag;
505 PCI_DN(dn)->eeh_check_count = 0;
acaa6176 506 __eeh_clear_slot(dn, mode_flag);
3d372628 507 raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
fd761fd8
LV
508}
509
1da177e4 510/**
cb3bc9d0
GS
511 * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze
512 * @dn: device node
513 * @dev: pci device, if known
1da177e4
LT
514 *
515 * Check for an EEH failure for the given device node. Call this
516 * routine if the result of a read was all 0xff's and you want to
517 * find out if this is due to an EEH slot freeze. This routine
518 * will query firmware for the EEH status.
519 *
520 * Returns 0 if there has not been an EEH error; otherwise returns
69376502 521 * a non-zero value and queues up a slot isolation event notification.
1da177e4
LT
522 *
523 * It is safe to call this routine in an interrupt context.
524 */
525int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
526{
527 int ret;
528 int rets[3];
529 unsigned long flags;
1635317f 530 struct pci_dn *pdn;
fd761fd8 531 int rc = 0;
f36c5227 532 const char *location;
1da177e4 533
257ffc64 534 total_mmio_ffs++;
1da177e4
LT
535
536 if (!eeh_subsystem_enabled)
537 return 0;
538
177bc936 539 if (!dn) {
257ffc64 540 no_dn++;
1da177e4 541 return 0;
177bc936 542 }
cce4b2d2 543 dn = eeh_find_device_pe(dn);
69376502 544 pdn = PCI_DN(dn);
1da177e4
LT
545
546 /* Access to IO BARs might get this far and still not want checking. */
f8632c82 547 if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
1635317f 548 pdn->eeh_mode & EEH_MODE_NOCHECK) {
257ffc64 549 ignored_check++;
57b066ff 550 pr_debug("EEH: Ignored check (%x) for %s %s\n",
8d3d50bf 551 pdn->eeh_mode, eeh_pci_name(dev), dn->full_name);
1da177e4
LT
552 return 0;
553 }
554
fcb7543e 555 if (!pdn->eeh_config_addr && !pdn->eeh_pe_config_addr) {
257ffc64 556 no_cfg_addr++;
1da177e4
LT
557 return 0;
558 }
559
fd761fd8
LV
560 /* If we already have a pending isolation event for this
561 * slot, we know it's bad already, we don't need to check.
562 * Do this checking under a lock; as multiple PCI devices
563 * in one slot might report errors simultaneously, and we
564 * only want one error recovery routine running.
1da177e4 565 */
3d372628 566 raw_spin_lock_irqsave(&confirm_error_lock, flags);
fd761fd8 567 rc = 1;
1635317f 568 if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
5c1344e9 569 pdn->eeh_check_count ++;
f36c5227
MM
570 if (pdn->eeh_check_count % EEH_MAX_FAILS == 0) {
571 location = of_get_property(dn, "ibm,loc-code", NULL);
cb3bc9d0 572 printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
f36c5227
MM
573 "location=%s driver=%s pci addr=%s\n",
574 pdn->eeh_check_count, location,
778a785f 575 eeh_driver_name(dev), eeh_pci_name(dev));
cb3bc9d0 576 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
778a785f 577 eeh_driver_name(dev));
5c1344e9 578 dump_stack();
1da177e4 579 }
fd761fd8 580 goto dn_unlock;
1da177e4
LT
581 }
582
583 /*
584 * Now test for an EEH failure. This is VERY expensive.
585 * Note that the eeh_config_addr may be a parent device
586 * in the case of a device behind a bridge, or it may be
587 * function zero of a multi-function device.
588 * In any case they must share a common PHB.
589 */
cce4b2d2 590 ret = eeh_read_slot_reset_state(pdn, rets);
76e6faf7
LV
591
592 /* If the call to firmware failed, punt */
593 if (ret != 0) {
cce4b2d2 594 printk(KERN_WARNING "EEH: eeh_read_slot_reset_state() failed; rc=%d dn=%s\n",
76e6faf7 595 ret, dn->full_name);
257ffc64 596 false_positives++;
858955bd 597 pdn->eeh_false_positives ++;
fd761fd8
LV
598 rc = 0;
599 goto dn_unlock;
76e6faf7
LV
600 }
601
39d16e29 602 /* Note that config-io to empty slots may fail;
cb3bc9d0
GS
603 * they are empty when they don't have children.
604 */
c9b65a7d 605 if ((rets[0] == 5) && (rets[2] == 0) && (dn->child == NULL)) {
39d16e29 606 false_positives++;
858955bd 607 pdn->eeh_false_positives ++;
39d16e29
LV
608 rc = 0;
609 goto dn_unlock;
610 }
611
76e6faf7
LV
612 /* If EEH is not supported on this device, punt. */
613 if (rets[1] != 1) {
614 printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
615 ret, dn->full_name);
257ffc64 616 false_positives++;
858955bd 617 pdn->eeh_false_positives ++;
fd761fd8
LV
618 rc = 0;
619 goto dn_unlock;
76e6faf7
LV
620 }
621
622 /* If not the kind of error we know about, punt. */
90375f53 623 if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
257ffc64 624 false_positives++;
858955bd 625 pdn->eeh_false_positives ++;
fd761fd8
LV
626 rc = 0;
627 goto dn_unlock;
76e6faf7
LV
628 }
629
257ffc64 630 slot_resets++;
fd761fd8
LV
631
632 /* Avoid repeated reports of this failure, including problems
633 * with other functions on this device, and functions under
cb3bc9d0
GS
634 * bridges.
635 */
636 eeh_mark_slot(dn, EEH_MODE_ISOLATED);
3d372628 637 raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
1da177e4 638
cb3bc9d0 639 eeh_send_failure_event(dn, dev);
77bd7415 640
1da177e4
LT
641 /* Most EEH events are due to device driver bugs. Having
642 * a stack trace will help the device-driver authors figure
cb3bc9d0
GS
643 * out what happened. So print that out.
644 */
90375f53 645 dump_stack();
fd761fd8
LV
646 return 1;
647
648dn_unlock:
3d372628 649 raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
fd761fd8 650 return rc;
1da177e4
LT
651}
652
fd761fd8 653EXPORT_SYMBOL_GPL(eeh_dn_check_failure);
1da177e4
LT
654
655/**
cb3bc9d0
GS
656 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
657 * @token: I/O token, should be address in the form 0xA....
658 * @val: value, should be all 1's (XXX why do we need this arg??)
1da177e4 659 *
1da177e4
LT
660 * Check for an EEH failure at the given token address. Call this
661 * routine if the result of a read was all 0xff's and you want to
662 * find out if this is due to an EEH slot freeze event. This routine
663 * will query firmware for the EEH status.
664 *
665 * Note this routine is safe to call in an interrupt context.
666 */
667unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
668{
669 unsigned long addr;
670 struct pci_dev *dev;
671 struct device_node *dn;
672
673 /* Finding the phys addr + pci device; this is pretty quick. */
674 addr = eeh_token_to_phys((unsigned long __force) token);
675 dev = pci_get_device_by_addr(addr);
177bc936 676 if (!dev) {
257ffc64 677 no_device++;
1da177e4 678 return val;
177bc936 679 }
1da177e4
LT
680
681 dn = pci_device_to_OF_node(dev);
cb3bc9d0 682 eeh_dn_check_failure(dn, dev);
1da177e4
LT
683
684 pci_dev_put(dev);
685 return val;
686}
687
688EXPORT_SYMBOL(eeh_check_failure);
689
6dee3fb9 690
47b5c838 691/**
cce4b2d2 692 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot
47b5c838 693 * @pdn pci device node
cb3bc9d0
GS
694 *
695 * This routine should be called to reenable frozen MMIO or DMA
696 * so that it would work correctly again. It's useful while doing
697 * recovery or log collection on the indicated device.
47b5c838 698 */
cce4b2d2 699int eeh_pci_enable(struct pci_dn *pdn, int function)
47b5c838
LV
700{
701 int config_addr;
702 int rc;
703
704 /* Use PE configuration address, if present */
705 config_addr = pdn->eeh_config_addr;
706 if (pdn->eeh_pe_config_addr)
707 config_addr = pdn->eeh_pe_config_addr;
708
709 rc = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
710 config_addr,
711 BUID_HI(pdn->phb->buid),
712 BUID_LO(pdn->phb->buid),
713 function);
714
715 if (rc)
fa1be476 716 printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n",
47b5c838
LV
717 function, rc, pdn->node->full_name);
718
cb3bc9d0 719 rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
fa1be476
LV
720 if ((rc == 4) && (function == EEH_THAW_MMIO))
721 return 0;
722
47b5c838
LV
723 return rc;
724}
725
cb5b5624 726/**
cce4b2d2 727 * eeh_slot_reset - Raises/Lowers the pci #RST line
cb3bc9d0 728 * @pdn: pci device node
cb5b5624 729 * @state: 1/0 to raise/lower the #RST
6dee3fb9
LV
730 *
731 * Clear the EEH-frozen condition on a slot. This routine
732 * asserts the PCI #RST line if the 'state' argument is '1',
733 * and drops the #RST line if 'state is '0'. This routine is
734 * safe to call in an interrupt context.
6dee3fb9 735 */
cce4b2d2 736static void eeh_slot_reset(struct pci_dn *pdn, int state)
6dee3fb9 737{
25e591f6 738 int config_addr;
6dee3fb9
LV
739 int rc;
740
cb3bc9d0 741 BUG_ON(pdn==NULL);
6dee3fb9
LV
742
743 if (!pdn->phb) {
cb3bc9d0 744 printk(KERN_WARNING "EEH: in slot reset, device node %s has no phb\n",
6dee3fb9
LV
745 pdn->node->full_name);
746 return;
747 }
748
25e591f6
LV
749 /* Use PE configuration address, if present */
750 config_addr = pdn->eeh_config_addr;
751 if (pdn->eeh_pe_config_addr)
752 config_addr = pdn->eeh_pe_config_addr;
753
ecb73902 754 rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
25e591f6 755 config_addr,
6dee3fb9
LV
756 BUID_HI(pdn->phb->buid),
757 BUID_LO(pdn->phb->buid),
758 state);
ecb73902
RL
759
760 /* Fundamental-reset not supported on this PE, try hot-reset */
761 if (rc == -8 && state == 3) {
762 rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
763 config_addr,
764 BUID_HI(pdn->phb->buid),
765 BUID_LO(pdn->phb->buid), 1);
766 if (rc)
767 printk(KERN_WARNING
768 "EEH: Unable to reset the failed slot,"
769 " #RST=%d dn=%s\n",
770 rc, pdn->node->full_name);
771 }
6dee3fb9
LV
772}
773
00c2ae35
BK
774/**
775 * pcibios_set_pcie_slot_reset - Set PCI-E reset state
cb3bc9d0
GS
776 * @dev: pci device struct
777 * @state: reset state to enter
00c2ae35
BK
778 *
779 * Return value:
780 * 0 if success
cb3bc9d0 781 */
00c2ae35
BK
782int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
783{
784 struct device_node *dn = pci_device_to_OF_node(dev);
785 struct pci_dn *pdn = PCI_DN(dn);
786
787 switch (state) {
788 case pcie_deassert_reset:
cce4b2d2 789 eeh_slot_reset(pdn, 0);
00c2ae35
BK
790 break;
791 case pcie_hot_reset:
cce4b2d2 792 eeh_slot_reset(pdn, 1);
00c2ae35
BK
793 break;
794 case pcie_warm_reset:
cce4b2d2 795 eeh_slot_reset(pdn, 3);
00c2ae35
BK
796 break;
797 default:
798 return -EINVAL;
799 };
800
801 return 0;
802}
803
cb5b5624 804/**
cb3bc9d0
GS
805 * __eeh_set_pe_freset - Check the required reset for child devices
806 * @parent: parent device
807 * @freset: return value
808 *
809 * Each device might have its preferred reset type: fundamental or
810 * hot reset. The routine is used to collect the information from
811 * the child devices so that they could be reset accordingly.
6dee3fb9 812 */
cb3bc9d0
GS
813void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset)
814{
815 struct device_node *dn;
816
817 for_each_child_of_node(parent, dn) {
818 if (PCI_DN(dn)) {
819 struct pci_dev *dev = PCI_DN(dn)->pcidev;
820
821 if (dev && dev->driver)
822 *freset |= dev->needs_freset;
823
824 __eeh_set_pe_freset(dn, freset);
825 }
826 }
827}
828
829/**
830 * eeh_set_pe_freset - Check the required reset for the indicated device and its children
831 * @dn: parent device
832 * @freset: return value
833 *
834 * Each device might have its preferred reset type: fundamental or
835 * hot reset. The routine is used to collected the information for
836 * the indicated device and its children so that the bunch of the
837 * devices could be reset properly.
838 */
839void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset)
840{
841 struct pci_dev *dev;
cce4b2d2 842 dn = eeh_find_device_pe(dn);
cb3bc9d0
GS
843
844 /* Back up one, since config addrs might be shared */
845 if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
846 dn = dn->parent;
6dee3fb9 847
cb3bc9d0
GS
848 dev = PCI_DN(dn)->pcidev;
849 if (dev)
850 *freset |= dev->needs_freset;
851
852 __eeh_set_pe_freset(dn, freset);
853}
854
855/**
cce4b2d2 856 * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
cb3bc9d0
GS
857 * @pdn: pci device node to be reset.
858 *
859 * Assert the PCI #RST line for 1/4 second.
860 */
cce4b2d2 861static void eeh_reset_pe_once(struct pci_dn *pdn)
6dee3fb9 862{
308fc4f8 863 unsigned int freset = 0;
6e19314c 864
308fc4f8
RL
865 /* Determine type of EEH reset required for
866 * Partitionable Endpoint, a hot-reset (1)
867 * or a fundamental reset (3).
868 * A fundamental reset required by any device under
869 * Partitionable Endpoint trumps hot-reset.
870 */
871 eeh_set_pe_freset(pdn->node, &freset);
872
873 if (freset)
cce4b2d2 874 eeh_slot_reset(pdn, 3);
6e19314c 875 else
cce4b2d2 876 eeh_slot_reset(pdn, 1);
6dee3fb9
LV
877
878 /* The PCI bus requires that the reset be held high for at least
cb3bc9d0
GS
879 * a 100 milliseconds. We wait a bit longer 'just in case'.
880 */
6dee3fb9 881#define PCI_BUS_RST_HOLD_TIME_MSEC 250
cb3bc9d0 882 msleep(PCI_BUS_RST_HOLD_TIME_MSEC);
d9564ad1
LV
883
884 /* We might get hit with another EEH freeze as soon as the
885 * pci slot reset line is dropped. Make sure we don't miss
cb3bc9d0
GS
886 * these, and clear the flag now.
887 */
888 eeh_clear_slot(pdn->node, EEH_MODE_ISOLATED);
d9564ad1 889
cce4b2d2 890 eeh_slot_reset(pdn, 0);
6dee3fb9
LV
891
892 /* After a PCI slot has been reset, the PCI Express spec requires
893 * a 1.5 second idle time for the bus to stabilize, before starting
cb3bc9d0
GS
894 * up traffic.
895 */
6dee3fb9 896#define PCI_BUS_SETTLE_TIME_MSEC 1800
cb3bc9d0 897 msleep(PCI_BUS_SETTLE_TIME_MSEC);
e1029263
LV
898}
899
cb3bc9d0 900/**
cce4b2d2 901 * eeh_reset_pe - Reset the indicated PE
cb3bc9d0
GS
902 * @pdn: PCI device node
903 *
904 * This routine should be called to reset indicated device, including
905 * PE. A PE might include multiple PCI devices and sometimes PCI bridges
906 * might be involved as well.
907 */
cce4b2d2 908int eeh_reset_pe(struct pci_dn *pdn)
e1029263
LV
909{
910 int i, rc;
911
9c547768
LV
912 /* Take three shots at resetting the bus */
913 for (i=0; i<3; i++) {
cce4b2d2 914 eeh_reset_pe_once(pdn);
6dee3fb9 915
9c547768 916 rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
b6495c0c
LV
917 if (rc == 0)
918 return 0;
e1029263 919
e1029263 920 if (rc < 0) {
12588da7
LV
921 printk(KERN_ERR "EEH: unrecoverable slot failure %s\n",
922 pdn->node->full_name);
b6495c0c 923 return -1;
e1029263 924 }
12588da7
LV
925 printk(KERN_ERR "EEH: bus reset %d failed on slot %s, rc=%d\n",
926 i+1, pdn->node->full_name, rc);
6dee3fb9 927 }
b6495c0c 928
9c547768 929 return -1;
6dee3fb9
LV
930}
931
8b553f32
LV
932/** Save and restore of PCI BARs
933 *
934 * Although firmware will set up BARs during boot, it doesn't
935 * set up device BAR's after a device reset, although it will,
936 * if requested, set up bridge configuration. Thus, we need to
937 * configure the PCI devices ourselves.
938 */
939
940/**
cce4b2d2 941 * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
cb5b5624
LV
942 * @pdn: pci device node
943 *
8b553f32
LV
944 * Loads the PCI configuration space base address registers,
945 * the expansion ROM base address, the latency timer, and etc.
946 * from the saved values in the device node.
947 */
cce4b2d2 948static inline void eeh_restore_one_device_bars(struct pci_dn *pdn)
8b553f32
LV
949{
950 int i;
cde274c0 951 u32 cmd;
8b553f32
LV
952
953 if (NULL==pdn->phb) return;
954 for (i=4; i<10; i++) {
955 rtas_write_config(pdn, i*4, 4, pdn->config_space[i]);
956 }
957
958 /* 12 == Expansion ROM Address */
959 rtas_write_config(pdn, 12*4, 4, pdn->config_space[12]);
960
961#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
962#define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)])
963
cb3bc9d0 964 rtas_write_config(pdn, PCI_CACHE_LINE_SIZE, 1,
8b553f32
LV
965 SAVED_BYTE(PCI_CACHE_LINE_SIZE));
966
cb3bc9d0 967 rtas_write_config(pdn, PCI_LATENCY_TIMER, 1,
8b553f32
LV
968 SAVED_BYTE(PCI_LATENCY_TIMER));
969
970 /* max latency, min grant, interrupt pin and line */
971 rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]);
cde274c0
MM
972
973 /* Restore PERR & SERR bits, some devices require it,
cb3bc9d0
GS
974 * don't touch the other command bits
975 */
cde274c0
MM
976 rtas_read_config(pdn, PCI_COMMAND, 4, &cmd);
977 if (pdn->config_space[1] & PCI_COMMAND_PARITY)
978 cmd |= PCI_COMMAND_PARITY;
979 else
980 cmd &= ~PCI_COMMAND_PARITY;
981 if (pdn->config_space[1] & PCI_COMMAND_SERR)
982 cmd |= PCI_COMMAND_SERR;
983 else
984 cmd &= ~PCI_COMMAND_SERR;
985 rtas_write_config(pdn, PCI_COMMAND, 4, cmd);
8b553f32
LV
986}
987
988/**
cb3bc9d0
GS
989 * eeh_restore_bars - Restore the PCI config space info
990 * @pdn: PCI device node
8b553f32
LV
991 *
992 * This routine performs a recursive walk to the children
993 * of this device as well.
994 */
995void eeh_restore_bars(struct pci_dn *pdn)
996{
997 struct device_node *dn;
998 if (!pdn)
999 return;
1000
7684b40c 1001 if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code))
cce4b2d2 1002 eeh_restore_one_device_bars(pdn);
8b553f32 1003
acaa6176 1004 for_each_child_of_node(pdn->node, dn)
cb3bc9d0 1005 eeh_restore_bars(PCI_DN(dn));
8b553f32
LV
1006}
1007
1008/**
cb3bc9d0
GS
1009 * eeh_save_bars - Save device bars
1010 * @pdn: PCI device node
8b553f32
LV
1011 *
1012 * Save the values of the device bars. Unlike the restore
1013 * routine, this routine is *not* recursive. This is because
31116f0b 1014 * PCI devices are added individually; but, for the restore,
8b553f32
LV
1015 * an entire slot is reset at a time.
1016 */
7684b40c 1017static void eeh_save_bars(struct pci_dn *pdn)
8b553f32
LV
1018{
1019 int i;
1020
7684b40c 1021 if (!pdn )
8b553f32
LV
1022 return;
1023
1024 for (i = 0; i < 16; i++)
7684b40c 1025 rtas_read_config(pdn, i * 4, 4, &pdn->config_space[i]);
8b553f32
LV
1026}
1027
cb3bc9d0 1028/**
cce4b2d2 1029 * eeh_configure_bridge - Configure PCI bridges for the indicated PE
cb3bc9d0
GS
1030 * @pdn: PCI device node
1031 *
1032 * PCI bridges might be included in PE. In order to make the PE work
1033 * again. The included PCI bridges should be recovered after the PE
1034 * encounters frozen state.
1035 */
cce4b2d2 1036void eeh_configure_bridge(struct pci_dn *pdn)
8b553f32 1037{
fcb7543e 1038 int config_addr;
8b553f32 1039 int rc;
65f47f13 1040 int token;
8b553f32 1041
fcb7543e
LV
1042 /* Use PE configuration address, if present */
1043 config_addr = pdn->eeh_config_addr;
1044 if (pdn->eeh_pe_config_addr)
1045 config_addr = pdn->eeh_pe_config_addr;
1046
65f47f13
RL
1047 /* Use new configure-pe function, if supported */
1048 if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE)
1049 token = ibm_configure_pe;
1050 else
1051 token = ibm_configure_bridge;
1052
1053 rc = rtas_call(token, 3, 1, NULL,
fcb7543e 1054 config_addr,
8b553f32
LV
1055 BUID_HI(pdn->phb->buid),
1056 BUID_LO(pdn->phb->buid));
1057 if (rc) {
cb3bc9d0 1058 printk(KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n",
8b553f32
LV
1059 rc, pdn->node->full_name);
1060 }
1061}
1062
172ca926
LV
1063#define EEH_ENABLE 1
1064
1da177e4
LT
1065struct eeh_early_enable_info {
1066 unsigned int buid_hi;
1067 unsigned int buid_lo;
1068};
1069
cb3bc9d0 1070/**
cce4b2d2 1071 * eeh_get_pe_addr - Retrieve PE address with given BDF address
cb3bc9d0
GS
1072 * @config_addr: BDF address
1073 * @info: BUID of the associated PHB
1074 *
1075 * There're 2 kinds of addresses existing in EEH core components:
1076 * BDF address and PE address. Besides, there has dedicated platform
1077 * dependent function call to retrieve the PE address according to
1078 * the given BDF address. Further more, we prefer PE address on BDF
1079 * address in EEH core components.
1080 */
cce4b2d2 1081static int eeh_get_pe_addr(int config_addr,
147d6a37
LV
1082 struct eeh_early_enable_info *info)
1083{
1084 unsigned int rets[3];
1085 int ret;
1086
1087 /* Use latest config-addr token on power6 */
1088 if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
1089 /* Make sure we have a PE in hand */
cb3bc9d0 1090 ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
147d6a37
LV
1091 config_addr, info->buid_hi, info->buid_lo, 1);
1092 if (ret || (rets[0]==0))
1093 return 0;
1094
cb3bc9d0 1095 ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
147d6a37
LV
1096 config_addr, info->buid_hi, info->buid_lo, 0);
1097 if (ret)
1098 return 0;
1099 return rets[0];
1100 }
1101
1102 /* Use older config-addr token on power5 */
1103 if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
cb3bc9d0 1104 ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
147d6a37
LV
1105 config_addr, info->buid_hi, info->buid_lo, 0);
1106 if (ret)
1107 return 0;
1108 return rets[0];
1109 }
1110 return 0;
1111}
1112
cb3bc9d0 1113/**
cce4b2d2 1114 * eeh_early_enable - Early enable EEH on the indicated device
cb3bc9d0
GS
1115 * @dn: device node
1116 * @data: BUID
1117 *
1118 * Enable EEH functionality on the specified PCI device. The function
1119 * is expected to be called before real PCI probing is done. However,
1120 * the PHBs have been initialized at this point.
1121 */
cce4b2d2 1122static void *eeh_early_enable(struct device_node *dn, void *data)
1da177e4 1123{
25c4a46f 1124 unsigned int rets[3];
1da177e4
LT
1125 struct eeh_early_enable_info *info = data;
1126 int ret;
e2eb6392
SR
1127 const u32 *class_code = of_get_property(dn, "class-code", NULL);
1128 const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL);
1129 const u32 *device_id = of_get_property(dn, "device-id", NULL);
954a46e2 1130 const u32 *regs;
1da177e4 1131 int enable;
69376502 1132 struct pci_dn *pdn = PCI_DN(dn);
1da177e4 1133
0f17574a 1134 pdn->class_code = 0;
1635317f 1135 pdn->eeh_mode = 0;
5c1344e9
LV
1136 pdn->eeh_check_count = 0;
1137 pdn->eeh_freeze_count = 0;
858955bd 1138 pdn->eeh_false_positives = 0;
1da177e4 1139
c6d4d5a8
NL
1140 if (!of_device_is_available(dn))
1141 return NULL;
1da177e4
LT
1142
1143 /* Ignore bad nodes. */
1144 if (!class_code || !vendor_id || !device_id)
1145 return NULL;
1146
1147 /* There is nothing to check on PCI to ISA bridges */
1148 if (dn->type && !strcmp(dn->type, "isa")) {
1635317f 1149 pdn->eeh_mode |= EEH_MODE_NOCHECK;
1da177e4
LT
1150 return NULL;
1151 }
0f17574a 1152 pdn->class_code = *class_code;
1da177e4 1153
1da177e4 1154 /* Ok... see if this device supports EEH. Some do, some don't,
cb3bc9d0
GS
1155 * and the only way to find out is to check each and every one.
1156 */
e2eb6392 1157 regs = of_get_property(dn, "reg", NULL);
1da177e4
LT
1158 if (regs) {
1159 /* First register entry is addr (00BBSS00) */
1160 /* Try to enable eeh */
1161 ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
172ca926
LV
1162 regs[0], info->buid_hi, info->buid_lo,
1163 EEH_ENABLE);
1164
25c4a46f 1165 enable = 0;
1da177e4 1166 if (ret == 0) {
1635317f 1167 pdn->eeh_config_addr = regs[0];
25e591f6
LV
1168
1169 /* If the newer, better, ibm,get-config-addr-info is supported,
cb3bc9d0
GS
1170 * then use that instead.
1171 */
cce4b2d2 1172 pdn->eeh_pe_config_addr = eeh_get_pe_addr(pdn->eeh_config_addr, info);
25c4a46f
LV
1173
1174 /* Some older systems (Power4) allow the
1175 * ibm,set-eeh-option call to succeed even on nodes
1176 * where EEH is not supported. Verify support
cb3bc9d0
GS
1177 * explicitly.
1178 */
cce4b2d2 1179 ret = eeh_read_slot_reset_state(pdn, rets);
25c4a46f
LV
1180 if ((ret == 0) && (rets[1] == 1))
1181 enable = 1;
1182 }
1183
1184 if (enable) {
1185 eeh_subsystem_enabled = 1;
1186 pdn->eeh_mode |= EEH_MODE_SUPPORTED;
1187
57b066ff
BH
1188 pr_debug("EEH: %s: eeh enabled, config=%x pe_config=%x\n",
1189 dn->full_name, pdn->eeh_config_addr,
1190 pdn->eeh_pe_config_addr);
1da177e4
LT
1191 } else {
1192
1193 /* This device doesn't support EEH, but it may have an
cb3bc9d0
GS
1194 * EEH parent, in which case we mark it as supported.
1195 */
69376502 1196 if (dn->parent && PCI_DN(dn->parent)
1635317f 1197 && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {
1da177e4 1198 /* Parent supports EEH. */
1635317f
PM
1199 pdn->eeh_mode |= EEH_MODE_SUPPORTED;
1200 pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr;
1da177e4
LT
1201 return NULL;
1202 }
1203 }
1204 } else {
1205 printk(KERN_WARNING "EEH: %s: unable to get reg property.\n",
1206 dn->full_name);
1207 }
1208
7684b40c 1209 eeh_save_bars(pdn);
69376502 1210 return NULL;
1da177e4
LT
1211}
1212
aa1e6374
GS
1213/**
1214 * eeh_ops_register - Register platform dependent EEH operations
1215 * @ops: platform dependent EEH operations
1216 *
1217 * Register the platform dependent EEH operation callback
1218 * functions. The platform should call this function before
1219 * any other EEH operations.
1220 */
1221int __init eeh_ops_register(struct eeh_ops *ops)
1222{
1223 if (!ops->name) {
1224 pr_warning("%s: Invalid EEH ops name for %p\n",
1225 __func__, ops);
1226 return -EINVAL;
1227 }
1228
1229 if (eeh_ops && eeh_ops != ops) {
1230 pr_warning("%s: EEH ops of platform %s already existing (%s)\n",
1231 __func__, eeh_ops->name, ops->name);
1232 return -EEXIST;
1233 }
1234
1235 eeh_ops = ops;
1236
1237 return 0;
1238}
1239
1240/**
1241 * eeh_ops_unregister - Unreigster platform dependent EEH operations
1242 * @name: name of EEH platform operations
1243 *
1244 * Unregister the platform dependent EEH operation callback
1245 * functions.
1246 */
1247int __exit eeh_ops_unregister(const char *name)
1248{
1249 if (!name || !strlen(name)) {
1250 pr_warning("%s: Invalid EEH ops name\n",
1251 __func__);
1252 return -EINVAL;
1253 }
1254
1255 if (eeh_ops && !strcmp(eeh_ops->name, name)) {
1256 eeh_ops = NULL;
1257 return 0;
1258 }
1259
1260 return -EEXIST;
1261}
1262
cb3bc9d0
GS
1263/**
1264 * eeh_init - EEH initialization
1265 *
1da177e4
LT
1266 * Initialize EEH by trying to enable it for all of the adapters in the system.
1267 * As a side effect we can determine here if eeh is supported at all.
1268 * Note that we leave EEH on so failed config cycles won't cause a machine
1269 * check. If a user turns off EEH for a particular adapter they are really
1270 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
1271 * grant access to a slot if EEH isn't enabled, and so we always enable
1272 * EEH for all slots/all devices.
1273 *
1274 * The eeh-force-off option disables EEH checking globally, for all slots.
1275 * Even if force-off is set, the EEH hardware is still enabled, so that
1276 * newer systems can boot.
1277 */
1278void __init eeh_init(void)
1279{
1280 struct device_node *phb, *np;
1281 struct eeh_early_enable_info info;
1282
3d372628 1283 raw_spin_lock_init(&confirm_error_lock);
df7242b1
LV
1284 spin_lock_init(&slot_errbuf_lock);
1285
1da177e4
LT
1286 np = of_find_node_by_path("/rtas");
1287 if (np == NULL)
1288 return;
1289
1290 ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
1291 ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
1292 ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
1293 ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
1294 ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
25e591f6 1295 ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
147d6a37 1296 ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
cb3bc9d0 1297 ibm_configure_bridge = rtas_token("ibm,configure-bridge");
65f47f13 1298 ibm_configure_pe = rtas_token("ibm,configure-pe");
1da177e4
LT
1299
1300 if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
1301 return;
1302
1303 eeh_error_buf_size = rtas_token("rtas-error-log-max");
1304 if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
1305 eeh_error_buf_size = 1024;
1306 }
1307 if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
1308 printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated "
1309 "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
1310 eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
1311 }
1312
1313 /* Enable EEH for all adapters. Note that eeh requires buid's */
1314 for (phb = of_find_node_by_name(NULL, "pci"); phb;
1315 phb = of_find_node_by_name(phb, "pci")) {
1316 unsigned long buid;
1317
1318 buid = get_phb_buid(phb);
69376502 1319 if (buid == 0 || PCI_DN(phb) == NULL)
1da177e4
LT
1320 continue;
1321
1322 info.buid_lo = BUID_LO(buid);
1323 info.buid_hi = BUID_HI(buid);
cce4b2d2 1324 traverse_pci_devices(phb, eeh_early_enable, &info);
1da177e4
LT
1325 }
1326
1327 if (eeh_subsystem_enabled)
1328 printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
1329 else
1330 printk(KERN_WARNING "EEH: No capable adapters found\n");
1331}
1332
1333/**
cb3bc9d0 1334 * eeh_add_device_early - Enable EEH for the indicated device_node
1da177e4
LT
1335 * @dn: device node for which to set up EEH
1336 *
1337 * This routine must be used to perform EEH initialization for PCI
1338 * devices that were added after system boot (e.g. hotplug, dlpar).
1339 * This routine must be called before any i/o is performed to the
1340 * adapter (inluding any config-space i/o).
1341 * Whether this actually enables EEH or not for this device depends
1342 * on the CEC architecture, type of the device, on earlier boot
1343 * command-line arguments & etc.
1344 */
794e085e 1345static void eeh_add_device_early(struct device_node *dn)
1da177e4
LT
1346{
1347 struct pci_controller *phb;
1348 struct eeh_early_enable_info info;
1349
69376502 1350 if (!dn || !PCI_DN(dn))
1da177e4 1351 return;
1635317f 1352 phb = PCI_DN(dn)->phb;
f751f841
LV
1353
1354 /* USB Bus children of PCI devices will not have BUID's */
1355 if (NULL == phb || 0 == phb->buid)
1da177e4 1356 return;
1da177e4
LT
1357
1358 info.buid_hi = BUID_HI(phb->buid);
1359 info.buid_lo = BUID_LO(phb->buid);
cce4b2d2 1360 eeh_early_enable(dn, &info);
1da177e4 1361}
1da177e4 1362
cb3bc9d0
GS
1363/**
1364 * eeh_add_device_tree_early - Enable EEH for the indicated device
1365 * @dn: device node
1366 *
1367 * This routine must be used to perform EEH initialization for the
1368 * indicated PCI device that was added after system boot (e.g.
1369 * hotplug, dlpar).
1370 */
e2a296ee
LV
1371void eeh_add_device_tree_early(struct device_node *dn)
1372{
1373 struct device_node *sib;
acaa6176
SR
1374
1375 for_each_child_of_node(dn, sib)
e2a296ee
LV
1376 eeh_add_device_tree_early(sib);
1377 eeh_add_device_early(dn);
1378}
1379EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
1380
1da177e4 1381/**
cb3bc9d0 1382 * eeh_add_device_late - Perform EEH initialization for the indicated pci device
1da177e4
LT
1383 * @dev: pci device for which to set up EEH
1384 *
1385 * This routine must be used to complete EEH initialization for PCI
1386 * devices that were added after system boot (e.g. hotplug, dlpar).
1387 */
794e085e 1388static void eeh_add_device_late(struct pci_dev *dev)
1da177e4 1389{
56b0fca3 1390 struct device_node *dn;
8b553f32 1391 struct pci_dn *pdn;
56b0fca3 1392
1da177e4
LT
1393 if (!dev || !eeh_subsystem_enabled)
1394 return;
1395
57b066ff 1396 pr_debug("EEH: Adding device %s\n", pci_name(dev));
1da177e4 1397
56b0fca3 1398 dn = pci_device_to_OF_node(dev);
8b553f32 1399 pdn = PCI_DN(dn);
57b066ff
BH
1400 if (pdn->pcidev == dev) {
1401 pr_debug("EEH: Already referenced !\n");
1402 return;
1403 }
1404 WARN_ON(pdn->pcidev);
1405
cb3bc9d0 1406 pci_dev_get(dev);
8b553f32 1407 pdn->pcidev = dev;
56b0fca3 1408
e1d04c97
LV
1409 pci_addr_cache_insert_device(dev);
1410 eeh_sysfs_add_device(dev);
1da177e4 1411}
794e085e 1412
cb3bc9d0
GS
1413/**
1414 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
1415 * @bus: PCI bus
1416 *
1417 * This routine must be used to perform EEH initialization for PCI
1418 * devices which are attached to the indicated PCI bus. The PCI bus
1419 * is added after system boot through hotplug or dlpar.
1420 */
794e085e
NF
1421void eeh_add_device_tree_late(struct pci_bus *bus)
1422{
1423 struct pci_dev *dev;
1424
1425 list_for_each_entry(dev, &bus->devices, bus_list) {
1426 eeh_add_device_late(dev);
1427 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
1428 struct pci_bus *subbus = dev->subordinate;
1429 if (subbus)
1430 eeh_add_device_tree_late(subbus);
1431 }
1432 }
1433}
1434EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
1da177e4
LT
1435
1436/**
cb3bc9d0 1437 * eeh_remove_device - Undo EEH setup for the indicated pci device
1da177e4
LT
1438 * @dev: pci device to be removed
1439 *
794e085e
NF
1440 * This routine should be called when a device is removed from
1441 * a running system (e.g. by hotplug or dlpar). It unregisters
1442 * the PCI device from the EEH subsystem. I/O errors affecting
1443 * this device will no longer be detected after this call; thus,
1444 * i/o errors affecting this slot may leave this device unusable.
1da177e4 1445 */
794e085e 1446static void eeh_remove_device(struct pci_dev *dev)
1da177e4 1447{
56b0fca3 1448 struct device_node *dn;
1da177e4
LT
1449 if (!dev || !eeh_subsystem_enabled)
1450 return;
1451
1452 /* Unregister the device with the EEH/PCI address search system */
57b066ff 1453 pr_debug("EEH: Removing device %s\n", pci_name(dev));
56b0fca3
LV
1454
1455 dn = pci_device_to_OF_node(dev);
57b066ff
BH
1456 if (PCI_DN(dn)->pcidev == NULL) {
1457 pr_debug("EEH: Not referenced !\n");
1458 return;
b055a9e1 1459 }
57b066ff 1460 PCI_DN(dn)->pcidev = NULL;
cb3bc9d0 1461 pci_dev_put(dev);
57b066ff
BH
1462
1463 pci_addr_cache_remove_device(dev);
1464 eeh_sysfs_remove_device(dev);
1da177e4 1465}
1da177e4 1466
cb3bc9d0
GS
1467/**
1468 * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device
1469 * @dev: PCI device
1470 *
1471 * This routine must be called when a device is removed from the
1472 * running system through hotplug or dlpar. The corresponding
1473 * PCI address cache will be removed.
1474 */
e2a296ee
LV
1475void eeh_remove_bus_device(struct pci_dev *dev)
1476{
794e085e
NF
1477 struct pci_bus *bus = dev->subordinate;
1478 struct pci_dev *child, *tmp;
1479
e2a296ee 1480 eeh_remove_device(dev);
794e085e
NF
1481
1482 if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
1483 list_for_each_entry_safe(child, tmp, &bus->devices, bus_list)
1484 eeh_remove_bus_device(child);
e2a296ee
LV
1485 }
1486}
1487EXPORT_SYMBOL_GPL(eeh_remove_bus_device);
1488
1da177e4
LT
1489static int proc_eeh_show(struct seq_file *m, void *v)
1490{
1da177e4
LT
1491 if (0 == eeh_subsystem_enabled) {
1492 seq_printf(m, "EEH Subsystem is globally disabled\n");
257ffc64 1493 seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);
1da177e4
LT
1494 } else {
1495 seq_printf(m, "EEH Subsystem is enabled\n");
177bc936
LV
1496 seq_printf(m,
1497 "no device=%ld\n"
1498 "no device node=%ld\n"
1499 "no config address=%ld\n"
1500 "check not wanted=%ld\n"
1501 "eeh_total_mmio_ffs=%ld\n"
1502 "eeh_false_positives=%ld\n"
177bc936 1503 "eeh_slot_resets=%ld\n",
257ffc64
LV
1504 no_device, no_dn, no_cfg_addr,
1505 ignored_check, total_mmio_ffs,
42253a68 1506 false_positives,
257ffc64 1507 slot_resets);
1da177e4
LT
1508 }
1509
1510 return 0;
1511}
1512
1513static int proc_eeh_open(struct inode *inode, struct file *file)
1514{
1515 return single_open(file, proc_eeh_show, NULL);
1516}
1517
5dfe4c96 1518static const struct file_operations proc_eeh_operations = {
1da177e4
LT
1519 .open = proc_eeh_open,
1520 .read = seq_read,
1521 .llseek = seq_lseek,
1522 .release = single_release,
1523};
1524
1525static int __init eeh_init_proc(void)
1526{
66747138 1527 if (machine_is(pseries))
8feaa434 1528 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
1da177e4
LT
1529 return 0;
1530}
1531__initcall(eeh_init_proc);
This page took 0.812669 seconds and 5 git commands to generate.