powerpc/powernv: Fix killed EEH event

author Gavin Shan <gwshan@linux.vnet.ibm.com>

Wed, 4 Jun 2014 07:31:52 +0000 (17:31 +1000)

committer Benjamin Herrenschmidt <benh@kernel.crashing.org>

Wed, 11 Jun 2014 07:04:33 +0000 (17:04 +1000)
author Gavin Shan <gwshan@linux.vnet.ibm.com>
Wed, 4 Jun 2014 07:31:52 +0000 (17:31 +1000)
committer Benjamin Herrenschmidt <benh@kernel.crashing.org>
Wed, 11 Jun 2014 07:04:33 +0000 (17:04 +1000)
diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h

index 89d5670b2eeb400ec659793fc3e960cc6d5894a3..1e551a2d6f8257f3fc78a73152ef65a3ebe24e52 100644 (file)
--- a/arch/powerpc/include/asm/eeh_event.h
+++ b/arch/powerpc/include/asm/eeh_event.h
@@ -33,7 +33,7 @@ struct eeh_event {
  
  int eeh_event_init(void);
  int eeh_send_failure_event(struct eeh_pe *pe);
-void eeh_remove_event(struct eeh_pe *pe);
+void eeh_remove_event(struct eeh_pe *pe, bool force);
  void eeh_handle_event(struct eeh_pe *pe);
  
  #endif /* __KERNEL__ */
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c

index 8bb40e7cdeb6a65e13bc57647f0e2b2dfd0ad430..420da61d4ce001d74ba64b675f8615b65f6e812a 100644 (file)
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -770,7 +770,7 @@ static void eeh_handle_special_event(void)
                         eeh_serialize_lock(&flags);
  
                         /* Purge all events */
-                       eeh_remove_event(NULL);
+                       eeh_remove_event(NULL, true);
  
                         list_for_each_entry(hose, &hose_list, list_node) {
                                 phb_pe = eeh_phb_pe_get(hose);
@@ -789,7 +789,7 @@ static void eeh_handle_special_event(void)
                         eeh_serialize_lock(&flags);
  
                         /* Purge all events of the PHB */
-                       eeh_remove_event(pe);
+                       eeh_remove_event(pe, true);
  
                         if (rc == EEH_NEXT_ERR_DEAD_PHB)
                                 eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c

index 72d748b56c86b2b9ae960e49b819dfedef36f61a..4eefb6e34dbb2f6edbf4990349e9c11b0b5d07f8 100644 (file)
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -152,24 +152,33 @@ int eeh_send_failure_event(struct eeh_pe *pe)
  /**
   * eeh_remove_event - Remove EEH event from the queue
   * @pe: Event binding to the PE
+ * @force: Event will be removed unconditionally
   *
   * On PowerNV platform, we might have subsequent coming events
   * is part of the former one. For that case, those subsequent
   * coming events are totally duplicated and unnecessary, thus
   * they should be removed.
   */
-void eeh_remove_event(struct eeh_pe *pe)
+void eeh_remove_event(struct eeh_pe *pe, bool force)
  {
         unsigned long flags;
         struct eeh_event *event, *tmp;
  
+       /*
+        * If we have NULL PE passed in, we have dead IOC
+        * or we're sure we can report all existing errors
+        * by the caller.
+        *
+        * With "force", the event with associated PE that
+        * have been isolated, the event won't be removed
+        * to avoid event lost.
+        */
         spin_lock_irqsave(&eeh_eventlist_lock, flags);
         list_for_each_entry_safe(event, tmp, &eeh_eventlist, list) {
-               /*
-                * If we don't have valid PE passed in, that means
-                * we already have event corresponding to dead IOC
-                * and all events should be purged.
-                */
+               if (!force && event->pe &&
+                   (event->pe->state & EEH_PE_ISOLATED))
+                       continue;
+
                 if (!pe) {
                         list_del(&event->list);
                         kfree(event);
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c

index 5711f6f1fda66d3fc09ac8ce931b5b5a929aaee6..9c002099f875f7414bffe025a0a31f86694a90d5 100644 (file)
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -717,7 +717,7 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
          * And we should keep the cached OPAL notifier event sychronized
          * between the kernel and firmware.
          */
-       eeh_remove_event(NULL);
+       eeh_remove_event(NULL, false);
         opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
  
         list_for_each_entry(hose, &hose_list, list_node) {
author	Gavin Shan <gwshan@linux.vnet.ibm.com>
	Wed, 4 Jun 2014 07:31:52 +0000 (17:31 +1000)
committer	Benjamin Herrenschmidt <benh@kernel.crashing.org>
	Wed, 11 Jun 2014 07:04:33 +0000 (17:04 +1000)
arch/powerpc/include/asm/eeh_event.h		patch \| blob \| blame \| history
arch/powerpc/kernel/eeh_driver.c		patch \| blob \| blame \| history
arch/powerpc/kernel/eeh_event.c		patch \| blob \| blame \| history
arch/powerpc/platforms/powernv/eeh-ioda.c		patch \| blob \| blame \| history