igb: add completion timeout workaround for 82575/82576
[deliverable/linux.git] / drivers / net / igb / igb_main.c
index e25343588fc77466380039a46728ba4bfd8ecd34..2cb546078c52b940dff6ad9292b53920474d5709 100644 (file)
@@ -127,14 +127,48 @@ static void igb_restore_vlan(struct igb_adapter *);
 static void igb_ping_all_vfs(struct igb_adapter *);
 static void igb_msg_task(struct igb_adapter *);
 static int igb_rcv_msg_from_vf(struct igb_adapter *, u32);
-static inline void igb_set_rah_pool(struct e1000_hw *, int , int);
 static void igb_set_mc_list_pools(struct igb_adapter *, int, u16);
 static void igb_vmm_control(struct igb_adapter *);
-static inline void igb_set_vmolr(struct e1000_hw *, int);
-static inline int igb_set_vf_rlpml(struct igb_adapter *, int, int);
 static int igb_set_vf_mac(struct igb_adapter *adapter, int, unsigned char *);
 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
 
+static inline void igb_set_vmolr(struct e1000_hw *hw, int vfn)
+{
+       u32 reg_data;
+
+       reg_data = rd32(E1000_VMOLR(vfn));
+       reg_data |= E1000_VMOLR_BAM |    /* Accept broadcast */
+                   E1000_VMOLR_ROPE |   /* Accept packets matched in UTA */
+                   E1000_VMOLR_ROMPE |  /* Accept packets matched in MTA */
+                   E1000_VMOLR_AUPE |   /* Accept untagged packets */
+                   E1000_VMOLR_STRVLAN; /* Strip vlan tags */
+       wr32(E1000_VMOLR(vfn), reg_data);
+}
+
+static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
+                                 int vfn)
+{
+       struct e1000_hw *hw = &adapter->hw;
+       u32 vmolr;
+
+       vmolr = rd32(E1000_VMOLR(vfn));
+       vmolr &= ~E1000_VMOLR_RLPML_MASK;
+       vmolr |= size | E1000_VMOLR_LPE;
+       wr32(E1000_VMOLR(vfn), vmolr);
+
+       return 0;
+}
+
+static inline void igb_set_rah_pool(struct e1000_hw *hw, int pool, int entry)
+{
+       u32 reg_data;
+
+       reg_data = rd32(E1000_RAH(entry));
+       reg_data &= ~E1000_RAH_POOL_MASK;
+       reg_data |= E1000_RAH_POOL_1 << pool;;
+       wr32(E1000_RAH(entry), reg_data);
+}
+
 #ifdef CONFIG_PM
 static int igb_suspend(struct pci_dev *, pm_message_t);
 static int igb_resume(struct pci_dev *);
@@ -942,6 +976,8 @@ int igb_up(struct igb_adapter *adapter)
        rd32(E1000_ICR);
        igb_irq_enable(adapter);
 
+       netif_tx_start_all_queues(adapter->netdev);
+
        /* Fire a link change interrupt to start the watchdog. */
        wr32(E1000_ICS, E1000_ICS_LSC);
        return 0;
@@ -994,6 +1030,11 @@ void igb_down(struct igb_adapter *adapter)
                igb_reset(adapter);
        igb_clean_all_tx_rings(adapter);
        igb_clean_all_rx_rings(adapter);
+#ifdef CONFIG_IGB_DCA
+
+       /* since we reset the hardware DCA settings were cleared */
+       igb_setup_dca(adapter);
+#endif
 }
 
 void igb_reinit_locked(struct igb_adapter *adapter)
@@ -1343,6 +1384,9 @@ static int __devinit igb_probe(struct pci_dev *pdev,
        if (pci_using_dac)
                netdev->features |= NETIF_F_HIGHDMA;
 
+       if (adapter->hw.mac.type == e1000_82576)
+               netdev->features |= NETIF_F_SCTP_CSUM;
+
        adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw);
 
        /* before reading the NVM, reset the controller to put the device in a
@@ -1390,8 +1434,6 @@ static int __devinit igb_probe(struct pci_dev *pdev,
 
        igb_validate_mdi_setting(hw);
 
-       adapter->rx_csum = 1;
-
        /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
         * enable the ACPI Magic Packet filter
         */
@@ -1442,22 +1484,18 @@ static int __devinit igb_probe(struct pci_dev *pdev,
         * driver. */
        igb_get_hw_control(adapter);
 
-       /* tell the stack to leave us alone until igb_open() is called */
-       netif_carrier_off(netdev);
-       netif_tx_stop_all_queues(netdev);
-
        strcpy(netdev->name, "eth%d");
        err = register_netdev(netdev);
        if (err)
                goto err_register;
 
+       /* carrier off reporting is important to ethtool even BEFORE open */
+       netif_carrier_off(netdev);
+
 #ifdef CONFIG_IGB_DCA
        if (dca_add_requester(&pdev->dev) == 0) {
                adapter->flags |= IGB_FLAG_DCA_ENABLED;
                dev_info(&pdev->dev, "DCA enabled\n");
-               /* Always use CB2 mode, difference is masked
-                * in the CB driver. */
-               wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
                igb_setup_dca(adapter);
        }
 #endif
@@ -1699,6 +1737,8 @@ static int igb_open(struct net_device *netdev)
        if (test_bit(__IGB_TESTING, &adapter->state))
                return -EBUSY;
 
+       netif_carrier_off(netdev);
+
        /* allocate transmit descriptors */
        err = igb_setup_all_tx_resources(adapter);
        if (err)
@@ -2231,29 +2271,24 @@ static void igb_configure_rx(struct igb_adapter *adapter)
                mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
                         E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
 
-
                wr32(E1000_MRQC, mrqc);
-
-               /* Multiqueue and raw packet checksumming are mutually
-                * exclusive.  Note that this not the same as TCP/IP
-                * checksumming, which works fine. */
-               rxcsum = rd32(E1000_RXCSUM);
-               rxcsum |= E1000_RXCSUM_PCSD;
-               wr32(E1000_RXCSUM, rxcsum);
-       } else {
+       } else if (adapter->vfs_allocated_count) {
                /* Enable multi-queue for sr-iov */
-               if (adapter->vfs_allocated_count)
-                       wr32(E1000_MRQC, E1000_MRQC_ENABLE_VMDQ);
-               /* Enable Receive Checksum Offload for TCP and UDP */
-               rxcsum = rd32(E1000_RXCSUM);
-               if (adapter->rx_csum)
-                       rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPPCSE;
-               else
-                       rxcsum &= ~(E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPPCSE);
-
-               wr32(E1000_RXCSUM, rxcsum);
+               wr32(E1000_MRQC, E1000_MRQC_ENABLE_VMDQ);
        }
 
+       /* Enable Receive Checksum Offload for TCP and UDP */
+       rxcsum = rd32(E1000_RXCSUM);
+       /* Disable raw packet checksumming */
+       rxcsum |= E1000_RXCSUM_PCSD;
+
+       if (adapter->hw.mac.type == e1000_82576)
+               /* Enable Receive Checksum Offload for SCTP */
+               rxcsum |= E1000_RXCSUM_CRCOFL;
+
+       /* Don't need to set TUOFL or IPOFL, they default to 1 */
+       wr32(E1000_RXCSUM, rxcsum);
+
        /* Set the default pool for the PF's first queue */
        igb_configure_vt_default_pool(adapter);
 
@@ -2583,10 +2618,6 @@ static bool igb_has_link(struct igb_adapter *adapter)
                        link_active = true;
                }
                break;
-       case e1000_media_type_fiber:
-               ret_val = hw->mac.ops.check_for_link(hw);
-               link_active = !!(rd32(E1000_STATUS) & E1000_STATUS_LU);
-               break;
        case e1000_media_type_internal_serdes:
                ret_val = hw->mac.ops.check_for_link(hw);
                link_active = hw->mac.serdes_has_link;
@@ -2661,7 +2692,6 @@ static void igb_watchdog_task(struct work_struct *work)
                        }
 
                        netif_carrier_on(netdev);
-                       netif_tx_wake_all_queues(netdev);
 
                        igb_ping_all_vfs(adapter);
 
@@ -2678,7 +2708,6 @@ static void igb_watchdog_task(struct work_struct *work)
                        printk(KERN_INFO "igb: %s NIC Link is Down\n",
                               netdev->name);
                        netif_carrier_off(netdev);
-                       netif_tx_stop_all_queues(netdev);
 
                        igb_ping_all_vfs(adapter);
 
@@ -2712,6 +2741,8 @@ link_up:
                         * (Do the reset outside of interrupt context). */
                        adapter->tx_timeout_count++;
                        schedule_work(&adapter->reset_task);
+                       /* return immediately since reset is imminent */
+                       return;
                }
        }
 
@@ -2895,13 +2926,13 @@ static void igb_set_itr(struct igb_adapter *adapter)
        switch (current_itr) {
        /* counts and packets in update_itr are dependent on these numbers */
        case lowest_latency:
-               new_itr = 70000;
+               new_itr = 56;  /* aka 70,000 ints/sec */
                break;
        case low_latency:
-               new_itr = 20000; /* aka hwitr = ~200 */
+               new_itr = 196; /* aka 20,000 ints/sec */
                break;
        case bulk_latency:
-               new_itr = 4000;
+               new_itr = 980; /* aka 4,000 ints/sec */
                break;
        default:
                break;
@@ -2920,7 +2951,8 @@ set_itr_now:
                 * by adding intermediate steps when interrupt rate is
                 * increasing */
                new_itr = new_itr > adapter->itr ?
-                            min(adapter->itr + (new_itr >> 2), new_itr) :
+                            max((new_itr * adapter->itr) /
+                                (new_itr + (adapter->itr >> 2)), new_itr) :
                             new_itr;
                /* Don't write the value here; it resets the adapter's
                 * internal timer, and causes us to delay far longer than
@@ -2929,7 +2961,7 @@ set_itr_now:
                 * ends up being correct.
                 */
                adapter->itr = new_itr;
-               adapter->rx_ring->itr_val = 1000000000 / (new_itr * 256);
+               adapter->rx_ring->itr_val = new_itr;
                adapter->rx_ring->set_itr = 1;
        }
 
@@ -3068,11 +3100,15 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter,
                                tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
                                if (ip_hdr(skb)->protocol == IPPROTO_TCP)
                                        tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
+                               else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
+                                       tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
                                break;
                        case cpu_to_be16(ETH_P_IPV6):
                                /* XXX what about other V6 headers?? */
                                if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
                                        tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
+                               else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
+                                       tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
                                break;
                        default:
                                if (unlikely(net_ratelimit()))
@@ -3133,8 +3169,7 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter,
        /* set time_stamp *before* dma to help avoid a possible race */
        buffer_info->time_stamp = jiffies;
        buffer_info->next_to_watch = i;
-       buffer_info->dma = map[count];
-       count++;
+       buffer_info->dma = skb_shinfo(skb)->dma_head;
 
        for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
                struct skb_frag_struct *frag;
@@ -3158,7 +3193,7 @@ static inline int igb_tx_map_adv(struct igb_adapter *adapter,
        tx_ring->buffer_info[i].skb = skb;
        tx_ring->buffer_info[first].next_to_watch = i;
 
-       return count;
+       return count + 1;
 }
 
 static inline void igb_tx_queue_adv(struct igb_adapter *adapter,
@@ -3338,7 +3373,6 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb,
        if (count) {
                igb_tx_queue_adv(adapter, tx_ring, tx_flags, count,
                                 skb->len, hdr_len);
-               netdev->trans_start = jiffies;
                /* Make sure there is space in the ring for the next send. */
                igb_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 4);
        } else {
@@ -3582,8 +3616,35 @@ void igb_update_stats(struct igb_adapter *adapter)
 
        /* Rx Errors */
 
+       if (hw->mac.type != e1000_82575) {
+               u32 rqdpc_tmp;
+               u64 rqdpc_total = 0;
+               int i;
+               /* Read out drops stats per RX queue.  Notice RQDPC (Receive
+                * Queue Drop Packet Count) stats only gets incremented, if
+                * the DROP_EN but it set (in the SRRCTL register for that
+                * queue).  If DROP_EN bit is NOT set, then the some what
+                * equivalent count is stored in RNBC (not per queue basis).
+                * Also note the drop count is due to lack of available
+                * descriptors.
+                */
+               for (i = 0; i < adapter->num_rx_queues; i++) {
+                       rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0xFFF;
+                       adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
+                       rqdpc_total += adapter->rx_ring[i].rx_stats.drops;
+               }
+               adapter->net_stats.rx_fifo_errors = rqdpc_total;
+       }
+
+       /* Note RNBC (Receive No Buffers Count) is an not an exact
+        * drop count as the hardware FIFO might save the day.  Thats
+        * one of the reason for saving it in rx_fifo_errors, as its
+        * potentially not a true drop.
+        */
+       adapter->net_stats.rx_fifo_errors += adapter->stats.rnbc;
+
        /* RLEC on some newer hardware can be incorrect so build
-       * our own version based on RUC and ROC */
+        * our own version based on RUC and ROC */
        adapter->net_stats.rx_errors = adapter->stats.rxerrc +
                adapter->stats.crcerrs + adapter->stats.algnerrc +
                adapter->stats.ruc + adapter->stats.roc +
@@ -3767,11 +3828,15 @@ static void igb_update_tx_dca(struct igb_ring *tx_ring)
 
 static void igb_setup_dca(struct igb_adapter *adapter)
 {
+       struct e1000_hw *hw = &adapter->hw;
        int i;
 
        if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
                return;
 
+       /* Always use CB2 mode, difference is masked in the CB driver. */
+       wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
+
        for (i = 0; i < adapter->num_tx_queues; i++) {
                adapter->tx_ring[i].cpu = -1;
                igb_update_tx_dca(&adapter->tx_ring[i]);
@@ -4434,20 +4499,12 @@ static void igb_receive_skb(struct igb_ring *ring, u8 status,
        bool vlan_extracted = (adapter->vlgrp && (status & E1000_RXD_STAT_VP));
 
        skb_record_rx_queue(skb, ring->queue_index);
-       if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
-               if (vlan_extracted)
-                       vlan_gro_receive(&ring->napi, adapter->vlgrp,
-                                        le16_to_cpu(rx_desc->wb.upper.vlan),
-                                        skb);
-               else
-                       napi_gro_receive(&ring->napi, skb);
-       } else {
-               if (vlan_extracted)
-                       vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
-                                         le16_to_cpu(rx_desc->wb.upper.vlan));
-               else
-                       netif_receive_skb(skb);
-       }
+       if (vlan_extracted)
+               vlan_gro_receive(&ring->napi, adapter->vlgrp,
+                                le16_to_cpu(rx_desc->wb.upper.vlan),
+                                skb);
+       else
+               napi_gro_receive(&ring->napi, skb);
 }
 
 static inline void igb_rx_checksum_adv(struct igb_adapter *adapter,
@@ -4456,19 +4513,28 @@ static inline void igb_rx_checksum_adv(struct igb_adapter *adapter,
        skb->ip_summed = CHECKSUM_NONE;
 
        /* Ignore Checksum bit is set or checksum is disabled through ethtool */
-       if ((status_err & E1000_RXD_STAT_IXSM) || !adapter->rx_csum)
+       if ((status_err & E1000_RXD_STAT_IXSM) ||
+           (adapter->flags & IGB_FLAG_RX_CSUM_DISABLED))
                return;
        /* TCP/UDP checksum error bit is set */
        if (status_err &
            (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
+               /*
+                * work around errata with sctp packets where the TCPE aka
+                * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
+                * packets, (aka let the stack check the crc32c)
+                */
+               if (!((adapter->hw.mac.type == e1000_82576) &&
+                     (skb->len == 60)))
+                       adapter->hw_csum_err++;
                /* let the stack verify checksum errors */
-               adapter->hw_csum_err++;
                return;
        }
        /* It must be a TCP or UDP packet with a valid checksum */
        if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
                skb->ip_summed = CHECKSUM_UNNECESSARY;
 
+       dev_dbg(&adapter->pdev->dev, "cksum success: bits %08X\n", status_err);
        adapter->hw_csum_good++;
 }
 
@@ -4513,11 +4579,12 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring,
                cleaned = true;
                cleaned_count++;
 
+               /* this is the fast path for the non-packet split case */
                if (!adapter->rx_ps_hdr_size) {
                        pci_unmap_single(pdev, buffer_info->dma,
-                                        adapter->rx_buffer_len +
-                                          NET_IP_ALIGN,
+                                        adapter->rx_buffer_len,
                                         PCI_DMA_FROMDEVICE);
+                       buffer_info->dma = 0;
                        skb_put(skb, length);
                        goto send_up;
                }
@@ -4534,8 +4601,9 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring,
 
                if (!skb_shinfo(skb)->nr_frags) {
                        pci_unmap_single(pdev, buffer_info->dma,
-                                        adapter->rx_ps_hdr_size + NET_IP_ALIGN,
+                                        adapter->rx_ps_hdr_size,
                                         PCI_DMA_FROMDEVICE);
+                       buffer_info->dma = 0;
                        skb_put(skb, hlen);
                }
 
@@ -4677,7 +4745,6 @@ static void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring,
                bufsz = adapter->rx_ps_hdr_size;
        else
                bufsz = adapter->rx_buffer_len;
-       bufsz += NET_IP_ALIGN;
 
        while (cleaned_count--) {
                rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
@@ -4701,7 +4768,7 @@ static void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring,
                }
 
                if (!buffer_info->skb) {
-                       skb = netdev_alloc_skb(netdev, bufsz);
+                       skb = netdev_alloc_skb(netdev, bufsz + NET_IP_ALIGN);
                        if (!skb) {
                                adapter->alloc_rx_buff_failed++;
                                goto no_buffers;
@@ -4962,6 +5029,34 @@ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
        }
 }
 
+s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
+{
+       struct igb_adapter *adapter = hw->back;
+       u16 cap_offset;
+
+       cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
+       if (!cap_offset)
+               return -E1000_ERR_CONFIG;
+
+       pci_read_config_word(adapter->pdev, cap_offset + reg, value);
+
+       return 0;
+}
+
+s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
+{
+       struct igb_adapter *adapter = hw->back;
+       u16 cap_offset;
+
+       cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
+       if (!cap_offset)
+               return -E1000_ERR_CONFIG;
+
+       pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
+
+       return 0;
+}
+
 static void igb_vlan_rx_register(struct net_device *netdev,
                                 struct vlan_group *grp)
 {
@@ -5065,14 +5160,6 @@ int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
 
        mac->autoneg = 0;
 
-       /* Fiber NICs only allow 1000 gbps Full duplex */
-       if ((adapter->hw.phy.media_type == e1000_media_type_fiber) &&
-               spddplx != (SPEED_1000 + DUPLEX_FULL)) {
-               dev_err(&adapter->pdev->dev,
-                       "Unsupported Speed/Duplex configuration\n");
-               return -EINVAL;
-       }
-
        switch (spddplx) {
        case SPEED_10 + DUPLEX_HALF:
                mac->forced_speed_duplex = ADVERTISE_10_HALF;
@@ -5302,6 +5389,9 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
 
        netif_device_detach(netdev);
 
+       if (state == pci_channel_io_perm_failure)
+               return PCI_ERS_RESULT_DISCONNECT;
+
        if (netif_running(netdev))
                igb_down(adapter);
        pci_disable_device(pdev);
@@ -5378,43 +5468,6 @@ static void igb_io_resume(struct pci_dev *pdev)
        igb_get_hw_control(adapter);
 }
 
-static inline void igb_set_vmolr(struct e1000_hw *hw, int vfn)
-{
-       u32 reg_data;
-
-       reg_data = rd32(E1000_VMOLR(vfn));
-       reg_data |= E1000_VMOLR_BAM |    /* Accept broadcast */
-                   E1000_VMOLR_ROPE |   /* Accept packets matched in UTA */
-                   E1000_VMOLR_ROMPE |  /* Accept packets matched in MTA */
-                   E1000_VMOLR_AUPE |   /* Accept untagged packets */
-                   E1000_VMOLR_STRVLAN; /* Strip vlan tags */
-       wr32(E1000_VMOLR(vfn), reg_data);
-}
-
-static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
-                                 int vfn)
-{
-       struct e1000_hw *hw = &adapter->hw;
-       u32 vmolr;
-
-       vmolr = rd32(E1000_VMOLR(vfn));
-       vmolr &= ~E1000_VMOLR_RLPML_MASK;
-       vmolr |= size | E1000_VMOLR_LPE;
-       wr32(E1000_VMOLR(vfn), vmolr);
-
-       return 0;
-}
-
-static inline void igb_set_rah_pool(struct e1000_hw *hw, int pool, int entry)
-{
-       u32 reg_data;
-
-       reg_data = rd32(E1000_RAH(entry));
-       reg_data &= ~E1000_RAH_POOL_MASK;
-       reg_data |= E1000_RAH_POOL_1 << pool;;
-       wr32(E1000_RAH(entry), reg_data);
-}
-
 static void igb_set_mc_list_pools(struct igb_adapter *adapter,
                                  int entry_count, u16 total_rar_filters)
 {
This page took 0.032207 seconds and 5 git commands to generate.