X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=drivers%2Fnet%2Figb%2Figb_main.c;h=ba043c4e1ca24fb4d6fa8a663676b286b449fcef;hb=2d064c06fecadadcb81a452acd373af00dfb1fec;hp=e79a26a886c883e0fc56ba0318a1aaa87b41e366;hpb=79ff1ad2eec1c106962241f6346958b9641e34f3;p=deliverable%2Flinux.git diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index e79a26a886c8..ba043c4e1ca2 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -41,15 +41,17 @@ #include #include #include - +#ifdef CONFIG_DCA +#include +#endif #include "igb.h" -#define DRV_VERSION "1.0.8-k2" +#define DRV_VERSION "1.2.45-k2" char igb_driver_name[] = "igb"; char igb_driver_version[] = DRV_VERSION; static const char igb_driver_string[] = "Intel(R) Gigabit Ethernet Network Driver"; -static const char igb_copyright[] = "Copyright (c) 2007 Intel Corporation."; +static const char igb_copyright[] = "Copyright (c) 2008 Intel Corporation."; static const struct e1000_info *igb_info_tbl[] = { @@ -57,6 +59,10 @@ static const struct e1000_info *igb_info_tbl[] = { }; static struct pci_device_id igb_pci_tbl[] = { + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 }, @@ -71,8 +77,8 @@ static int igb_setup_all_tx_resources(struct igb_adapter *); static int igb_setup_all_rx_resources(struct igb_adapter *); static void igb_free_all_tx_resources(struct igb_adapter *); static void igb_free_all_rx_resources(struct igb_adapter *); -static void igb_free_tx_resources(struct igb_adapter *, struct igb_ring *); -static void igb_free_rx_resources(struct igb_adapter *, struct igb_ring *); +static void igb_free_tx_resources(struct igb_ring *); +static void igb_free_rx_resources(struct igb_ring *); void igb_update_stats(struct igb_adapter *); static int igb_probe(struct pci_dev *, const struct pci_device_id *); static void __devexit igb_remove(struct pci_dev *pdev); @@ -84,8 +90,8 @@ static void igb_configure_rx(struct igb_adapter *); static void igb_setup_rctl(struct igb_adapter *); static void igb_clean_all_tx_rings(struct igb_adapter *); static void igb_clean_all_rx_rings(struct igb_adapter *); -static void igb_clean_tx_ring(struct igb_adapter *, struct igb_ring *); -static void igb_clean_rx_ring(struct igb_adapter *, struct igb_ring *); +static void igb_clean_tx_ring(struct igb_ring *); +static void igb_clean_rx_ring(struct igb_ring *); static void igb_set_multi(struct net_device *); static void igb_update_phy_info(unsigned long); static void igb_watchdog(unsigned long); @@ -102,12 +108,15 @@ static irqreturn_t igb_msix_other(int irq, void *); static irqreturn_t igb_msix_rx(int irq, void *); static irqreturn_t igb_msix_tx(int irq, void *); static int igb_clean_rx_ring_msix(struct napi_struct *, int); -static bool igb_clean_tx_irq(struct igb_adapter *, struct igb_ring *); -static int igb_clean(struct napi_struct *, int); -static bool igb_clean_rx_irq_adv(struct igb_adapter *, - struct igb_ring *, int *, int); -static void igb_alloc_rx_buffers_adv(struct igb_adapter *, - struct igb_ring *, int); +#ifdef CONFIG_DCA +static void igb_update_rx_dca(struct igb_ring *); +static void igb_update_tx_dca(struct igb_ring *); +static void igb_setup_dca(struct igb_adapter *); +#endif /* CONFIG_DCA */ +static bool igb_clean_tx_irq(struct igb_ring *); +static int igb_poll(struct napi_struct *, int); +static bool igb_clean_rx_irq_adv(struct igb_ring *, int *, int); +static void igb_alloc_rx_buffers_adv(struct igb_ring *, int); static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); static void igb_tx_timeout(struct net_device *); static void igb_reset_task(struct work_struct *); @@ -121,6 +130,14 @@ static int igb_suspend(struct pci_dev *, pm_message_t); static int igb_resume(struct pci_dev *); #endif static void igb_shutdown(struct pci_dev *); +#ifdef CONFIG_DCA +static int igb_notify_dca(struct notifier_block *, unsigned long, void *); +static struct notifier_block dca_notifier = { + .notifier_call = igb_notify_dca, + .next = NULL, + .priority = 0 +}; +#endif #ifdef CONFIG_NET_POLL_CONTROLLER /* for netdump / net console */ @@ -185,6 +202,9 @@ static int __init igb_init_module(void) printk(KERN_INFO "%s\n", igb_copyright); ret = pci_register_driver(&igb_driver); +#ifdef CONFIG_DCA + dca_register_notify(&dca_notifier); +#endif return ret; } @@ -198,6 +218,9 @@ module_init(igb_init_module); **/ static void __exit igb_exit_module(void) { +#ifdef CONFIG_DCA + dca_unregister_notify(&dca_notifier); +#endif pci_unregister_driver(&igb_driver); } @@ -226,15 +249,19 @@ static int igb_alloc_queues(struct igb_adapter *adapter) return -ENOMEM; } + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igb_ring *ring = &(adapter->tx_ring[i]); + ring->adapter = adapter; + ring->queue_index = i; + } for (i = 0; i < adapter->num_rx_queues; i++) { struct igb_ring *ring = &(adapter->rx_ring[i]); ring->adapter = adapter; + ring->queue_index = i; ring->itr_register = E1000_ITR; - if (!ring->napi.poll) - netif_napi_add(adapter->netdev, &ring->napi, igb_clean, - adapter->napi.weight / - adapter->num_rx_queues); + /* set a default napi handler for each rx_ring */ + netif_napi_add(adapter->netdev, &ring->napi, igb_poll, 64); } return 0; } @@ -245,6 +272,10 @@ static void igb_assign_vector(struct igb_adapter *adapter, int rx_queue, { u32 msixbm = 0; struct e1000_hw *hw = &adapter->hw; + u32 ivar, index; + + switch (hw->mac.type) { + case e1000_82575: /* The 82575 assigns vectors using a bitmask, which matches the bitmask for the EICR/EIMS/EIMC registers. To assign one or more queues to a vector, we write the appropriate bits @@ -259,6 +290,47 @@ static void igb_assign_vector(struct igb_adapter *adapter, int rx_queue, E1000_EICR_TX_QUEUE0 << tx_queue; } array_wr32(E1000_MSIXBM(0), msix_vector, msixbm); + break; + case e1000_82576: + /* Kawela uses a table-based method for assigning vectors. + Each queue has a single entry in the table to which we write + a vector number along with a "valid" bit. Sadly, the layout + of the table is somewhat counterintuitive. */ + if (rx_queue > IGB_N0_QUEUE) { + index = (rx_queue & 0x7); + ivar = array_rd32(E1000_IVAR0, index); + if (rx_queue < 8) { + /* vector goes into low byte of register */ + ivar = ivar & 0xFFFFFF00; + ivar |= msix_vector | E1000_IVAR_VALID; + } else { + /* vector goes into third byte of register */ + ivar = ivar & 0xFF00FFFF; + ivar |= (msix_vector | E1000_IVAR_VALID) << 16; + } + adapter->rx_ring[rx_queue].eims_value= 1 << msix_vector; + array_wr32(E1000_IVAR0, index, ivar); + } + if (tx_queue > IGB_N0_QUEUE) { + index = (tx_queue & 0x7); + ivar = array_rd32(E1000_IVAR0, index); + if (tx_queue < 8) { + /* vector goes into second byte of register */ + ivar = ivar & 0xFFFF00FF; + ivar |= (msix_vector | E1000_IVAR_VALID) << 8; + } else { + /* vector goes into high byte of register */ + ivar = ivar & 0x00FFFFFF; + ivar |= (msix_vector | E1000_IVAR_VALID) << 24; + } + adapter->tx_ring[tx_queue].eims_value= 1 << msix_vector; + array_wr32(E1000_IVAR0, index, ivar); + } + break; + default: + BUG(); + break; + } } /** @@ -274,6 +346,12 @@ static void igb_configure_msix(struct igb_adapter *adapter) struct e1000_hw *hw = &adapter->hw; adapter->eims_enable_mask = 0; + if (hw->mac.type == e1000_82576) + /* Turn on MSI-X capability first, or our settings + * won't stick. And it will take days to debug. */ + wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE | + E1000_GPIE_PBA | E1000_GPIE_EIAME | + E1000_GPIE_NSICR); for (i = 0; i < adapter->num_tx_queues; i++) { struct igb_ring *tx_ring = &adapter->tx_ring[i]; @@ -299,12 +377,11 @@ static void igb_configure_msix(struct igb_adapter *adapter) /* set vector for other causes, i.e. link changes */ + switch (hw->mac.type) { + case e1000_82575: array_wr32(E1000_MSIXBM(0), vector++, E1000_EIMS_OTHER); - /* disable IAM for ICR interrupt bits */ - wr32(E1000_IAM, 0); - tmp = rd32(E1000_CTRL_EXT); /* enable MSI-X PBA support*/ tmp |= E1000_CTRL_EXT_PBA_CLR; @@ -315,7 +392,21 @@ static void igb_configure_msix(struct igb_adapter *adapter) wr32(E1000_CTRL_EXT, tmp); adapter->eims_enable_mask |= E1000_EIMS_OTHER; + adapter->eims_other = E1000_EIMS_OTHER; + + break; + + case e1000_82576: + tmp = (vector++ | E1000_IVAR_VALID) << 8; + wr32(E1000_IVAR_MISC, tmp); + adapter->eims_enable_mask = (1 << (vector)) - 1; + adapter->eims_other = 1 << (vector - 1); + break; + default: + /* do nothing, since nothing else supports MSI-X */ + break; + } /* switch (hw->mac.type) */ wrfl(); } @@ -357,6 +448,9 @@ static int igb_request_msix(struct igb_adapter *adapter) goto out; ring->itr_register = E1000_EITR(0) + (vector << 2); ring->itr_val = adapter->itr; + /* overwrite the poll routine for MSIX, we've already done + * netif_napi_add */ + ring->napi.poll = &igb_clean_rx_ring_msix; vector++; } @@ -365,9 +459,6 @@ static int igb_request_msix(struct igb_adapter *adapter) if (err) goto out; - adapter->napi.poll = igb_clean_rx_ring_msix; - for (i = 0; i < adapter->num_rx_queues; i++) - adapter->rx_ring[i].napi.poll = adapter->napi.poll; igb_configure_msix(adapter); return 0; out: @@ -417,8 +508,14 @@ static void igb_set_interrupt_capability(struct igb_adapter *adapter) /* If we can't do MSI-X, try MSI */ msi_only: adapter->num_rx_queues = 1; + adapter->num_tx_queues = 1; if (!pci_enable_msi(adapter->pdev)) adapter->msi_enabled = 1; + +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + /* Notify the stack of the (possibly) reduced Tx Queue count. */ + adapter->netdev->egress_subqueue_count = adapter->num_tx_queues; +#endif return; } @@ -436,12 +533,8 @@ static int igb_request_irq(struct igb_adapter *adapter) if (adapter->msix_entries) { err = igb_request_msix(adapter); - if (!err) { - /* enable IAM, auto-mask, - * DO NOT USE EIAM or IAM in legacy mode */ - wr32(E1000_IAM, IMS_ENABLE_MASK); + if (!err) goto request_done; - } /* fall back to MSI */ igb_reset_interrupt_capability(adapter); if (!pci_enable_msi(adapter->pdev)) @@ -450,7 +543,20 @@ static int igb_request_irq(struct igb_adapter *adapter) igb_free_all_rx_resources(adapter); adapter->num_rx_queues = 1; igb_alloc_queues(adapter); + } else { + switch (hw->mac.type) { + case e1000_82575: + wr32(E1000_MSIXBM(0), + (E1000_EICR_RX_QUEUE0 | E1000_EIMS_OTHER)); + break; + case e1000_82576: + wr32(E1000_IVAR0, E1000_IVAR_VALID); + break; + default: + break; + } } + if (adapter->msi_enabled) { err = request_irq(adapter->pdev->irq, &igb_intr_msi, 0, netdev->name, netdev); @@ -502,9 +608,12 @@ static void igb_irq_disable(struct igb_adapter *adapter) struct e1000_hw *hw = &adapter->hw; if (adapter->msix_entries) { + wr32(E1000_EIAM, 0); wr32(E1000_EIMC, ~0); wr32(E1000_EIAC, 0); } + + wr32(E1000_IAM, 0); wr32(E1000_IMC, ~0); wrfl(); synchronize_irq(adapter->pdev->irq); @@ -519,13 +628,14 @@ static void igb_irq_enable(struct igb_adapter *adapter) struct e1000_hw *hw = &adapter->hw; if (adapter->msix_entries) { - wr32(E1000_EIMS, - adapter->eims_enable_mask); - wr32(E1000_EIAC, - adapter->eims_enable_mask); + wr32(E1000_EIAC, adapter->eims_enable_mask); + wr32(E1000_EIAM, adapter->eims_enable_mask); + wr32(E1000_EIMS, adapter->eims_enable_mask); wr32(E1000_IMS, E1000_IMS_LSC); - } else - wr32(E1000_IMS, IMS_ENABLE_MASK); + } else { + wr32(E1000_IMS, IMS_ENABLE_MASK); + wr32(E1000_IAM, IMS_ENABLE_MASK); + } } static void igb_update_mng_vlan(struct igb_adapter *adapter) @@ -632,12 +742,15 @@ static void igb_configure(struct igb_adapter *adapter) igb_configure_tx(adapter); igb_setup_rctl(adapter); igb_configure_rx(adapter); + + igb_rx_fifo_flush_82575(&adapter->hw); + /* call IGB_DESC_UNUSED which always leaves * at least 1 descriptor unused to make sure * next_to_use != next_to_clean */ for (i = 0; i < adapter->num_rx_queues; i++) { struct igb_ring *ring = &adapter->rx_ring[i]; - igb_alloc_rx_buffers_adv(adapter, ring, IGB_DESC_UNUSED(ring)); + igb_alloc_rx_buffers_adv(ring, IGB_DESC_UNUSED(ring)); } @@ -660,13 +773,10 @@ int igb_up(struct igb_adapter *adapter) clear_bit(__IGB_DOWN, &adapter->state); - napi_enable(&adapter->napi); - - if (adapter->msix_entries) { - for (i = 0; i < adapter->num_rx_queues; i++) - napi_enable(&adapter->rx_ring[i].napi); + for (i = 0; i < adapter->num_rx_queues; i++) + napi_enable(&adapter->rx_ring[i].napi); + if (adapter->msix_entries) igb_configure_msix(adapter); - } /* Clear any pending interrupts. */ rd32(E1000_ICR); @@ -694,6 +804,10 @@ void igb_down(struct igb_adapter *adapter) /* flush and sleep below */ netif_stop_queue(netdev); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + for (i = 0; i < adapter->num_tx_queues; i++) + netif_stop_subqueue(netdev, i); +#endif /* disable transmits in the hardware */ tctl = rd32(E1000_TCTL); @@ -703,11 +817,9 @@ void igb_down(struct igb_adapter *adapter) wrfl(); msleep(10); - napi_disable(&adapter->napi); + for (i = 0; i < adapter->num_rx_queues; i++) + napi_disable(&adapter->rx_ring[i].napi); - if (adapter->msix_entries) - for (i = 0; i < adapter->num_rx_queues; i++) - napi_disable(&adapter->rx_ring[i].napi); igb_irq_disable(adapter); del_timer_sync(&adapter->watchdog_timer); @@ -737,16 +849,23 @@ void igb_reinit_locked(struct igb_adapter *adapter) void igb_reset(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; - struct e1000_fc_info *fc = &adapter->hw.fc; + struct e1000_mac_info *mac = &hw->mac; + struct e1000_fc_info *fc = &hw->fc; u32 pba = 0, tx_space, min_tx_space, min_rx_space; u16 hwm; /* Repartition Pba for greater than 9k mtu * To take effect CTRL.RST is required. */ + if (mac->type != e1000_82576) { pba = E1000_PBA_34K; + } + else { + pba = E1000_PBA_64K; + } - if (adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) { + if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) && + (mac->type < e1000_82576)) { /* adjust PBA for jumbo frames */ wr32(E1000_PBA, pba); @@ -785,8 +904,8 @@ void igb_reset(struct igb_adapter *adapter) if (pba < min_rx_space) pba = min_rx_space; } + wr32(E1000_PBA, pba); } - wr32(E1000_PBA, pba); /* flow control settings */ /* The high water mark must be low enough to fit one full frame @@ -795,10 +914,15 @@ void igb_reset(struct igb_adapter *adapter) * - 90% of the Rx FIFO size, or * - the full Rx FIFO size minus one full frame */ hwm = min(((pba << 10) * 9 / 10), - ((pba << 10) - adapter->max_frame_size)); + ((pba << 10) - 2 * adapter->max_frame_size)); - fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ - fc->low_water = fc->high_water - 8; + if (mac->type < e1000_82576) { + fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ + fc->low_water = fc->high_water - 8; + } else { + fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ + fc->low_water = fc->high_water - 16; + } fc->pause_time = 0xFFFF; fc->send_xon = 1; fc->type = fc->original_type; @@ -820,6 +944,21 @@ void igb_reset(struct igb_adapter *adapter) adapter->hw.phy.ops.get_phy_info(&adapter->hw); } +/** + * igb_is_need_ioport - determine if an adapter needs ioport resources or not + * @pdev: PCI device information struct + * + * Returns true if an adapter needs ioport resources + **/ +static int igb_is_need_ioport(struct pci_dev *pdev) +{ + switch (pdev->device) { + /* Currently there are no adapters that need ioport resources */ + default: + return false; + } +} + /** * igb_probe - Device Initialization Routine * @pdev: PCI device information struct @@ -839,13 +978,21 @@ static int __devinit igb_probe(struct pci_dev *pdev, struct e1000_hw *hw; const struct e1000_info *ei = igb_info_tbl[ent->driver_data]; unsigned long mmio_start, mmio_len; - static int cards_found; int i, err, pci_using_dac; u16 eeprom_data = 0; u16 eeprom_apme_mask = IGB_EEPROM_APME; u32 part_num; + int bars, need_ioport; - err = pci_enable_device(pdev); + /* do not allocate ioport bars when not needed */ + need_ioport = igb_is_need_ioport(pdev); + if (need_ioport) { + bars = pci_select_bars(pdev, IORESOURCE_MEM | IORESOURCE_IO); + err = pci_enable_device(pdev); + } else { + bars = pci_select_bars(pdev, IORESOURCE_MEM); + err = pci_enable_device_mem(pdev); + } if (err) return err; @@ -867,7 +1014,7 @@ static int __devinit igb_probe(struct pci_dev *pdev, } } - err = pci_request_regions(pdev, igb_driver_name); + err = pci_request_selected_regions(pdev, bars, igb_driver_name); if (err) goto err_pci_reg; @@ -875,7 +1022,11 @@ static int __devinit igb_probe(struct pci_dev *pdev, pci_save_state(pdev); err = -ENOMEM; +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + netdev = alloc_etherdev_mq(sizeof(struct igb_adapter), IGB_MAX_TX_QUEUES); +#else netdev = alloc_etherdev(sizeof(struct igb_adapter)); +#endif /* CONFIG_NETDEVICES_MULTIQUEUE */ if (!netdev) goto err_alloc_etherdev; @@ -888,6 +1039,8 @@ static int __devinit igb_probe(struct pci_dev *pdev, hw = &adapter->hw; hw->back = adapter; adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE; + adapter->bars = bars; + adapter->need_ioport = need_ioport; mmio_start = pci_resource_start(pdev, 0); mmio_len = pci_resource_len(pdev, 0); @@ -907,7 +1060,6 @@ static int __devinit igb_probe(struct pci_dev *pdev, igb_set_ethtool_ops(netdev); netdev->tx_timeout = &igb_tx_timeout; netdev->watchdog_timeo = 5 * HZ; - netif_napi_add(netdev, &adapter->napi, igb_clean, 64); netdev->vlan_rx_register = igb_vlan_rx_register; netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid; netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid; @@ -921,8 +1073,6 @@ static int __devinit igb_probe(struct pci_dev *pdev, netdev->mem_start = mmio_start; netdev->mem_end = mmio_start + mmio_len; - adapter->bd_number = cards_found; - /* PCI config space info */ hw->vendor_id = pdev->vendor; hw->device_id = pdev->device; @@ -968,11 +1118,20 @@ static int __devinit igb_probe(struct pci_dev *pdev, NETIF_F_HW_VLAN_FILTER; netdev->features |= NETIF_F_TSO; - netdev->features |= NETIF_F_TSO6; + + netdev->vlan_features |= NETIF_F_TSO; + netdev->vlan_features |= NETIF_F_TSO6; + netdev->vlan_features |= NETIF_F_HW_CSUM; + netdev->vlan_features |= NETIF_F_SG; + if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + netdev->features |= NETIF_F_MULTI_QUEUE; +#endif + netdev->features |= NETIF_F_LLTX; adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw); @@ -1050,9 +1209,12 @@ static int __devinit igb_probe(struct pci_dev *pdev, * lan on a particular port */ switch (pdev->device) { case E1000_DEV_ID_82575GB_QUAD_COPPER: + case E1000_DEV_ID_82576_QUAD_COPPER: adapter->eeprom_wol = 0; break; case E1000_DEV_ID_82575EB_FIBER_SERDES: + case E1000_DEV_ID_82576_FIBER: + case E1000_DEV_ID_82576_SERDES: /* Wake events only supported on port A for dual fiber * regardless of eeprom setting */ if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1) @@ -1073,12 +1235,27 @@ static int __devinit igb_probe(struct pci_dev *pdev, /* tell the stack to leave us alone until igb_open() is called */ netif_carrier_off(netdev); netif_stop_queue(netdev); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + for (i = 0; i < adapter->num_tx_queues; i++) + netif_stop_subqueue(netdev, i); +#endif strcpy(netdev->name, "eth%d"); err = register_netdev(netdev); if (err) goto err_register; +#ifdef CONFIG_DCA + if (dca_add_requester(&pdev->dev) == 0) { + adapter->dca_enabled = true; + dev_info(&pdev->dev, "DCA enabled\n"); + /* Always use CB2 mode, difference is masked + * in the CB driver. */ + wr32(E1000_DCA_CTRL, 2); + igb_setup_dca(adapter); + } +#endif + dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n"); /* print bus type/speed/width info */ dev_info(&pdev->dev, @@ -1102,7 +1279,6 @@ static int __devinit igb_probe(struct pci_dev *pdev, adapter->msi_enabled ? "MSI" : "legacy", adapter->num_rx_queues, adapter->num_tx_queues); - cards_found++; return 0; err_register: @@ -1123,7 +1299,7 @@ err_hw_init: err_ioremap: free_netdev(netdev); err_alloc_etherdev: - pci_release_regions(pdev); + pci_release_selected_regions(pdev, bars); err_pci_reg: err_dma: pci_disable_device(pdev); @@ -1143,6 +1319,7 @@ static void __devexit igb_remove(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; /* flush_scheduled work may reschedule our watchdog task, so * explicitly disable watchdog tasks from being rescheduled */ @@ -1152,6 +1329,15 @@ static void __devexit igb_remove(struct pci_dev *pdev) flush_scheduled_work(); +#ifdef CONFIG_DCA + if (adapter->dca_enabled) { + dev_info(&pdev->dev, "DCA disabled\n"); + dca_remove_requester(&pdev->dev); + adapter->dca_enabled = false; + wr32(E1000_DCA_CTRL, 1); + } +#endif + /* Release control of h/w to f/w. If f/w is AMT enabled, this * would have already happened in close and is redundant. */ igb_release_hw_control(adapter); @@ -1170,7 +1356,7 @@ static void __devexit igb_remove(struct pci_dev *pdev) iounmap(adapter->hw.hw_addr); if (adapter->hw.flash_address) iounmap(adapter->hw.flash_address); - pci_release_regions(pdev); + pci_release_selected_regions(pdev, adapter->bars); free_netdev(netdev); @@ -1200,9 +1386,15 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter) /* Number of supported queues. */ /* Having more queues than CPUs doesn't make sense. */ + adapter->num_rx_queues = min((u32)IGB_MAX_RX_QUEUES, (u32)num_online_cpus()); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + adapter->num_tx_queues = min(IGB_MAX_TX_QUEUES, num_online_cpus()); +#else adapter->num_tx_queues = 1; - adapter->num_rx_queues = min(IGB_MAX_RX_QUEUES, num_online_cpus()); +#endif /* CONFIG_NET_MULTI_QUEUE_DEVICE */ + /* This call may decrease the number of queues depending on + * interrupt mode. */ igb_set_interrupt_capability(adapter); if (igb_alloc_queues(adapter)) { @@ -1270,15 +1462,14 @@ static int igb_open(struct net_device *netdev) /* From here on the code is the same as igb_up() */ clear_bit(__IGB_DOWN, &adapter->state); - napi_enable(&adapter->napi); - if (adapter->msix_entries) - for (i = 0; i < adapter->num_rx_queues; i++) - napi_enable(&adapter->rx_ring[i].napi); - - igb_irq_enable(adapter); + for (i = 0; i < adapter->num_rx_queues; i++) + napi_enable(&adapter->rx_ring[i].napi); /* Clear any pending interrupts. */ rd32(E1000_ICR); + + igb_irq_enable(adapter); + /* Fire a link status change interrupt to start the watchdog. */ wr32(E1000_ICS, E1000_ICS_LSC); @@ -1364,8 +1555,6 @@ int igb_setup_tx_resources(struct igb_adapter *adapter, tx_ring->adapter = adapter; tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; - spin_lock_init(&tx_ring->tx_clean_lock); - spin_lock_init(&tx_ring->tx_lock); return 0; err: @@ -1385,6 +1574,9 @@ err: static int igb_setup_all_tx_resources(struct igb_adapter *adapter) { int i, err = 0; +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + int r_idx; +#endif for (i = 0; i < adapter->num_tx_queues; i++) { err = igb_setup_tx_resources(adapter, &adapter->tx_ring[i]); @@ -1392,12 +1584,17 @@ static int igb_setup_all_tx_resources(struct igb_adapter *adapter) dev_err(&adapter->pdev->dev, "Allocation for Tx Queue %u failed\n", i); for (i--; i >= 0; i--) - igb_free_tx_resources(adapter, - &adapter->tx_ring[i]); + igb_free_tx_resources(&adapter->tx_ring[i]); break; } } +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + for (i = 0; i < IGB_MAX_TX_QUEUES; i++) { + r_idx = i % adapter->num_tx_queues; + adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx]; + } +#endif return err; } @@ -1507,8 +1704,6 @@ int igb_setup_rx_resources(struct igb_adapter *adapter, rx_ring->pending_skb = NULL; rx_ring->adapter = adapter; - /* FIXME: do we want to setup ring->napi->poll here? */ - rx_ring->napi.poll = adapter->napi.poll; return 0; @@ -1536,8 +1731,7 @@ static int igb_setup_all_rx_resources(struct igb_adapter *adapter) dev_err(&adapter->pdev->dev, "Allocation for Rx Queue %u failed\n", i); for (i--; i >= 0; i--) - igb_free_rx_resources(adapter, - &adapter->rx_ring[i]); + igb_free_rx_resources(&adapter->rx_ring[i]); break; } } @@ -1564,10 +1758,12 @@ static void igb_setup_rctl(struct igb_adapter *adapter) E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT); - /* disable the stripping of CRC because it breaks - * BMC firmware connected over SMBUS - rctl |= E1000_RCTL_SECRC; + /* + * enable stripping of CRC. It's unlikely this will break BMC + * redirection as it did with e1000. Newer features require + * that the HW strips the CRC. */ + rctl |= E1000_RCTL_SECRC; rctl &= ~E1000_RCTL_SBP; @@ -1699,7 +1895,10 @@ static void igb_configure_rx(struct igb_adapter *adapter) get_random_bytes(&random[0], 40); - shift = 6; + if (hw->mac.type >= e1000_82576) + shift = 0; + else + shift = 6; for (j = 0; j < (32 * 4); j++) { reta.bytes[j & 3] = (j % adapter->num_rx_queues) << shift; @@ -1765,12 +1964,11 @@ static void igb_configure_rx(struct igb_adapter *adapter) * * Free all transmit software resources **/ -static void igb_free_tx_resources(struct igb_adapter *adapter, - struct igb_ring *tx_ring) +static void igb_free_tx_resources(struct igb_ring *tx_ring) { - struct pci_dev *pdev = adapter->pdev; + struct pci_dev *pdev = tx_ring->adapter->pdev; - igb_clean_tx_ring(adapter, tx_ring); + igb_clean_tx_ring(tx_ring); vfree(tx_ring->buffer_info); tx_ring->buffer_info = NULL; @@ -1791,7 +1989,7 @@ static void igb_free_all_tx_resources(struct igb_adapter *adapter) int i; for (i = 0; i < adapter->num_tx_queues; i++) - igb_free_tx_resources(adapter, &adapter->tx_ring[i]); + igb_free_tx_resources(&adapter->tx_ring[i]); } static void igb_unmap_and_free_tx_resource(struct igb_adapter *adapter, @@ -1817,9 +2015,9 @@ static void igb_unmap_and_free_tx_resource(struct igb_adapter *adapter, * @adapter: board private structure * @tx_ring: ring to be cleaned **/ -static void igb_clean_tx_ring(struct igb_adapter *adapter, - struct igb_ring *tx_ring) +static void igb_clean_tx_ring(struct igb_ring *tx_ring) { + struct igb_adapter *adapter = tx_ring->adapter; struct igb_buffer *buffer_info; unsigned long size; unsigned int i; @@ -1856,7 +2054,7 @@ static void igb_clean_all_tx_rings(struct igb_adapter *adapter) int i; for (i = 0; i < adapter->num_tx_queues; i++) - igb_clean_tx_ring(adapter, &adapter->tx_ring[i]); + igb_clean_tx_ring(&adapter->tx_ring[i]); } /** @@ -1866,12 +2064,11 @@ static void igb_clean_all_tx_rings(struct igb_adapter *adapter) * * Free all receive software resources **/ -static void igb_free_rx_resources(struct igb_adapter *adapter, - struct igb_ring *rx_ring) +static void igb_free_rx_resources(struct igb_ring *rx_ring) { - struct pci_dev *pdev = adapter->pdev; + struct pci_dev *pdev = rx_ring->adapter->pdev; - igb_clean_rx_ring(adapter, rx_ring); + igb_clean_rx_ring(rx_ring); vfree(rx_ring->buffer_info); rx_ring->buffer_info = NULL; @@ -1892,7 +2089,7 @@ static void igb_free_all_rx_resources(struct igb_adapter *adapter) int i; for (i = 0; i < adapter->num_rx_queues; i++) - igb_free_rx_resources(adapter, &adapter->rx_ring[i]); + igb_free_rx_resources(&adapter->rx_ring[i]); } /** @@ -1900,9 +2097,9 @@ static void igb_free_all_rx_resources(struct igb_adapter *adapter) * @adapter: board private structure * @rx_ring: ring to free buffers from **/ -static void igb_clean_rx_ring(struct igb_adapter *adapter, - struct igb_ring *rx_ring) +static void igb_clean_rx_ring(struct igb_ring *rx_ring) { + struct igb_adapter *adapter = rx_ring->adapter; struct igb_buffer *buffer_info; struct pci_dev *pdev = adapter->pdev; unsigned long size; @@ -1966,7 +2163,7 @@ static void igb_clean_all_rx_rings(struct igb_adapter *adapter) int i; for (i = 0; i < adapter->num_rx_queues; i++) - igb_clean_rx_ring(adapter, &adapter->rx_ring[i]); + igb_clean_rx_ring(&adapter->rx_ring[i]); } /** @@ -2027,7 +2224,7 @@ static void igb_set_multi(struct net_device *netdev) if (!netdev->mc_count) { /* nothing to program, so clear mc list */ - igb_update_mc_addr_list(hw, NULL, 0, 1, + igb_update_mc_addr_list_82575(hw, NULL, 0, 1, mac->rar_entry_count); return; } @@ -2045,7 +2242,8 @@ static void igb_set_multi(struct net_device *netdev) memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN); mc_ptr = mc_ptr->next; } - igb_update_mc_addr_list(hw, mta_list, i, 1, mac->rar_entry_count); + igb_update_mc_addr_list_82575(hw, mta_list, i, 1, + mac->rar_entry_count); kfree(mta_list); } @@ -2080,6 +2278,9 @@ static void igb_watchdog_task(struct work_struct *work) struct e1000_mac_info *mac = &adapter->hw.mac; u32 link; s32 ret_val; +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + int i; +#endif if ((netif_carrier_ok(netdev)) && (rd32(E1000_STATUS) & E1000_STATUS_LU)) @@ -2136,6 +2337,10 @@ static void igb_watchdog_task(struct work_struct *work) netif_carrier_on(netdev); netif_wake_queue(netdev); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + for (i = 0; i < adapter->num_tx_queues; i++) + netif_wake_subqueue(netdev, i); +#endif if (!test_bit(__IGB_DOWN, &adapter->state)) mod_timer(&adapter->phy_info_timer, @@ -2148,6 +2353,10 @@ static void igb_watchdog_task(struct work_struct *work) dev_info(&adapter->pdev->dev, "NIC Link is Down\n"); netif_carrier_off(netdev); netif_stop_queue(netdev); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + for (i = 0; i < adapter->num_tx_queues; i++) + netif_stop_subqueue(netdev, i); +#endif if (!test_bit(__IGB_DOWN, &adapter->state)) mod_timer(&adapter->phy_info_timer, round_jiffies(jiffies + 2 * HZ)); @@ -2508,7 +2717,7 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter, context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd); context_desc->seqnum_seed = 0; context_desc->mss_l4len_idx = - cpu_to_le32(tx_ring->eims_value >> 4); + cpu_to_le32(tx_ring->queue_index << 4); buffer_info->time_stamp = jiffies; buffer_info->dma = 0; @@ -2611,7 +2820,7 @@ static inline void igb_tx_queue_adv(struct igb_adapter *adapter, if (tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_TSO | IGB_TX_FLAGS_VLAN)) - olinfo_status |= tx_ring->eims_value >> 4; + olinfo_status |= tx_ring->queue_index << 4; olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT); @@ -2647,7 +2856,12 @@ static int __igb_maybe_stop_tx(struct net_device *netdev, { struct igb_adapter *adapter = netdev_priv(netdev); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + netif_stop_subqueue(netdev, tx_ring->queue_index); +#else netif_stop_queue(netdev); +#endif + /* Herbert's original patch had: * smp_mb__after_netif_stop_queue(); * but since that doesn't exist yet, just open code it. */ @@ -2659,7 +2873,11 @@ static int __igb_maybe_stop_tx(struct net_device *netdev, return -EBUSY; /* A reprieve! */ - netif_start_queue(netdev); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + netif_wake_subqueue(netdev, tx_ring->queue_index); +#else + netif_wake_queue(netdev); +#endif ++adapter->restart_queue; return 0; } @@ -2681,7 +2899,6 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb, struct igb_adapter *adapter = netdev_priv(netdev); unsigned int tx_flags = 0; unsigned int len; - unsigned long irq_flags; u8 hdr_len = 0; int tso = 0; @@ -2697,10 +2914,6 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb, return NETDEV_TX_OK; } - if (!spin_trylock_irqsave(&tx_ring->tx_lock, irq_flags)) - /* Collision - tell upper layer to requeue */ - return NETDEV_TX_LOCKED; - /* need: 1 descriptor per page, * + 2 desc gap to keep tail from touching head, * + 1 desc for skb->data, @@ -2708,7 +2921,6 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb, * otherwise try next time */ if (igb_maybe_stop_tx(netdev, tx_ring, skb_shinfo(skb)->nr_frags + 4)) { /* this is a hard error */ - spin_unlock_irqrestore(&tx_ring->tx_lock, irq_flags); return NETDEV_TX_BUSY; } @@ -2717,12 +2929,14 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb, tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT); } + if (skb->protocol == htons(ETH_P_IP)) + tx_flags |= IGB_TX_FLAGS_IPV4; + tso = skb_is_gso(skb) ? igb_tso_adv(adapter, tx_ring, skb, tx_flags, &hdr_len) : 0; if (tso < 0) { dev_kfree_skb_any(skb); - spin_unlock_irqrestore(&tx_ring->tx_lock, irq_flags); return NETDEV_TX_OK; } @@ -2732,9 +2946,6 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb, if (skb->ip_summed == CHECKSUM_PARTIAL) tx_flags |= IGB_TX_FLAGS_CSUM; - if (skb->protocol == htons(ETH_P_IP)) - tx_flags |= IGB_TX_FLAGS_IPV4; - igb_tx_queue_adv(adapter, tx_ring, tx_flags, igb_tx_map_adv(adapter, tx_ring, skb), skb->len, hdr_len); @@ -2744,14 +2955,22 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb, /* Make sure there is space in the ring for the next send. */ igb_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 4); - spin_unlock_irqrestore(&tx_ring->tx_lock, irq_flags); return NETDEV_TX_OK; } static int igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *netdev) { struct igb_adapter *adapter = netdev_priv(netdev); - struct igb_ring *tx_ring = &adapter->tx_ring[0]; + struct igb_ring *tx_ring; + +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + int r_idx = 0; + r_idx = skb->queue_mapping & (IGB_MAX_TX_QUEUES - 1); + tx_ring = adapter->multi_tx_table[r_idx]; +#else + tx_ring = &adapter->tx_ring[0]; +#endif + /* This goes back to the question of how to logically map a tx queue * to a flow. Right now, performance is impacted slightly negatively @@ -3010,26 +3229,19 @@ static irqreturn_t igb_msix_other(int irq, void *data) struct net_device *netdev = data; struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - u32 eicr; - /* disable interrupts from the "other" bit, avoid re-entry */ - wr32(E1000_EIMC, E1000_EIMS_OTHER); - - eicr = rd32(E1000_EICR); - - if (eicr & E1000_EIMS_OTHER) { - u32 icr = rd32(E1000_ICR); - /* reading ICR causes bit 31 of EICR to be cleared */ - if (!(icr & E1000_ICR_LSC)) - goto no_link_interrupt; - hw->mac.get_link_status = 1; - /* guard against interrupt when we're going down */ - if (!test_bit(__IGB_DOWN, &adapter->state)) - mod_timer(&adapter->watchdog_timer, jiffies + 1); - } + u32 icr = rd32(E1000_ICR); + /* reading ICR causes bit 31 of EICR to be cleared */ + if (!(icr & E1000_ICR_LSC)) + goto no_link_interrupt; + hw->mac.get_link_status = 1; + /* guard against interrupt when we're going down */ + if (!test_bit(__IGB_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies + 1); + no_link_interrupt: wr32(E1000_IMS, E1000_IMS_LSC); - wr32(E1000_EIMS, E1000_EIMS_OTHER); + wr32(E1000_EIMS, adapter->eims_other); return IRQ_HANDLED; } @@ -3042,15 +3254,21 @@ static irqreturn_t igb_msix_tx(int irq, void *data) if (!tx_ring->itr_val) wr32(E1000_EIMC, tx_ring->eims_value); - +#ifdef CONFIG_DCA + if (adapter->dca_enabled) + igb_update_tx_dca(tx_ring); +#endif tx_ring->total_bytes = 0; tx_ring->total_packets = 0; - if (!igb_clean_tx_irq(adapter, tx_ring)) + + /* auto mask will automatically reenable the interrupt when we write + * EICS */ + if (!igb_clean_tx_irq(tx_ring)) /* Ring was not completely cleaned, so fire another interrupt */ wr32(E1000_EICS, tx_ring->eims_value); - - if (!tx_ring->itr_val) + else wr32(E1000_EIMS, tx_ring->eims_value); + return IRQ_HANDLED; } @@ -3060,24 +3278,144 @@ static irqreturn_t igb_msix_rx(int irq, void *data) struct igb_adapter *adapter = rx_ring->adapter; struct e1000_hw *hw = &adapter->hw; - if (!rx_ring->itr_val) - wr32(E1000_EIMC, rx_ring->eims_value); + /* Write the ITR value calculated at the end of the + * previous interrupt. + */ - if (netif_rx_schedule_prep(adapter->netdev, &rx_ring->napi)) { - rx_ring->total_bytes = 0; - rx_ring->total_packets = 0; - rx_ring->no_itr_adjust = 0; + if (adapter->set_itr) { + wr32(rx_ring->itr_register, + 1000000000 / (rx_ring->itr_val * 256)); + adapter->set_itr = 0; + } + + if (netif_rx_schedule_prep(adapter->netdev, &rx_ring->napi)) __netif_rx_schedule(adapter->netdev, &rx_ring->napi); - } else { - if (!rx_ring->no_itr_adjust) { - igb_lower_rx_eitr(adapter, rx_ring); - rx_ring->no_itr_adjust = 1; + +#ifdef CONFIG_DCA + if (adapter->dca_enabled) + igb_update_rx_dca(rx_ring); +#endif + return IRQ_HANDLED; +} + +#ifdef CONFIG_DCA +static void igb_update_rx_dca(struct igb_ring *rx_ring) +{ + u32 dca_rxctrl; + struct igb_adapter *adapter = rx_ring->adapter; + struct e1000_hw *hw = &adapter->hw; + int cpu = get_cpu(); + int q = rx_ring - adapter->rx_ring; + + if (rx_ring->cpu != cpu) { + dca_rxctrl = rd32(E1000_DCA_RXCTRL(q)); + if (hw->mac.type == e1000_82576) { + dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576; + dca_rxctrl |= dca_get_tag(cpu) << + E1000_DCA_RXCTRL_CPUID_SHIFT; + } else { + dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK; + dca_rxctrl |= dca_get_tag(cpu); } + dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN; + dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN; + dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN; + wr32(E1000_DCA_RXCTRL(q), dca_rxctrl); + rx_ring->cpu = cpu; } + put_cpu(); +} - return IRQ_HANDLED; +static void igb_update_tx_dca(struct igb_ring *tx_ring) +{ + u32 dca_txctrl; + struct igb_adapter *adapter = tx_ring->adapter; + struct e1000_hw *hw = &adapter->hw; + int cpu = get_cpu(); + int q = tx_ring - adapter->tx_ring; + + if (tx_ring->cpu != cpu) { + dca_txctrl = rd32(E1000_DCA_TXCTRL(q)); + if (hw->mac.type == e1000_82576) { + dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576; + dca_txctrl |= dca_get_tag(cpu) << + E1000_DCA_TXCTRL_CPUID_SHIFT; + } else { + dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK; + dca_txctrl |= dca_get_tag(cpu); + } + dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN; + wr32(E1000_DCA_TXCTRL(q), dca_txctrl); + tx_ring->cpu = cpu; + } + put_cpu(); } +static void igb_setup_dca(struct igb_adapter *adapter) +{ + int i; + + if (!(adapter->dca_enabled)) + return; + + for (i = 0; i < adapter->num_tx_queues; i++) { + adapter->tx_ring[i].cpu = -1; + igb_update_tx_dca(&adapter->tx_ring[i]); + } + for (i = 0; i < adapter->num_rx_queues; i++) { + adapter->rx_ring[i].cpu = -1; + igb_update_rx_dca(&adapter->rx_ring[i]); + } +} + +static int __igb_notify_dca(struct device *dev, void *data) +{ + struct net_device *netdev = dev_get_drvdata(dev); + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + unsigned long event = *(unsigned long *)data; + + switch (event) { + case DCA_PROVIDER_ADD: + /* if already enabled, don't do it again */ + if (adapter->dca_enabled) + break; + adapter->dca_enabled = true; + /* Always use CB2 mode, difference is masked + * in the CB driver. */ + wr32(E1000_DCA_CTRL, 2); + if (dca_add_requester(dev) == 0) { + dev_info(&adapter->pdev->dev, "DCA enabled\n"); + igb_setup_dca(adapter); + break; + } + /* Fall Through since DCA is disabled. */ + case DCA_PROVIDER_REMOVE: + if (adapter->dca_enabled) { + /* without this a class_device is left + * hanging around in the sysfs model */ + dca_remove_requester(dev); + dev_info(&adapter->pdev->dev, "DCA disabled\n"); + adapter->dca_enabled = false; + wr32(E1000_DCA_CTRL, 1); + } + break; + } + + return 0; +} + +static int igb_notify_dca(struct notifier_block *nb, unsigned long event, + void *p) +{ + int ret_val; + + ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event, + __igb_notify_dca); + + return ret_val ? NOTIFY_BAD : NOTIFY_DONE; +} +#endif /* CONFIG_DCA */ /** * igb_intr_msi - Interrupt Handler @@ -3088,7 +3426,6 @@ static irqreturn_t igb_intr_msi(int irq, void *data) { struct net_device *netdev = data; struct igb_adapter *adapter = netdev_priv(netdev); - struct napi_struct *napi = &adapter->napi; struct e1000_hw *hw = &adapter->hw; /* read ICR disables interrupts using IAM */ u32 icr = rd32(E1000_ICR); @@ -3097,25 +3434,17 @@ static irqreturn_t igb_intr_msi(int irq, void *data) * previous interrupt. */ if (adapter->set_itr) { - wr32(E1000_ITR, - 1000000000 / (adapter->itr * 256)); + wr32(E1000_ITR, 1000000000 / (adapter->itr * 256)); adapter->set_itr = 0; } - /* read ICR disables interrupts using IAM */ if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { hw->mac.get_link_status = 1; if (!test_bit(__IGB_DOWN, &adapter->state)) mod_timer(&adapter->watchdog_timer, jiffies + 1); } - if (netif_rx_schedule_prep(netdev, napi)) { - adapter->tx_ring->total_bytes = 0; - adapter->tx_ring->total_packets = 0; - adapter->rx_ring->total_bytes = 0; - adapter->rx_ring->total_packets = 0; - __netif_rx_schedule(netdev, napi); - } + netif_rx_schedule(netdev, &adapter->rx_ring[0].napi); return IRQ_HANDLED; } @@ -3129,7 +3458,6 @@ static irqreturn_t igb_intr(int irq, void *data) { struct net_device *netdev = data; struct igb_adapter *adapter = netdev_priv(netdev); - struct napi_struct *napi = &adapter->napi; struct e1000_hw *hw = &adapter->hw; /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No * need for the IMC write */ @@ -3142,8 +3470,7 @@ static irqreturn_t igb_intr(int irq, void *data) * previous interrupt. */ if (adapter->set_itr) { - wr32(E1000_ITR, - 1000000000 / (adapter->itr * 256)); + wr32(E1000_ITR, 1000000000 / (adapter->itr * 256)); adapter->set_itr = 0; } @@ -3161,55 +3488,39 @@ static irqreturn_t igb_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } - if (netif_rx_schedule_prep(netdev, napi)) { - adapter->tx_ring->total_bytes = 0; - adapter->rx_ring->total_bytes = 0; - adapter->tx_ring->total_packets = 0; - adapter->rx_ring->total_packets = 0; - __netif_rx_schedule(netdev, napi); - } + netif_rx_schedule(netdev, &adapter->rx_ring[0].napi); return IRQ_HANDLED; } /** - * igb_clean - NAPI Rx polling callback - * @adapter: board private structure + * igb_poll - NAPI Rx polling callback + * @napi: napi polling structure + * @budget: count of how many packets we should handle **/ -static int igb_clean(struct napi_struct *napi, int budget) +static int igb_poll(struct napi_struct *napi, int budget) { - struct igb_adapter *adapter = container_of(napi, struct igb_adapter, - napi); + struct igb_ring *rx_ring = container_of(napi, struct igb_ring, napi); + struct igb_adapter *adapter = rx_ring->adapter; struct net_device *netdev = adapter->netdev; - int tx_clean_complete = 1, work_done = 0; - int i; + int tx_clean_complete, work_done = 0; - /* Must NOT use netdev_priv macro here. */ - adapter = netdev->priv; - - /* Keep link state information with original netdev */ - if (!netif_carrier_ok(netdev)) - goto quit_polling; - - /* igb_clean is called per-cpu. This lock protects tx_ring[i] from - * being cleaned by multiple cpus simultaneously. A failure obtaining - * the lock means tx_ring[i] is currently being cleaned anyway. */ - for (i = 0; i < adapter->num_tx_queues; i++) { - if (spin_trylock(&adapter->tx_ring[i].tx_clean_lock)) { - tx_clean_complete &= igb_clean_tx_irq(adapter, - &adapter->tx_ring[i]); - spin_unlock(&adapter->tx_ring[i].tx_clean_lock); - } - } + /* this poll routine only supports one tx and one rx queue */ +#ifdef CONFIG_DCA + if (adapter->dca_enabled) + igb_update_tx_dca(&adapter->tx_ring[0]); +#endif + tx_clean_complete = igb_clean_tx_irq(&adapter->tx_ring[0]); - for (i = 0; i < adapter->num_rx_queues; i++) - igb_clean_rx_irq_adv(adapter, &adapter->rx_ring[i], &work_done, - adapter->rx_ring[i].napi.weight); +#ifdef CONFIG_DCA + if (adapter->dca_enabled) + igb_update_rx_dca(&adapter->rx_ring[0]); +#endif + igb_clean_rx_irq_adv(&adapter->rx_ring[0], &work_done, budget); /* If no Tx and not enough Rx work done, exit the polling mode */ if ((tx_clean_complete && (work_done < budget)) || !netif_running(netdev)) { -quit_polling: if (adapter->itr_setting & 3) igb_set_itr(adapter, E1000_ITR, false); netif_rx_complete(netdev, napi); @@ -3233,7 +3544,11 @@ static int igb_clean_rx_ring_msix(struct napi_struct *napi, int budget) if (!netif_carrier_ok(netdev)) goto quit_polling; - igb_clean_rx_irq_adv(adapter, rx_ring, &work_done, budget); +#ifdef CONFIG_DCA + if (adapter->dca_enabled) + igb_update_rx_dca(rx_ring); +#endif + igb_clean_rx_irq_adv(rx_ring, &work_done, budget); /* If not enough Rx work done, exit the polling mode */ @@ -3251,6 +3566,10 @@ quit_polling: else if (mean_size > IGB_DYN_ITR_LENGTH_HIGH) igb_lower_rx_eitr(adapter, rx_ring); } + + if (!test_bit(__IGB_DOWN, &adapter->state)) + wr32(E1000_EIMS, rx_ring->eims_value); + return 0; } @@ -3268,11 +3587,11 @@ static inline u32 get_head(struct igb_ring *tx_ring) * @adapter: board private structure * returns true if ring is completely cleaned **/ -static bool igb_clean_tx_irq(struct igb_adapter *adapter, - struct igb_ring *tx_ring) +static bool igb_clean_tx_irq(struct igb_ring *tx_ring) { - struct net_device *netdev = adapter->netdev; + struct igb_adapter *adapter = tx_ring->adapter; struct e1000_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; struct e1000_tx_desc *tx_desc; struct igb_buffer *buffer_info; struct sk_buff *skb; @@ -3334,11 +3653,19 @@ done_cleaning: * sees the new next_to_clean. */ smp_mb(); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) && + !(test_bit(__IGB_DOWN, &adapter->state))) { + netif_wake_subqueue(netdev, tx_ring->queue_index); + ++adapter->restart_queue; + } +#else if (netif_queue_stopped(netdev) && !(test_bit(__IGB_DOWN, &adapter->state))) { netif_wake_queue(netdev); ++adapter->restart_queue; } +#endif } if (tx_ring->detect_tx_hung) { @@ -3355,7 +3682,7 @@ done_cleaning: /* detected Tx unit hang */ dev_err(&adapter->pdev->dev, "Detected Tx Unit Hang\n" - " Tx Queue <%lu>\n" + " Tx Queue <%d>\n" " TDH <%x>\n" " TDT <%x>\n" " next_to_use <%x>\n" @@ -3365,8 +3692,7 @@ done_cleaning: " time_stamp <%lx>\n" " jiffies <%lx>\n" " desc.status <%x>\n", - (unsigned long)((tx_ring - adapter->tx_ring) / - sizeof(struct igb_ring)), + tx_ring->queue_index, readl(adapter->hw.hw_addr + tx_ring->head), readl(adapter->hw.hw_addr + tx_ring->tail), tx_ring->next_to_use, @@ -3375,11 +3701,17 @@ done_cleaning: tx_ring->buffer_info[i].time_stamp, jiffies, tx_desc->upper.fields.status); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + netif_stop_subqueue(netdev, tx_ring->queue_index); +#else netif_stop_queue(netdev); +#endif } } tx_ring->total_bytes += total_bytes; tx_ring->total_packets += total_packets; + tx_ring->tx_stats.bytes += total_bytes; + tx_ring->tx_stats.packets += total_packets; adapter->net_stats.tx_bytes += total_bytes; adapter->net_stats.tx_packets += total_packets; return retval; @@ -3398,8 +3730,7 @@ static void igb_receive_skb(struct igb_adapter *adapter, u8 status, __le16 vlan, { if (adapter->vlgrp && (status & E1000_RXD_STAT_VP)) vlan_hwaccel_receive_skb(skb, adapter->vlgrp, - le16_to_cpu(vlan) & - E1000_RXD_SPC_VLAN_MASK); + le16_to_cpu(vlan)); else netif_receive_skb(skb); } @@ -3427,10 +3758,10 @@ static inline void igb_rx_checksum_adv(struct igb_adapter *adapter, adapter->hw_csum_good++; } -static bool igb_clean_rx_irq_adv(struct igb_adapter *adapter, - struct igb_ring *rx_ring, - int *work_done, int budget) +static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring, + int *work_done, int budget) { + struct igb_adapter *adapter = rx_ring->adapter; struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; union e1000_adv_rx_desc *rx_desc , *next_rxd; @@ -3523,7 +3854,6 @@ static bool igb_clean_rx_irq_adv(struct igb_adapter *adapter, } } send_up: - pskb_trim(skb, skb->len - 4); i++; if (i == rx_ring->count) i = 0; @@ -3553,8 +3883,7 @@ next_desc: /* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IGB_RX_BUFFER_WRITE) { - igb_alloc_rx_buffers_adv(adapter, rx_ring, - cleaned_count); + igb_alloc_rx_buffers_adv(rx_ring, cleaned_count); cleaned_count = 0; } @@ -3569,7 +3898,7 @@ out: cleaned_count = IGB_DESC_UNUSED(rx_ring); if (cleaned_count) - igb_alloc_rx_buffers_adv(adapter, rx_ring, cleaned_count); + igb_alloc_rx_buffers_adv(rx_ring, cleaned_count); rx_ring->total_packets += total_packets; rx_ring->total_bytes += total_bytes; @@ -3585,10 +3914,10 @@ out: * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split * @adapter: address of board private structure **/ -static void igb_alloc_rx_buffers_adv(struct igb_adapter *adapter, - struct igb_ring *rx_ring, +static void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count) { + struct igb_adapter *adapter = rx_ring->adapter; struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; union e1000_adv_rx_desc *rx_desc; @@ -3878,7 +4207,7 @@ static int igb_suspend(struct pci_dev *pdev, pm_message_t state) struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - u32 ctrl, ctrl_ext, rctl, status; + u32 ctrl, rctl, status; u32 wufc = adapter->wol; #ifdef CONFIG_PM int retval = 0; @@ -3921,33 +4250,24 @@ static int igb_suspend(struct pci_dev *pdev, pm_message_t state) ctrl |= E1000_CTRL_ADVD3WUC; wr32(E1000_CTRL, ctrl); - if (adapter->hw.phy.media_type == e1000_media_type_fiber || - adapter->hw.phy.media_type == - e1000_media_type_internal_serdes) { - /* keep the laser running in D3 */ - ctrl_ext = rd32(E1000_CTRL_EXT); - ctrl_ext |= E1000_CTRL_EXT_SDP7_DATA; - wr32(E1000_CTRL_EXT, ctrl_ext); - } - /* Allow time for pending master requests to run */ igb_disable_pcie_master(&adapter->hw); wr32(E1000_WUC, E1000_WUC_PME_EN); wr32(E1000_WUFC, wufc); - pci_enable_wake(pdev, PCI_D3hot, 1); - pci_enable_wake(pdev, PCI_D3cold, 1); } else { wr32(E1000_WUC, 0); wr32(E1000_WUFC, 0); - pci_enable_wake(pdev, PCI_D3hot, 0); - pci_enable_wake(pdev, PCI_D3cold, 0); } - /* make sure adapter isn't asleep if manageability is enabled */ - if (adapter->en_mng_pt) { + /* make sure adapter isn't asleep if manageability/wol is enabled */ + if (wufc || adapter->en_mng_pt) { pci_enable_wake(pdev, PCI_D3hot, 1); pci_enable_wake(pdev, PCI_D3cold, 1); + } else { + igb_shutdown_fiber_serdes_link_82575(hw); + pci_enable_wake(pdev, PCI_D3hot, 0); + pci_enable_wake(pdev, PCI_D3cold, 0); } /* Release control of h/w to f/w. If f/w is AMT enabled, this @@ -3971,7 +4291,11 @@ static int igb_resume(struct pci_dev *pdev) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - err = pci_enable_device(pdev); + + if (adapter->need_ioport) + err = pci_enable_device(pdev); + else + err = pci_enable_device_mem(pdev); if (err) { dev_err(&pdev->dev, "igb: Cannot enable PCI device from suspend\n"); @@ -4027,10 +4351,10 @@ static void igb_netpoll(struct net_device *netdev) igb_irq_disable(adapter); for (i = 0; i < adapter->num_tx_queues; i++) - igb_clean_tx_irq(adapter, &adapter->tx_ring[i]); + igb_clean_tx_irq(&adapter->tx_ring[i]); for (i = 0; i < adapter->num_rx_queues; i++) - igb_clean_rx_irq_adv(adapter, &adapter->rx_ring[i], + igb_clean_rx_irq_adv(&adapter->rx_ring[i], &work_done, adapter->rx_ring[i].napi.weight); @@ -4074,8 +4398,13 @@ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct igb_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + int err; - if (pci_enable_device(pdev)) { + if (adapter->need_ioport) + err = pci_enable_device(pdev); + else + err = pci_enable_device_mem(pdev); + if (err) { dev_err(&pdev->dev, "Cannot re-enable PCI device after reset.\n"); return PCI_ERS_RESULT_DISCONNECT;