igb: update ring and adapter structure to improve performance
[deliverable/linux.git] / drivers / net / ethernet / intel / igb / igb_main.c
index 3cb1bc96bf70ec20553ff490c4974ee82efb7173..9fa2ad01c6b7e9e4ec9059fee938eea86bf4fd4a 100644 (file)
@@ -517,16 +517,14 @@ rx_ring_summary:
                                                DUMP_PREFIX_ADDRESS,
                                                16, 1,
                                                phys_to_virt(buffer_info->dma),
-                                               rx_ring->rx_buffer_len, true);
-                                       if (rx_ring->rx_buffer_len
-                                               < IGB_RXBUFFER_1024)
-                                               print_hex_dump(KERN_INFO, "",
-                                                 DUMP_PREFIX_ADDRESS,
-                                                 16, 1,
-                                                 phys_to_virt(
-                                                   buffer_info->page_dma +
-                                                   buffer_info->page_offset),
-                                                 PAGE_SIZE/2, true);
+                                               IGB_RX_HDR_LEN, true);
+                                       print_hex_dump(KERN_INFO, "",
+                                         DUMP_PREFIX_ADDRESS,
+                                         16, 1,
+                                         phys_to_virt(
+                                           buffer_info->page_dma +
+                                           buffer_info->page_offset),
+                                         PAGE_SIZE/2, true);
                                }
                        }
 
@@ -707,7 +705,6 @@ static int igb_alloc_queues(struct igb_adapter *adapter)
                ring->queue_index = i;
                ring->dev = &adapter->pdev->dev;
                ring->netdev = adapter->netdev;
-               ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
                ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
                /* set flag indicating ring supports SCTP checksum offload */
                if (adapter->hw.mac.type >= e1000_82576)
@@ -2396,7 +2393,8 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter)
        adapter->rx_itr_setting = IGB_DEFAULT_ITR;
        adapter->tx_itr_setting = IGB_DEFAULT_ITR;
 
-       adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+       adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
+                                 VLAN_HLEN;
        adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
 
        spin_lock_init(&adapter->stats64_lock);
@@ -2666,14 +2664,12 @@ void igb_configure_tx_ring(struct igb_adapter *adapter,
                            struct igb_ring *ring)
 {
        struct e1000_hw *hw = &adapter->hw;
-       u32 txdctl;
+       u32 txdctl = 0;
        u64 tdba = ring->dma;
        int reg_idx = ring->reg_idx;
 
        /* disable the queue */
-       txdctl = rd32(E1000_TXDCTL(reg_idx));
-       wr32(E1000_TXDCTL(reg_idx),
-                       txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
+       wr32(E1000_TXDCTL(reg_idx), 0);
        wrfl();
        mdelay(10);
 
@@ -2683,9 +2679,8 @@ void igb_configure_tx_ring(struct igb_adapter *adapter,
                        tdba & 0x00000000ffffffffULL);
        wr32(E1000_TDBAH(reg_idx), tdba >> 32);
 
-       ring->head = hw->hw_addr + E1000_TDH(reg_idx);
        ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
-       writel(0, ring->head);
+       wr32(E1000_TDH(reg_idx), 0);
        writel(0, ring->tail);
 
        txdctl |= IGB_TX_PTHRESH;
@@ -2964,16 +2959,19 @@ static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
  **/
 static void igb_rlpml_set(struct igb_adapter *adapter)
 {
-       u32 max_frame_size;
+       u32 max_frame_size = adapter->max_frame_size;
        struct e1000_hw *hw = &adapter->hw;
        u16 pf_id = adapter->vfs_allocated_count;
 
-       max_frame_size = adapter->max_frame_size + VLAN_TAG_SIZE;
-
-       /* if vfs are enabled we set RLPML to the largest possible request
-        * size and set the VMOLR RLPML to the size we need */
        if (pf_id) {
                igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
+               /*
+                * If we're in VMDQ or SR-IOV mode, then set global RLPML
+                * to our max jumbo frame size, in case we need to enable
+                * jumbo frames on one of the rings later.
+                * This will not pass over-length frames into the default
+                * queue because it's gated by the VMOLR.RLPML.
+                */
                max_frame_size = MAX_JUMBO_FRAME_SIZE;
        }
 
@@ -3028,12 +3026,10 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
        struct e1000_hw *hw = &adapter->hw;
        u64 rdba = ring->dma;
        int reg_idx = ring->reg_idx;
-       u32 srrctl, rxdctl;
+       u32 srrctl = 0, rxdctl = 0;
 
        /* disable the queue */
-       rxdctl = rd32(E1000_RXDCTL(reg_idx));
-       wr32(E1000_RXDCTL(reg_idx),
-                       rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
+       wr32(E1000_RXDCTL(reg_idx), 0);
 
        /* Set DMA base address registers */
        wr32(E1000_RDBAL(reg_idx),
@@ -3043,28 +3039,18 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
                       ring->count * sizeof(union e1000_adv_rx_desc));
 
        /* initialize head and tail */
-       ring->head = hw->hw_addr + E1000_RDH(reg_idx);
        ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
-       writel(0, ring->head);
+       wr32(E1000_RDH(reg_idx), 0);
        writel(0, ring->tail);
 
        /* set descriptor configuration */
-       if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
-               srrctl = ALIGN(ring->rx_buffer_len, 64) <<
-                        E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
+       srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
-               srrctl |= IGB_RXBUFFER_16384 >>
-                         E1000_SRRCTL_BSIZEPKT_SHIFT;
+       srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
 #else
-               srrctl |= (PAGE_SIZE / 2) >>
-                         E1000_SRRCTL_BSIZEPKT_SHIFT;
+       srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
 #endif
-               srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
-       } else {
-               srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
-                        E1000_SRRCTL_BSIZEPKT_SHIFT;
-               srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
-       }
+       srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
        if (hw->mac.type == e1000_82580)
                srrctl |= E1000_SRRCTL_TIMESTAMP;
        /* Only set Drop Enable if we are supporting multiple queues */
@@ -3076,13 +3062,12 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
        /* set filtering for VMDQ pools */
        igb_set_vmolr(adapter, reg_idx & 0x7, true);
 
-       /* enable receive descriptor fetching */
-       rxdctl = rd32(E1000_RXDCTL(reg_idx));
-       rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
-       rxdctl &= 0xFFF00000;
        rxdctl |= IGB_RX_PTHRESH;
        rxdctl |= IGB_RX_HTHRESH << 8;
        rxdctl |= IGB_RX_WTHRESH << 16;
+
+       /* enable receive descriptor fetching */
+       rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
        wr32(E1000_RXDCTL(reg_idx), rxdctl);
 }
 
@@ -3256,20 +3241,19 @@ static void igb_free_all_rx_resources(struct igb_adapter *adapter)
  **/
 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
 {
-       struct igb_buffer *buffer_info;
        unsigned long size;
-       unsigned int i;
+       u16 i;
 
        if (!rx_ring->buffer_info)
                return;
 
        /* Free all the Rx ring sk_buffs */
        for (i = 0; i < rx_ring->count; i++) {
-               buffer_info = &rx_ring->buffer_info[i];
+               struct igb_buffer *buffer_info = &rx_ring->buffer_info[i];
                if (buffer_info->dma) {
                        dma_unmap_single(rx_ring->dev,
                                         buffer_info->dma,
-                                        rx_ring->rx_buffer_len,
+                                        IGB_RX_HDR_LEN,
                                         DMA_FROM_DEVICE);
                        buffer_info->dma = 0;
                }
@@ -4466,14 +4450,14 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
        struct pci_dev *pdev = adapter->pdev;
-       int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
-       u32 rx_buffer_len, i;
+       int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
 
        if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
                dev_err(&pdev->dev, "Invalid MTU setting\n");
                return -EINVAL;
        }
 
+#define MAX_STD_JUMBO_FRAME_SIZE 9238
        if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
                dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
                return -EINVAL;
@@ -4485,30 +4469,6 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu)
        /* igb_down has a dependency on max_frame_size */
        adapter->max_frame_size = max_frame;
 
-       /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
-        * means we reserve 2 more, this pushes us to allocate from the next
-        * larger slab size.
-        * i.e. RXBUFFER_2048 --> size-4096 slab
-        */
-
-       if (adapter->hw.mac.type == e1000_82580)
-               max_frame += IGB_TS_HDR_LEN;
-
-       if (max_frame <= IGB_RXBUFFER_1024)
-               rx_buffer_len = IGB_RXBUFFER_1024;
-       else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
-               rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
-       else
-               rx_buffer_len = IGB_RXBUFFER_128;
-
-       if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
-            (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
-               rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
-
-       if ((adapter->hw.mac.type == e1000_82580) &&
-           (rx_buffer_len == IGB_RXBUFFER_128))
-               rx_buffer_len += IGB_RXBUFFER_64;
-
        if (netif_running(netdev))
                igb_down(adapter);
 
@@ -4516,9 +4476,6 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu)
                 netdev->mtu, new_mtu);
        netdev->mtu = new_mtu;
 
-       for (i = 0; i < adapter->num_rx_queues; i++)
-               adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
-
        if (netif_running(netdev))
                igb_up(adapter);
        else
@@ -5694,7 +5651,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
                                "  jiffies              <%lx>\n"
                                "  desc.status          <%x>\n",
                                tx_ring->queue_index,
-                               readl(tx_ring->head),
+                               rd32(E1000_TDH(tx_ring->reg_idx)),
                                readl(tx_ring->tail),
                                tx_ring->next_to_use,
                                tx_ring->next_to_clean,
@@ -5781,8 +5738,7 @@ static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
 
        igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
 }
-static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
-                               union e1000_adv_rx_desc *rx_desc)
+static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
 {
        /* HW will not DMA in data larger than the given buffer, even if it
         * parses the (NFS, of course) header to be larger.  In that case, it
@@ -5790,8 +5746,8 @@ static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
         */
        u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
                   E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
-       if (hlen > rx_ring->rx_buffer_len)
-               hlen = rx_ring->rx_buffer_len;
+       if (hlen > IGB_RX_HDR_LEN)
+               hlen = IGB_RX_HDR_LEN;
        return hlen;
 }
 
@@ -5805,7 +5761,7 @@ static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
        struct igb_buffer *buffer_info , *next_buffer;
        struct sk_buff *skb;
        bool cleaned = false;
-       int cleaned_count = 0;
+       u16 cleaned_count = igb_desc_unused(rx_ring);
        int current_node = numa_node_id();
        unsigned int total_bytes = 0, total_packets = 0;
        unsigned int i;
@@ -5841,14 +5797,10 @@ static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
 
                if (buffer_info->dma) {
                        dma_unmap_single(dev, buffer_info->dma,
-                                        rx_ring->rx_buffer_len,
+                                        IGB_RX_HDR_LEN,
                                         DMA_FROM_DEVICE);
                        buffer_info->dma = 0;
-                       if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
-                               skb_put(skb, length);
-                               goto send_up;
-                       }
-                       skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
+                       skb_put(skb, igb_get_hlen(rx_desc));
                }
 
                if (length) {
@@ -5879,7 +5831,7 @@ static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
                        next_buffer->dma = 0;
                        goto next_desc;
                }
-send_up:
+
                if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
                        dev_kfree_skb_irq(skb);
                        goto next_desc;
@@ -5893,7 +5845,6 @@ send_up:
                igb_rx_checksum_adv(rx_ring, staterr, skb);
 
                skb->protocol = eth_type_trans(skb, netdev);
-               skb_record_rx_queue(skb, rx_ring->queue_index);
 
                if (staterr & E1000_RXD_STAT_VP) {
                        u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
@@ -5903,8 +5854,6 @@ send_up:
                napi_gro_receive(&q_vector->napi, skb);
 
 next_desc:
-               rx_desc->wb.upper.status_error = 0;
-
                /* return some buffers to hardware, one at a time is too slow */
                if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
                        igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
@@ -5918,119 +5867,130 @@ next_desc:
        }
 
        rx_ring->next_to_clean = i;
-       cleaned_count = igb_desc_unused(rx_ring);
-
-       if (cleaned_count)
-               igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
-
-       rx_ring->total_packets += total_packets;
-       rx_ring->total_bytes += total_bytes;
        u64_stats_update_begin(&rx_ring->rx_syncp);
        rx_ring->rx_stats.packets += total_packets;
        rx_ring->rx_stats.bytes += total_bytes;
        u64_stats_update_end(&rx_ring->rx_syncp);
+       rx_ring->total_packets += total_packets;
+       rx_ring->total_bytes += total_bytes;
+
+       if (cleaned_count)
+               igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
+
        return cleaned;
 }
 
+static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
+                                struct igb_buffer *bi)
+{
+       struct sk_buff *skb = bi->skb;
+       dma_addr_t dma = bi->dma;
+
+       if (dma)
+               return true;
+
+       if (likely(!skb)) {
+               skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
+                                               IGB_RX_HDR_LEN);
+               bi->skb = skb;
+               if (!skb) {
+                       rx_ring->rx_stats.alloc_failed++;
+                       return false;
+               }
+
+               /* initialize skb for ring */
+               skb_record_rx_queue(skb, rx_ring->queue_index);
+       }
+
+       dma = dma_map_single(rx_ring->dev, skb->data,
+                            IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
+
+       if (dma_mapping_error(rx_ring->dev, dma)) {
+               rx_ring->rx_stats.alloc_failed++;
+               return false;
+       }
+
+       bi->dma = dma;
+       return true;
+}
+
+static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
+                                 struct igb_buffer *bi)
+{
+       struct page *page = bi->page;
+       dma_addr_t page_dma = bi->page_dma;
+       unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
+
+       if (page_dma)
+               return true;
+
+       if (!page) {
+               page = netdev_alloc_page(rx_ring->netdev);
+               bi->page = page;
+               if (unlikely(!page)) {
+                       rx_ring->rx_stats.alloc_failed++;
+                       return false;
+               }
+       }
+
+       page_dma = dma_map_page(rx_ring->dev, page,
+                               page_offset, PAGE_SIZE / 2,
+                               DMA_FROM_DEVICE);
+
+       if (dma_mapping_error(rx_ring->dev, page_dma)) {
+               rx_ring->rx_stats.alloc_failed++;
+               return false;
+       }
+
+       bi->page_dma = page_dma;
+       bi->page_offset = page_offset;
+       return true;
+}
+
 /**
  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
  * @adapter: address of board private structure
  **/
-void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
+void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, u16 cleaned_count)
 {
-       struct net_device *netdev = rx_ring->netdev;
        union e1000_adv_rx_desc *rx_desc;
-       struct igb_buffer *buffer_info;
-       struct sk_buff *skb;
-       unsigned int i;
-       int bufsz;
-
-       i = rx_ring->next_to_use;
-       buffer_info = &rx_ring->buffer_info[i];
+       struct igb_buffer *bi;
+       u16 i = rx_ring->next_to_use;
 
-       bufsz = rx_ring->rx_buffer_len;
+       rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
+       bi = &rx_ring->buffer_info[i];
+       i -= rx_ring->count;
 
        while (cleaned_count--) {
-               rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
-
-               if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
-                       if (!buffer_info->page) {
-                               buffer_info->page = netdev_alloc_page(netdev);
-                               if (unlikely(!buffer_info->page)) {
-                                       u64_stats_update_begin(&rx_ring->rx_syncp);
-                                       rx_ring->rx_stats.alloc_failed++;
-                                       u64_stats_update_end(&rx_ring->rx_syncp);
-                                       goto no_buffers;
-                               }
-                               buffer_info->page_offset = 0;
-                       } else {
-                               buffer_info->page_offset ^= PAGE_SIZE / 2;
-                       }
-                       buffer_info->page_dma =
-                               dma_map_page(rx_ring->dev, buffer_info->page,
-                                            buffer_info->page_offset,
-                                            PAGE_SIZE / 2,
-                                            DMA_FROM_DEVICE);
-                       if (dma_mapping_error(rx_ring->dev,
-                                             buffer_info->page_dma)) {
-                               buffer_info->page_dma = 0;
-                               u64_stats_update_begin(&rx_ring->rx_syncp);
-                               rx_ring->rx_stats.alloc_failed++;
-                               u64_stats_update_end(&rx_ring->rx_syncp);
-                               goto no_buffers;
-                       }
-               }
+               if (!igb_alloc_mapped_skb(rx_ring, bi))
+                       break;
 
-               skb = buffer_info->skb;
-               if (!skb) {
-                       skb = netdev_alloc_skb_ip_align(netdev, bufsz);
-                       if (unlikely(!skb)) {
-                               u64_stats_update_begin(&rx_ring->rx_syncp);
-                               rx_ring->rx_stats.alloc_failed++;
-                               u64_stats_update_end(&rx_ring->rx_syncp);
-                               goto no_buffers;
-                       }
+               /* Refresh the desc even if buffer_addrs didn't change
+                * because each write-back erases this info. */
+               rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
 
-                       buffer_info->skb = skb;
-               }
-               if (!buffer_info->dma) {
-                       buffer_info->dma = dma_map_single(rx_ring->dev,
-                                                         skb->data,
-                                                         bufsz,
-                                                         DMA_FROM_DEVICE);
-                       if (dma_mapping_error(rx_ring->dev,
-                                             buffer_info->dma)) {
-                               buffer_info->dma = 0;
-                               u64_stats_update_begin(&rx_ring->rx_syncp);
-                               rx_ring->rx_stats.alloc_failed++;
-                               u64_stats_update_end(&rx_ring->rx_syncp);
-                               goto no_buffers;
-                       }
-               }
-               /* Refresh the desc even if buffer_addrs didn't change because
-                * each write-back erases this info. */
-               if (bufsz < IGB_RXBUFFER_1024) {
-                       rx_desc->read.pkt_addr =
-                            cpu_to_le64(buffer_info->page_dma);
-                       rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
-               } else {
-                       rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
-                       rx_desc->read.hdr_addr = 0;
-               }
+               if (!igb_alloc_mapped_page(rx_ring, bi))
+                       break;
+
+               rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
 
+               rx_desc++;
+               bi++;
                i++;
-               if (i == rx_ring->count)
-                       i = 0;
-               buffer_info = &rx_ring->buffer_info[i];
+               if (unlikely(!i)) {
+                       rx_desc = E1000_RX_DESC_ADV(*rx_ring, 0);
+                       bi = rx_ring->buffer_info;
+                       i -= rx_ring->count;
+               }
+
+               /* clear the hdr_addr for the next_to_use descriptor */
+               rx_desc->read.hdr_addr = 0;
        }
 
-no_buffers:
+       i += rx_ring->count;
+
        if (rx_ring->next_to_use != i) {
                rx_ring->next_to_use = i;
-               if (i == 0)
-                       i = (rx_ring->count - 1);
-               else
-                       i--;
 
                /* Force memory writes to complete before letting h/w
                 * know there are new descriptors to fetch.  (Only
This page took 0.033718 seconds and 5 git commands to generate.