enic: bug fix: split TSO fragments larger than 16K into multiple descs
[deliverable/linux.git] / drivers / net / enic / enic_main.c
index 9080f07da8fe01f23e81a52eef0690151017b47c..bc5cb225ddaca093aee14a9c23ffd76617707e08 100644 (file)
 #include "enic.h"
 
 #define ENIC_NOTIFY_TIMER_PERIOD       (2 * HZ)
+#define WQ_ENET_MAX_DESC_LEN           (1 << WQ_ENET_LEN_BITS)
+#define MAX_TSO                                (1 << 16)
+#define ENIC_DESC_MAX_SPLITS           (MAX_TSO / WQ_ENET_MAX_DESC_LEN + 1)
+
+#define PCI_DEVICE_ID_CISCO_VIC_ENET         0x0043  /* ethernet vnic */
 
 /* Supported devices */
 static struct pci_device_id enic_id_table[] = {
-       { PCI_VDEVICE(CISCO, 0x0043) },
+       { PCI_VDEVICE(CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) },
        { 0, }  /* end of table */
 };
 
@@ -256,7 +261,7 @@ static void enic_set_msglevel(struct net_device *netdev, u32 value)
        enic->msg_enable = value;
 }
 
-static struct ethtool_ops enic_ethtool_ops = {
+static const struct ethtool_ops enic_ethtool_ops = {
        .get_settings = enic_get_settings,
        .get_drvinfo = enic_get_drvinfo,
        .get_msglevel = enic_get_msglevel,
@@ -310,7 +315,8 @@ static int enic_wq_service(struct vnic_dev *vdev, struct cq_desc *cq_desc,
                opaque);
 
        if (netif_queue_stopped(enic->netdev) &&
-           vnic_wq_desc_avail(&enic->wq[q_number]) >= MAX_SKB_FRAGS + 1)
+           vnic_wq_desc_avail(&enic->wq[q_number]) >=
+           (MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS))
                netif_wake_queue(enic->netdev);
 
        spin_unlock(&enic->wq_lock[q_number]);
@@ -525,7 +531,11 @@ static inline void enic_queue_wq_skb_vlan(struct enic *enic,
        unsigned int len_left = skb->len - head_len;
        int eop = (len_left == 0);
 
-       /* Queue the main skb fragment */
+       /* Queue the main skb fragment. The fragments are no larger
+        * than max MTU(9000)+ETH_HDR_LEN(14) bytes, which is less
+        * than WQ_ENET_MAX_DESC_LEN length. So only one descriptor
+        * per fragment is queued.
+        */
        enic_queue_wq_desc(wq, skb,
                pci_map_single(enic->pdev, skb->data,
                        head_len, PCI_DMA_TODEVICE),
@@ -547,7 +557,11 @@ static inline void enic_queue_wq_skb_csum_l4(struct enic *enic,
        unsigned int csum_offset = hdr_len + skb->csum_offset;
        int eop = (len_left == 0);
 
-       /* Queue the main skb fragment */
+       /* Queue the main skb fragment. The fragments are no larger
+        * than max MTU(9000)+ETH_HDR_LEN(14) bytes, which is less
+        * than WQ_ENET_MAX_DESC_LEN length. So only one descriptor
+        * per fragment is queued.
+        */
        enic_queue_wq_desc_csum_l4(wq, skb,
                pci_map_single(enic->pdev, skb->data,
                        head_len, PCI_DMA_TODEVICE),
@@ -565,10 +579,14 @@ static inline void enic_queue_wq_skb_tso(struct enic *enic,
        struct vnic_wq *wq, struct sk_buff *skb, unsigned int mss,
        int vlan_tag_insert, unsigned int vlan_tag)
 {
-       unsigned int head_len = skb_headlen(skb);
-       unsigned int len_left = skb->len - head_len;
+       unsigned int frag_len_left = skb_headlen(skb);
+       unsigned int len_left = skb->len - frag_len_left;
        unsigned int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
        int eop = (len_left == 0);
+       unsigned int len;
+       dma_addr_t dma_addr;
+       unsigned int offset = 0;
+       skb_frag_t *frag;
 
        /* Preload TCP csum field with IP pseudo hdr calculated
         * with IP length set to zero.  HW will later add in length
@@ -584,17 +602,49 @@ static inline void enic_queue_wq_skb_tso(struct enic *enic,
                        &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
        }
 
-       /* Queue the main skb fragment */
-       enic_queue_wq_desc_tso(wq, skb,
-               pci_map_single(enic->pdev, skb->data,
-                       head_len, PCI_DMA_TODEVICE),
-               head_len,
-               mss, hdr_len,
-               vlan_tag_insert, vlan_tag,
-               eop);
+       /* Queue WQ_ENET_MAX_DESC_LEN length descriptors
+        * for the main skb fragment
+        */
+       while (frag_len_left) {
+               len = min(frag_len_left, (unsigned int)WQ_ENET_MAX_DESC_LEN);
+               dma_addr = pci_map_single(enic->pdev, skb->data + offset,
+                               len, PCI_DMA_TODEVICE);
+               enic_queue_wq_desc_tso(wq, skb,
+                       dma_addr,
+                       len,
+                       mss, hdr_len,
+                       vlan_tag_insert, vlan_tag,
+                       eop && (len == frag_len_left));
+               frag_len_left -= len;
+               offset += len;
+       }
 
-       if (!eop)
-               enic_queue_wq_skb_cont(enic, wq, skb, len_left);
+       if (eop)
+               return;
+
+       /* Queue WQ_ENET_MAX_DESC_LEN length descriptors
+        * for additional data fragments
+        */
+       for (frag = skb_shinfo(skb)->frags; len_left; frag++) {
+               len_left -= frag->size;
+               frag_len_left = frag->size;
+               offset = frag->page_offset;
+
+               while (frag_len_left) {
+                       len = min(frag_len_left,
+                               (unsigned int)WQ_ENET_MAX_DESC_LEN);
+                       dma_addr = pci_map_page(enic->pdev, frag->page,
+                               offset, len,
+                               PCI_DMA_TODEVICE);
+                       enic_queue_wq_desc_cont(wq, skb,
+                               dma_addr,
+                               len,
+                               (len_left == 0) &&
+                               (len == frag_len_left));        /* EOP? */
+                       frag_len_left -= len;
+                       offset += len;
+               }
+       }
 }
 
 static inline void enic_queue_wq_skb(struct enic *enic,
@@ -622,7 +672,8 @@ static inline void enic_queue_wq_skb(struct enic *enic,
 }
 
 /* netif_tx_lock held, process context with BHs disabled, or BH */
-static int enic_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb,
+                                             struct net_device *netdev)
 {
        struct enic *enic = netdev_priv(netdev);
        struct vnic_wq *wq = &enic->wq[0];
@@ -647,7 +698,8 @@ static int enic_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 
        spin_lock_irqsave(&enic->wq_lock[0], flags);
 
-       if (vnic_wq_desc_avail(wq) < skb_shinfo(skb)->nr_frags + 1) {
+       if (vnic_wq_desc_avail(wq) <
+           skb_shinfo(skb)->nr_frags + ENIC_DESC_MAX_SPLITS) {
                netif_stop_queue(netdev);
                /* This is a hard error, log it */
                printk(KERN_ERR PFX "%s: BUG! Tx ring full when "
@@ -658,11 +710,9 @@ static int enic_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 
        enic_queue_wq_skb(enic, wq, skb);
 
-       if (vnic_wq_desc_avail(wq) < MAX_SKB_FRAGS + 1)
+       if (vnic_wq_desc_avail(wq) < MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS)
                netif_stop_queue(netdev);
 
-       netdev->trans_start = jiffies;
-
        spin_unlock_irqrestore(&enic->wq_lock[0], flags);
 
        return NETDEV_TX_OK;
@@ -852,6 +902,50 @@ static int enic_rq_alloc_buf(struct vnic_rq *rq)
        return 0;
 }
 
+static int enic_rq_alloc_buf_a1(struct vnic_rq *rq)
+{
+       struct rq_enet_desc *desc = vnic_rq_next_desc(rq);
+
+       if (vnic_rq_posting_soon(rq)) {
+
+               /* SW workaround for A0 HW erratum: if we're just about
+                * to write posted_index, insert a dummy desc
+                * of type resvd
+                */
+
+               rq_enet_desc_enc(desc, 0, RQ_ENET_TYPE_RESV2, 0);
+               vnic_rq_post(rq, 0, 0, 0, 0);
+       } else {
+               return enic_rq_alloc_buf(rq);
+       }
+
+       return 0;
+}
+
+static int enic_set_rq_alloc_buf(struct enic *enic)
+{
+       enum vnic_dev_hw_version hw_ver;
+       int err;
+
+       err = vnic_dev_hw_version(enic->vdev, &hw_ver);
+       if (err)
+               return err;
+
+       switch (hw_ver) {
+       case VNIC_DEV_HW_VER_A1:
+               enic->rq_alloc_buf = enic_rq_alloc_buf_a1;
+               break;
+       case VNIC_DEV_HW_VER_A2:
+       case VNIC_DEV_HW_VER_UNKNOWN:
+               enic->rq_alloc_buf = enic_rq_alloc_buf;
+               break;
+       default:
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
 static int enic_get_skb_header(struct sk_buff *skb, void **iphdr,
        void **tcph, u64 *hdr_flags, void *priv)
 {
@@ -1059,7 +1153,7 @@ static int enic_poll(struct napi_struct *napi, int budget)
                /* Replenish RQ
                 */
 
-               vnic_rq_fill(&enic->rq[0], enic_rq_alloc_buf);
+               vnic_rq_fill(&enic->rq[0], enic->rq_alloc_buf);
 
        } else {
 
@@ -1094,7 +1188,7 @@ static int enic_poll_msix(struct napi_struct *napi, int budget)
                /* Replenish RQ
                 */
 
-               vnic_rq_fill(&enic->rq[0], enic_rq_alloc_buf);
+               vnic_rq_fill(&enic->rq[0], enic->rq_alloc_buf);
 
                /* Return intr event credits for this polling
                 * cycle.  An intr event is the completion of a
@@ -1270,7 +1364,7 @@ static int enic_open(struct net_device *netdev)
        }
 
        for (i = 0; i < enic->rq_count; i++) {
-               err = vnic_rq_fill(&enic->rq[i], enic_rq_alloc_buf);
+               err = vnic_rq_fill(&enic->rq[i], enic->rq_alloc_buf);
                if (err) {
                        printk(KERN_ERR PFX
                                "%s: Unable to alloc receive buffers.\n",
@@ -1610,12 +1704,6 @@ static void enic_clear_intr_mode(struct enic *enic)
        vnic_dev_set_intr_mode(enic->vdev, VNIC_DEV_INTR_MODE_UNKNOWN);
 }
 
-static void enic_iounmap(struct enic *enic)
-{
-       if (enic->bar0.vaddr)
-               iounmap(enic->bar0.vaddr);
-}
-
 static const struct net_device_ops enic_netdev_ops = {
        .ndo_open               = enic_open,
        .ndo_stop               = enic_stop,
@@ -1634,6 +1722,15 @@ static const struct net_device_ops enic_netdev_ops = {
 #endif
 };
 
+static void enic_iounmap(struct enic *enic)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(enic->bar); i++)
+               if (enic->bar[i].vaddr)
+                       iounmap(enic->bar[i].vaddr);
+}
+
 static int __devinit enic_probe(struct pci_dev *pdev,
        const struct pci_device_id *ent)
 {
@@ -1711,31 +1808,28 @@ static int __devinit enic_probe(struct pci_dev *pdev,
                using_dac = 1;
        }
 
-       /* Map vNIC resources from BAR0
+       /* Map vNIC resources from BAR0-5
         */
 
-       if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
-               printk(KERN_ERR PFX
-                       "BAR0 not memory-map'able, aborting.\n");
-               err = -ENODEV;
-               goto err_out_release_regions;
-       }
-
-       enic->bar0.vaddr = pci_iomap(pdev, 0, enic->bar0.len);
-       enic->bar0.bus_addr = pci_resource_start(pdev, 0);
-       enic->bar0.len = pci_resource_len(pdev, 0);
-
-       if (!enic->bar0.vaddr) {
-               printk(KERN_ERR PFX
-                       "Cannot memory-map BAR0 res hdr, aborting.\n");
-               err = -ENODEV;
-               goto err_out_release_regions;
+       for (i = 0; i < ARRAY_SIZE(enic->bar); i++) {
+               if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+                       continue;
+               enic->bar[i].len = pci_resource_len(pdev, i);
+               enic->bar[i].vaddr = pci_iomap(pdev, i, enic->bar[i].len);
+               if (!enic->bar[i].vaddr) {
+                       printk(KERN_ERR PFX
+                               "Cannot memory-map BAR %d, aborting.\n", i);
+                       err = -ENODEV;
+                       goto err_out_iounmap;
+               }
+               enic->bar[i].bus_addr = pci_resource_start(pdev, i);
        }
 
        /* Register vNIC device
         */
 
-       enic->vdev = vnic_dev_register(NULL, enic, pdev, &enic->bar0);
+       enic->vdev = vnic_dev_register(NULL, enic, pdev, enic->bar,
+               ARRAY_SIZE(enic->bar));
        if (!enic->vdev) {
                printk(KERN_ERR PFX
                        "vNIC registration failed, aborting.\n");
This page took 0.041327 seconds and 5 git commands to generate.