e1000: Removing the unused macro PAGE_USE_COUNT()
[deliverable/linux.git] / drivers / net / igb / igb_main.c
index f8c2919bcec02da234b3516f4918bac6a658fdeb..87eed88d958667a32f2a3d970a12b0d94665a8eb 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/ipv6.h>
 #include <net/checksum.h>
 #include <net/ip6_checksum.h>
+#include <linux/net_tstamp.h>
 #include <linux/mii.h>
 #include <linux/ethtool.h>
 #include <linux/if_vlan.h>
@@ -175,6 +176,54 @@ MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 
+/**
+ * Scale the NIC clock cycle by a large factor so that
+ * relatively small clock corrections can be added or
+ * substracted at each clock tick. The drawbacks of a
+ * large factor are a) that the clock register overflows
+ * more quickly (not such a big deal) and b) that the
+ * increment per tick has to fit into 24 bits.
+ *
+ * Note that
+ *   TIMINCA = IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS *
+ *             IGB_TSYNC_SCALE
+ *   TIMINCA += TIMINCA * adjustment [ppm] / 1e9
+ *
+ * The base scale factor is intentionally a power of two
+ * so that the division in %struct timecounter can be done with
+ * a shift.
+ */
+#define IGB_TSYNC_SHIFT (19)
+#define IGB_TSYNC_SCALE (1<<IGB_TSYNC_SHIFT)
+
+/**
+ * The duration of one clock cycle of the NIC.
+ *
+ * @todo This hard-coded value is part of the specification and might change
+ * in future hardware revisions. Add revision check.
+ */
+#define IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS 16
+
+#if (IGB_TSYNC_SCALE * IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS) >= (1<<24)
+# error IGB_TSYNC_SCALE and/or IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS are too large to fit into TIMINCA
+#endif
+
+/**
+ * igb_read_clock - read raw cycle counter (to be used by time counter)
+ */
+static cycle_t igb_read_clock(const struct cyclecounter *tc)
+{
+       struct igb_adapter *adapter =
+               container_of(tc, struct igb_adapter, cycles);
+       struct e1000_hw *hw = &adapter->hw;
+       u64 stamp;
+
+       stamp =  rd32(E1000_SYSTIML);
+       stamp |= (u64)rd32(E1000_SYSTIMH) << 32ULL;
+
+       return stamp;
+}
+
 #ifdef DEBUG
 /**
  * igb_get_hw_dev_name - return device name string
@@ -185,6 +234,30 @@ char *igb_get_hw_dev_name(struct e1000_hw *hw)
        struct igb_adapter *adapter = hw->back;
        return adapter->netdev->name;
 }
+
+/**
+ * igb_get_time_str - format current NIC and system time as string
+ */
+static char *igb_get_time_str(struct igb_adapter *adapter,
+                             char buffer[160])
+{
+       cycle_t hw = adapter->cycles.read(&adapter->cycles);
+       struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
+       struct timespec sys;
+       struct timespec delta;
+       getnstimeofday(&sys);
+
+       delta = timespec_sub(nic, sys);
+
+       sprintf(buffer,
+               "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
+               hw,
+               (long)nic.tv_sec, nic.tv_nsec,
+               (long)sys.tv_sec, sys.tv_nsec,
+               (long)delta.tv_sec, delta.tv_nsec);
+
+       return buffer;
+}
 #endif
 
 /**
@@ -921,7 +994,7 @@ void igb_reset(struct igb_adapter *adapter)
                /* the tx fifo also stores 16 bytes of information about the tx
                 * but don't include ethernet FCS because hardware appends it */
                min_tx_space = (adapter->max_frame_size +
-                               sizeof(struct e1000_tx_desc) -
+                               sizeof(union e1000_adv_tx_desc) -
                                ETH_FCS_LEN) * 2;
                min_tx_space = ALIGN(min_tx_space, 1024);
                min_tx_space >>= 10;
@@ -1175,9 +1248,7 @@ static int __devinit igb_probe(struct pci_dev *pdev,
        netdev->features |= NETIF_F_TSO;
        netdev->features |= NETIF_F_TSO6;
 
-#ifdef CONFIG_IGB_LRO
        netdev->features |= NETIF_F_GRO;
-#endif
 
        netdev->vlan_features |= NETIF_F_TSO;
        netdev->vlan_features |= NETIF_F_TSO6;
@@ -1232,7 +1303,7 @@ static int __devinit igb_probe(struct pci_dev *pdev,
        hw->fc.original_type = e1000_fc_default;
        hw->fc.type = e1000_fc_default;
 
-       adapter->itr_setting = 3;
+       adapter->itr_setting = IGB_DEFAULT_ITR;
        adapter->itr = IGB_START_ITR;
 
        igb_validate_mdi_setting(hw);
@@ -1293,11 +1364,63 @@ static int __devinit igb_probe(struct pci_dev *pdev,
                dev_info(&pdev->dev, "DCA enabled\n");
                /* Always use CB2 mode, difference is masked
                 * in the CB driver. */
-               wr32(E1000_DCA_CTRL, 2);
+               wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
                igb_setup_dca(adapter);
        }
 #endif
 
+       /*
+        * Initialize hardware timer: we keep it running just in case
+        * that some program needs it later on.
+        */
+       memset(&adapter->cycles, 0, sizeof(adapter->cycles));
+       adapter->cycles.read = igb_read_clock;
+       adapter->cycles.mask = CLOCKSOURCE_MASK(64);
+       adapter->cycles.mult = 1;
+       adapter->cycles.shift = IGB_TSYNC_SHIFT;
+       wr32(E1000_TIMINCA,
+            (1<<24) |
+            IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS * IGB_TSYNC_SCALE);
+#if 0
+       /*
+        * Avoid rollover while we initialize by resetting the time counter.
+        */
+       wr32(E1000_SYSTIML, 0x00000000);
+       wr32(E1000_SYSTIMH, 0x00000000);
+#else
+       /*
+        * Set registers so that rollover occurs soon to test this.
+        */
+       wr32(E1000_SYSTIML, 0x00000000);
+       wr32(E1000_SYSTIMH, 0xFF800000);
+#endif
+       wrfl();
+       timecounter_init(&adapter->clock,
+                        &adapter->cycles,
+                        ktime_to_ns(ktime_get_real()));
+
+       /*
+        * Synchronize our NIC clock against system wall clock. NIC
+        * time stamp reading requires ~3us per sample, each sample
+        * was pretty stable even under load => only require 10
+        * samples for each offset comparison.
+        */
+       memset(&adapter->compare, 0, sizeof(adapter->compare));
+       adapter->compare.source = &adapter->clock;
+       adapter->compare.target = ktime_get_real;
+       adapter->compare.num_samples = 10;
+       timecompare_update(&adapter->compare, 0);
+
+#ifdef DEBUG
+       {
+               char buffer[160];
+               printk(KERN_DEBUG
+                       "igb: %s: hw %p initialized timer\n",
+                       igb_get_time_str(adapter, buffer),
+                       &adapter->hw);
+       }
+#endif
+
        dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
        /* print bus type/speed/width info */
        dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
@@ -1373,7 +1496,7 @@ static void __devexit igb_remove(struct pci_dev *pdev)
                dev_info(&pdev->dev, "DCA disabled\n");
                dca_remove_requester(&pdev->dev);
                adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
-               wr32(E1000_DCA_CTRL, 1);
+               wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
        }
 #endif
 
@@ -1566,7 +1689,6 @@ static int igb_close(struct net_device *netdev)
  *
  * Return 0 on success, negative on failure
  **/
-
 int igb_setup_tx_resources(struct igb_adapter *adapter,
                           struct igb_ring *tx_ring)
 {
@@ -1580,7 +1702,7 @@ int igb_setup_tx_resources(struct igb_adapter *adapter,
        memset(tx_ring->buffer_info, 0, size);
 
        /* round up to nearest 4K */
-       tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc);
+       tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
        tx_ring->size = ALIGN(tx_ring->size, 4096);
 
        tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size,
@@ -1646,13 +1768,13 @@ static void igb_configure_tx(struct igb_adapter *adapter)
        int i, j;
 
        for (i = 0; i < adapter->num_tx_queues; i++) {
-               struct igb_ring *ring = &(adapter->tx_ring[i]);
+               struct igb_ring *ring = &adapter->tx_ring[i];
                j = ring->reg_idx;
                wr32(E1000_TDLEN(j),
-                               ring->count * sizeof(struct e1000_tx_desc));
+                    ring->count * sizeof(union e1000_adv_tx_desc));
                tdba = ring->dma;
                wr32(E1000_TDBAL(j),
-                               tdba & 0x00000000ffffffffULL);
+                    tdba & 0x00000000ffffffffULL);
                wr32(E1000_TDBAH(j), tdba >> 32);
 
                ring->head = E1000_TDH(j);
@@ -1672,8 +1794,6 @@ static void igb_configure_tx(struct igb_adapter *adapter)
                wr32(E1000_DCA_TXCTRL(j), txctrl);
        }
 
-
-
        /* Use the default values for the Tx Inter Packet Gap (IPG) timer */
 
        /* Program the Transmit Control Register */
@@ -1701,7 +1821,6 @@ static void igb_configure_tx(struct igb_adapter *adapter)
  *
  * Returns 0 on success, negative on failure
  **/
-
 int igb_setup_rx_resources(struct igb_adapter *adapter,
                           struct igb_ring *rx_ring)
 {
@@ -1788,7 +1907,7 @@ static void igb_setup_rctl(struct igb_adapter *adapter)
         * enable stripping of CRC. It's unlikely this will break BMC
         * redirection as it did with e1000. Newer features require
         * that the HW strips the CRC.
-       */
+        */
        rctl |= E1000_RCTL_SECRC;
 
        /*
@@ -1852,7 +1971,7 @@ static void igb_configure_rx(struct igb_adapter *adapter)
        struct e1000_hw *hw = &adapter->hw;
        u32 rctl, rxcsum;
        u32 rxdctl;
-       int i, j;
+       int i;
 
        /* disable receives while setting up the descriptors */
        rctl = rd32(E1000_RCTL);
@@ -1866,14 +1985,14 @@ static void igb_configure_rx(struct igb_adapter *adapter)
        /* Setup the HW Rx Head and Tail Descriptor Pointers and
         * the Base and Length of the Rx Descriptor Ring */
        for (i = 0; i < adapter->num_rx_queues; i++) {
-               struct igb_ring *ring = &(adapter->rx_ring[i]);
-               j = ring->reg_idx;
+               struct igb_ring *ring = &adapter->rx_ring[i];
+               int j = ring->reg_idx;
                rdba = ring->dma;
                wr32(E1000_RDBAL(j),
-                               rdba & 0x00000000ffffffffULL);
+                    rdba & 0x00000000ffffffffULL);
                wr32(E1000_RDBAH(j), rdba >> 32);
                wr32(E1000_RDLEN(j),
-                              ring->count * sizeof(union e1000_adv_rx_desc));
+                    ring->count * sizeof(union e1000_adv_rx_desc));
 
                ring->head = E1000_RDH(j);
                ring->tail = E1000_RDT(j);
@@ -1938,17 +2057,11 @@ static void igb_configure_rx(struct igb_adapter *adapter)
        } else {
                /* Enable Receive Checksum Offload for TCP and UDP */
                rxcsum = rd32(E1000_RXCSUM);
-               if (adapter->rx_csum) {
-                       rxcsum |= E1000_RXCSUM_TUOFL;
+               if (adapter->rx_csum)
+                       rxcsum |= E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPPCSE;
+               else
+                       rxcsum &= ~(E1000_RXCSUM_TUOFL | E1000_RXCSUM_IPPCSE);
 
-                       /* Enable IPv4 payload checksum for UDP fragments
-                        * Must be used in conjunction with packet-split. */
-                       if (adapter->rx_ps_hdr_size)
-                               rxcsum |= E1000_RXCSUM_IPPCSE;
-               } else {
-                       rxcsum &= ~E1000_RXCSUM_TUOFL;
-                       /* don't need to clear IPPCSE as it defaults to 0 */
-               }
                wr32(E1000_RXCSUM, rxcsum);
        }
 
@@ -2011,6 +2124,7 @@ static void igb_unmap_and_free_tx_resource(struct igb_adapter *adapter,
                buffer_info->skb = NULL;
        }
        buffer_info->time_stamp = 0;
+       buffer_info->next_to_watch = 0;
        /* buffer_info must be completely set up in the transmit path */
 }
 
@@ -2544,7 +2658,7 @@ static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
                if (bytes > 25000) {
                        if (packets > 35)
                                retval = low_latency;
-               } else if (bytes < 6000) {
+               } else if (bytes < 1500) {
                        retval = low_latency;
                }
                break;
@@ -2576,15 +2690,13 @@ static void igb_set_itr(struct igb_adapter *adapter)
                                            adapter->tx_itr,
                                            adapter->tx_ring->total_packets,
                                            adapter->tx_ring->total_bytes);
-
                current_itr = max(adapter->rx_itr, adapter->tx_itr);
        } else {
                current_itr = adapter->rx_itr;
        }
 
        /* conservative mode (itr 3) eliminates the lowest_latency setting */
-       if (adapter->itr_setting == 3 &&
-           current_itr == lowest_latency)
+       if (adapter->itr_setting == 3 && current_itr == lowest_latency)
                current_itr = low_latency;
 
        switch (current_itr) {
@@ -2636,6 +2748,7 @@ set_itr_now:
 #define IGB_TX_FLAGS_VLAN              0x00000002
 #define IGB_TX_FLAGS_TSO               0x00000004
 #define IGB_TX_FLAGS_IPV4              0x00000008
+#define IGB_TX_FLAGS_TSTAMP             0x00000010
 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
 #define IGB_TX_FLAGS_VLAN_SHIFT        16
 
@@ -2701,7 +2814,7 @@ static inline int igb_tso_adv(struct igb_adapter *adapter,
        mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
        mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
 
-       /* Context index must be unique per ring. */
+       /* For 82575, context index must be unique per ring. */
        if (adapter->flags & IGB_FLAG_NEED_CTX_IDX)
                mss_l4len_idx |= tx_ring->queue_index << 4;
 
@@ -2785,8 +2898,6 @@ static inline bool igb_tx_csum_adv(struct igb_adapter *adapter,
 
                return true;
        }
-
-
        return false;
 }
 
@@ -2863,6 +2974,9 @@ static inline void igb_tx_queue_adv(struct igb_adapter *adapter,
        if (tx_flags & IGB_TX_FLAGS_VLAN)
                cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
 
+       if (tx_flags & IGB_TX_FLAGS_TSTAMP)
+               cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
+
        if (tx_flags & IGB_TX_FLAGS_TSO) {
                cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
 
@@ -2942,8 +3056,6 @@ static int igb_maybe_stop_tx(struct net_device *netdev,
        return __igb_maybe_stop_tx(netdev, tx_ring, size);
 }
 
-#define TXD_USE_COUNT(S) (((S) >> (IGB_MAX_TXD_PWR)) + 1)
-
 static int igb_xmit_frame_ring_adv(struct sk_buff *skb,
                                   struct net_device *netdev,
                                   struct igb_ring *tx_ring)
@@ -2953,6 +3065,7 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb,
        unsigned int tx_flags = 0;
        u8 hdr_len = 0;
        int tso = 0;
+       union skb_shared_tx *shtx;
 
        if (test_bit(__IGB_DOWN, &adapter->state)) {
                dev_kfree_skb_any(skb);
@@ -2973,7 +3086,29 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb,
                /* this is a hard error */
                return NETDEV_TX_BUSY;
        }
-       skb_orphan(skb);
+
+       /*
+        * TODO: check that there currently is no other packet with
+        * time stamping in the queue
+        *
+        * When doing time stamping, keep the connection to the socket
+        * a while longer: it is still needed by skb_hwtstamp_tx(),
+        * called either in igb_tx_hwtstamp() or by our caller when
+        * doing software time stamping.
+        */
+       shtx = skb_tx(skb);
+       if (unlikely(shtx->hardware)) {
+               shtx->in_progress = 1;
+               tx_flags |= IGB_TX_FLAGS_TSTAMP;
+       } else if (likely(!shtx->software)) {
+               /*
+                * TODO: can this be solved in dev.c:dev_hard_start_xmit()?
+                * There are probably unmodified driver which do something
+                * like this and thus don't work in combination with
+                * SOF_TIMESTAMPING_TX_SOFTWARE.
+                */
+               skb_orphan(skb);
+       }
 
        if (adapter->vlgrp && vlan_tx_tag_present(skb)) {
                tx_flags |= IGB_TX_FLAGS_VLAN;
@@ -2984,7 +3119,6 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb,
                tx_flags |= IGB_TX_FLAGS_IPV4;
 
        first = tx_ring->next_to_use;
-
        tso = skb_is_gso(skb) ? igb_tso_adv(adapter, tx_ring, skb, tx_flags,
                                              &hdr_len) : 0;
 
@@ -2995,9 +3129,9 @@ static int igb_xmit_frame_ring_adv(struct sk_buff *skb,
 
        if (tso)
                tx_flags |= IGB_TX_FLAGS_TSO;
-       else if (igb_tx_csum_adv(adapter, tx_ring, skb, tx_flags))
-                       if (skb->ip_summed == CHECKSUM_PARTIAL)
-                               tx_flags |= IGB_TX_FLAGS_CSUM;
+       else if (igb_tx_csum_adv(adapter, tx_ring, skb, tx_flags) &&
+                (skb->ip_summed == CHECKSUM_PARTIAL))
+               tx_flags |= IGB_TX_FLAGS_CSUM;
 
        igb_tx_queue_adv(adapter, tx_ring, tx_flags,
                         igb_tx_map_adv(adapter, tx_ring, skb, first),
@@ -3058,8 +3192,7 @@ static void igb_reset_task(struct work_struct *work)
  * Returns the address of the device statistics structure.
  * The statistics are actually updated from the timer callback.
  **/
-static struct net_device_stats *
-igb_get_stats(struct net_device *netdev)
+static struct net_device_stats *igb_get_stats(struct net_device *netdev)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
 
@@ -3093,6 +3226,7 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu)
 
        while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
                msleep(1);
+
        /* igb_down has a dependency on max_frame_size */
        adapter->max_frame_size = max_frame;
        if (netif_running(netdev))
@@ -3262,8 +3396,7 @@ void igb_update_stats(struct igb_adapter *adapter)
        /* Phy Stats */
        if (hw->phy.media_type == e1000_media_type_copper) {
                if ((adapter->link_speed == SPEED_1000) &&
-                  (!igb_read_phy_reg(hw, PHY_1000T_STATUS,
-                                             &phy_tmp))) {
+                  (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
                        phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
                        adapter->phy_stats.idle_errors += phy_tmp;
                }
@@ -3275,7 +3408,6 @@ void igb_update_stats(struct igb_adapter *adapter)
        adapter->stats.mgpdc += rd32(E1000_MGTPDC);
 }
 
-
 static irqreturn_t igb_msix_other(int irq, void *data)
 {
        struct net_device *netdev = data;
@@ -3313,6 +3445,7 @@ static irqreturn_t igb_msix_tx(int irq, void *data)
        if (adapter->flags & IGB_FLAG_DCA_ENABLED)
                igb_update_tx_dca(tx_ring);
 #endif
+
        tx_ring->total_bytes = 0;
        tx_ring->total_packets = 0;
 
@@ -3333,13 +3466,11 @@ static void igb_write_itr(struct igb_ring *ring)
        if ((ring->adapter->itr_setting & 3) && ring->set_itr) {
                switch (hw->mac.type) {
                case e1000_82576:
-                       wr32(ring->itr_register,
-                            ring->itr_val |
+                       wr32(ring->itr_register, ring->itr_val |
                             0x80000000);
                        break;
                default:
-                       wr32(ring->itr_register,
-                            ring->itr_val |
+                       wr32(ring->itr_register, ring->itr_val |
                             (ring->itr_val << 16));
                        break;
                }
@@ -3451,7 +3582,7 @@ static int __igb_notify_dca(struct device *dev, void *data)
                        break;
                /* Always use CB2 mode, difference is masked
                 * in the CB driver. */
-               wr32(E1000_DCA_CTRL, 2);
+               wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
                if (dca_add_requester(dev) == 0) {
                        adapter->flags |= IGB_FLAG_DCA_ENABLED;
                        dev_info(&adapter->pdev->dev, "DCA enabled\n");
@@ -3466,7 +3597,7 @@ static int __igb_notify_dca(struct device *dev, void *data)
                        dca_remove_requester(dev);
                        dev_info(&adapter->pdev->dev, "DCA disabled\n");
                        adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
-                       wr32(E1000_DCA_CTRL, 1);
+                       wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
                }
                break;
        }
@@ -3610,7 +3741,6 @@ static int igb_clean_rx_ring_msix(struct napi_struct *napi, int budget)
 #endif
        igb_clean_rx_irq_adv(rx_ring, &work_done, budget);
 
-
        /* If not enough Rx work done, exit the polling mode */
        if ((work_done == 0) || !netif_running(netdev)) {
                napi_complete(napi);
@@ -3621,7 +3751,6 @@ static int igb_clean_rx_ring_msix(struct napi_struct *napi, int budget)
                        else
                                igb_update_ring_itr(rx_ring);
                }
-
                if (!test_bit(__IGB_DOWN, &adapter->state))
                        wr32(E1000_EIMS, rx_ring->eims_value);
 
@@ -3631,6 +3760,43 @@ static int igb_clean_rx_ring_msix(struct napi_struct *napi, int budget)
        return 1;
 }
 
+/**
+ * igb_hwtstamp - utility function which checks for TX time stamp
+ * @adapter: board private structure
+ * @skb: packet that was just sent
+ *
+ * If we were asked to do hardware stamping and such a time stamp is
+ * available, then it must have been for this skb here because we only
+ * allow only one such packet into the queue.
+ */
+static void igb_tx_hwtstamp(struct igb_adapter *adapter, struct sk_buff *skb)
+{
+       union skb_shared_tx *shtx = skb_tx(skb);
+       struct e1000_hw *hw = &adapter->hw;
+
+       if (unlikely(shtx->hardware)) {
+               u32 valid = rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID;
+               if (valid) {
+                       u64 regval = rd32(E1000_TXSTMPL);
+                       u64 ns;
+                       struct skb_shared_hwtstamps shhwtstamps;
+
+                       memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+                       regval |= (u64)rd32(E1000_TXSTMPH) << 32;
+                       ns = timecounter_cyc2time(&adapter->clock,
+                                                 regval);
+                       timecompare_update(&adapter->compare, ns);
+                       shhwtstamps.hwtstamp = ns_to_ktime(ns);
+                       shhwtstamps.syststamp =
+                               timecompare_transform(&adapter->compare, ns);
+                       skb_tstamp_tx(skb, &shhwtstamps);
+               }
+
+               /* delayed orphaning: skb_tstamp_tx() needs the socket */
+               skb_orphan(skb);
+       }
+}
+
 /**
  * igb_clean_tx_irq - Reclaim resources after transmit completes
  * @adapter: board private structure
@@ -3669,6 +3835,8 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring)
                                            skb->len;
                                total_packets += segs;
                                total_bytes += bytecount;
+
+                               igb_tx_hwtstamp(adapter, skb);
                        }
 
                        igb_unmap_and_free_tx_resource(adapter, buffer_info);
@@ -3678,7 +3846,6 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring)
                        if (i == tx_ring->count)
                                i = 0;
                }
-
                eop = tx_ring->buffer_info[i].next_to_watch;
                eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
        }
@@ -3747,7 +3914,7 @@ static bool igb_clean_tx_irq(struct igb_ring *tx_ring)
  * igb_receive_skb - helper function to handle rx indications
  * @ring: pointer to receive ring receving this packet
  * @status: descriptor status field as written by hardware
- * @vlan: descriptor vlan field as written by hardware (no le/be conversion)
+ * @rx_desc: receive descriptor containing vlan and type information.
  * @skb: pointer to sk_buff to be indicated to stack
  **/
 static void igb_receive_skb(struct igb_ring *ring, u8 status,
@@ -3774,7 +3941,6 @@ static void igb_receive_skb(struct igb_ring *ring, u8 status,
        }
 }
 
-
 static inline void igb_rx_checksum_adv(struct igb_adapter *adapter,
                                       u32 status_err, struct sk_buff *skb)
 {
@@ -3802,15 +3968,16 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring,
 {
        struct igb_adapter *adapter = rx_ring->adapter;
        struct net_device *netdev = adapter->netdev;
+       struct e1000_hw *hw = &adapter->hw;
        struct pci_dev *pdev = adapter->pdev;
        union e1000_adv_rx_desc *rx_desc , *next_rxd;
        struct igb_buffer *buffer_info , *next_buffer;
        struct sk_buff *skb;
-       unsigned int i;
-       u32 length, hlen, staterr;
        bool cleaned = false;
        int cleaned_count = 0;
        unsigned int total_bytes = 0, total_packets = 0;
+       unsigned int i;
+       u32 length, hlen, staterr;
 
        i = rx_ring->next_to_clean;
        buffer_info = &rx_ring->buffer_info[i];
@@ -3858,8 +4025,7 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring,
 
                if (!skb_shinfo(skb)->nr_frags) {
                        pci_unmap_single(pdev, buffer_info->dma,
-                                        adapter->rx_ps_hdr_size +
-                                          NET_IP_ALIGN,
+                                        adapter->rx_ps_hdr_size + NET_IP_ALIGN,
                                         PCI_DMA_FROMDEVICE);
                        skb_put(skb, hlen);
                }
@@ -3894,6 +4060,47 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring,
                        goto next_desc;
                }
 send_up:
+               /*
+                * If this bit is set, then the RX registers contain
+                * the time stamp. No other packet will be time
+                * stamped until we read these registers, so read the
+                * registers to make them available again. Because
+                * only one packet can be time stamped at a time, we
+                * know that the register values must belong to this
+                * one here and therefore we don't need to compare
+                * any of the additional attributes stored for it.
+                *
+                * If nothing went wrong, then it should have a
+                * skb_shared_tx that we can turn into a
+                * skb_shared_hwtstamps.
+                *
+                * TODO: can time stamping be triggered (thus locking
+                * the registers) without the packet reaching this point
+                * here? In that case RX time stamping would get stuck.
+                *
+                * TODO: in "time stamp all packets" mode this bit is
+                * not set. Need a global flag for this mode and then
+                * always read the registers. Cannot be done without
+                * a race condition.
+                */
+               if (unlikely(staterr & E1000_RXD_STAT_TS)) {
+                       u64 regval;
+                       u64 ns;
+                       struct skb_shared_hwtstamps *shhwtstamps =
+                               skb_hwtstamps(skb);
+
+                       WARN(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID),
+                            "igb: no RX time stamp available for time stamped packet");
+                       regval = rd32(E1000_RXSTMPL);
+                       regval |= (u64)rd32(E1000_RXSTMPH) << 32;
+                       ns = timecounter_cyc2time(&adapter->clock, regval);
+                       timecompare_update(&adapter->compare, ns);
+                       memset(shhwtstamps, 0, sizeof(*shhwtstamps));
+                       shhwtstamps->hwtstamp = ns_to_ktime(ns);
+                       shhwtstamps->syststamp =
+                               timecompare_transform(&adapter->compare, ns);
+               }
+
                if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
                        dev_kfree_skb_irq(skb);
                        goto next_desc;
@@ -3938,7 +4145,6 @@ next_desc:
        return cleaned;
 }
 
-
 /**
  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
  * @adapter: address of board private structure
@@ -4070,6 +4276,163 @@ static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
        return 0;
 }
 
+/**
+ * igb_hwtstamp_ioctl - control hardware time stamping
+ * @netdev:
+ * @ifreq:
+ * @cmd:
+ *
+ * Outgoing time stamping can be enabled and disabled. Play nice and
+ * disable it when requested, although it shouldn't case any overhead
+ * when no packet needs it. At most one packet in the queue may be
+ * marked for time stamping, otherwise it would be impossible to tell
+ * for sure to which packet the hardware time stamp belongs.
+ *
+ * Incoming time stamping has to be configured via the hardware
+ * filters. Not all combinations are supported, in particular event
+ * type has to be specified. Matching the kind of event packet is
+ * not supported, with the exception of "all V2 events regardless of
+ * level 2 or 4".
+ *
+ **/
+static int igb_hwtstamp_ioctl(struct net_device *netdev,
+                             struct ifreq *ifr, int cmd)
+{
+       struct igb_adapter *adapter = netdev_priv(netdev);
+       struct e1000_hw *hw = &adapter->hw;
+       struct hwtstamp_config config;
+       u32 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
+       u32 tsync_rx_ctl_bit = E1000_TSYNCRXCTL_ENABLED;
+       u32 tsync_rx_ctl_type = 0;
+       u32 tsync_rx_cfg = 0;
+       int is_l4 = 0;
+       int is_l2 = 0;
+       short port = 319; /* PTP */
+       u32 regval;
+
+       if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+               return -EFAULT;
+
+       /* reserved for future extensions */
+       if (config.flags)
+               return -EINVAL;
+
+       switch (config.tx_type) {
+       case HWTSTAMP_TX_OFF:
+               tsync_tx_ctl_bit = 0;
+               break;
+       case HWTSTAMP_TX_ON:
+               tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
+               break;
+       default:
+               return -ERANGE;
+       }
+
+       switch (config.rx_filter) {
+       case HWTSTAMP_FILTER_NONE:
+               tsync_rx_ctl_bit = 0;
+               break;
+       case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+       case HWTSTAMP_FILTER_ALL:
+               /*
+                * register TSYNCRXCFG must be set, therefore it is not
+                * possible to time stamp both Sync and Delay_Req messages
+                * => fall back to time stamping all packets
+                */
+               tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_ALL;
+               config.rx_filter = HWTSTAMP_FILTER_ALL;
+               break;
+       case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+               tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
+               tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
+               is_l4 = 1;
+               break;
+       case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+               tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
+               tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
+               is_l4 = 1;
+               break;
+       case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+               tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
+               tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
+               is_l2 = 1;
+               is_l4 = 1;
+               config.rx_filter = HWTSTAMP_FILTER_SOME;
+               break;
+       case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+       case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+               tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
+               tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
+               is_l2 = 1;
+               is_l4 = 1;
+               config.rx_filter = HWTSTAMP_FILTER_SOME;
+               break;
+       case HWTSTAMP_FILTER_PTP_V2_EVENT:
+       case HWTSTAMP_FILTER_PTP_V2_SYNC:
+       case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+               tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_EVENT_V2;
+               config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+               is_l2 = 1;
+               break;
+       default:
+               return -ERANGE;
+       }
+
+       /* enable/disable TX */
+       regval = rd32(E1000_TSYNCTXCTL);
+       regval = (regval & ~E1000_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit;
+       wr32(E1000_TSYNCTXCTL, regval);
+
+       /* enable/disable RX, define which PTP packets are time stamped */
+       regval = rd32(E1000_TSYNCRXCTL);
+       regval = (regval & ~E1000_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit;
+       regval = (regval & ~0xE) | tsync_rx_ctl_type;
+       wr32(E1000_TSYNCRXCTL, regval);
+       wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
+
+       /*
+        * Ethertype Filter Queue Filter[0][15:0] = 0x88F7
+        *                                          (Ethertype to filter on)
+        * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
+        * Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping)
+        */
+       wr32(E1000_ETQF0, is_l2 ? 0x440088f7 : 0);
+
+       /* L4 Queue Filter[0]: only filter by source and destination port */
+       wr32(E1000_SPQF0, htons(port));
+       wr32(E1000_IMIREXT(0), is_l4 ?
+            ((1<<12) | (1<<19) /* bypass size and control flags */) : 0);
+       wr32(E1000_IMIR(0), is_l4 ?
+            (htons(port)
+             | (0<<16) /* immediate interrupt disabled */
+             | 0 /* (1<<17) bit cleared: do not bypass
+                    destination port check */)
+               : 0);
+       wr32(E1000_FTQF0, is_l4 ?
+            (0x11 /* UDP */
+             | (1<<15) /* VF not compared */
+             | (1<<27) /* Enable Timestamping */
+             | (7<<28) /* only source port filter enabled,
+                          source/target address and protocol
+                          masked */)
+            : ((1<<15) | (15<<28) /* all mask bits set = filter not
+                                     enabled */));
+
+       wrfl();
+
+       adapter->hwtstamp_config = config;
+
+       /* clear TX/RX time stamp registers, just to be sure */
+       regval = rd32(E1000_TXSTMPH);
+       regval = rd32(E1000_RXSTMPH);
+
+       return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+               -EFAULT : 0;
+}
+
 /**
  * igb_ioctl -
  * @netdev:
@@ -4083,6 +4446,8 @@ static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
        case SIOCGMIIREG:
        case SIOCSMIIREG:
                return igb_mii_ioctl(netdev, ifr, cmd);
+       case SIOCSHWTSTAMP:
+               return igb_hwtstamp_ioctl(netdev, ifr, cmd);
        default:
                return -EOPNOTSUPP;
        }
@@ -4227,7 +4592,6 @@ int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
        return 0;
 }
 
-
 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
 {
        struct net_device *netdev = pci_get_drvdata(pdev);
This page took 0.039157 seconds and 5 git commands to generate.