net/mlx4_en: Use affinity hint
authorYuval Atias <yuvala@mellanox.com>
Sun, 25 May 2014 14:47:27 +0000 (17:47 +0300)
committerDavid S. Miller <davem@davemloft.net>
Mon, 2 Jun 2014 02:16:29 +0000 (19:16 -0700)
The “affinity hint” mechanism is used by the user space
daemon, irqbalancer, to indicate a preferred CPU mask for irqs.
Irqbalancer can use this hint to balance the irqs between the
cpus indicated by the mask.

We wish the HCA to preferentially map the IRQs it uses to numa cores
close to it.  To accomplish this, we use cpumask_set_cpu_local_first(), that
sets the affinity hint according the following policy:
First it maps IRQs to “close” numa cores.  If these are exhausted, the
remaining IRQs are mapped to “far” numa cores.

Signed-off-by: Yuval Atias <yuvala@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/infiniband/hw/mlx4/main.c
drivers/net/ethernet/mellanox/mlx4/en_cq.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/eq.c
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
include/linux/mlx4/device.h

index 199c7896f08188ca40fa9cd30462f0631d387338..58b1f239ac2bb2db2eef3591a869e447d877c2d0 100644 (file)
@@ -1897,7 +1897,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
                                 i, j, dev->pdev->bus->name);
                        /* Set IRQ for specific name (per ring) */
                        if (mlx4_assign_eq(dev, name, NULL,
-                                          &ibdev->eq_table[eq])) {
+                                          &ibdev->eq_table[eq], NULL)) {
                                /* Use legacy (same as mlx4_en driver) */
                                pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq);
                                ibdev->eq_table[eq] =
index 636963db598ae0025f52aa806f323c61261b7d97..ea2cd72e53680a2adc65e61f2bd35efde42f79e8 100644 (file)
@@ -118,11 +118,15 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
        if (cq->is_tx == RX) {
                if (mdev->dev->caps.comp_pool) {
                        if (!cq->vector) {
+                               struct mlx4_en_rx_ring *ring =
+                                       priv->rx_ring[cq->ring];
+
                                sprintf(name, "%s-%d", priv->dev->name,
                                        cq->ring);
                                /* Set IRQ for specific name (per ring) */
                                if (mlx4_assign_eq(mdev->dev, name, rmap,
-                                                  &cq->vector)) {
+                                                  &cq->vector,
+                                                  ring->affinity_mask)) {
                                        cq->vector = (cq->ring + 1 + priv->port)
                                            % mdev->dev->caps.num_comp_vectors;
                                        mlx4_warn(mdev, "Failed assigning an EQ to %s, falling back to legacy EQ's\n",
index 58209bd0c94c6ced62a5984dd72911669eaa7ef9..05d135572abcee114aef29406692bb3cd1e095d2 100644 (file)
@@ -1526,6 +1526,32 @@ static void mlx4_en_linkstate(struct work_struct *work)
        mutex_unlock(&mdev->state_lock);
 }
 
+static void mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
+{
+       struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx];
+       int numa_node = priv->mdev->dev->numa_node;
+
+       if (numa_node == -1)
+               return;
+
+       if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL)) {
+               en_err(priv, "Failed to allocate core mask\n");
+               return;
+       }
+
+       if (cpumask_set_cpu_local_first(ring_idx, numa_node,
+                                       ring->affinity_mask)) {
+               en_err(priv, "Failed setting affinity hint\n");
+               free_cpumask_var(ring->affinity_mask);
+               ring->affinity_mask = NULL;
+       }
+}
+
+static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
+{
+       free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask);
+       priv->rx_ring[ring_idx]->affinity_mask = NULL;
+}
 
 int mlx4_en_start_port(struct net_device *dev)
 {
@@ -1567,6 +1593,8 @@ int mlx4_en_start_port(struct net_device *dev)
 
                mlx4_en_cq_init_lock(cq);
 
+               mlx4_en_init_affinity_hint(priv, i);
+
                err = mlx4_en_activate_cq(priv, cq, i);
                if (err) {
                        en_err(priv, "Failed activating Rx CQ\n");
@@ -1847,6 +1875,8 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
                        msleep(1);
                mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]);
                mlx4_en_deactivate_cq(priv, cq);
+
+               mlx4_en_free_affinity_hint(priv, i);
        }
 }
 
index d954ec1eac173752e23e57653ccd4d2cae2de944..f91659e5fa13e2a5f70b5f8d6e4441d060edf0fb 100644 (file)
@@ -1376,7 +1376,7 @@ int mlx4_test_interrupts(struct mlx4_dev *dev)
 EXPORT_SYMBOL(mlx4_test_interrupts);
 
 int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
-                  int *vector)
+                  int *vector, cpumask_var_t cpu_hint_mask)
 {
 
        struct mlx4_priv *priv = mlx4_priv(dev);
@@ -1411,6 +1411,15 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
                        }
                        mlx4_assign_irq_notifier(priv, dev,
                                                 priv->eq_table.eq[vec].irq);
+                       if (cpu_hint_mask) {
+                               err = irq_set_affinity_hint(
+                                               priv->eq_table.eq[vec].irq,
+                                               cpu_hint_mask);
+                               if (err) {
+                                       mlx4_warn(dev, "Failed setting affinity hint\n");
+                                       /*we dont want to break here*/
+                               }
+                       }
 
                        eq_set_ci(&priv->eq_table.eq[vec], 1);
                }
@@ -1441,6 +1450,8 @@ void mlx4_release_eq(struct mlx4_dev *dev, int vec)
                        irq_set_affinity_notifier(
                                priv->eq_table.eq[vec].irq,
                                NULL);
+                       irq_set_affinity_hint(priv->eq_table.eq[vec].irq,
+                                             NULL);
                        free_irq(priv->eq_table.eq[vec].irq,
                                 &priv->eq_table.eq[vec]);
                        priv->msix_ctl.pool_bm &= ~(1ULL << i);
index b5db1bf361dc6adac67dbade22f04ab03601956c..0e15295bedd671a0c3fc8c1ebbf0372052c489b7 100644 (file)
@@ -313,6 +313,7 @@ struct mlx4_en_rx_ring {
        unsigned long csum_ok;
        unsigned long csum_none;
        int hwtstamp_rx_filter;
+       cpumask_var_t affinity_mask;
 };
 
 struct mlx4_en_cq {
index ca38871a585cf2f0f0d60b7dc6aa44b73257e388..b9b70e00e3c146f85189534059774acad93d79a0 100644 (file)
@@ -1161,7 +1161,7 @@ int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
 int mlx4_SYNC_TPT(struct mlx4_dev *dev);
 int mlx4_test_interrupts(struct mlx4_dev *dev);
 int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
-                  int *vector);
+                  int *vector, cpumask_t *cpu_hint_mask);
 void mlx4_release_eq(struct mlx4_dev *dev, int vec);
 
 int mlx4_get_phys_port_id(struct mlx4_dev *dev);
This page took 0.030617 seconds and 5 git commands to generate.