drivers/net/ethernet/intel/fm10k/fm10k_main.c

   1 /* Intel Ethernet Switch Host Interface Driver
   2  * Copyright(c) 2013 - 2014 Intel Corporation.
   3  *
   4  * This program is free software; you can redistribute it and/or modify it
   5  * under the terms and conditions of the GNU General Public License,
   6  * version 2, as published by the Free Software Foundation.
   7  *
   8  * This program is distributed in the hope it will be useful, but WITHOUT
   9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11  * more details.
  12  *
  13  * The full GNU General Public License is included in this distribution in
  14  * the file called "COPYING".
  15  *
  16  * Contact Information:
  17  * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
  18  * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
  19  */
  20
  21 #include <linux/types.h>
  22 #include <linux/module.h>
  23 #include <net/ipv6.h>
  24 #include <net/ip.h>
  25 #include <net/tcp.h>
  26 #include <linux/if_macvlan.h>
  27 #include <linux/prefetch.h>
  28
  29 #include "fm10k.h"
  30
  31 #define DRV_VERSION     "0.12.2-k"
  32 const char fm10k_driver_version[] = DRV_VERSION;
  33 char fm10k_driver_name[] = "fm10k";
  34 static const char fm10k_driver_string[] =
  35         "Intel(R) Ethernet Switch Host Interface Driver";
  36 static const char fm10k_copyright[] =
  37         "Copyright (c) 2013 Intel Corporation.";
  38
  39 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
  40 MODULE_DESCRIPTION("Intel(R) Ethernet Switch Host Interface Driver");
  41 MODULE_LICENSE("GPL");
  42 MODULE_VERSION(DRV_VERSION);
  43
  44 /**
  45  * fm10k_init_module - Driver Registration Routine
  46  *
  47  * fm10k_init_module is the first routine called when the driver is
  48  * loaded.  All it does is register with the PCI subsystem.
  49  **/
  50 static int __init fm10k_init_module(void)
  51 {
  52         pr_info("%s - version %s\n", fm10k_driver_string, fm10k_driver_version);
  53         pr_info("%s\n", fm10k_copyright);
  54
  55         return fm10k_register_pci_driver();
  56 }
  57 module_init(fm10k_init_module);
  58
  59 /**
  60  * fm10k_exit_module - Driver Exit Cleanup Routine
  61  *
  62  * fm10k_exit_module is called just before the driver is removed
  63  * from memory.
  64  **/
  65 static void __exit fm10k_exit_module(void)
  66 {
  67         fm10k_unregister_pci_driver();
  68 }
  69 module_exit(fm10k_exit_module);
  70
  71 static bool fm10k_alloc_mapped_page(struct fm10k_ring *rx_ring,
  72                                     struct fm10k_rx_buffer *bi)
  73 {
  74         struct page *page = bi->page;
  75         dma_addr_t dma;
  76
  77         /* Only page will be NULL if buffer was consumed */
  78         if (likely(page))
  79                 return true;
  80
  81         /* alloc new page for storage */
  82         page = alloc_page(GFP_ATOMIC | __GFP_COLD);
  83         if (unlikely(!page)) {
  84                 rx_ring->rx_stats.alloc_failed++;
  85                 return false;
  86         }
  87
  88         /* map page for use */
  89         dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
  90
  91         /* if mapping failed free memory back to system since
  92          * there isn't much point in holding memory we can't use
  93          */
  94         if (dma_mapping_error(rx_ring->dev, dma)) {
  95                 __free_page(page);
  96                 bi->page = NULL;
  97
  98                 rx_ring->rx_stats.alloc_failed++;
  99                 return false;
 100         }
 101
 102         bi->dma = dma;
 103         bi->page = page;
 104         bi->page_offset = 0;
 105
 106         return true;
 107 }
 108
 109 /**
 110  * fm10k_alloc_rx_buffers - Replace used receive buffers
 111  * @rx_ring: ring to place buffers on
 112  * @cleaned_count: number of buffers to replace
 113  **/
 114 void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count)
 115 {
 116         union fm10k_rx_desc *rx_desc;
 117         struct fm10k_rx_buffer *bi;
 118         u16 i = rx_ring->next_to_use;
 119
 120         /* nothing to do */
 121         if (!cleaned_count)
 122                 return;
 123
 124         rx_desc = FM10K_RX_DESC(rx_ring, i);
 125         bi = &rx_ring->rx_buffer[i];
 126         i -= rx_ring->count;
 127
 128         do {
 129                 if (!fm10k_alloc_mapped_page(rx_ring, bi))
 130                         break;
 131
 132                 /* Refresh the desc even if buffer_addrs didn't change
 133                  * because each write-back erases this info.
 134                  */
 135                 rx_desc->q.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
 136
 137                 rx_desc++;
 138                 bi++;
 139                 i++;
 140                 if (unlikely(!i)) {
 141                         rx_desc = FM10K_RX_DESC(rx_ring, 0);
 142                         bi = rx_ring->rx_buffer;
 143                         i -= rx_ring->count;
 144                 }
 145
 146                 /* clear the hdr_addr for the next_to_use descriptor */
 147                 rx_desc->q.hdr_addr = 0;
 148
 149                 cleaned_count--;
 150         } while (cleaned_count);
 151
 152         i += rx_ring->count;
 153
 154         if (rx_ring->next_to_use != i) {
 155                 /* record the next descriptor to use */
 156                 rx_ring->next_to_use = i;
 157
 158                 /* update next to alloc since we have filled the ring */
 159                 rx_ring->next_to_alloc = i;
 160
 161                 /* Force memory writes to complete before letting h/w
 162                  * know there are new descriptors to fetch.  (Only
 163                  * applicable for weak-ordered memory model archs,
 164                  * such as IA-64).
 165                  */
 166                 wmb();
 167
 168                 /* notify hardware of new descriptors */
 169                 writel(i, rx_ring->tail);
 170         }
 171 }
 172
 173 /**
 174  * fm10k_reuse_rx_page - page flip buffer and store it back on the ring
 175  * @rx_ring: rx descriptor ring to store buffers on
 176  * @old_buff: donor buffer to have page reused
 177  *
 178  * Synchronizes page for reuse by the interface
 179  **/
 180 static void fm10k_reuse_rx_page(struct fm10k_ring *rx_ring,
 181                                 struct fm10k_rx_buffer *old_buff)
 182 {
 183         struct fm10k_rx_buffer *new_buff;
 184         u16 nta = rx_ring->next_to_alloc;
 185
 186         new_buff = &rx_ring->rx_buffer[nta];
 187
 188         /* update, and store next to alloc */
 189         nta++;
 190         rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
 191
 192         /* transfer page from old buffer to new buffer */
 193         memcpy(new_buff, old_buff, sizeof(struct fm10k_rx_buffer));
 194
 195         /* sync the buffer for use by the device */
 196         dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma,
 197                                          old_buff->page_offset,
 198                                          FM10K_RX_BUFSZ,
 199                                          DMA_FROM_DEVICE);
 200 }
 201
 202 static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer,
 203                                     struct page *page,
 204                                     unsigned int truesize)
 205 {
 206         /* avoid re-using remote pages */
 207         if (unlikely(page_to_nid(page) != numa_mem_id()))
 208                 return false;
 209
 210 #if (PAGE_SIZE < 8192)
 211         /* if we are only owner of page we can reuse it */
 212         if (unlikely(page_count(page) != 1))
 213                 return false;
 214
 215         /* flip page offset to other buffer */
 216         rx_buffer->page_offset ^= FM10K_RX_BUFSZ;
 217
 218         /* since we are the only owner of the page and we need to
 219          * increment it, just set the value to 2 in order to avoid
 220          * an unnecessary locked operation
 221          */
 222         atomic_set(&page->_count, 2);
 223 #else
 224         /* move offset up to the next cache line */
 225         rx_buffer->page_offset += truesize;
 226
 227         if (rx_buffer->page_offset > (PAGE_SIZE - FM10K_RX_BUFSZ))
 228                 return false;
 229
 230         /* bump ref count on page before it is given to the stack */
 231         get_page(page);
 232 #endif
 233
 234         return true;
 235 }
 236
 237 /**
 238  * fm10k_add_rx_frag - Add contents of Rx buffer to sk_buff
 239  * @rx_ring: rx descriptor ring to transact packets on
 240  * @rx_buffer: buffer containing page to add
 241  * @rx_desc: descriptor containing length of buffer written by hardware
 242  * @skb: sk_buff to place the data into
 243  *
 244  * This function will add the data contained in rx_buffer->page to the skb.
 245  * This is done either through a direct copy if the data in the buffer is
 246  * less than the skb header size, otherwise it will just attach the page as
 247  * a frag to the skb.
 248  *
 249  * The function will then update the page offset if necessary and return
 250  * true if the buffer can be reused by the interface.
 251  **/
 252 static bool fm10k_add_rx_frag(struct fm10k_ring *rx_ring,
 253                               struct fm10k_rx_buffer *rx_buffer,
 254                               union fm10k_rx_desc *rx_desc,
 255                               struct sk_buff *skb)
 256 {
 257         struct page *page = rx_buffer->page;
 258         unsigned int size = le16_to_cpu(rx_desc->w.length);
 259 #if (PAGE_SIZE < 8192)
 260         unsigned int truesize = FM10K_RX_BUFSZ;
 261 #else
 262         unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
 263 #endif
 264
 265         if ((size <= FM10K_RX_HDR_LEN) && !skb_is_nonlinear(skb)) {
 266                 unsigned char *va = page_address(page) + rx_buffer->page_offset;
 267
 268                 memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
 269
 270                 /* we can reuse buffer as-is, just make sure it is local */
 271                 if (likely(page_to_nid(page) == numa_mem_id()))
 272                         return true;
 273
 274                 /* this page cannot be reused so discard it */
 275                 put_page(page);
 276                 return false;
 277         }
 278
 279         skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
 280                         rx_buffer->page_offset, size, truesize);
 281
 282         return fm10k_can_reuse_rx_page(rx_buffer, page, truesize);
 283 }
 284
 285 static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring,
 286                                              union fm10k_rx_desc *rx_desc,
 287                                              struct sk_buff *skb)
 288 {
 289         struct fm10k_rx_buffer *rx_buffer;
 290         struct page *page;
 291
 292         rx_buffer = &rx_ring->rx_buffer[rx_ring->next_to_clean];
 293
 294         page = rx_buffer->page;
 295         prefetchw(page);
 296
 297         if (likely(!skb)) {
 298                 void *page_addr = page_address(page) +
 299                                   rx_buffer->page_offset;
 300
 301                 /* prefetch first cache line of first page */
 302                 prefetch(page_addr);
 303 #if L1_CACHE_BYTES < 128
 304                 prefetch(page_addr + L1_CACHE_BYTES);
 305 #endif
 306
 307                 /* allocate a skb to store the frags */
 308                 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
 309                                                 FM10K_RX_HDR_LEN);
 310                 if (unlikely(!skb)) {
 311                         rx_ring->rx_stats.alloc_failed++;
 312                         return NULL;
 313                 }
 314
 315                 /* we will be copying header into skb->data in
 316                  * pskb_may_pull so it is in our interest to prefetch
 317                  * it now to avoid a possible cache miss
 318                  */
 319                 prefetchw(skb->data);
 320         }
 321
 322         /* we are reusing so sync this buffer for CPU use */
 323         dma_sync_single_range_for_cpu(rx_ring->dev,
 324                                       rx_buffer->dma,
 325                                       rx_buffer->page_offset,
 326                                       FM10K_RX_BUFSZ,
 327                                       DMA_FROM_DEVICE);
 328
 329         /* pull page into skb */
 330         if (fm10k_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
 331                 /* hand second half of page back to the ring */
 332                 fm10k_reuse_rx_page(rx_ring, rx_buffer);
 333         } else {
 334                 /* we are not reusing the buffer so unmap it */
 335                 dma_unmap_page(rx_ring->dev, rx_buffer->dma,
 336                                PAGE_SIZE, DMA_FROM_DEVICE);
 337         }
 338
 339         /* clear contents of rx_buffer */
 340         rx_buffer->page = NULL;
 341
 342         return skb;
 343 }
 344
 345 /**
 346  * fm10k_process_skb_fields - Populate skb header fields from Rx descriptor
 347  * @rx_ring: rx descriptor ring packet is being transacted on
 348  * @rx_desc: pointer to the EOP Rx descriptor
 349  * @skb: pointer to current skb being populated
 350  *
 351  * This function checks the ring, descriptor, and packet information in
 352  * order to populate the hash, checksum, VLAN, timestamp, protocol, and
 353  * other fields within the skb.
 354  **/
 355 static unsigned int fm10k_process_skb_fields(struct fm10k_ring *rx_ring,
 356                                              union fm10k_rx_desc *rx_desc,
 357                                              struct sk_buff *skb)
 358 {
 359         unsigned int len = skb->len;
 360
 361         FM10K_CB(skb)->fi.w.vlan = rx_desc->w.vlan;
 362
 363         skb_record_rx_queue(skb, rx_ring->queue_index);
 364
 365         FM10K_CB(skb)->fi.d.glort = rx_desc->d.glort;
 366
 367         if (rx_desc->w.vlan) {
 368                 u16 vid = le16_to_cpu(rx_desc->w.vlan);
 369
 370                 if (vid != rx_ring->vid)
 371                         __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
 372         }
 373
 374         skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 375
 376         return len;
 377 }
 378
 379 /**
 380  * fm10k_is_non_eop - process handling of non-EOP buffers
 381  * @rx_ring: Rx ring being processed
 382  * @rx_desc: Rx descriptor for current buffer
 383  *
 384  * This function updates next to clean.  If the buffer is an EOP buffer
 385  * this function exits returning false, otherwise it will place the
 386  * sk_buff in the next buffer to be chained and return true indicating
 387  * that this is in fact a non-EOP buffer.
 388  **/
 389 static bool fm10k_is_non_eop(struct fm10k_ring *rx_ring,
 390                              union fm10k_rx_desc *rx_desc)
 391 {
 392         u32 ntc = rx_ring->next_to_clean + 1;
 393
 394         /* fetch, update, and store next to clean */
 395         ntc = (ntc < rx_ring->count) ? ntc : 0;
 396         rx_ring->next_to_clean = ntc;
 397
 398         prefetch(FM10K_RX_DESC(rx_ring, ntc));
 399
 400         if (likely(fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_EOP)))
 401                 return false;
 402
 403         return true;
 404 }
 405
 406 /**
 407  * fm10k_pull_tail - fm10k specific version of skb_pull_tail
 408  * @rx_ring: rx descriptor ring packet is being transacted on
 409  * @rx_desc: pointer to the EOP Rx descriptor
 410  * @skb: pointer to current skb being adjusted
 411  *
 412  * This function is an fm10k specific version of __pskb_pull_tail.  The
 413  * main difference between this version and the original function is that
 414  * this function can make several assumptions about the state of things
 415  * that allow for significant optimizations versus the standard function.
 416  * As a result we can do things like drop a frag and maintain an accurate
 417  * truesize for the skb.
 418  */
 419 static void fm10k_pull_tail(struct fm10k_ring *rx_ring,
 420                             union fm10k_rx_desc *rx_desc,
 421                             struct sk_buff *skb)
 422 {
 423         struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
 424         unsigned char *va;
 425         unsigned int pull_len;
 426
 427         /* it is valid to use page_address instead of kmap since we are
 428          * working with pages allocated out of the lomem pool per
 429          * alloc_page(GFP_ATOMIC)
 430          */
 431         va = skb_frag_address(frag);
 432
 433         /* we need the header to contain the greater of either ETH_HLEN or
 434          * 60 bytes if the skb->len is less than 60 for skb_pad.
 435          */
 436         pull_len = eth_get_headlen(va, FM10K_RX_HDR_LEN);
 437
 438         /* align pull length to size of long to optimize memcpy performance */
 439         skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
 440
 441         /* update all of the pointers */
 442         skb_frag_size_sub(frag, pull_len);
 443         frag->page_offset += pull_len;
 444         skb->data_len -= pull_len;
 445         skb->tail += pull_len;
 446 }
 447
 448 /**
 449  * fm10k_cleanup_headers - Correct corrupted or empty headers
 450  * @rx_ring: rx descriptor ring packet is being transacted on
 451  * @rx_desc: pointer to the EOP Rx descriptor
 452  * @skb: pointer to current skb being fixed
 453  *
 454  * Address the case where we are pulling data in on pages only
 455  * and as such no data is present in the skb header.
 456  *
 457  * In addition if skb is not at least 60 bytes we need to pad it so that
 458  * it is large enough to qualify as a valid Ethernet frame.
 459  *
 460  * Returns true if an error was encountered and skb was freed.
 461  **/
 462 static bool fm10k_cleanup_headers(struct fm10k_ring *rx_ring,
 463                                   union fm10k_rx_desc *rx_desc,
 464                                   struct sk_buff *skb)
 465 {
 466         if (unlikely((fm10k_test_staterr(rx_desc,
 467                                          FM10K_RXD_STATUS_RXE)))) {
 468                 dev_kfree_skb_any(skb);
 469                 rx_ring->rx_stats.errors++;
 470                 return true;
 471         }
 472
 473         /* place header in linear portion of buffer */
 474         if (skb_is_nonlinear(skb))
 475                 fm10k_pull_tail(rx_ring, rx_desc, skb);
 476
 477         /* if skb_pad returns an error the skb was freed */
 478         if (unlikely(skb->len < 60)) {
 479                 int pad_len = 60 - skb->len;
 480
 481                 if (skb_pad(skb, pad_len))
 482                         return true;
 483                 __skb_put(skb, pad_len);
 484         }
 485
 486         return false;
 487 }
 488
 489 /**
 490  * fm10k_receive_skb - helper function to handle rx indications
 491  * @q_vector: structure containing interrupt and ring information
 492  * @skb: packet to send up
 493  **/
 494 static void fm10k_receive_skb(struct fm10k_q_vector *q_vector,
 495                               struct sk_buff *skb)
 496 {
 497         napi_gro_receive(&q_vector->napi, skb);
 498 }
 499
 500 static bool fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector,
 501                                struct fm10k_ring *rx_ring,
 502                                int budget)
 503 {
 504         struct sk_buff *skb = rx_ring->skb;
 505         unsigned int total_bytes = 0, total_packets = 0;
 506         u16 cleaned_count = fm10k_desc_unused(rx_ring);
 507
 508         do {
 509                 union fm10k_rx_desc *rx_desc;
 510
 511                 /* return some buffers to hardware, one at a time is too slow */
 512                 if (cleaned_count >= FM10K_RX_BUFFER_WRITE) {
 513                         fm10k_alloc_rx_buffers(rx_ring, cleaned_count);
 514                         cleaned_count = 0;
 515                 }
 516
 517                 rx_desc = FM10K_RX_DESC(rx_ring, rx_ring->next_to_clean);
 518
 519                 if (!fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_DD))
 520                         break;
 521
 522                 /* This memory barrier is needed to keep us from reading
 523                  * any other fields out of the rx_desc until we know the
 524                  * RXD_STATUS_DD bit is set
 525                  */
 526                 rmb();
 527
 528                 /* retrieve a buffer from the ring */
 529                 skb = fm10k_fetch_rx_buffer(rx_ring, rx_desc, skb);
 530
 531                 /* exit if we failed to retrieve a buffer */
 532                 if (!skb)
 533                         break;
 534
 535                 cleaned_count++;
 536
 537                 /* fetch next buffer in frame if non-eop */
 538                 if (fm10k_is_non_eop(rx_ring, rx_desc))
 539                         continue;
 540
 541                 /* verify the packet layout is correct */
 542                 if (fm10k_cleanup_headers(rx_ring, rx_desc, skb)) {
 543                         skb = NULL;
 544                         continue;
 545                 }
 546
 547                 /* populate checksum, timestamp, VLAN, and protocol */
 548                 total_bytes += fm10k_process_skb_fields(rx_ring, rx_desc, skb);
 549
 550                 fm10k_receive_skb(q_vector, skb);
 551
 552                 /* reset skb pointer */
 553                 skb = NULL;
 554
 555                 /* update budget accounting */
 556                 total_packets++;
 557         } while (likely(total_packets < budget));
 558
 559         /* place incomplete frames back on ring for completion */
 560         rx_ring->skb = skb;
 561
 562         u64_stats_update_begin(&rx_ring->syncp);
 563         rx_ring->stats.packets += total_packets;
 564         rx_ring->stats.bytes += total_bytes;
 565         u64_stats_update_end(&rx_ring->syncp);
 566         q_vector->rx.total_packets += total_packets;
 567         q_vector->rx.total_bytes += total_bytes;
 568
 569         return total_packets < budget;
 570 }
 571
 572 static bool fm10k_tx_desc_push(struct fm10k_ring *tx_ring,
 573                                struct fm10k_tx_desc *tx_desc, u16 i,
 574                                dma_addr_t dma, unsigned int size, u8 desc_flags)
 575 {
 576         /* set RS and INT for last frame in a cache line */
 577         if ((++i & (FM10K_TXD_WB_FIFO_SIZE - 1)) == 0)
 578                 desc_flags |= FM10K_TXD_FLAG_RS | FM10K_TXD_FLAG_INT;
 579
 580         /* record values to descriptor */
 581         tx_desc->buffer_addr = cpu_to_le64(dma);
 582         tx_desc->flags = desc_flags;
 583         tx_desc->buflen = cpu_to_le16(size);
 584
 585         /* return true if we just wrapped the ring */
 586         return i == tx_ring->count;
 587 }
 588
 589 static void fm10k_tx_map(struct fm10k_ring *tx_ring,
 590                          struct fm10k_tx_buffer *first)
 591 {
 592         struct sk_buff *skb = first->skb;
 593         struct fm10k_tx_buffer *tx_buffer;
 594         struct fm10k_tx_desc *tx_desc;
 595         struct skb_frag_struct *frag;
 596         unsigned char *data;
 597         dma_addr_t dma;
 598         unsigned int data_len, size;
 599         u16 i = tx_ring->next_to_use;
 600         u8 flags = 0;
 601
 602         tx_desc = FM10K_TX_DESC(tx_ring, i);
 603
 604         /* add HW VLAN tag */
 605         if (vlan_tx_tag_present(skb))
 606                 tx_desc->vlan = cpu_to_le16(vlan_tx_tag_get(skb));
 607         else
 608                 tx_desc->vlan = 0;
 609
 610         size = skb_headlen(skb);
 611         data = skb->data;
 612
 613         dma = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE);
 614
 615         data_len = skb->data_len;
 616         tx_buffer = first;
 617
 618         for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
 619                 if (dma_mapping_error(tx_ring->dev, dma))
 620                         goto dma_error;
 621
 622                 /* record length, and DMA address */
 623                 dma_unmap_len_set(tx_buffer, len, size);
 624                 dma_unmap_addr_set(tx_buffer, dma, dma);
 625
 626                 while (unlikely(size > FM10K_MAX_DATA_PER_TXD)) {
 627                         if (fm10k_tx_desc_push(tx_ring, tx_desc++, i++, dma,
 628                                                FM10K_MAX_DATA_PER_TXD, flags)) {
 629                                 tx_desc = FM10K_TX_DESC(tx_ring, 0);
 630                                 i = 0;
 631                         }
 632
 633                         dma += FM10K_MAX_DATA_PER_TXD;
 634                         size -= FM10K_MAX_DATA_PER_TXD;
 635                 }
 636
 637                 if (likely(!data_len))
 638                         break;
 639
 640                 if (fm10k_tx_desc_push(tx_ring, tx_desc++, i++,
 641                                        dma, size, flags)) {
 642                         tx_desc = FM10K_TX_DESC(tx_ring, 0);
 643                         i = 0;
 644                 }
 645
 646                 size = skb_frag_size(frag);
 647                 data_len -= size;
 648
 649                 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
 650                                        DMA_TO_DEVICE);
 651
 652                 tx_buffer = &tx_ring->tx_buffer[i];
 653         }
 654
 655         /* write last descriptor with LAST bit set */
 656         flags |= FM10K_TXD_FLAG_LAST;
 657
 658         if (fm10k_tx_desc_push(tx_ring, tx_desc, i++, dma, size, flags))
 659                 i = 0;
 660
 661         /* record bytecount for BQL */
 662         netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
 663
 664         /* record SW timestamp if HW timestamp is not available */
 665         skb_tx_timestamp(first->skb);
 666
 667         /* Force memory writes to complete before letting h/w know there
 668          * are new descriptors to fetch.  (Only applicable for weak-ordered
 669          * memory model archs, such as IA-64).
 670          *
 671          * We also need this memory barrier to make certain all of the
 672          * status bits have been updated before next_to_watch is written.
 673          */
 674         wmb();
 675
 676         /* set next_to_watch value indicating a packet is present */
 677         first->next_to_watch = tx_desc;
 678
 679         tx_ring->next_to_use = i;
 680
 681         /* notify HW of packet */
 682         writel(i, tx_ring->tail);
 683
 684         /* we need this if more than one processor can write to our tail
 685          * at a time, it synchronizes IO on IA64/Altix systems
 686          */
 687         mmiowb();
 688
 689         return;
 690 dma_error:
 691         dev_err(tx_ring->dev, "TX DMA map failed\n");
 692
 693         /* clear dma mappings for failed tx_buffer map */
 694         for (;;) {
 695                 tx_buffer = &tx_ring->tx_buffer[i];
 696                 fm10k_unmap_and_free_tx_resource(tx_ring, tx_buffer);
 697                 if (tx_buffer == first)
 698                         break;
 699                 if (i == 0)
 700                         i = tx_ring->count;
 701                 i--;
 702         }
 703
 704         tx_ring->next_to_use = i;
 705 }
 706
 707 static int __fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size)
 708 {
 709         netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
 710
 711         smp_mb();
 712
 713         /* We need to check again in a case another CPU has just
 714          * made room available. */
 715         if (likely(fm10k_desc_unused(tx_ring) < size))
 716                 return -EBUSY;
 717
 718         /* A reprieve! - use start_queue because it doesn't call schedule */
 719         netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
 720         ++tx_ring->tx_stats.restart_queue;
 721         return 0;
 722 }
 723
 724 static inline int fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size)
 725 {
 726         if (likely(fm10k_desc_unused(tx_ring) >= size))
 727                 return 0;
 728         return __fm10k_maybe_stop_tx(tx_ring, size);
 729 }
 730
 731 netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb,
 732                                   struct fm10k_ring *tx_ring)
 733 {
 734         struct fm10k_tx_buffer *first;
 735         u32 tx_flags = 0;
 736 #if PAGE_SIZE > FM10K_MAX_DATA_PER_TXD
 737         unsigned short f;
 738 #endif
 739         u16 count = TXD_USE_COUNT(skb_headlen(skb));
 740
 741         /* need: 1 descriptor per page * PAGE_SIZE/FM10K_MAX_DATA_PER_TXD,
 742          *       + 1 desc for skb_headlen/FM10K_MAX_DATA_PER_TXD,
 743          *       + 2 desc gap to keep tail from touching head
 744          * otherwise try next time
 745          */
 746 #if PAGE_SIZE > FM10K_MAX_DATA_PER_TXD
 747         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
 748                 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
 749 #else
 750         count += skb_shinfo(skb)->nr_frags;
 751 #endif
 752         if (fm10k_maybe_stop_tx(tx_ring, count + 3)) {
 753                 tx_ring->tx_stats.tx_busy++;
 754                 return NETDEV_TX_BUSY;
 755         }
 756
 757         /* record the location of the first descriptor for this packet */
 758         first = &tx_ring->tx_buffer[tx_ring->next_to_use];
 759         first->skb = skb;
 760         first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
 761         first->gso_segs = 1;
 762
 763         /* record initial flags and protocol */
 764         first->tx_flags = tx_flags;
 765
 766         fm10k_tx_map(tx_ring, first);
 767
 768         fm10k_maybe_stop_tx(tx_ring, DESC_NEEDED);
 769
 770         return NETDEV_TX_OK;
 771 }
 772
 773 static u64 fm10k_get_tx_completed(struct fm10k_ring *ring)
 774 {
 775         return ring->stats.packets;
 776 }
 777
 778 static u64 fm10k_get_tx_pending(struct fm10k_ring *ring)
 779 {
 780         /* use SW head and tail until we have real hardware */
 781         u32 head = ring->next_to_clean;
 782         u32 tail = ring->next_to_use;
 783
 784         return ((head <= tail) ? tail : tail + ring->count) - head;
 785 }
 786
 787 bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring)
 788 {
 789         u32 tx_done = fm10k_get_tx_completed(tx_ring);
 790         u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
 791         u32 tx_pending = fm10k_get_tx_pending(tx_ring);
 792
 793         clear_check_for_tx_hang(tx_ring);
 794
 795         /* Check for a hung queue, but be thorough. This verifies
 796          * that a transmit has been completed since the previous
 797          * check AND there is at least one packet pending. By
 798          * requiring this to fail twice we avoid races with
 799          * clearing the ARMED bit and conditions where we
 800          * run the check_tx_hang logic with a transmit completion
 801          * pending but without time to complete it yet.
 802          */
 803         if (!tx_pending || (tx_done_old != tx_done)) {
 804                 /* update completed stats and continue */
 805                 tx_ring->tx_stats.tx_done_old = tx_done;
 806                 /* reset the countdown */
 807                 clear_bit(__FM10K_HANG_CHECK_ARMED, &tx_ring->state);
 808
 809                 return false;
 810         }
 811
 812         /* make sure it is true for two checks in a row */
 813         return test_and_set_bit(__FM10K_HANG_CHECK_ARMED, &tx_ring->state);
 814 }
 815
 816 /**
 817  * fm10k_tx_timeout_reset - initiate reset due to Tx timeout
 818  * @interface: driver private struct
 819  **/
 820 void fm10k_tx_timeout_reset(struct fm10k_intfc *interface)
 821 {
 822         /* Do the reset outside of interrupt context */
 823         if (!test_bit(__FM10K_DOWN, &interface->state)) {
 824                 netdev_err(interface->netdev, "Reset interface\n");
 825                 interface->tx_timeout_count++;
 826                 interface->flags |= FM10K_FLAG_RESET_REQUESTED;
 827                 fm10k_service_event_schedule(interface);
 828         }
 829 }
 830
 831 /**
 832  * fm10k_clean_tx_irq - Reclaim resources after transmit completes
 833  * @q_vector: structure containing interrupt and ring information
 834  * @tx_ring: tx ring to clean
 835  **/
 836 static bool fm10k_clean_tx_irq(struct fm10k_q_vector *q_vector,
 837                                struct fm10k_ring *tx_ring)
 838 {
 839         struct fm10k_intfc *interface = q_vector->interface;
 840         struct fm10k_tx_buffer *tx_buffer;
 841         struct fm10k_tx_desc *tx_desc;
 842         unsigned int total_bytes = 0, total_packets = 0;
 843         unsigned int budget = q_vector->tx.work_limit;
 844         unsigned int i = tx_ring->next_to_clean;
 845
 846         if (test_bit(__FM10K_DOWN, &interface->state))
 847                 return true;
 848
 849         tx_buffer = &tx_ring->tx_buffer[i];
 850         tx_desc = FM10K_TX_DESC(tx_ring, i);
 851         i -= tx_ring->count;
 852
 853         do {
 854                 struct fm10k_tx_desc *eop_desc = tx_buffer->next_to_watch;
 855
 856                 /* if next_to_watch is not set then there is no work pending */
 857                 if (!eop_desc)
 858                         break;
 859
 860                 /* prevent any other reads prior to eop_desc */
 861                 read_barrier_depends();
 862
 863                 /* if DD is not set pending work has not been completed */
 864                 if (!(eop_desc->flags & FM10K_TXD_FLAG_DONE))
 865                         break;
 866
 867                 /* clear next_to_watch to prevent false hangs */
 868                 tx_buffer->next_to_watch = NULL;
 869
 870                 /* update the statistics for this packet */
 871                 total_bytes += tx_buffer->bytecount;
 872                 total_packets += tx_buffer->gso_segs;
 873
 874                 /* free the skb */
 875                 dev_consume_skb_any(tx_buffer->skb);
 876
 877                 /* unmap skb header data */
 878                 dma_unmap_single(tx_ring->dev,
 879                                  dma_unmap_addr(tx_buffer, dma),
 880                                  dma_unmap_len(tx_buffer, len),
 881                                  DMA_TO_DEVICE);
 882
 883                 /* clear tx_buffer data */
 884                 tx_buffer->skb = NULL;
 885                 dma_unmap_len_set(tx_buffer, len, 0);
 886
 887                 /* unmap remaining buffers */
 888                 while (tx_desc != eop_desc) {
 889                         tx_buffer++;
 890                         tx_desc++;
 891                         i++;
 892                         if (unlikely(!i)) {
 893                                 i -= tx_ring->count;
 894                                 tx_buffer = tx_ring->tx_buffer;
 895                                 tx_desc = FM10K_TX_DESC(tx_ring, 0);
 896                         }
 897
 898                         /* unmap any remaining paged data */
 899                         if (dma_unmap_len(tx_buffer, len)) {
 900                                 dma_unmap_page(tx_ring->dev,
 901                                                dma_unmap_addr(tx_buffer, dma),
 902                                                dma_unmap_len(tx_buffer, len),
 903                                                DMA_TO_DEVICE);
 904                                 dma_unmap_len_set(tx_buffer, len, 0);
 905                         }
 906                 }
 907
 908                 /* move us one more past the eop_desc for start of next pkt */
 909                 tx_buffer++;
 910                 tx_desc++;
 911                 i++;
 912                 if (unlikely(!i)) {
 913                         i -= tx_ring->count;
 914                         tx_buffer = tx_ring->tx_buffer;
 915                         tx_desc = FM10K_TX_DESC(tx_ring, 0);
 916                 }
 917
 918                 /* issue prefetch for next Tx descriptor */
 919                 prefetch(tx_desc);
 920
 921                 /* update budget accounting */
 922                 budget--;
 923         } while (likely(budget));
 924
 925         i += tx_ring->count;
 926         tx_ring->next_to_clean = i;
 927         u64_stats_update_begin(&tx_ring->syncp);
 928         tx_ring->stats.bytes += total_bytes;
 929         tx_ring->stats.packets += total_packets;
 930         u64_stats_update_end(&tx_ring->syncp);
 931         q_vector->tx.total_bytes += total_bytes;
 932         q_vector->tx.total_packets += total_packets;
 933
 934         if (check_for_tx_hang(tx_ring) && fm10k_check_tx_hang(tx_ring)) {
 935                 /* schedule immediate reset if we believe we hung */
 936                 struct fm10k_hw *hw = &interface->hw;
 937
 938                 netif_err(interface, drv, tx_ring->netdev,
 939                           "Detected Tx Unit Hang\n"
 940                           "  Tx Queue             <%d>\n"
 941                           "  TDH, TDT             <%x>, <%x>\n"
 942                           "  next_to_use          <%x>\n"
 943                           "  next_to_clean        <%x>\n",
 944                           tx_ring->queue_index,
 945                           fm10k_read_reg(hw, FM10K_TDH(tx_ring->reg_idx)),
 946                           fm10k_read_reg(hw, FM10K_TDT(tx_ring->reg_idx)),
 947                           tx_ring->next_to_use, i);
 948
 949                 netif_stop_subqueue(tx_ring->netdev,
 950                                     tx_ring->queue_index);
 951
 952                 netif_info(interface, probe, tx_ring->netdev,
 953                            "tx hang %d detected on queue %d, resetting interface\n",
 954                            interface->tx_timeout_count + 1,
 955                            tx_ring->queue_index);
 956
 957                 fm10k_tx_timeout_reset(interface);
 958
 959                 /* the netdev is about to reset, no point in enabling stuff */
 960                 return true;
 961         }
 962
 963         /* notify netdev of completed buffers */
 964         netdev_tx_completed_queue(txring_txq(tx_ring),
 965                                   total_packets, total_bytes);
 966
 967 #define TX_WAKE_THRESHOLD min_t(u16, FM10K_MIN_TXD - 1, DESC_NEEDED * 2)
 968         if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
 969                      (fm10k_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
 970                 /* Make sure that anybody stopping the queue after this
 971                  * sees the new next_to_clean.
 972                  */
 973                 smp_mb();
 974                 if (__netif_subqueue_stopped(tx_ring->netdev,
 975                                              tx_ring->queue_index) &&
 976                     !test_bit(__FM10K_DOWN, &interface->state)) {
 977                         netif_wake_subqueue(tx_ring->netdev,
 978                                             tx_ring->queue_index);
 979                         ++tx_ring->tx_stats.restart_queue;
 980                 }
 981         }
 982
 983         return !!budget;
 984 }
 985
 986 /**
 987  * fm10k_update_itr - update the dynamic ITR value based on packet size
 988  *
 989  *      Stores a new ITR value based on strictly on packet size.  The
 990  *      divisors and thresholds used by this function were determined based
 991  *      on theoretical maximum wire speed and testing data, in order to
 992  *      minimize response time while increasing bulk throughput.
 993  *
 994  * @ring_container: Container for rings to have ITR updated
 995  **/
 996 static void fm10k_update_itr(struct fm10k_ring_container *ring_container)
 997 {
 998         unsigned int avg_wire_size, packets;
 999
1000         /* Only update ITR if we are using adaptive setting */
1001         if (!(ring_container->itr & FM10K_ITR_ADAPTIVE))
1002                 goto clear_counts;
1003
1004         packets = ring_container->total_packets;
1005         if (!packets)
1006                 goto clear_counts;
1007
1008         avg_wire_size = ring_container->total_bytes / packets;
1009
1010         /* Add 24 bytes to size to account for CRC, preamble, and gap */
1011         avg_wire_size += 24;
1012
1013         /* Don't starve jumbo frames */
1014         if (avg_wire_size > 3000)
1015                 avg_wire_size = 3000;
1016
1017         /* Give a little boost to mid-size frames */
1018         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
1019                 avg_wire_size /= 3;
1020         else
1021                 avg_wire_size /= 2;
1022
1023         /* write back value and retain adaptive flag */
1024         ring_container->itr = avg_wire_size | FM10K_ITR_ADAPTIVE;
1025
1026 clear_counts:
1027         ring_container->total_bytes = 0;
1028         ring_container->total_packets = 0;
1029 }
1030
1031 static void fm10k_qv_enable(struct fm10k_q_vector *q_vector)
1032 {
1033         /* Enable auto-mask and clear the current mask */
1034         u32 itr = FM10K_ITR_ENABLE;
1035
1036         /* Update Tx ITR */
1037         fm10k_update_itr(&q_vector->tx);
1038
1039         /* Update Rx ITR */
1040         fm10k_update_itr(&q_vector->rx);
1041
1042         /* Store Tx itr in timer slot 0 */
1043         itr |= (q_vector->tx.itr & FM10K_ITR_MAX);
1044
1045         /* Shift Rx itr to timer slot 1 */
1046         itr |= (q_vector->rx.itr & FM10K_ITR_MAX) << FM10K_ITR_INTERVAL1_SHIFT;
1047
1048         /* Write the final value to the ITR register */
1049         writel(itr, q_vector->itr);
1050 }
1051
1052 static int fm10k_poll(struct napi_struct *napi, int budget)
1053 {
1054         struct fm10k_q_vector *q_vector =
1055                                container_of(napi, struct fm10k_q_vector, napi);
1056         struct fm10k_ring *ring;
1057         int per_ring_budget;
1058         bool clean_complete = true;
1059
1060         fm10k_for_each_ring(ring, q_vector->tx)
1061                 clean_complete &= fm10k_clean_tx_irq(q_vector, ring);
1062
1063         /* attempt to distribute budget to each queue fairly, but don't
1064          * allow the budget to go below 1 because we'll exit polling
1065          */
1066         if (q_vector->rx.count > 1)
1067                 per_ring_budget = max(budget/q_vector->rx.count, 1);
1068         else
1069                 per_ring_budget = budget;
1070
1071         fm10k_for_each_ring(ring, q_vector->rx)
1072                 clean_complete &= fm10k_clean_rx_irq(q_vector, ring,
1073                                                      per_ring_budget);
1074
1075         /* If all work not completed, return budget and keep polling */
1076         if (!clean_complete)
1077                 return budget;
1078
1079         /* all work done, exit the polling mode */
1080         napi_complete(napi);
1081
1082         /* re-enable the q_vector */
1083         fm10k_qv_enable(q_vector);
1084
1085         return 0;
1086 }
1087
1088 /**
1089  * fm10k_set_num_queues: Allocate queues for device, feature dependent
1090  * @interface: board private structure to initialize
1091  *
1092  * This is the top level queue allocation routine.  The order here is very
1093  * important, starting with the "most" number of features turned on at once,
1094  * and ending with the smallest set of features.  This way large combinations
1095  * can be allocated if they're turned on, and smaller combinations are the
1096  * fallthrough conditions.
1097  *
1098  **/
1099 static void fm10k_set_num_queues(struct fm10k_intfc *interface)
1100 {
1101         /* Start with base case */
1102         interface->num_rx_queues = 1;
1103         interface->num_tx_queues = 1;
1104 }
1105
1106 /**
1107  * fm10k_alloc_q_vector - Allocate memory for a single interrupt vector
1108  * @interface: board private structure to initialize
1109  * @v_count: q_vectors allocated on interface, used for ring interleaving
1110  * @v_idx: index of vector in interface struct
1111  * @txr_count: total number of Tx rings to allocate
1112  * @txr_idx: index of first Tx ring to allocate
1113  * @rxr_count: total number of Rx rings to allocate
1114  * @rxr_idx: index of first Rx ring to allocate
1115  *
1116  * We allocate one q_vector.  If allocation fails we return -ENOMEM.
1117  **/
1118 static int fm10k_alloc_q_vector(struct fm10k_intfc *interface,
1119                                 unsigned int v_count, unsigned int v_idx,
1120                                 unsigned int txr_count, unsigned int txr_idx,
1121                                 unsigned int rxr_count, unsigned int rxr_idx)
1122 {
1123         struct fm10k_q_vector *q_vector;
1124         struct fm10k_ring *ring;
1125         int ring_count, size;
1126
1127         ring_count = txr_count + rxr_count;
1128         size = sizeof(struct fm10k_q_vector) +
1129                (sizeof(struct fm10k_ring) * ring_count);
1130
1131         /* allocate q_vector and rings */
1132         q_vector = kzalloc(size, GFP_KERNEL);
1133         if (!q_vector)
1134                 return -ENOMEM;
1135
1136         /* initialize NAPI */
1137         netif_napi_add(interface->netdev, &q_vector->napi,
1138                        fm10k_poll, NAPI_POLL_WEIGHT);
1139
1140         /* tie q_vector and interface together */
1141         interface->q_vector[v_idx] = q_vector;
1142         q_vector->interface = interface;
1143         q_vector->v_idx = v_idx;
1144
1145         /* initialize pointer to rings */
1146         ring = q_vector->ring;
1147
1148         /* save Tx ring container info */
1149         q_vector->tx.ring = ring;
1150         q_vector->tx.work_limit = FM10K_DEFAULT_TX_WORK;
1151         q_vector->tx.itr = interface->tx_itr;
1152         q_vector->tx.count = txr_count;
1153
1154         while (txr_count) {
1155                 /* assign generic ring traits */
1156                 ring->dev = &interface->pdev->dev;
1157                 ring->netdev = interface->netdev;
1158
1159                 /* configure backlink on ring */
1160                 ring->q_vector = q_vector;
1161
1162                 /* apply Tx specific ring traits */
1163                 ring->count = interface->tx_ring_count;
1164                 ring->queue_index = txr_idx;
1165
1166                 /* assign ring to interface */
1167                 interface->tx_ring[txr_idx] = ring;
1168
1169                 /* update count and index */
1170                 txr_count--;
1171                 txr_idx += v_count;
1172
1173                 /* push pointer to next ring */
1174                 ring++;
1175         }
1176
1177         /* save Rx ring container info */
1178         q_vector->rx.ring = ring;
1179         q_vector->rx.itr = interface->rx_itr;
1180         q_vector->rx.count = rxr_count;
1181
1182         while (rxr_count) {
1183                 /* assign generic ring traits */
1184                 ring->dev = &interface->pdev->dev;
1185                 ring->netdev = interface->netdev;
1186
1187                 /* configure backlink on ring */
1188                 ring->q_vector = q_vector;
1189
1190                 /* apply Rx specific ring traits */
1191                 ring->count = interface->rx_ring_count;
1192                 ring->queue_index = rxr_idx;
1193
1194                 /* assign ring to interface */
1195                 interface->rx_ring[rxr_idx] = ring;
1196
1197                 /* update count and index */
1198                 rxr_count--;
1199                 rxr_idx += v_count;
1200
1201                 /* push pointer to next ring */
1202                 ring++;
1203         }
1204
1205         return 0;
1206 }
1207
1208 /**
1209  * fm10k_free_q_vector - Free memory allocated for specific interrupt vector
1210  * @interface: board private structure to initialize
1211  * @v_idx: Index of vector to be freed
1212  *
1213  * This function frees the memory allocated to the q_vector.  In addition if
1214  * NAPI is enabled it will delete any references to the NAPI struct prior
1215  * to freeing the q_vector.
1216  **/
1217 static void fm10k_free_q_vector(struct fm10k_intfc *interface, int v_idx)
1218 {
1219         struct fm10k_q_vector *q_vector = interface->q_vector[v_idx];
1220         struct fm10k_ring *ring;
1221
1222         fm10k_for_each_ring(ring, q_vector->tx)
1223                 interface->tx_ring[ring->queue_index] = NULL;
1224
1225         fm10k_for_each_ring(ring, q_vector->rx)
1226                 interface->rx_ring[ring->queue_index] = NULL;
1227
1228         interface->q_vector[v_idx] = NULL;
1229         netif_napi_del(&q_vector->napi);
1230         kfree_rcu(q_vector, rcu);
1231 }
1232
1233 /**
1234  * fm10k_alloc_q_vectors - Allocate memory for interrupt vectors
1235  * @interface: board private structure to initialize
1236  *
1237  * We allocate one q_vector per queue interrupt.  If allocation fails we
1238  * return -ENOMEM.
1239  **/
1240 static int fm10k_alloc_q_vectors(struct fm10k_intfc *interface)
1241 {
1242         unsigned int q_vectors = interface->num_q_vectors;
1243         unsigned int rxr_remaining = interface->num_rx_queues;
1244         unsigned int txr_remaining = interface->num_tx_queues;
1245         unsigned int rxr_idx = 0, txr_idx = 0, v_idx = 0;
1246         int err;
1247
1248         if (q_vectors >= (rxr_remaining + txr_remaining)) {
1249                 for (; rxr_remaining; v_idx++) {
1250                         err = fm10k_alloc_q_vector(interface, q_vectors, v_idx,
1251                                                    0, 0, 1, rxr_idx);
1252                         if (err)
1253                                 goto err_out;
1254
1255                         /* update counts and index */
1256                         rxr_remaining--;
1257                         rxr_idx++;
1258                 }
1259         }
1260
1261         for (; v_idx < q_vectors; v_idx++) {
1262                 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
1263                 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
1264
1265                 err = fm10k_alloc_q_vector(interface, q_vectors, v_idx,
1266                                            tqpv, txr_idx,
1267                                            rqpv, rxr_idx);
1268
1269                 if (err)
1270                         goto err_out;
1271
1272                 /* update counts and index */
1273                 rxr_remaining -= rqpv;
1274                 txr_remaining -= tqpv;
1275                 rxr_idx++;
1276                 txr_idx++;
1277         }
1278
1279         return 0;
1280
1281 err_out:
1282         interface->num_tx_queues = 0;
1283         interface->num_rx_queues = 0;
1284         interface->num_q_vectors = 0;
1285
1286         while (v_idx--)
1287                 fm10k_free_q_vector(interface, v_idx);
1288
1289         return -ENOMEM;
1290 }
1291
1292 /**
1293  * fm10k_free_q_vectors - Free memory allocated for interrupt vectors
1294  * @interface: board private structure to initialize
1295  *
1296  * This function frees the memory allocated to the q_vectors.  In addition if
1297  * NAPI is enabled it will delete any references to the NAPI struct prior
1298  * to freeing the q_vector.
1299  **/
1300 static void fm10k_free_q_vectors(struct fm10k_intfc *interface)
1301 {
1302         int v_idx = interface->num_q_vectors;
1303
1304         interface->num_tx_queues = 0;
1305         interface->num_rx_queues = 0;
1306         interface->num_q_vectors = 0;
1307
1308         while (v_idx--)
1309                 fm10k_free_q_vector(interface, v_idx);
1310 }
1311
1312 /**
1313  * f10k_reset_msix_capability - reset MSI-X capability
1314  * @interface: board private structure to initialize
1315  *
1316  * Reset the MSI-X capability back to its starting state
1317  **/
1318 static void fm10k_reset_msix_capability(struct fm10k_intfc *interface)
1319 {
1320         pci_disable_msix(interface->pdev);
1321         kfree(interface->msix_entries);
1322         interface->msix_entries = NULL;
1323 }
1324
1325 /**
1326  * f10k_init_msix_capability - configure MSI-X capability
1327  * @interface: board private structure to initialize
1328  *
1329  * Attempt to configure the interrupts using the best available
1330  * capabilities of the hardware and the kernel.
1331  **/
1332 static int fm10k_init_msix_capability(struct fm10k_intfc *interface)
1333 {
1334         struct fm10k_hw *hw = &interface->hw;
1335         int v_budget, vector;
1336
1337         /* It's easy to be greedy for MSI-X vectors, but it really
1338          * doesn't do us much good if we have a lot more vectors
1339          * than CPU's.  So let's be conservative and only ask for
1340          * (roughly) the same number of vectors as there are CPU's.
1341          * the default is to use pairs of vectors
1342          */
1343         v_budget = max(interface->num_rx_queues, interface->num_tx_queues);
1344         v_budget = min_t(u16, v_budget, num_online_cpus());
1345
1346         /* account for vectors not related to queues */
1347         v_budget += NON_Q_VECTORS(hw);
1348
1349         /* At the same time, hardware can only support a maximum of
1350          * hw.mac->max_msix_vectors vectors.  With features
1351          * such as RSS and VMDq, we can easily surpass the number of Rx and Tx
1352          * descriptor queues supported by our device.  Thus, we cap it off in
1353          * those rare cases where the cpu count also exceeds our vector limit.
1354          */
1355         v_budget = min_t(int, v_budget, hw->mac.max_msix_vectors);
1356
1357         /* A failure in MSI-X entry allocation is fatal. */
1358         interface->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry),
1359                                           GFP_KERNEL);
1360         if (!interface->msix_entries)
1361                 return -ENOMEM;
1362
1363         /* populate entry values */
1364         for (vector = 0; vector < v_budget; vector++)
1365                 interface->msix_entries[vector].entry = vector;
1366
1367         /* Attempt to enable MSI-X with requested value */
1368         v_budget = pci_enable_msix_range(interface->pdev,
1369                                          interface->msix_entries,
1370                                          MIN_MSIX_COUNT(hw),
1371                                          v_budget);
1372         if (v_budget < 0) {
1373                 kfree(interface->msix_entries);
1374                 interface->msix_entries = NULL;
1375                 return -ENOMEM;
1376         }
1377
1378         /* record the number of queues available for q_vectors */
1379         interface->num_q_vectors = v_budget - NON_Q_VECTORS(hw);
1380
1381         return 0;
1382 }
1383
1384 static void fm10k_init_reta(struct fm10k_intfc *interface)
1385 {
1386         u16 i, rss_i = interface->ring_feature[RING_F_RSS].indices;
1387         u32 reta, base;
1388
1389         /* If the netdev is initialized we have to maintain table if possible */
1390         if (interface->netdev->reg_state) {
1391                 for (i = FM10K_RETA_SIZE; i--;) {
1392                         reta = interface->reta[i];
1393                         if ((((reta << 24) >> 24) < rss_i) &&
1394                             (((reta << 16) >> 24) < rss_i) &&
1395                             (((reta <<  8) >> 24) < rss_i) &&
1396                             (((reta)       >> 24) < rss_i))
1397                                 continue;
1398                         goto repopulate_reta;
1399                 }
1400
1401                 /* do nothing if all of the elements are in bounds */
1402                 return;
1403         }
1404
1405 repopulate_reta:
1406         /* Populate the redirection table 4 entries at a time.  To do this
1407          * we are generating the results for n and n+2 and then interleaving
1408          * those with the results with n+1 and n+3.
1409          */
1410         for (i = FM10K_RETA_SIZE; i--;) {
1411                 /* first pass generates n and n+2 */
1412                 base = ((i * 0x00040004) + 0x00020000) * rss_i;
1413                 reta = (base & 0x3F803F80) >> 7;
1414
1415                 /* second pass generates n+1 and n+3 */
1416                 base += 0x00010001 * rss_i;
1417                 reta |= (base & 0x3F803F80) << 1;
1418
1419                 interface->reta[i] = reta;
1420         }
1421 }
1422
1423 /**
1424  * fm10k_init_queueing_scheme - Determine proper queueing scheme
1425  * @interface: board private structure to initialize
1426  *
1427  * We determine which queueing scheme to use based on...
1428  * - Hardware queue count (num_*_queues)
1429  *   - defined by miscellaneous hardware support/features (RSS, etc.)
1430  **/
1431 int fm10k_init_queueing_scheme(struct fm10k_intfc *interface)
1432 {
1433         int err;
1434
1435         /* Number of supported queues */
1436         fm10k_set_num_queues(interface);
1437
1438         /* Configure MSI-X capability */
1439         err = fm10k_init_msix_capability(interface);
1440         if (err) {
1441                 dev_err(&interface->pdev->dev,
1442                         "Unable to initialize MSI-X capability\n");
1443                 return err;
1444         }
1445
1446         /* Allocate memory for queues */
1447         err = fm10k_alloc_q_vectors(interface);
1448         if (err)
1449                 return err;
1450
1451         /* Initialize RSS redirection table */
1452         fm10k_init_reta(interface);
1453
1454         return 0;
1455 }
1456
1457 /**
1458  * fm10k_clear_queueing_scheme - Clear the current queueing scheme settings
1459  * @interface: board private structure to clear queueing scheme on
1460  *
1461  * We go through and clear queueing specific resources and reset the structure
1462  * to pre-load conditions
1463  **/
1464 void fm10k_clear_queueing_scheme(struct fm10k_intfc *interface)
1465 {
1466         fm10k_free_q_vectors(interface);
1467         fm10k_reset_msix_capability(interface);
1468 }