f7220d841336203e13297cc0639d17eb603bff9b
[deliverable/linux.git] / drivers / net / ethernet / intel / fm10k / fm10k_main.c
1 /* Intel Ethernet Switch Host Interface Driver
2 * Copyright(c) 2013 - 2014 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * The full GNU General Public License is included in this distribution in
14 * the file called "COPYING".
15 *
16 * Contact Information:
17 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
18 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
19 */
20
21 #include <linux/types.h>
22 #include <linux/module.h>
23 #include <net/ipv6.h>
24 #include <net/ip.h>
25 #include <net/tcp.h>
26 #include <linux/if_macvlan.h>
27 #include <linux/prefetch.h>
28
29 #include "fm10k.h"
30
31 #define DRV_VERSION "0.12.2-k"
32 const char fm10k_driver_version[] = DRV_VERSION;
33 char fm10k_driver_name[] = "fm10k";
34 static const char fm10k_driver_string[] =
35 "Intel(R) Ethernet Switch Host Interface Driver";
36 static const char fm10k_copyright[] =
37 "Copyright (c) 2013 Intel Corporation.";
38
39 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
40 MODULE_DESCRIPTION("Intel(R) Ethernet Switch Host Interface Driver");
41 MODULE_LICENSE("GPL");
42 MODULE_VERSION(DRV_VERSION);
43
44 /**
45 * fm10k_init_module - Driver Registration Routine
46 *
47 * fm10k_init_module is the first routine called when the driver is
48 * loaded. All it does is register with the PCI subsystem.
49 **/
50 static int __init fm10k_init_module(void)
51 {
52 pr_info("%s - version %s\n", fm10k_driver_string, fm10k_driver_version);
53 pr_info("%s\n", fm10k_copyright);
54
55 return fm10k_register_pci_driver();
56 }
57 module_init(fm10k_init_module);
58
59 /**
60 * fm10k_exit_module - Driver Exit Cleanup Routine
61 *
62 * fm10k_exit_module is called just before the driver is removed
63 * from memory.
64 **/
65 static void __exit fm10k_exit_module(void)
66 {
67 fm10k_unregister_pci_driver();
68 }
69 module_exit(fm10k_exit_module);
70
71 static bool fm10k_alloc_mapped_page(struct fm10k_ring *rx_ring,
72 struct fm10k_rx_buffer *bi)
73 {
74 struct page *page = bi->page;
75 dma_addr_t dma;
76
77 /* Only page will be NULL if buffer was consumed */
78 if (likely(page))
79 return true;
80
81 /* alloc new page for storage */
82 page = alloc_page(GFP_ATOMIC | __GFP_COLD);
83 if (unlikely(!page)) {
84 rx_ring->rx_stats.alloc_failed++;
85 return false;
86 }
87
88 /* map page for use */
89 dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
90
91 /* if mapping failed free memory back to system since
92 * there isn't much point in holding memory we can't use
93 */
94 if (dma_mapping_error(rx_ring->dev, dma)) {
95 __free_page(page);
96 bi->page = NULL;
97
98 rx_ring->rx_stats.alloc_failed++;
99 return false;
100 }
101
102 bi->dma = dma;
103 bi->page = page;
104 bi->page_offset = 0;
105
106 return true;
107 }
108
109 /**
110 * fm10k_alloc_rx_buffers - Replace used receive buffers
111 * @rx_ring: ring to place buffers on
112 * @cleaned_count: number of buffers to replace
113 **/
114 void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count)
115 {
116 union fm10k_rx_desc *rx_desc;
117 struct fm10k_rx_buffer *bi;
118 u16 i = rx_ring->next_to_use;
119
120 /* nothing to do */
121 if (!cleaned_count)
122 return;
123
124 rx_desc = FM10K_RX_DESC(rx_ring, i);
125 bi = &rx_ring->rx_buffer[i];
126 i -= rx_ring->count;
127
128 do {
129 if (!fm10k_alloc_mapped_page(rx_ring, bi))
130 break;
131
132 /* Refresh the desc even if buffer_addrs didn't change
133 * because each write-back erases this info.
134 */
135 rx_desc->q.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
136
137 rx_desc++;
138 bi++;
139 i++;
140 if (unlikely(!i)) {
141 rx_desc = FM10K_RX_DESC(rx_ring, 0);
142 bi = rx_ring->rx_buffer;
143 i -= rx_ring->count;
144 }
145
146 /* clear the hdr_addr for the next_to_use descriptor */
147 rx_desc->q.hdr_addr = 0;
148
149 cleaned_count--;
150 } while (cleaned_count);
151
152 i += rx_ring->count;
153
154 if (rx_ring->next_to_use != i) {
155 /* record the next descriptor to use */
156 rx_ring->next_to_use = i;
157
158 /* update next to alloc since we have filled the ring */
159 rx_ring->next_to_alloc = i;
160
161 /* Force memory writes to complete before letting h/w
162 * know there are new descriptors to fetch. (Only
163 * applicable for weak-ordered memory model archs,
164 * such as IA-64).
165 */
166 wmb();
167
168 /* notify hardware of new descriptors */
169 writel(i, rx_ring->tail);
170 }
171 }
172
173 /**
174 * fm10k_reuse_rx_page - page flip buffer and store it back on the ring
175 * @rx_ring: rx descriptor ring to store buffers on
176 * @old_buff: donor buffer to have page reused
177 *
178 * Synchronizes page for reuse by the interface
179 **/
180 static void fm10k_reuse_rx_page(struct fm10k_ring *rx_ring,
181 struct fm10k_rx_buffer *old_buff)
182 {
183 struct fm10k_rx_buffer *new_buff;
184 u16 nta = rx_ring->next_to_alloc;
185
186 new_buff = &rx_ring->rx_buffer[nta];
187
188 /* update, and store next to alloc */
189 nta++;
190 rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
191
192 /* transfer page from old buffer to new buffer */
193 memcpy(new_buff, old_buff, sizeof(struct fm10k_rx_buffer));
194
195 /* sync the buffer for use by the device */
196 dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma,
197 old_buff->page_offset,
198 FM10K_RX_BUFSZ,
199 DMA_FROM_DEVICE);
200 }
201
202 static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer,
203 struct page *page,
204 unsigned int truesize)
205 {
206 /* avoid re-using remote pages */
207 if (unlikely(page_to_nid(page) != numa_mem_id()))
208 return false;
209
210 #if (PAGE_SIZE < 8192)
211 /* if we are only owner of page we can reuse it */
212 if (unlikely(page_count(page) != 1))
213 return false;
214
215 /* flip page offset to other buffer */
216 rx_buffer->page_offset ^= FM10K_RX_BUFSZ;
217
218 /* since we are the only owner of the page and we need to
219 * increment it, just set the value to 2 in order to avoid
220 * an unnecessary locked operation
221 */
222 atomic_set(&page->_count, 2);
223 #else
224 /* move offset up to the next cache line */
225 rx_buffer->page_offset += truesize;
226
227 if (rx_buffer->page_offset > (PAGE_SIZE - FM10K_RX_BUFSZ))
228 return false;
229
230 /* bump ref count on page before it is given to the stack */
231 get_page(page);
232 #endif
233
234 return true;
235 }
236
237 /**
238 * fm10k_add_rx_frag - Add contents of Rx buffer to sk_buff
239 * @rx_ring: rx descriptor ring to transact packets on
240 * @rx_buffer: buffer containing page to add
241 * @rx_desc: descriptor containing length of buffer written by hardware
242 * @skb: sk_buff to place the data into
243 *
244 * This function will add the data contained in rx_buffer->page to the skb.
245 * This is done either through a direct copy if the data in the buffer is
246 * less than the skb header size, otherwise it will just attach the page as
247 * a frag to the skb.
248 *
249 * The function will then update the page offset if necessary and return
250 * true if the buffer can be reused by the interface.
251 **/
252 static bool fm10k_add_rx_frag(struct fm10k_ring *rx_ring,
253 struct fm10k_rx_buffer *rx_buffer,
254 union fm10k_rx_desc *rx_desc,
255 struct sk_buff *skb)
256 {
257 struct page *page = rx_buffer->page;
258 unsigned int size = le16_to_cpu(rx_desc->w.length);
259 #if (PAGE_SIZE < 8192)
260 unsigned int truesize = FM10K_RX_BUFSZ;
261 #else
262 unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
263 #endif
264
265 if ((size <= FM10K_RX_HDR_LEN) && !skb_is_nonlinear(skb)) {
266 unsigned char *va = page_address(page) + rx_buffer->page_offset;
267
268 memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
269
270 /* we can reuse buffer as-is, just make sure it is local */
271 if (likely(page_to_nid(page) == numa_mem_id()))
272 return true;
273
274 /* this page cannot be reused so discard it */
275 put_page(page);
276 return false;
277 }
278
279 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
280 rx_buffer->page_offset, size, truesize);
281
282 return fm10k_can_reuse_rx_page(rx_buffer, page, truesize);
283 }
284
285 static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring,
286 union fm10k_rx_desc *rx_desc,
287 struct sk_buff *skb)
288 {
289 struct fm10k_rx_buffer *rx_buffer;
290 struct page *page;
291
292 rx_buffer = &rx_ring->rx_buffer[rx_ring->next_to_clean];
293
294 page = rx_buffer->page;
295 prefetchw(page);
296
297 if (likely(!skb)) {
298 void *page_addr = page_address(page) +
299 rx_buffer->page_offset;
300
301 /* prefetch first cache line of first page */
302 prefetch(page_addr);
303 #if L1_CACHE_BYTES < 128
304 prefetch(page_addr + L1_CACHE_BYTES);
305 #endif
306
307 /* allocate a skb to store the frags */
308 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
309 FM10K_RX_HDR_LEN);
310 if (unlikely(!skb)) {
311 rx_ring->rx_stats.alloc_failed++;
312 return NULL;
313 }
314
315 /* we will be copying header into skb->data in
316 * pskb_may_pull so it is in our interest to prefetch
317 * it now to avoid a possible cache miss
318 */
319 prefetchw(skb->data);
320 }
321
322 /* we are reusing so sync this buffer for CPU use */
323 dma_sync_single_range_for_cpu(rx_ring->dev,
324 rx_buffer->dma,
325 rx_buffer->page_offset,
326 FM10K_RX_BUFSZ,
327 DMA_FROM_DEVICE);
328
329 /* pull page into skb */
330 if (fm10k_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
331 /* hand second half of page back to the ring */
332 fm10k_reuse_rx_page(rx_ring, rx_buffer);
333 } else {
334 /* we are not reusing the buffer so unmap it */
335 dma_unmap_page(rx_ring->dev, rx_buffer->dma,
336 PAGE_SIZE, DMA_FROM_DEVICE);
337 }
338
339 /* clear contents of rx_buffer */
340 rx_buffer->page = NULL;
341
342 return skb;
343 }
344
345 /**
346 * fm10k_process_skb_fields - Populate skb header fields from Rx descriptor
347 * @rx_ring: rx descriptor ring packet is being transacted on
348 * @rx_desc: pointer to the EOP Rx descriptor
349 * @skb: pointer to current skb being populated
350 *
351 * This function checks the ring, descriptor, and packet information in
352 * order to populate the hash, checksum, VLAN, timestamp, protocol, and
353 * other fields within the skb.
354 **/
355 static unsigned int fm10k_process_skb_fields(struct fm10k_ring *rx_ring,
356 union fm10k_rx_desc *rx_desc,
357 struct sk_buff *skb)
358 {
359 unsigned int len = skb->len;
360
361 FM10K_CB(skb)->fi.w.vlan = rx_desc->w.vlan;
362
363 skb_record_rx_queue(skb, rx_ring->queue_index);
364
365 FM10K_CB(skb)->fi.d.glort = rx_desc->d.glort;
366
367 if (rx_desc->w.vlan) {
368 u16 vid = le16_to_cpu(rx_desc->w.vlan);
369
370 if (vid != rx_ring->vid)
371 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
372 }
373
374 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
375
376 return len;
377 }
378
379 /**
380 * fm10k_is_non_eop - process handling of non-EOP buffers
381 * @rx_ring: Rx ring being processed
382 * @rx_desc: Rx descriptor for current buffer
383 *
384 * This function updates next to clean. If the buffer is an EOP buffer
385 * this function exits returning false, otherwise it will place the
386 * sk_buff in the next buffer to be chained and return true indicating
387 * that this is in fact a non-EOP buffer.
388 **/
389 static bool fm10k_is_non_eop(struct fm10k_ring *rx_ring,
390 union fm10k_rx_desc *rx_desc)
391 {
392 u32 ntc = rx_ring->next_to_clean + 1;
393
394 /* fetch, update, and store next to clean */
395 ntc = (ntc < rx_ring->count) ? ntc : 0;
396 rx_ring->next_to_clean = ntc;
397
398 prefetch(FM10K_RX_DESC(rx_ring, ntc));
399
400 if (likely(fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_EOP)))
401 return false;
402
403 return true;
404 }
405
406 /**
407 * fm10k_pull_tail - fm10k specific version of skb_pull_tail
408 * @rx_ring: rx descriptor ring packet is being transacted on
409 * @rx_desc: pointer to the EOP Rx descriptor
410 * @skb: pointer to current skb being adjusted
411 *
412 * This function is an fm10k specific version of __pskb_pull_tail. The
413 * main difference between this version and the original function is that
414 * this function can make several assumptions about the state of things
415 * that allow for significant optimizations versus the standard function.
416 * As a result we can do things like drop a frag and maintain an accurate
417 * truesize for the skb.
418 */
419 static void fm10k_pull_tail(struct fm10k_ring *rx_ring,
420 union fm10k_rx_desc *rx_desc,
421 struct sk_buff *skb)
422 {
423 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
424 unsigned char *va;
425 unsigned int pull_len;
426
427 /* it is valid to use page_address instead of kmap since we are
428 * working with pages allocated out of the lomem pool per
429 * alloc_page(GFP_ATOMIC)
430 */
431 va = skb_frag_address(frag);
432
433 /* we need the header to contain the greater of either ETH_HLEN or
434 * 60 bytes if the skb->len is less than 60 for skb_pad.
435 */
436 pull_len = eth_get_headlen(va, FM10K_RX_HDR_LEN);
437
438 /* align pull length to size of long to optimize memcpy performance */
439 skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
440
441 /* update all of the pointers */
442 skb_frag_size_sub(frag, pull_len);
443 frag->page_offset += pull_len;
444 skb->data_len -= pull_len;
445 skb->tail += pull_len;
446 }
447
448 /**
449 * fm10k_cleanup_headers - Correct corrupted or empty headers
450 * @rx_ring: rx descriptor ring packet is being transacted on
451 * @rx_desc: pointer to the EOP Rx descriptor
452 * @skb: pointer to current skb being fixed
453 *
454 * Address the case where we are pulling data in on pages only
455 * and as such no data is present in the skb header.
456 *
457 * In addition if skb is not at least 60 bytes we need to pad it so that
458 * it is large enough to qualify as a valid Ethernet frame.
459 *
460 * Returns true if an error was encountered and skb was freed.
461 **/
462 static bool fm10k_cleanup_headers(struct fm10k_ring *rx_ring,
463 union fm10k_rx_desc *rx_desc,
464 struct sk_buff *skb)
465 {
466 if (unlikely((fm10k_test_staterr(rx_desc,
467 FM10K_RXD_STATUS_RXE)))) {
468 dev_kfree_skb_any(skb);
469 rx_ring->rx_stats.errors++;
470 return true;
471 }
472
473 /* place header in linear portion of buffer */
474 if (skb_is_nonlinear(skb))
475 fm10k_pull_tail(rx_ring, rx_desc, skb);
476
477 /* if skb_pad returns an error the skb was freed */
478 if (unlikely(skb->len < 60)) {
479 int pad_len = 60 - skb->len;
480
481 if (skb_pad(skb, pad_len))
482 return true;
483 __skb_put(skb, pad_len);
484 }
485
486 return false;
487 }
488
489 /**
490 * fm10k_receive_skb - helper function to handle rx indications
491 * @q_vector: structure containing interrupt and ring information
492 * @skb: packet to send up
493 **/
494 static void fm10k_receive_skb(struct fm10k_q_vector *q_vector,
495 struct sk_buff *skb)
496 {
497 napi_gro_receive(&q_vector->napi, skb);
498 }
499
500 static bool fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector,
501 struct fm10k_ring *rx_ring,
502 int budget)
503 {
504 struct sk_buff *skb = rx_ring->skb;
505 unsigned int total_bytes = 0, total_packets = 0;
506 u16 cleaned_count = fm10k_desc_unused(rx_ring);
507
508 do {
509 union fm10k_rx_desc *rx_desc;
510
511 /* return some buffers to hardware, one at a time is too slow */
512 if (cleaned_count >= FM10K_RX_BUFFER_WRITE) {
513 fm10k_alloc_rx_buffers(rx_ring, cleaned_count);
514 cleaned_count = 0;
515 }
516
517 rx_desc = FM10K_RX_DESC(rx_ring, rx_ring->next_to_clean);
518
519 if (!fm10k_test_staterr(rx_desc, FM10K_RXD_STATUS_DD))
520 break;
521
522 /* This memory barrier is needed to keep us from reading
523 * any other fields out of the rx_desc until we know the
524 * RXD_STATUS_DD bit is set
525 */
526 rmb();
527
528 /* retrieve a buffer from the ring */
529 skb = fm10k_fetch_rx_buffer(rx_ring, rx_desc, skb);
530
531 /* exit if we failed to retrieve a buffer */
532 if (!skb)
533 break;
534
535 cleaned_count++;
536
537 /* fetch next buffer in frame if non-eop */
538 if (fm10k_is_non_eop(rx_ring, rx_desc))
539 continue;
540
541 /* verify the packet layout is correct */
542 if (fm10k_cleanup_headers(rx_ring, rx_desc, skb)) {
543 skb = NULL;
544 continue;
545 }
546
547 /* populate checksum, timestamp, VLAN, and protocol */
548 total_bytes += fm10k_process_skb_fields(rx_ring, rx_desc, skb);
549
550 fm10k_receive_skb(q_vector, skb);
551
552 /* reset skb pointer */
553 skb = NULL;
554
555 /* update budget accounting */
556 total_packets++;
557 } while (likely(total_packets < budget));
558
559 /* place incomplete frames back on ring for completion */
560 rx_ring->skb = skb;
561
562 u64_stats_update_begin(&rx_ring->syncp);
563 rx_ring->stats.packets += total_packets;
564 rx_ring->stats.bytes += total_bytes;
565 u64_stats_update_end(&rx_ring->syncp);
566 q_vector->rx.total_packets += total_packets;
567 q_vector->rx.total_bytes += total_bytes;
568
569 return total_packets < budget;
570 }
571
572 static bool fm10k_tx_desc_push(struct fm10k_ring *tx_ring,
573 struct fm10k_tx_desc *tx_desc, u16 i,
574 dma_addr_t dma, unsigned int size, u8 desc_flags)
575 {
576 /* set RS and INT for last frame in a cache line */
577 if ((++i & (FM10K_TXD_WB_FIFO_SIZE - 1)) == 0)
578 desc_flags |= FM10K_TXD_FLAG_RS | FM10K_TXD_FLAG_INT;
579
580 /* record values to descriptor */
581 tx_desc->buffer_addr = cpu_to_le64(dma);
582 tx_desc->flags = desc_flags;
583 tx_desc->buflen = cpu_to_le16(size);
584
585 /* return true if we just wrapped the ring */
586 return i == tx_ring->count;
587 }
588
589 static void fm10k_tx_map(struct fm10k_ring *tx_ring,
590 struct fm10k_tx_buffer *first)
591 {
592 struct sk_buff *skb = first->skb;
593 struct fm10k_tx_buffer *tx_buffer;
594 struct fm10k_tx_desc *tx_desc;
595 struct skb_frag_struct *frag;
596 unsigned char *data;
597 dma_addr_t dma;
598 unsigned int data_len, size;
599 u16 i = tx_ring->next_to_use;
600 u8 flags = 0;
601
602 tx_desc = FM10K_TX_DESC(tx_ring, i);
603
604 /* add HW VLAN tag */
605 if (vlan_tx_tag_present(skb))
606 tx_desc->vlan = cpu_to_le16(vlan_tx_tag_get(skb));
607 else
608 tx_desc->vlan = 0;
609
610 size = skb_headlen(skb);
611 data = skb->data;
612
613 dma = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE);
614
615 data_len = skb->data_len;
616 tx_buffer = first;
617
618 for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
619 if (dma_mapping_error(tx_ring->dev, dma))
620 goto dma_error;
621
622 /* record length, and DMA address */
623 dma_unmap_len_set(tx_buffer, len, size);
624 dma_unmap_addr_set(tx_buffer, dma, dma);
625
626 while (unlikely(size > FM10K_MAX_DATA_PER_TXD)) {
627 if (fm10k_tx_desc_push(tx_ring, tx_desc++, i++, dma,
628 FM10K_MAX_DATA_PER_TXD, flags)) {
629 tx_desc = FM10K_TX_DESC(tx_ring, 0);
630 i = 0;
631 }
632
633 dma += FM10K_MAX_DATA_PER_TXD;
634 size -= FM10K_MAX_DATA_PER_TXD;
635 }
636
637 if (likely(!data_len))
638 break;
639
640 if (fm10k_tx_desc_push(tx_ring, tx_desc++, i++,
641 dma, size, flags)) {
642 tx_desc = FM10K_TX_DESC(tx_ring, 0);
643 i = 0;
644 }
645
646 size = skb_frag_size(frag);
647 data_len -= size;
648
649 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
650 DMA_TO_DEVICE);
651
652 tx_buffer = &tx_ring->tx_buffer[i];
653 }
654
655 /* write last descriptor with LAST bit set */
656 flags |= FM10K_TXD_FLAG_LAST;
657
658 if (fm10k_tx_desc_push(tx_ring, tx_desc, i++, dma, size, flags))
659 i = 0;
660
661 /* record bytecount for BQL */
662 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
663
664 /* record SW timestamp if HW timestamp is not available */
665 skb_tx_timestamp(first->skb);
666
667 /* Force memory writes to complete before letting h/w know there
668 * are new descriptors to fetch. (Only applicable for weak-ordered
669 * memory model archs, such as IA-64).
670 *
671 * We also need this memory barrier to make certain all of the
672 * status bits have been updated before next_to_watch is written.
673 */
674 wmb();
675
676 /* set next_to_watch value indicating a packet is present */
677 first->next_to_watch = tx_desc;
678
679 tx_ring->next_to_use = i;
680
681 /* notify HW of packet */
682 writel(i, tx_ring->tail);
683
684 /* we need this if more than one processor can write to our tail
685 * at a time, it synchronizes IO on IA64/Altix systems
686 */
687 mmiowb();
688
689 return;
690 dma_error:
691 dev_err(tx_ring->dev, "TX DMA map failed\n");
692
693 /* clear dma mappings for failed tx_buffer map */
694 for (;;) {
695 tx_buffer = &tx_ring->tx_buffer[i];
696 fm10k_unmap_and_free_tx_resource(tx_ring, tx_buffer);
697 if (tx_buffer == first)
698 break;
699 if (i == 0)
700 i = tx_ring->count;
701 i--;
702 }
703
704 tx_ring->next_to_use = i;
705 }
706
707 static int __fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size)
708 {
709 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
710
711 smp_mb();
712
713 /* We need to check again in a case another CPU has just
714 * made room available. */
715 if (likely(fm10k_desc_unused(tx_ring) < size))
716 return -EBUSY;
717
718 /* A reprieve! - use start_queue because it doesn't call schedule */
719 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
720 ++tx_ring->tx_stats.restart_queue;
721 return 0;
722 }
723
724 static inline int fm10k_maybe_stop_tx(struct fm10k_ring *tx_ring, u16 size)
725 {
726 if (likely(fm10k_desc_unused(tx_ring) >= size))
727 return 0;
728 return __fm10k_maybe_stop_tx(tx_ring, size);
729 }
730
731 netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb,
732 struct fm10k_ring *tx_ring)
733 {
734 struct fm10k_tx_buffer *first;
735 u32 tx_flags = 0;
736 #if PAGE_SIZE > FM10K_MAX_DATA_PER_TXD
737 unsigned short f;
738 #endif
739 u16 count = TXD_USE_COUNT(skb_headlen(skb));
740
741 /* need: 1 descriptor per page * PAGE_SIZE/FM10K_MAX_DATA_PER_TXD,
742 * + 1 desc for skb_headlen/FM10K_MAX_DATA_PER_TXD,
743 * + 2 desc gap to keep tail from touching head
744 * otherwise try next time
745 */
746 #if PAGE_SIZE > FM10K_MAX_DATA_PER_TXD
747 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
748 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
749 #else
750 count += skb_shinfo(skb)->nr_frags;
751 #endif
752 if (fm10k_maybe_stop_tx(tx_ring, count + 3)) {
753 tx_ring->tx_stats.tx_busy++;
754 return NETDEV_TX_BUSY;
755 }
756
757 /* record the location of the first descriptor for this packet */
758 first = &tx_ring->tx_buffer[tx_ring->next_to_use];
759 first->skb = skb;
760 first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
761 first->gso_segs = 1;
762
763 /* record initial flags and protocol */
764 first->tx_flags = tx_flags;
765
766 fm10k_tx_map(tx_ring, first);
767
768 fm10k_maybe_stop_tx(tx_ring, DESC_NEEDED);
769
770 return NETDEV_TX_OK;
771 }
772
773 static u64 fm10k_get_tx_completed(struct fm10k_ring *ring)
774 {
775 return ring->stats.packets;
776 }
777
778 static u64 fm10k_get_tx_pending(struct fm10k_ring *ring)
779 {
780 /* use SW head and tail until we have real hardware */
781 u32 head = ring->next_to_clean;
782 u32 tail = ring->next_to_use;
783
784 return ((head <= tail) ? tail : tail + ring->count) - head;
785 }
786
787 bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring)
788 {
789 u32 tx_done = fm10k_get_tx_completed(tx_ring);
790 u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
791 u32 tx_pending = fm10k_get_tx_pending(tx_ring);
792
793 clear_check_for_tx_hang(tx_ring);
794
795 /* Check for a hung queue, but be thorough. This verifies
796 * that a transmit has been completed since the previous
797 * check AND there is at least one packet pending. By
798 * requiring this to fail twice we avoid races with
799 * clearing the ARMED bit and conditions where we
800 * run the check_tx_hang logic with a transmit completion
801 * pending but without time to complete it yet.
802 */
803 if (!tx_pending || (tx_done_old != tx_done)) {
804 /* update completed stats and continue */
805 tx_ring->tx_stats.tx_done_old = tx_done;
806 /* reset the countdown */
807 clear_bit(__FM10K_HANG_CHECK_ARMED, &tx_ring->state);
808
809 return false;
810 }
811
812 /* make sure it is true for two checks in a row */
813 return test_and_set_bit(__FM10K_HANG_CHECK_ARMED, &tx_ring->state);
814 }
815
816 /**
817 * fm10k_tx_timeout_reset - initiate reset due to Tx timeout
818 * @interface: driver private struct
819 **/
820 void fm10k_tx_timeout_reset(struct fm10k_intfc *interface)
821 {
822 /* Do the reset outside of interrupt context */
823 if (!test_bit(__FM10K_DOWN, &interface->state)) {
824 netdev_err(interface->netdev, "Reset interface\n");
825 interface->tx_timeout_count++;
826 interface->flags |= FM10K_FLAG_RESET_REQUESTED;
827 fm10k_service_event_schedule(interface);
828 }
829 }
830
831 /**
832 * fm10k_clean_tx_irq - Reclaim resources after transmit completes
833 * @q_vector: structure containing interrupt and ring information
834 * @tx_ring: tx ring to clean
835 **/
836 static bool fm10k_clean_tx_irq(struct fm10k_q_vector *q_vector,
837 struct fm10k_ring *tx_ring)
838 {
839 struct fm10k_intfc *interface = q_vector->interface;
840 struct fm10k_tx_buffer *tx_buffer;
841 struct fm10k_tx_desc *tx_desc;
842 unsigned int total_bytes = 0, total_packets = 0;
843 unsigned int budget = q_vector->tx.work_limit;
844 unsigned int i = tx_ring->next_to_clean;
845
846 if (test_bit(__FM10K_DOWN, &interface->state))
847 return true;
848
849 tx_buffer = &tx_ring->tx_buffer[i];
850 tx_desc = FM10K_TX_DESC(tx_ring, i);
851 i -= tx_ring->count;
852
853 do {
854 struct fm10k_tx_desc *eop_desc = tx_buffer->next_to_watch;
855
856 /* if next_to_watch is not set then there is no work pending */
857 if (!eop_desc)
858 break;
859
860 /* prevent any other reads prior to eop_desc */
861 read_barrier_depends();
862
863 /* if DD is not set pending work has not been completed */
864 if (!(eop_desc->flags & FM10K_TXD_FLAG_DONE))
865 break;
866
867 /* clear next_to_watch to prevent false hangs */
868 tx_buffer->next_to_watch = NULL;
869
870 /* update the statistics for this packet */
871 total_bytes += tx_buffer->bytecount;
872 total_packets += tx_buffer->gso_segs;
873
874 /* free the skb */
875 dev_consume_skb_any(tx_buffer->skb);
876
877 /* unmap skb header data */
878 dma_unmap_single(tx_ring->dev,
879 dma_unmap_addr(tx_buffer, dma),
880 dma_unmap_len(tx_buffer, len),
881 DMA_TO_DEVICE);
882
883 /* clear tx_buffer data */
884 tx_buffer->skb = NULL;
885 dma_unmap_len_set(tx_buffer, len, 0);
886
887 /* unmap remaining buffers */
888 while (tx_desc != eop_desc) {
889 tx_buffer++;
890 tx_desc++;
891 i++;
892 if (unlikely(!i)) {
893 i -= tx_ring->count;
894 tx_buffer = tx_ring->tx_buffer;
895 tx_desc = FM10K_TX_DESC(tx_ring, 0);
896 }
897
898 /* unmap any remaining paged data */
899 if (dma_unmap_len(tx_buffer, len)) {
900 dma_unmap_page(tx_ring->dev,
901 dma_unmap_addr(tx_buffer, dma),
902 dma_unmap_len(tx_buffer, len),
903 DMA_TO_DEVICE);
904 dma_unmap_len_set(tx_buffer, len, 0);
905 }
906 }
907
908 /* move us one more past the eop_desc for start of next pkt */
909 tx_buffer++;
910 tx_desc++;
911 i++;
912 if (unlikely(!i)) {
913 i -= tx_ring->count;
914 tx_buffer = tx_ring->tx_buffer;
915 tx_desc = FM10K_TX_DESC(tx_ring, 0);
916 }
917
918 /* issue prefetch for next Tx descriptor */
919 prefetch(tx_desc);
920
921 /* update budget accounting */
922 budget--;
923 } while (likely(budget));
924
925 i += tx_ring->count;
926 tx_ring->next_to_clean = i;
927 u64_stats_update_begin(&tx_ring->syncp);
928 tx_ring->stats.bytes += total_bytes;
929 tx_ring->stats.packets += total_packets;
930 u64_stats_update_end(&tx_ring->syncp);
931 q_vector->tx.total_bytes += total_bytes;
932 q_vector->tx.total_packets += total_packets;
933
934 if (check_for_tx_hang(tx_ring) && fm10k_check_tx_hang(tx_ring)) {
935 /* schedule immediate reset if we believe we hung */
936 struct fm10k_hw *hw = &interface->hw;
937
938 netif_err(interface, drv, tx_ring->netdev,
939 "Detected Tx Unit Hang\n"
940 " Tx Queue <%d>\n"
941 " TDH, TDT <%x>, <%x>\n"
942 " next_to_use <%x>\n"
943 " next_to_clean <%x>\n",
944 tx_ring->queue_index,
945 fm10k_read_reg(hw, FM10K_TDH(tx_ring->reg_idx)),
946 fm10k_read_reg(hw, FM10K_TDT(tx_ring->reg_idx)),
947 tx_ring->next_to_use, i);
948
949 netif_stop_subqueue(tx_ring->netdev,
950 tx_ring->queue_index);
951
952 netif_info(interface, probe, tx_ring->netdev,
953 "tx hang %d detected on queue %d, resetting interface\n",
954 interface->tx_timeout_count + 1,
955 tx_ring->queue_index);
956
957 fm10k_tx_timeout_reset(interface);
958
959 /* the netdev is about to reset, no point in enabling stuff */
960 return true;
961 }
962
963 /* notify netdev of completed buffers */
964 netdev_tx_completed_queue(txring_txq(tx_ring),
965 total_packets, total_bytes);
966
967 #define TX_WAKE_THRESHOLD min_t(u16, FM10K_MIN_TXD - 1, DESC_NEEDED * 2)
968 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
969 (fm10k_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
970 /* Make sure that anybody stopping the queue after this
971 * sees the new next_to_clean.
972 */
973 smp_mb();
974 if (__netif_subqueue_stopped(tx_ring->netdev,
975 tx_ring->queue_index) &&
976 !test_bit(__FM10K_DOWN, &interface->state)) {
977 netif_wake_subqueue(tx_ring->netdev,
978 tx_ring->queue_index);
979 ++tx_ring->tx_stats.restart_queue;
980 }
981 }
982
983 return !!budget;
984 }
985
986 /**
987 * fm10k_update_itr - update the dynamic ITR value based on packet size
988 *
989 * Stores a new ITR value based on strictly on packet size. The
990 * divisors and thresholds used by this function were determined based
991 * on theoretical maximum wire speed and testing data, in order to
992 * minimize response time while increasing bulk throughput.
993 *
994 * @ring_container: Container for rings to have ITR updated
995 **/
996 static void fm10k_update_itr(struct fm10k_ring_container *ring_container)
997 {
998 unsigned int avg_wire_size, packets;
999
1000 /* Only update ITR if we are using adaptive setting */
1001 if (!(ring_container->itr & FM10K_ITR_ADAPTIVE))
1002 goto clear_counts;
1003
1004 packets = ring_container->total_packets;
1005 if (!packets)
1006 goto clear_counts;
1007
1008 avg_wire_size = ring_container->total_bytes / packets;
1009
1010 /* Add 24 bytes to size to account for CRC, preamble, and gap */
1011 avg_wire_size += 24;
1012
1013 /* Don't starve jumbo frames */
1014 if (avg_wire_size > 3000)
1015 avg_wire_size = 3000;
1016
1017 /* Give a little boost to mid-size frames */
1018 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
1019 avg_wire_size /= 3;
1020 else
1021 avg_wire_size /= 2;
1022
1023 /* write back value and retain adaptive flag */
1024 ring_container->itr = avg_wire_size | FM10K_ITR_ADAPTIVE;
1025
1026 clear_counts:
1027 ring_container->total_bytes = 0;
1028 ring_container->total_packets = 0;
1029 }
1030
1031 static void fm10k_qv_enable(struct fm10k_q_vector *q_vector)
1032 {
1033 /* Enable auto-mask and clear the current mask */
1034 u32 itr = FM10K_ITR_ENABLE;
1035
1036 /* Update Tx ITR */
1037 fm10k_update_itr(&q_vector->tx);
1038
1039 /* Update Rx ITR */
1040 fm10k_update_itr(&q_vector->rx);
1041
1042 /* Store Tx itr in timer slot 0 */
1043 itr |= (q_vector->tx.itr & FM10K_ITR_MAX);
1044
1045 /* Shift Rx itr to timer slot 1 */
1046 itr |= (q_vector->rx.itr & FM10K_ITR_MAX) << FM10K_ITR_INTERVAL1_SHIFT;
1047
1048 /* Write the final value to the ITR register */
1049 writel(itr, q_vector->itr);
1050 }
1051
1052 static int fm10k_poll(struct napi_struct *napi, int budget)
1053 {
1054 struct fm10k_q_vector *q_vector =
1055 container_of(napi, struct fm10k_q_vector, napi);
1056 struct fm10k_ring *ring;
1057 int per_ring_budget;
1058 bool clean_complete = true;
1059
1060 fm10k_for_each_ring(ring, q_vector->tx)
1061 clean_complete &= fm10k_clean_tx_irq(q_vector, ring);
1062
1063 /* attempt to distribute budget to each queue fairly, but don't
1064 * allow the budget to go below 1 because we'll exit polling
1065 */
1066 if (q_vector->rx.count > 1)
1067 per_ring_budget = max(budget/q_vector->rx.count, 1);
1068 else
1069 per_ring_budget = budget;
1070
1071 fm10k_for_each_ring(ring, q_vector->rx)
1072 clean_complete &= fm10k_clean_rx_irq(q_vector, ring,
1073 per_ring_budget);
1074
1075 /* If all work not completed, return budget and keep polling */
1076 if (!clean_complete)
1077 return budget;
1078
1079 /* all work done, exit the polling mode */
1080 napi_complete(napi);
1081
1082 /* re-enable the q_vector */
1083 fm10k_qv_enable(q_vector);
1084
1085 return 0;
1086 }
1087
1088 /**
1089 * fm10k_set_num_queues: Allocate queues for device, feature dependent
1090 * @interface: board private structure to initialize
1091 *
1092 * This is the top level queue allocation routine. The order here is very
1093 * important, starting with the "most" number of features turned on at once,
1094 * and ending with the smallest set of features. This way large combinations
1095 * can be allocated if they're turned on, and smaller combinations are the
1096 * fallthrough conditions.
1097 *
1098 **/
1099 static void fm10k_set_num_queues(struct fm10k_intfc *interface)
1100 {
1101 /* Start with base case */
1102 interface->num_rx_queues = 1;
1103 interface->num_tx_queues = 1;
1104 }
1105
1106 /**
1107 * fm10k_alloc_q_vector - Allocate memory for a single interrupt vector
1108 * @interface: board private structure to initialize
1109 * @v_count: q_vectors allocated on interface, used for ring interleaving
1110 * @v_idx: index of vector in interface struct
1111 * @txr_count: total number of Tx rings to allocate
1112 * @txr_idx: index of first Tx ring to allocate
1113 * @rxr_count: total number of Rx rings to allocate
1114 * @rxr_idx: index of first Rx ring to allocate
1115 *
1116 * We allocate one q_vector. If allocation fails we return -ENOMEM.
1117 **/
1118 static int fm10k_alloc_q_vector(struct fm10k_intfc *interface,
1119 unsigned int v_count, unsigned int v_idx,
1120 unsigned int txr_count, unsigned int txr_idx,
1121 unsigned int rxr_count, unsigned int rxr_idx)
1122 {
1123 struct fm10k_q_vector *q_vector;
1124 struct fm10k_ring *ring;
1125 int ring_count, size;
1126
1127 ring_count = txr_count + rxr_count;
1128 size = sizeof(struct fm10k_q_vector) +
1129 (sizeof(struct fm10k_ring) * ring_count);
1130
1131 /* allocate q_vector and rings */
1132 q_vector = kzalloc(size, GFP_KERNEL);
1133 if (!q_vector)
1134 return -ENOMEM;
1135
1136 /* initialize NAPI */
1137 netif_napi_add(interface->netdev, &q_vector->napi,
1138 fm10k_poll, NAPI_POLL_WEIGHT);
1139
1140 /* tie q_vector and interface together */
1141 interface->q_vector[v_idx] = q_vector;
1142 q_vector->interface = interface;
1143 q_vector->v_idx = v_idx;
1144
1145 /* initialize pointer to rings */
1146 ring = q_vector->ring;
1147
1148 /* save Tx ring container info */
1149 q_vector->tx.ring = ring;
1150 q_vector->tx.work_limit = FM10K_DEFAULT_TX_WORK;
1151 q_vector->tx.itr = interface->tx_itr;
1152 q_vector->tx.count = txr_count;
1153
1154 while (txr_count) {
1155 /* assign generic ring traits */
1156 ring->dev = &interface->pdev->dev;
1157 ring->netdev = interface->netdev;
1158
1159 /* configure backlink on ring */
1160 ring->q_vector = q_vector;
1161
1162 /* apply Tx specific ring traits */
1163 ring->count = interface->tx_ring_count;
1164 ring->queue_index = txr_idx;
1165
1166 /* assign ring to interface */
1167 interface->tx_ring[txr_idx] = ring;
1168
1169 /* update count and index */
1170 txr_count--;
1171 txr_idx += v_count;
1172
1173 /* push pointer to next ring */
1174 ring++;
1175 }
1176
1177 /* save Rx ring container info */
1178 q_vector->rx.ring = ring;
1179 q_vector->rx.itr = interface->rx_itr;
1180 q_vector->rx.count = rxr_count;
1181
1182 while (rxr_count) {
1183 /* assign generic ring traits */
1184 ring->dev = &interface->pdev->dev;
1185 ring->netdev = interface->netdev;
1186
1187 /* configure backlink on ring */
1188 ring->q_vector = q_vector;
1189
1190 /* apply Rx specific ring traits */
1191 ring->count = interface->rx_ring_count;
1192 ring->queue_index = rxr_idx;
1193
1194 /* assign ring to interface */
1195 interface->rx_ring[rxr_idx] = ring;
1196
1197 /* update count and index */
1198 rxr_count--;
1199 rxr_idx += v_count;
1200
1201 /* push pointer to next ring */
1202 ring++;
1203 }
1204
1205 return 0;
1206 }
1207
1208 /**
1209 * fm10k_free_q_vector - Free memory allocated for specific interrupt vector
1210 * @interface: board private structure to initialize
1211 * @v_idx: Index of vector to be freed
1212 *
1213 * This function frees the memory allocated to the q_vector. In addition if
1214 * NAPI is enabled it will delete any references to the NAPI struct prior
1215 * to freeing the q_vector.
1216 **/
1217 static void fm10k_free_q_vector(struct fm10k_intfc *interface, int v_idx)
1218 {
1219 struct fm10k_q_vector *q_vector = interface->q_vector[v_idx];
1220 struct fm10k_ring *ring;
1221
1222 fm10k_for_each_ring(ring, q_vector->tx)
1223 interface->tx_ring[ring->queue_index] = NULL;
1224
1225 fm10k_for_each_ring(ring, q_vector->rx)
1226 interface->rx_ring[ring->queue_index] = NULL;
1227
1228 interface->q_vector[v_idx] = NULL;
1229 netif_napi_del(&q_vector->napi);
1230 kfree_rcu(q_vector, rcu);
1231 }
1232
1233 /**
1234 * fm10k_alloc_q_vectors - Allocate memory for interrupt vectors
1235 * @interface: board private structure to initialize
1236 *
1237 * We allocate one q_vector per queue interrupt. If allocation fails we
1238 * return -ENOMEM.
1239 **/
1240 static int fm10k_alloc_q_vectors(struct fm10k_intfc *interface)
1241 {
1242 unsigned int q_vectors = interface->num_q_vectors;
1243 unsigned int rxr_remaining = interface->num_rx_queues;
1244 unsigned int txr_remaining = interface->num_tx_queues;
1245 unsigned int rxr_idx = 0, txr_idx = 0, v_idx = 0;
1246 int err;
1247
1248 if (q_vectors >= (rxr_remaining + txr_remaining)) {
1249 for (; rxr_remaining; v_idx++) {
1250 err = fm10k_alloc_q_vector(interface, q_vectors, v_idx,
1251 0, 0, 1, rxr_idx);
1252 if (err)
1253 goto err_out;
1254
1255 /* update counts and index */
1256 rxr_remaining--;
1257 rxr_idx++;
1258 }
1259 }
1260
1261 for (; v_idx < q_vectors; v_idx++) {
1262 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
1263 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
1264
1265 err = fm10k_alloc_q_vector(interface, q_vectors, v_idx,
1266 tqpv, txr_idx,
1267 rqpv, rxr_idx);
1268
1269 if (err)
1270 goto err_out;
1271
1272 /* update counts and index */
1273 rxr_remaining -= rqpv;
1274 txr_remaining -= tqpv;
1275 rxr_idx++;
1276 txr_idx++;
1277 }
1278
1279 return 0;
1280
1281 err_out:
1282 interface->num_tx_queues = 0;
1283 interface->num_rx_queues = 0;
1284 interface->num_q_vectors = 0;
1285
1286 while (v_idx--)
1287 fm10k_free_q_vector(interface, v_idx);
1288
1289 return -ENOMEM;
1290 }
1291
1292 /**
1293 * fm10k_free_q_vectors - Free memory allocated for interrupt vectors
1294 * @interface: board private structure to initialize
1295 *
1296 * This function frees the memory allocated to the q_vectors. In addition if
1297 * NAPI is enabled it will delete any references to the NAPI struct prior
1298 * to freeing the q_vector.
1299 **/
1300 static void fm10k_free_q_vectors(struct fm10k_intfc *interface)
1301 {
1302 int v_idx = interface->num_q_vectors;
1303
1304 interface->num_tx_queues = 0;
1305 interface->num_rx_queues = 0;
1306 interface->num_q_vectors = 0;
1307
1308 while (v_idx--)
1309 fm10k_free_q_vector(interface, v_idx);
1310 }
1311
1312 /**
1313 * f10k_reset_msix_capability - reset MSI-X capability
1314 * @interface: board private structure to initialize
1315 *
1316 * Reset the MSI-X capability back to its starting state
1317 **/
1318 static void fm10k_reset_msix_capability(struct fm10k_intfc *interface)
1319 {
1320 pci_disable_msix(interface->pdev);
1321 kfree(interface->msix_entries);
1322 interface->msix_entries = NULL;
1323 }
1324
1325 /**
1326 * f10k_init_msix_capability - configure MSI-X capability
1327 * @interface: board private structure to initialize
1328 *
1329 * Attempt to configure the interrupts using the best available
1330 * capabilities of the hardware and the kernel.
1331 **/
1332 static int fm10k_init_msix_capability(struct fm10k_intfc *interface)
1333 {
1334 struct fm10k_hw *hw = &interface->hw;
1335 int v_budget, vector;
1336
1337 /* It's easy to be greedy for MSI-X vectors, but it really
1338 * doesn't do us much good if we have a lot more vectors
1339 * than CPU's. So let's be conservative and only ask for
1340 * (roughly) the same number of vectors as there are CPU's.
1341 * the default is to use pairs of vectors
1342 */
1343 v_budget = max(interface->num_rx_queues, interface->num_tx_queues);
1344 v_budget = min_t(u16, v_budget, num_online_cpus());
1345
1346 /* account for vectors not related to queues */
1347 v_budget += NON_Q_VECTORS(hw);
1348
1349 /* At the same time, hardware can only support a maximum of
1350 * hw.mac->max_msix_vectors vectors. With features
1351 * such as RSS and VMDq, we can easily surpass the number of Rx and Tx
1352 * descriptor queues supported by our device. Thus, we cap it off in
1353 * those rare cases where the cpu count also exceeds our vector limit.
1354 */
1355 v_budget = min_t(int, v_budget, hw->mac.max_msix_vectors);
1356
1357 /* A failure in MSI-X entry allocation is fatal. */
1358 interface->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry),
1359 GFP_KERNEL);
1360 if (!interface->msix_entries)
1361 return -ENOMEM;
1362
1363 /* populate entry values */
1364 for (vector = 0; vector < v_budget; vector++)
1365 interface->msix_entries[vector].entry = vector;
1366
1367 /* Attempt to enable MSI-X with requested value */
1368 v_budget = pci_enable_msix_range(interface->pdev,
1369 interface->msix_entries,
1370 MIN_MSIX_COUNT(hw),
1371 v_budget);
1372 if (v_budget < 0) {
1373 kfree(interface->msix_entries);
1374 interface->msix_entries = NULL;
1375 return -ENOMEM;
1376 }
1377
1378 /* record the number of queues available for q_vectors */
1379 interface->num_q_vectors = v_budget - NON_Q_VECTORS(hw);
1380
1381 return 0;
1382 }
1383
1384 static void fm10k_init_reta(struct fm10k_intfc *interface)
1385 {
1386 u16 i, rss_i = interface->ring_feature[RING_F_RSS].indices;
1387 u32 reta, base;
1388
1389 /* If the netdev is initialized we have to maintain table if possible */
1390 if (interface->netdev->reg_state) {
1391 for (i = FM10K_RETA_SIZE; i--;) {
1392 reta = interface->reta[i];
1393 if ((((reta << 24) >> 24) < rss_i) &&
1394 (((reta << 16) >> 24) < rss_i) &&
1395 (((reta << 8) >> 24) < rss_i) &&
1396 (((reta) >> 24) < rss_i))
1397 continue;
1398 goto repopulate_reta;
1399 }
1400
1401 /* do nothing if all of the elements are in bounds */
1402 return;
1403 }
1404
1405 repopulate_reta:
1406 /* Populate the redirection table 4 entries at a time. To do this
1407 * we are generating the results for n and n+2 and then interleaving
1408 * those with the results with n+1 and n+3.
1409 */
1410 for (i = FM10K_RETA_SIZE; i--;) {
1411 /* first pass generates n and n+2 */
1412 base = ((i * 0x00040004) + 0x00020000) * rss_i;
1413 reta = (base & 0x3F803F80) >> 7;
1414
1415 /* second pass generates n+1 and n+3 */
1416 base += 0x00010001 * rss_i;
1417 reta |= (base & 0x3F803F80) << 1;
1418
1419 interface->reta[i] = reta;
1420 }
1421 }
1422
1423 /**
1424 * fm10k_init_queueing_scheme - Determine proper queueing scheme
1425 * @interface: board private structure to initialize
1426 *
1427 * We determine which queueing scheme to use based on...
1428 * - Hardware queue count (num_*_queues)
1429 * - defined by miscellaneous hardware support/features (RSS, etc.)
1430 **/
1431 int fm10k_init_queueing_scheme(struct fm10k_intfc *interface)
1432 {
1433 int err;
1434
1435 /* Number of supported queues */
1436 fm10k_set_num_queues(interface);
1437
1438 /* Configure MSI-X capability */
1439 err = fm10k_init_msix_capability(interface);
1440 if (err) {
1441 dev_err(&interface->pdev->dev,
1442 "Unable to initialize MSI-X capability\n");
1443 return err;
1444 }
1445
1446 /* Allocate memory for queues */
1447 err = fm10k_alloc_q_vectors(interface);
1448 if (err)
1449 return err;
1450
1451 /* Initialize RSS redirection table */
1452 fm10k_init_reta(interface);
1453
1454 return 0;
1455 }
1456
1457 /**
1458 * fm10k_clear_queueing_scheme - Clear the current queueing scheme settings
1459 * @interface: board private structure to clear queueing scheme on
1460 *
1461 * We go through and clear queueing specific resources and reset the structure
1462 * to pre-load conditions
1463 **/
1464 void fm10k_clear_queueing_scheme(struct fm10k_intfc *interface)
1465 {
1466 fm10k_free_q_vectors(interface);
1467 fm10k_reset_msix_capability(interface);
1468 }
This page took 0.097827 seconds and 4 git commands to generate.