igb: move the multiple receive queue configuration into seperate function
[deliverable/linux.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2009 Intel Corporation.
5
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
9
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
14
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
21
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "1.3.16-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60 [board_82575] = &e1000_82575_info,
61 };
62
63 static struct pci_device_id igb_pci_tbl[] = {
64 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
65 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
66 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
67 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
68 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
69 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
70 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
74 /* required last entry */
75 {0, }
76 };
77
78 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
79
80 void igb_reset(struct igb_adapter *);
81 static int igb_setup_all_tx_resources(struct igb_adapter *);
82 static int igb_setup_all_rx_resources(struct igb_adapter *);
83 static void igb_free_all_tx_resources(struct igb_adapter *);
84 static void igb_free_all_rx_resources(struct igb_adapter *);
85 static void igb_setup_mrqc(struct igb_adapter *);
86 void igb_update_stats(struct igb_adapter *);
87 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
88 static void __devexit igb_remove(struct pci_dev *pdev);
89 static int igb_sw_init(struct igb_adapter *);
90 static int igb_open(struct net_device *);
91 static int igb_close(struct net_device *);
92 static void igb_configure_tx(struct igb_adapter *);
93 static void igb_configure_rx(struct igb_adapter *);
94 static void igb_setup_tctl(struct igb_adapter *);
95 static void igb_setup_rctl(struct igb_adapter *);
96 static void igb_clean_all_tx_rings(struct igb_adapter *);
97 static void igb_clean_all_rx_rings(struct igb_adapter *);
98 static void igb_clean_tx_ring(struct igb_ring *);
99 static void igb_clean_rx_ring(struct igb_ring *);
100 static void igb_set_rx_mode(struct net_device *);
101 static void igb_update_phy_info(unsigned long);
102 static void igb_watchdog(unsigned long);
103 static void igb_watchdog_task(struct work_struct *);
104 static netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *,
105 struct igb_ring *);
106 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
107 struct net_device *);
108 static struct net_device_stats *igb_get_stats(struct net_device *);
109 static int igb_change_mtu(struct net_device *, int);
110 static int igb_set_mac(struct net_device *, void *);
111 static void igb_set_uta(struct igb_adapter *adapter);
112 static irqreturn_t igb_intr(int irq, void *);
113 static irqreturn_t igb_intr_msi(int irq, void *);
114 static irqreturn_t igb_msix_other(int irq, void *);
115 static irqreturn_t igb_msix_ring(int irq, void *);
116 #ifdef CONFIG_IGB_DCA
117 static void igb_update_dca(struct igb_q_vector *);
118 static void igb_setup_dca(struct igb_adapter *);
119 #endif /* CONFIG_IGB_DCA */
120 static bool igb_clean_tx_irq(struct igb_q_vector *);
121 static int igb_poll(struct napi_struct *, int);
122 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
123 static void igb_alloc_rx_buffers_adv(struct igb_ring *, int);
124 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
125 static void igb_tx_timeout(struct net_device *);
126 static void igb_reset_task(struct work_struct *);
127 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
128 static void igb_vlan_rx_add_vid(struct net_device *, u16);
129 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
130 static void igb_restore_vlan(struct igb_adapter *);
131 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
132 static void igb_ping_all_vfs(struct igb_adapter *);
133 static void igb_msg_task(struct igb_adapter *);
134 static int igb_rcv_msg_from_vf(struct igb_adapter *, u32);
135 static void igb_vmm_control(struct igb_adapter *);
136 static int igb_set_vf_mac(struct igb_adapter *adapter, int, unsigned char *);
137 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
138
139 static inline void igb_set_vmolr(struct e1000_hw *hw, int vfn)
140 {
141 u32 reg_data;
142
143 reg_data = rd32(E1000_VMOLR(vfn));
144 reg_data |= E1000_VMOLR_BAM | /* Accept broadcast */
145 E1000_VMOLR_ROMPE | /* Accept packets matched in MTA */
146 E1000_VMOLR_AUPE | /* Accept untagged packets */
147 E1000_VMOLR_STRVLAN; /* Strip vlan tags */
148 wr32(E1000_VMOLR(vfn), reg_data);
149 }
150
151 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
152 int vfn)
153 {
154 struct e1000_hw *hw = &adapter->hw;
155 u32 vmolr;
156
157 /* if it isn't the PF check to see if VFs are enabled and
158 * increase the size to support vlan tags */
159 if (vfn < adapter->vfs_allocated_count &&
160 adapter->vf_data[vfn].vlans_enabled)
161 size += VLAN_TAG_SIZE;
162
163 vmolr = rd32(E1000_VMOLR(vfn));
164 vmolr &= ~E1000_VMOLR_RLPML_MASK;
165 vmolr |= size | E1000_VMOLR_LPE;
166 wr32(E1000_VMOLR(vfn), vmolr);
167
168 return 0;
169 }
170
171 #ifdef CONFIG_PM
172 static int igb_suspend(struct pci_dev *, pm_message_t);
173 static int igb_resume(struct pci_dev *);
174 #endif
175 static void igb_shutdown(struct pci_dev *);
176 #ifdef CONFIG_IGB_DCA
177 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
178 static struct notifier_block dca_notifier = {
179 .notifier_call = igb_notify_dca,
180 .next = NULL,
181 .priority = 0
182 };
183 #endif
184 #ifdef CONFIG_NET_POLL_CONTROLLER
185 /* for netdump / net console */
186 static void igb_netpoll(struct net_device *);
187 #endif
188 #ifdef CONFIG_PCI_IOV
189 static unsigned int max_vfs = 0;
190 module_param(max_vfs, uint, 0);
191 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
192 "per physical function");
193 #endif /* CONFIG_PCI_IOV */
194
195 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
196 pci_channel_state_t);
197 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
198 static void igb_io_resume(struct pci_dev *);
199
200 static struct pci_error_handlers igb_err_handler = {
201 .error_detected = igb_io_error_detected,
202 .slot_reset = igb_io_slot_reset,
203 .resume = igb_io_resume,
204 };
205
206
207 static struct pci_driver igb_driver = {
208 .name = igb_driver_name,
209 .id_table = igb_pci_tbl,
210 .probe = igb_probe,
211 .remove = __devexit_p(igb_remove),
212 #ifdef CONFIG_PM
213 /* Power Managment Hooks */
214 .suspend = igb_suspend,
215 .resume = igb_resume,
216 #endif
217 .shutdown = igb_shutdown,
218 .err_handler = &igb_err_handler
219 };
220
221 static int global_quad_port_a; /* global quad port a indication */
222
223 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
224 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
225 MODULE_LICENSE("GPL");
226 MODULE_VERSION(DRV_VERSION);
227
228 /**
229 * Scale the NIC clock cycle by a large factor so that
230 * relatively small clock corrections can be added or
231 * substracted at each clock tick. The drawbacks of a
232 * large factor are a) that the clock register overflows
233 * more quickly (not such a big deal) and b) that the
234 * increment per tick has to fit into 24 bits.
235 *
236 * Note that
237 * TIMINCA = IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS *
238 * IGB_TSYNC_SCALE
239 * TIMINCA += TIMINCA * adjustment [ppm] / 1e9
240 *
241 * The base scale factor is intentionally a power of two
242 * so that the division in %struct timecounter can be done with
243 * a shift.
244 */
245 #define IGB_TSYNC_SHIFT (19)
246 #define IGB_TSYNC_SCALE (1<<IGB_TSYNC_SHIFT)
247
248 /**
249 * The duration of one clock cycle of the NIC.
250 *
251 * @todo This hard-coded value is part of the specification and might change
252 * in future hardware revisions. Add revision check.
253 */
254 #define IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS 16
255
256 #if (IGB_TSYNC_SCALE * IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS) >= (1<<24)
257 # error IGB_TSYNC_SCALE and/or IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS are too large to fit into TIMINCA
258 #endif
259
260 /**
261 * igb_read_clock - read raw cycle counter (to be used by time counter)
262 */
263 static cycle_t igb_read_clock(const struct cyclecounter *tc)
264 {
265 struct igb_adapter *adapter =
266 container_of(tc, struct igb_adapter, cycles);
267 struct e1000_hw *hw = &adapter->hw;
268 u64 stamp;
269
270 stamp = rd32(E1000_SYSTIML);
271 stamp |= (u64)rd32(E1000_SYSTIMH) << 32ULL;
272
273 return stamp;
274 }
275
276 #ifdef DEBUG
277 /**
278 * igb_get_hw_dev_name - return device name string
279 * used by hardware layer to print debugging information
280 **/
281 char *igb_get_hw_dev_name(struct e1000_hw *hw)
282 {
283 struct igb_adapter *adapter = hw->back;
284 return adapter->netdev->name;
285 }
286
287 /**
288 * igb_get_time_str - format current NIC and system time as string
289 */
290 static char *igb_get_time_str(struct igb_adapter *adapter,
291 char buffer[160])
292 {
293 cycle_t hw = adapter->cycles.read(&adapter->cycles);
294 struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
295 struct timespec sys;
296 struct timespec delta;
297 getnstimeofday(&sys);
298
299 delta = timespec_sub(nic, sys);
300
301 sprintf(buffer,
302 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
303 hw,
304 (long)nic.tv_sec, nic.tv_nsec,
305 (long)sys.tv_sec, sys.tv_nsec,
306 (long)delta.tv_sec, delta.tv_nsec);
307
308 return buffer;
309 }
310 #endif
311
312 /**
313 * igb_desc_unused - calculate if we have unused descriptors
314 **/
315 static int igb_desc_unused(struct igb_ring *ring)
316 {
317 if (ring->next_to_clean > ring->next_to_use)
318 return ring->next_to_clean - ring->next_to_use - 1;
319
320 return ring->count + ring->next_to_clean - ring->next_to_use - 1;
321 }
322
323 /**
324 * igb_init_module - Driver Registration Routine
325 *
326 * igb_init_module is the first routine called when the driver is
327 * loaded. All it does is register with the PCI subsystem.
328 **/
329 static int __init igb_init_module(void)
330 {
331 int ret;
332 printk(KERN_INFO "%s - version %s\n",
333 igb_driver_string, igb_driver_version);
334
335 printk(KERN_INFO "%s\n", igb_copyright);
336
337 global_quad_port_a = 0;
338
339 #ifdef CONFIG_IGB_DCA
340 dca_register_notify(&dca_notifier);
341 #endif
342
343 ret = pci_register_driver(&igb_driver);
344 return ret;
345 }
346
347 module_init(igb_init_module);
348
349 /**
350 * igb_exit_module - Driver Exit Cleanup Routine
351 *
352 * igb_exit_module is called just before the driver is removed
353 * from memory.
354 **/
355 static void __exit igb_exit_module(void)
356 {
357 #ifdef CONFIG_IGB_DCA
358 dca_unregister_notify(&dca_notifier);
359 #endif
360 pci_unregister_driver(&igb_driver);
361 }
362
363 module_exit(igb_exit_module);
364
365 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
366 /**
367 * igb_cache_ring_register - Descriptor ring to register mapping
368 * @adapter: board private structure to initialize
369 *
370 * Once we know the feature-set enabled for the device, we'll cache
371 * the register offset the descriptor ring is assigned to.
372 **/
373 static void igb_cache_ring_register(struct igb_adapter *adapter)
374 {
375 int i;
376 u32 rbase_offset = adapter->vfs_allocated_count;
377
378 switch (adapter->hw.mac.type) {
379 case e1000_82576:
380 /* The queues are allocated for virtualization such that VF 0
381 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
382 * In order to avoid collision we start at the first free queue
383 * and continue consuming queues in the same sequence
384 */
385 for (i = 0; i < adapter->num_rx_queues; i++)
386 adapter->rx_ring[i].reg_idx = rbase_offset +
387 Q_IDX_82576(i);
388 for (i = 0; i < adapter->num_tx_queues; i++)
389 adapter->tx_ring[i].reg_idx = rbase_offset +
390 Q_IDX_82576(i);
391 break;
392 case e1000_82575:
393 default:
394 for (i = 0; i < adapter->num_rx_queues; i++)
395 adapter->rx_ring[i].reg_idx = i;
396 for (i = 0; i < adapter->num_tx_queues; i++)
397 adapter->tx_ring[i].reg_idx = i;
398 break;
399 }
400 }
401
402 static void igb_free_queues(struct igb_adapter *adapter)
403 {
404 kfree(adapter->tx_ring);
405 kfree(adapter->rx_ring);
406
407 adapter->tx_ring = NULL;
408 adapter->rx_ring = NULL;
409
410 adapter->num_rx_queues = 0;
411 adapter->num_tx_queues = 0;
412 }
413
414 /**
415 * igb_alloc_queues - Allocate memory for all rings
416 * @adapter: board private structure to initialize
417 *
418 * We allocate one ring per queue at run-time since we don't know the
419 * number of queues at compile-time.
420 **/
421 static int igb_alloc_queues(struct igb_adapter *adapter)
422 {
423 int i;
424
425 adapter->tx_ring = kcalloc(adapter->num_tx_queues,
426 sizeof(struct igb_ring), GFP_KERNEL);
427 if (!adapter->tx_ring)
428 goto err;
429
430 adapter->rx_ring = kcalloc(adapter->num_rx_queues,
431 sizeof(struct igb_ring), GFP_KERNEL);
432 if (!adapter->rx_ring)
433 goto err;
434
435 for (i = 0; i < adapter->num_tx_queues; i++) {
436 struct igb_ring *ring = &(adapter->tx_ring[i]);
437 ring->count = adapter->tx_ring_count;
438 ring->queue_index = i;
439 ring->pdev = adapter->pdev;
440 ring->netdev = adapter->netdev;
441 /* For 82575, context index must be unique per ring. */
442 if (adapter->hw.mac.type == e1000_82575)
443 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
444 }
445
446 for (i = 0; i < adapter->num_rx_queues; i++) {
447 struct igb_ring *ring = &(adapter->rx_ring[i]);
448 ring->count = adapter->rx_ring_count;
449 ring->queue_index = i;
450 ring->pdev = adapter->pdev;
451 ring->netdev = adapter->netdev;
452 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
453 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
454 /* set flag indicating ring supports SCTP checksum offload */
455 if (adapter->hw.mac.type >= e1000_82576)
456 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
457 }
458
459 igb_cache_ring_register(adapter);
460
461 return 0;
462
463 err:
464 igb_free_queues(adapter);
465
466 return -ENOMEM;
467 }
468
469 #define IGB_N0_QUEUE -1
470 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
471 {
472 u32 msixbm = 0;
473 struct igb_adapter *adapter = q_vector->adapter;
474 struct e1000_hw *hw = &adapter->hw;
475 u32 ivar, index;
476 int rx_queue = IGB_N0_QUEUE;
477 int tx_queue = IGB_N0_QUEUE;
478
479 if (q_vector->rx_ring)
480 rx_queue = q_vector->rx_ring->reg_idx;
481 if (q_vector->tx_ring)
482 tx_queue = q_vector->tx_ring->reg_idx;
483
484 switch (hw->mac.type) {
485 case e1000_82575:
486 /* The 82575 assigns vectors using a bitmask, which matches the
487 bitmask for the EICR/EIMS/EIMC registers. To assign one
488 or more queues to a vector, we write the appropriate bits
489 into the MSIXBM register for that vector. */
490 if (rx_queue > IGB_N0_QUEUE)
491 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
492 if (tx_queue > IGB_N0_QUEUE)
493 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
494 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
495 q_vector->eims_value = msixbm;
496 break;
497 case e1000_82576:
498 /* 82576 uses a table-based method for assigning vectors.
499 Each queue has a single entry in the table to which we write
500 a vector number along with a "valid" bit. Sadly, the layout
501 of the table is somewhat counterintuitive. */
502 if (rx_queue > IGB_N0_QUEUE) {
503 index = (rx_queue & 0x7);
504 ivar = array_rd32(E1000_IVAR0, index);
505 if (rx_queue < 8) {
506 /* vector goes into low byte of register */
507 ivar = ivar & 0xFFFFFF00;
508 ivar |= msix_vector | E1000_IVAR_VALID;
509 } else {
510 /* vector goes into third byte of register */
511 ivar = ivar & 0xFF00FFFF;
512 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
513 }
514 array_wr32(E1000_IVAR0, index, ivar);
515 }
516 if (tx_queue > IGB_N0_QUEUE) {
517 index = (tx_queue & 0x7);
518 ivar = array_rd32(E1000_IVAR0, index);
519 if (tx_queue < 8) {
520 /* vector goes into second byte of register */
521 ivar = ivar & 0xFFFF00FF;
522 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
523 } else {
524 /* vector goes into high byte of register */
525 ivar = ivar & 0x00FFFFFF;
526 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
527 }
528 array_wr32(E1000_IVAR0, index, ivar);
529 }
530 q_vector->eims_value = 1 << msix_vector;
531 break;
532 default:
533 BUG();
534 break;
535 }
536 }
537
538 /**
539 * igb_configure_msix - Configure MSI-X hardware
540 *
541 * igb_configure_msix sets up the hardware to properly
542 * generate MSI-X interrupts.
543 **/
544 static void igb_configure_msix(struct igb_adapter *adapter)
545 {
546 u32 tmp;
547 int i, vector = 0;
548 struct e1000_hw *hw = &adapter->hw;
549
550 adapter->eims_enable_mask = 0;
551
552 /* set vector for other causes, i.e. link changes */
553 switch (hw->mac.type) {
554 case e1000_82575:
555 tmp = rd32(E1000_CTRL_EXT);
556 /* enable MSI-X PBA support*/
557 tmp |= E1000_CTRL_EXT_PBA_CLR;
558
559 /* Auto-Mask interrupts upon ICR read. */
560 tmp |= E1000_CTRL_EXT_EIAME;
561 tmp |= E1000_CTRL_EXT_IRCA;
562
563 wr32(E1000_CTRL_EXT, tmp);
564
565 /* enable msix_other interrupt */
566 array_wr32(E1000_MSIXBM(0), vector++,
567 E1000_EIMS_OTHER);
568 adapter->eims_other = E1000_EIMS_OTHER;
569
570 break;
571
572 case e1000_82576:
573 /* Turn on MSI-X capability first, or our settings
574 * won't stick. And it will take days to debug. */
575 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
576 E1000_GPIE_PBA | E1000_GPIE_EIAME |
577 E1000_GPIE_NSICR);
578
579 /* enable msix_other interrupt */
580 adapter->eims_other = 1 << vector;
581 tmp = (vector++ | E1000_IVAR_VALID) << 8;
582
583 wr32(E1000_IVAR_MISC, tmp);
584 break;
585 default:
586 /* do nothing, since nothing else supports MSI-X */
587 break;
588 } /* switch (hw->mac.type) */
589
590 adapter->eims_enable_mask |= adapter->eims_other;
591
592 for (i = 0; i < adapter->num_q_vectors; i++) {
593 struct igb_q_vector *q_vector = adapter->q_vector[i];
594 igb_assign_vector(q_vector, vector++);
595 adapter->eims_enable_mask |= q_vector->eims_value;
596 }
597
598 wrfl();
599 }
600
601 /**
602 * igb_request_msix - Initialize MSI-X interrupts
603 *
604 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
605 * kernel.
606 **/
607 static int igb_request_msix(struct igb_adapter *adapter)
608 {
609 struct net_device *netdev = adapter->netdev;
610 struct e1000_hw *hw = &adapter->hw;
611 int i, err = 0, vector = 0;
612
613 err = request_irq(adapter->msix_entries[vector].vector,
614 &igb_msix_other, 0, netdev->name, adapter);
615 if (err)
616 goto out;
617 vector++;
618
619 for (i = 0; i < adapter->num_q_vectors; i++) {
620 struct igb_q_vector *q_vector = adapter->q_vector[i];
621
622 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
623
624 if (q_vector->rx_ring && q_vector->tx_ring)
625 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
626 q_vector->rx_ring->queue_index);
627 else if (q_vector->tx_ring)
628 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
629 q_vector->tx_ring->queue_index);
630 else if (q_vector->rx_ring)
631 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
632 q_vector->rx_ring->queue_index);
633 else
634 sprintf(q_vector->name, "%s-unused", netdev->name);
635
636 err = request_irq(adapter->msix_entries[vector].vector,
637 &igb_msix_ring, 0, q_vector->name,
638 q_vector);
639 if (err)
640 goto out;
641 vector++;
642 }
643
644 igb_configure_msix(adapter);
645 return 0;
646 out:
647 return err;
648 }
649
650 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
651 {
652 if (adapter->msix_entries) {
653 pci_disable_msix(adapter->pdev);
654 kfree(adapter->msix_entries);
655 adapter->msix_entries = NULL;
656 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
657 pci_disable_msi(adapter->pdev);
658 }
659 }
660
661 /**
662 * igb_free_q_vectors - Free memory allocated for interrupt vectors
663 * @adapter: board private structure to initialize
664 *
665 * This function frees the memory allocated to the q_vectors. In addition if
666 * NAPI is enabled it will delete any references to the NAPI struct prior
667 * to freeing the q_vector.
668 **/
669 static void igb_free_q_vectors(struct igb_adapter *adapter)
670 {
671 int v_idx;
672
673 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
674 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
675 adapter->q_vector[v_idx] = NULL;
676 netif_napi_del(&q_vector->napi);
677 kfree(q_vector);
678 }
679 adapter->num_q_vectors = 0;
680 }
681
682 /**
683 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
684 *
685 * This function resets the device so that it has 0 rx queues, tx queues, and
686 * MSI-X interrupts allocated.
687 */
688 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
689 {
690 igb_free_queues(adapter);
691 igb_free_q_vectors(adapter);
692 igb_reset_interrupt_capability(adapter);
693 }
694
695 /**
696 * igb_set_interrupt_capability - set MSI or MSI-X if supported
697 *
698 * Attempt to configure interrupts using the best available
699 * capabilities of the hardware and kernel.
700 **/
701 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
702 {
703 int err;
704 int numvecs, i;
705
706 /* Number of supported queues. */
707 adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
708 adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus());
709
710 /* start with one vector for every rx queue */
711 numvecs = adapter->num_rx_queues;
712
713 /* if tx handler is seperate add 1 for every tx queue */
714 numvecs += adapter->num_tx_queues;
715
716 /* store the number of vectors reserved for queues */
717 adapter->num_q_vectors = numvecs;
718
719 /* add 1 vector for link status interrupts */
720 numvecs++;
721 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
722 GFP_KERNEL);
723 if (!adapter->msix_entries)
724 goto msi_only;
725
726 for (i = 0; i < numvecs; i++)
727 adapter->msix_entries[i].entry = i;
728
729 err = pci_enable_msix(adapter->pdev,
730 adapter->msix_entries,
731 numvecs);
732 if (err == 0)
733 goto out;
734
735 igb_reset_interrupt_capability(adapter);
736
737 /* If we can't do MSI-X, try MSI */
738 msi_only:
739 #ifdef CONFIG_PCI_IOV
740 /* disable SR-IOV for non MSI-X configurations */
741 if (adapter->vf_data) {
742 struct e1000_hw *hw = &adapter->hw;
743 /* disable iov and allow time for transactions to clear */
744 pci_disable_sriov(adapter->pdev);
745 msleep(500);
746
747 kfree(adapter->vf_data);
748 adapter->vf_data = NULL;
749 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
750 msleep(100);
751 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
752 }
753 #endif
754 adapter->num_rx_queues = 1;
755 adapter->num_tx_queues = 1;
756 adapter->num_q_vectors = 1;
757 if (!pci_enable_msi(adapter->pdev))
758 adapter->flags |= IGB_FLAG_HAS_MSI;
759 out:
760 /* Notify the stack of the (possibly) reduced Tx Queue count. */
761 adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
762 return;
763 }
764
765 /**
766 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
767 * @adapter: board private structure to initialize
768 *
769 * We allocate one q_vector per queue interrupt. If allocation fails we
770 * return -ENOMEM.
771 **/
772 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
773 {
774 struct igb_q_vector *q_vector;
775 struct e1000_hw *hw = &adapter->hw;
776 int v_idx;
777
778 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
779 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
780 if (!q_vector)
781 goto err_out;
782 q_vector->adapter = adapter;
783 q_vector->itr_shift = (hw->mac.type == e1000_82575) ? 16 : 0;
784 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
785 q_vector->itr_val = IGB_START_ITR;
786 q_vector->set_itr = 1;
787 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
788 adapter->q_vector[v_idx] = q_vector;
789 }
790 return 0;
791
792 err_out:
793 while (v_idx) {
794 v_idx--;
795 q_vector = adapter->q_vector[v_idx];
796 netif_napi_del(&q_vector->napi);
797 kfree(q_vector);
798 adapter->q_vector[v_idx] = NULL;
799 }
800 return -ENOMEM;
801 }
802
803 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
804 int ring_idx, int v_idx)
805 {
806 struct igb_q_vector *q_vector;
807
808 q_vector = adapter->q_vector[v_idx];
809 q_vector->rx_ring = &adapter->rx_ring[ring_idx];
810 q_vector->rx_ring->q_vector = q_vector;
811 q_vector->itr_val = adapter->itr;
812 }
813
814 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
815 int ring_idx, int v_idx)
816 {
817 struct igb_q_vector *q_vector;
818
819 q_vector = adapter->q_vector[v_idx];
820 q_vector->tx_ring = &adapter->tx_ring[ring_idx];
821 q_vector->tx_ring->q_vector = q_vector;
822 q_vector->itr_val = adapter->itr;
823 }
824
825 /**
826 * igb_map_ring_to_vector - maps allocated queues to vectors
827 *
828 * This function maps the recently allocated queues to vectors.
829 **/
830 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
831 {
832 int i;
833 int v_idx = 0;
834
835 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
836 (adapter->num_q_vectors < adapter->num_tx_queues))
837 return -ENOMEM;
838
839 if (adapter->num_q_vectors >=
840 (adapter->num_rx_queues + adapter->num_tx_queues)) {
841 for (i = 0; i < adapter->num_rx_queues; i++)
842 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
843 for (i = 0; i < adapter->num_tx_queues; i++)
844 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
845 } else {
846 for (i = 0; i < adapter->num_rx_queues; i++) {
847 if (i < adapter->num_tx_queues)
848 igb_map_tx_ring_to_vector(adapter, i, v_idx);
849 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
850 }
851 for (; i < adapter->num_tx_queues; i++)
852 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
853 }
854 return 0;
855 }
856
857 /**
858 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
859 *
860 * This function initializes the interrupts and allocates all of the queues.
861 **/
862 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
863 {
864 struct pci_dev *pdev = adapter->pdev;
865 int err;
866
867 igb_set_interrupt_capability(adapter);
868
869 err = igb_alloc_q_vectors(adapter);
870 if (err) {
871 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
872 goto err_alloc_q_vectors;
873 }
874
875 err = igb_alloc_queues(adapter);
876 if (err) {
877 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
878 goto err_alloc_queues;
879 }
880
881 err = igb_map_ring_to_vector(adapter);
882 if (err) {
883 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
884 goto err_map_queues;
885 }
886
887
888 return 0;
889 err_map_queues:
890 igb_free_queues(adapter);
891 err_alloc_queues:
892 igb_free_q_vectors(adapter);
893 err_alloc_q_vectors:
894 igb_reset_interrupt_capability(adapter);
895 return err;
896 }
897
898 /**
899 * igb_request_irq - initialize interrupts
900 *
901 * Attempts to configure interrupts using the best available
902 * capabilities of the hardware and kernel.
903 **/
904 static int igb_request_irq(struct igb_adapter *adapter)
905 {
906 struct net_device *netdev = adapter->netdev;
907 struct pci_dev *pdev = adapter->pdev;
908 struct e1000_hw *hw = &adapter->hw;
909 int err = 0;
910
911 if (adapter->msix_entries) {
912 err = igb_request_msix(adapter);
913 if (!err)
914 goto request_done;
915 /* fall back to MSI */
916 igb_clear_interrupt_scheme(adapter);
917 if (!pci_enable_msi(adapter->pdev))
918 adapter->flags |= IGB_FLAG_HAS_MSI;
919 igb_free_all_tx_resources(adapter);
920 igb_free_all_rx_resources(adapter);
921 adapter->num_tx_queues = 1;
922 adapter->num_rx_queues = 1;
923 adapter->num_q_vectors = 1;
924 err = igb_alloc_q_vectors(adapter);
925 if (err) {
926 dev_err(&pdev->dev,
927 "Unable to allocate memory for vectors\n");
928 goto request_done;
929 }
930 err = igb_alloc_queues(adapter);
931 if (err) {
932 dev_err(&pdev->dev,
933 "Unable to allocate memory for queues\n");
934 igb_free_q_vectors(adapter);
935 goto request_done;
936 }
937 igb_setup_all_tx_resources(adapter);
938 igb_setup_all_rx_resources(adapter);
939 } else {
940 switch (hw->mac.type) {
941 case e1000_82575:
942 wr32(E1000_MSIXBM(0),
943 (E1000_EICR_RX_QUEUE0 |
944 E1000_EICR_TX_QUEUE0 |
945 E1000_EIMS_OTHER));
946 break;
947 case e1000_82576:
948 wr32(E1000_IVAR0, E1000_IVAR_VALID);
949 break;
950 default:
951 break;
952 }
953 }
954
955 if (adapter->flags & IGB_FLAG_HAS_MSI) {
956 err = request_irq(adapter->pdev->irq, &igb_intr_msi, 0,
957 netdev->name, adapter);
958 if (!err)
959 goto request_done;
960
961 /* fall back to legacy interrupts */
962 igb_reset_interrupt_capability(adapter);
963 adapter->flags &= ~IGB_FLAG_HAS_MSI;
964 }
965
966 err = request_irq(adapter->pdev->irq, &igb_intr, IRQF_SHARED,
967 netdev->name, adapter);
968
969 if (err)
970 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
971 err);
972
973 request_done:
974 return err;
975 }
976
977 static void igb_free_irq(struct igb_adapter *adapter)
978 {
979 if (adapter->msix_entries) {
980 int vector = 0, i;
981
982 free_irq(adapter->msix_entries[vector++].vector, adapter);
983
984 for (i = 0; i < adapter->num_q_vectors; i++) {
985 struct igb_q_vector *q_vector = adapter->q_vector[i];
986 free_irq(adapter->msix_entries[vector++].vector,
987 q_vector);
988 }
989 } else {
990 free_irq(adapter->pdev->irq, adapter);
991 }
992 }
993
994 /**
995 * igb_irq_disable - Mask off interrupt generation on the NIC
996 * @adapter: board private structure
997 **/
998 static void igb_irq_disable(struct igb_adapter *adapter)
999 {
1000 struct e1000_hw *hw = &adapter->hw;
1001
1002 if (adapter->msix_entries) {
1003 u32 regval = rd32(E1000_EIAM);
1004 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1005 wr32(E1000_EIMC, adapter->eims_enable_mask);
1006 regval = rd32(E1000_EIAC);
1007 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1008 }
1009
1010 wr32(E1000_IAM, 0);
1011 wr32(E1000_IMC, ~0);
1012 wrfl();
1013 synchronize_irq(adapter->pdev->irq);
1014 }
1015
1016 /**
1017 * igb_irq_enable - Enable default interrupt generation settings
1018 * @adapter: board private structure
1019 **/
1020 static void igb_irq_enable(struct igb_adapter *adapter)
1021 {
1022 struct e1000_hw *hw = &adapter->hw;
1023
1024 if (adapter->msix_entries) {
1025 u32 regval = rd32(E1000_EIAC);
1026 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1027 regval = rd32(E1000_EIAM);
1028 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1029 wr32(E1000_EIMS, adapter->eims_enable_mask);
1030 if (adapter->vfs_allocated_count)
1031 wr32(E1000_MBVFIMR, 0xFF);
1032 wr32(E1000_IMS, (E1000_IMS_LSC | E1000_IMS_VMMB |
1033 E1000_IMS_DOUTSYNC));
1034 } else {
1035 wr32(E1000_IMS, IMS_ENABLE_MASK);
1036 wr32(E1000_IAM, IMS_ENABLE_MASK);
1037 }
1038 }
1039
1040 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1041 {
1042 struct net_device *netdev = adapter->netdev;
1043 u16 vid = adapter->hw.mng_cookie.vlan_id;
1044 u16 old_vid = adapter->mng_vlan_id;
1045 if (adapter->vlgrp) {
1046 if (!vlan_group_get_device(adapter->vlgrp, vid)) {
1047 if (adapter->hw.mng_cookie.status &
1048 E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1049 igb_vlan_rx_add_vid(netdev, vid);
1050 adapter->mng_vlan_id = vid;
1051 } else
1052 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1053
1054 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1055 (vid != old_vid) &&
1056 !vlan_group_get_device(adapter->vlgrp, old_vid))
1057 igb_vlan_rx_kill_vid(netdev, old_vid);
1058 } else
1059 adapter->mng_vlan_id = vid;
1060 }
1061 }
1062
1063 /**
1064 * igb_release_hw_control - release control of the h/w to f/w
1065 * @adapter: address of board private structure
1066 *
1067 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1068 * For ASF and Pass Through versions of f/w this means that the
1069 * driver is no longer loaded.
1070 *
1071 **/
1072 static void igb_release_hw_control(struct igb_adapter *adapter)
1073 {
1074 struct e1000_hw *hw = &adapter->hw;
1075 u32 ctrl_ext;
1076
1077 /* Let firmware take over control of h/w */
1078 ctrl_ext = rd32(E1000_CTRL_EXT);
1079 wr32(E1000_CTRL_EXT,
1080 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1081 }
1082
1083
1084 /**
1085 * igb_get_hw_control - get control of the h/w from f/w
1086 * @adapter: address of board private structure
1087 *
1088 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1089 * For ASF and Pass Through versions of f/w this means that
1090 * the driver is loaded.
1091 *
1092 **/
1093 static void igb_get_hw_control(struct igb_adapter *adapter)
1094 {
1095 struct e1000_hw *hw = &adapter->hw;
1096 u32 ctrl_ext;
1097
1098 /* Let firmware know the driver has taken over */
1099 ctrl_ext = rd32(E1000_CTRL_EXT);
1100 wr32(E1000_CTRL_EXT,
1101 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1102 }
1103
1104 /**
1105 * igb_configure - configure the hardware for RX and TX
1106 * @adapter: private board structure
1107 **/
1108 static void igb_configure(struct igb_adapter *adapter)
1109 {
1110 struct net_device *netdev = adapter->netdev;
1111 int i;
1112
1113 igb_get_hw_control(adapter);
1114 igb_set_rx_mode(netdev);
1115
1116 igb_restore_vlan(adapter);
1117
1118 igb_setup_tctl(adapter);
1119 igb_setup_mrqc(adapter);
1120 igb_setup_rctl(adapter);
1121
1122 igb_configure_tx(adapter);
1123 igb_configure_rx(adapter);
1124
1125 igb_rx_fifo_flush_82575(&adapter->hw);
1126
1127 /* call igb_desc_unused which always leaves
1128 * at least 1 descriptor unused to make sure
1129 * next_to_use != next_to_clean */
1130 for (i = 0; i < adapter->num_rx_queues; i++) {
1131 struct igb_ring *ring = &adapter->rx_ring[i];
1132 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1133 }
1134
1135
1136 adapter->tx_queue_len = netdev->tx_queue_len;
1137 }
1138
1139
1140 /**
1141 * igb_up - Open the interface and prepare it to handle traffic
1142 * @adapter: board private structure
1143 **/
1144
1145 int igb_up(struct igb_adapter *adapter)
1146 {
1147 struct e1000_hw *hw = &adapter->hw;
1148 int i;
1149
1150 /* hardware has been reset, we need to reload some things */
1151 igb_configure(adapter);
1152
1153 clear_bit(__IGB_DOWN, &adapter->state);
1154
1155 for (i = 0; i < adapter->num_q_vectors; i++) {
1156 struct igb_q_vector *q_vector = adapter->q_vector[i];
1157 napi_enable(&q_vector->napi);
1158 }
1159 if (adapter->msix_entries)
1160 igb_configure_msix(adapter);
1161
1162 igb_set_vmolr(hw, adapter->vfs_allocated_count);
1163
1164 /* Clear any pending interrupts. */
1165 rd32(E1000_ICR);
1166 igb_irq_enable(adapter);
1167
1168 netif_tx_start_all_queues(adapter->netdev);
1169
1170 /* Fire a link change interrupt to start the watchdog. */
1171 wr32(E1000_ICS, E1000_ICS_LSC);
1172 return 0;
1173 }
1174
1175 void igb_down(struct igb_adapter *adapter)
1176 {
1177 struct e1000_hw *hw = &adapter->hw;
1178 struct net_device *netdev = adapter->netdev;
1179 u32 tctl, rctl;
1180 int i;
1181
1182 /* signal that we're down so the interrupt handler does not
1183 * reschedule our watchdog timer */
1184 set_bit(__IGB_DOWN, &adapter->state);
1185
1186 /* disable receives in the hardware */
1187 rctl = rd32(E1000_RCTL);
1188 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1189 /* flush and sleep below */
1190
1191 netif_tx_stop_all_queues(netdev);
1192
1193 /* disable transmits in the hardware */
1194 tctl = rd32(E1000_TCTL);
1195 tctl &= ~E1000_TCTL_EN;
1196 wr32(E1000_TCTL, tctl);
1197 /* flush both disables and wait for them to finish */
1198 wrfl();
1199 msleep(10);
1200
1201 for (i = 0; i < adapter->num_q_vectors; i++) {
1202 struct igb_q_vector *q_vector = adapter->q_vector[i];
1203 napi_disable(&q_vector->napi);
1204 }
1205
1206 igb_irq_disable(adapter);
1207
1208 del_timer_sync(&adapter->watchdog_timer);
1209 del_timer_sync(&adapter->phy_info_timer);
1210
1211 netdev->tx_queue_len = adapter->tx_queue_len;
1212 netif_carrier_off(netdev);
1213
1214 /* record the stats before reset*/
1215 igb_update_stats(adapter);
1216
1217 adapter->link_speed = 0;
1218 adapter->link_duplex = 0;
1219
1220 if (!pci_channel_offline(adapter->pdev))
1221 igb_reset(adapter);
1222 igb_clean_all_tx_rings(adapter);
1223 igb_clean_all_rx_rings(adapter);
1224 #ifdef CONFIG_IGB_DCA
1225
1226 /* since we reset the hardware DCA settings were cleared */
1227 igb_setup_dca(adapter);
1228 #endif
1229 }
1230
1231 void igb_reinit_locked(struct igb_adapter *adapter)
1232 {
1233 WARN_ON(in_interrupt());
1234 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1235 msleep(1);
1236 igb_down(adapter);
1237 igb_up(adapter);
1238 clear_bit(__IGB_RESETTING, &adapter->state);
1239 }
1240
1241 void igb_reset(struct igb_adapter *adapter)
1242 {
1243 struct e1000_hw *hw = &adapter->hw;
1244 struct e1000_mac_info *mac = &hw->mac;
1245 struct e1000_fc_info *fc = &hw->fc;
1246 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1247 u16 hwm;
1248
1249 /* Repartition Pba for greater than 9k mtu
1250 * To take effect CTRL.RST is required.
1251 */
1252 switch (mac->type) {
1253 case e1000_82576:
1254 pba = E1000_PBA_64K;
1255 break;
1256 case e1000_82575:
1257 default:
1258 pba = E1000_PBA_34K;
1259 break;
1260 }
1261
1262 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1263 (mac->type < e1000_82576)) {
1264 /* adjust PBA for jumbo frames */
1265 wr32(E1000_PBA, pba);
1266
1267 /* To maintain wire speed transmits, the Tx FIFO should be
1268 * large enough to accommodate two full transmit packets,
1269 * rounded up to the next 1KB and expressed in KB. Likewise,
1270 * the Rx FIFO should be large enough to accommodate at least
1271 * one full receive packet and is similarly rounded up and
1272 * expressed in KB. */
1273 pba = rd32(E1000_PBA);
1274 /* upper 16 bits has Tx packet buffer allocation size in KB */
1275 tx_space = pba >> 16;
1276 /* lower 16 bits has Rx packet buffer allocation size in KB */
1277 pba &= 0xffff;
1278 /* the tx fifo also stores 16 bytes of information about the tx
1279 * but don't include ethernet FCS because hardware appends it */
1280 min_tx_space = (adapter->max_frame_size +
1281 sizeof(union e1000_adv_tx_desc) -
1282 ETH_FCS_LEN) * 2;
1283 min_tx_space = ALIGN(min_tx_space, 1024);
1284 min_tx_space >>= 10;
1285 /* software strips receive CRC, so leave room for it */
1286 min_rx_space = adapter->max_frame_size;
1287 min_rx_space = ALIGN(min_rx_space, 1024);
1288 min_rx_space >>= 10;
1289
1290 /* If current Tx allocation is less than the min Tx FIFO size,
1291 * and the min Tx FIFO size is less than the current Rx FIFO
1292 * allocation, take space away from current Rx allocation */
1293 if (tx_space < min_tx_space &&
1294 ((min_tx_space - tx_space) < pba)) {
1295 pba = pba - (min_tx_space - tx_space);
1296
1297 /* if short on rx space, rx wins and must trump tx
1298 * adjustment */
1299 if (pba < min_rx_space)
1300 pba = min_rx_space;
1301 }
1302 wr32(E1000_PBA, pba);
1303 }
1304
1305 /* flow control settings */
1306 /* The high water mark must be low enough to fit one full frame
1307 * (or the size used for early receive) above it in the Rx FIFO.
1308 * Set it to the lower of:
1309 * - 90% of the Rx FIFO size, or
1310 * - the full Rx FIFO size minus one full frame */
1311 hwm = min(((pba << 10) * 9 / 10),
1312 ((pba << 10) - 2 * adapter->max_frame_size));
1313
1314 if (mac->type < e1000_82576) {
1315 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */
1316 fc->low_water = fc->high_water - 8;
1317 } else {
1318 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1319 fc->low_water = fc->high_water - 16;
1320 }
1321 fc->pause_time = 0xFFFF;
1322 fc->send_xon = 1;
1323 fc->current_mode = fc->requested_mode;
1324
1325 /* disable receive for all VFs and wait one second */
1326 if (adapter->vfs_allocated_count) {
1327 int i;
1328 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1329 adapter->vf_data[i].clear_to_send = false;
1330
1331 /* ping all the active vfs to let them know we are going down */
1332 igb_ping_all_vfs(adapter);
1333
1334 /* disable transmits and receives */
1335 wr32(E1000_VFRE, 0);
1336 wr32(E1000_VFTE, 0);
1337 }
1338
1339 /* Allow time for pending master requests to run */
1340 adapter->hw.mac.ops.reset_hw(&adapter->hw);
1341 wr32(E1000_WUC, 0);
1342
1343 if (adapter->hw.mac.ops.init_hw(&adapter->hw))
1344 dev_err(&adapter->pdev->dev, "Hardware Error\n");
1345
1346 igb_update_mng_vlan(adapter);
1347
1348 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1349 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1350
1351 igb_reset_adaptive(&adapter->hw);
1352 igb_get_phy_info(&adapter->hw);
1353 }
1354
1355 static const struct net_device_ops igb_netdev_ops = {
1356 .ndo_open = igb_open,
1357 .ndo_stop = igb_close,
1358 .ndo_start_xmit = igb_xmit_frame_adv,
1359 .ndo_get_stats = igb_get_stats,
1360 .ndo_set_rx_mode = igb_set_rx_mode,
1361 .ndo_set_multicast_list = igb_set_rx_mode,
1362 .ndo_set_mac_address = igb_set_mac,
1363 .ndo_change_mtu = igb_change_mtu,
1364 .ndo_do_ioctl = igb_ioctl,
1365 .ndo_tx_timeout = igb_tx_timeout,
1366 .ndo_validate_addr = eth_validate_addr,
1367 .ndo_vlan_rx_register = igb_vlan_rx_register,
1368 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1369 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1370 #ifdef CONFIG_NET_POLL_CONTROLLER
1371 .ndo_poll_controller = igb_netpoll,
1372 #endif
1373 };
1374
1375 /**
1376 * igb_probe - Device Initialization Routine
1377 * @pdev: PCI device information struct
1378 * @ent: entry in igb_pci_tbl
1379 *
1380 * Returns 0 on success, negative on failure
1381 *
1382 * igb_probe initializes an adapter identified by a pci_dev structure.
1383 * The OS initialization, configuring of the adapter private structure,
1384 * and a hardware reset occur.
1385 **/
1386 static int __devinit igb_probe(struct pci_dev *pdev,
1387 const struct pci_device_id *ent)
1388 {
1389 struct net_device *netdev;
1390 struct igb_adapter *adapter;
1391 struct e1000_hw *hw;
1392 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1393 unsigned long mmio_start, mmio_len;
1394 int err, pci_using_dac;
1395 u16 eeprom_data = 0;
1396 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1397 u32 part_num;
1398
1399 err = pci_enable_device_mem(pdev);
1400 if (err)
1401 return err;
1402
1403 pci_using_dac = 0;
1404 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1405 if (!err) {
1406 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1407 if (!err)
1408 pci_using_dac = 1;
1409 } else {
1410 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1411 if (err) {
1412 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1413 if (err) {
1414 dev_err(&pdev->dev, "No usable DMA "
1415 "configuration, aborting\n");
1416 goto err_dma;
1417 }
1418 }
1419 }
1420
1421 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1422 IORESOURCE_MEM),
1423 igb_driver_name);
1424 if (err)
1425 goto err_pci_reg;
1426
1427 pci_enable_pcie_error_reporting(pdev);
1428
1429 pci_set_master(pdev);
1430 pci_save_state(pdev);
1431
1432 err = -ENOMEM;
1433 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1434 IGB_ABS_MAX_TX_QUEUES);
1435 if (!netdev)
1436 goto err_alloc_etherdev;
1437
1438 SET_NETDEV_DEV(netdev, &pdev->dev);
1439
1440 pci_set_drvdata(pdev, netdev);
1441 adapter = netdev_priv(netdev);
1442 adapter->netdev = netdev;
1443 adapter->pdev = pdev;
1444 hw = &adapter->hw;
1445 hw->back = adapter;
1446 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1447
1448 mmio_start = pci_resource_start(pdev, 0);
1449 mmio_len = pci_resource_len(pdev, 0);
1450
1451 err = -EIO;
1452 hw->hw_addr = ioremap(mmio_start, mmio_len);
1453 if (!hw->hw_addr)
1454 goto err_ioremap;
1455
1456 netdev->netdev_ops = &igb_netdev_ops;
1457 igb_set_ethtool_ops(netdev);
1458 netdev->watchdog_timeo = 5 * HZ;
1459
1460 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1461
1462 netdev->mem_start = mmio_start;
1463 netdev->mem_end = mmio_start + mmio_len;
1464
1465 /* PCI config space info */
1466 hw->vendor_id = pdev->vendor;
1467 hw->device_id = pdev->device;
1468 hw->revision_id = pdev->revision;
1469 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1470 hw->subsystem_device_id = pdev->subsystem_device;
1471
1472 /* setup the private structure */
1473 hw->back = adapter;
1474 /* Copy the default MAC, PHY and NVM function pointers */
1475 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1476 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1477 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1478 /* Initialize skew-specific constants */
1479 err = ei->get_invariants(hw);
1480 if (err)
1481 goto err_sw_init;
1482
1483 #ifdef CONFIG_PCI_IOV
1484 /* since iov functionality isn't critical to base device function we
1485 * can accept failure. If it fails we don't allow iov to be enabled */
1486 if (hw->mac.type == e1000_82576) {
1487 /* 82576 supports a maximum of 7 VFs in addition to the PF */
1488 unsigned int num_vfs = (max_vfs > 7) ? 7 : max_vfs;
1489 int i;
1490 unsigned char mac_addr[ETH_ALEN];
1491
1492 if (num_vfs) {
1493 adapter->vf_data = kcalloc(num_vfs,
1494 sizeof(struct vf_data_storage),
1495 GFP_KERNEL);
1496 if (!adapter->vf_data) {
1497 dev_err(&pdev->dev,
1498 "Could not allocate VF private data - "
1499 "IOV enable failed\n");
1500 } else {
1501 err = pci_enable_sriov(pdev, num_vfs);
1502 if (!err) {
1503 adapter->vfs_allocated_count = num_vfs;
1504 dev_info(&pdev->dev,
1505 "%d vfs allocated\n",
1506 num_vfs);
1507 for (i = 0;
1508 i < adapter->vfs_allocated_count;
1509 i++) {
1510 random_ether_addr(mac_addr);
1511 igb_set_vf_mac(adapter, i,
1512 mac_addr);
1513 }
1514 } else {
1515 kfree(adapter->vf_data);
1516 adapter->vf_data = NULL;
1517 }
1518 }
1519 }
1520 }
1521
1522 #endif
1523 /* setup the private structure */
1524 err = igb_sw_init(adapter);
1525 if (err)
1526 goto err_sw_init;
1527
1528 igb_get_bus_info_pcie(hw);
1529
1530 hw->phy.autoneg_wait_to_complete = false;
1531 hw->mac.adaptive_ifs = true;
1532
1533 /* Copper options */
1534 if (hw->phy.media_type == e1000_media_type_copper) {
1535 hw->phy.mdix = AUTO_ALL_MODES;
1536 hw->phy.disable_polarity_correction = false;
1537 hw->phy.ms_type = e1000_ms_hw_default;
1538 }
1539
1540 if (igb_check_reset_block(hw))
1541 dev_info(&pdev->dev,
1542 "PHY reset is blocked due to SOL/IDER session.\n");
1543
1544 netdev->features = NETIF_F_SG |
1545 NETIF_F_IP_CSUM |
1546 NETIF_F_HW_VLAN_TX |
1547 NETIF_F_HW_VLAN_RX |
1548 NETIF_F_HW_VLAN_FILTER;
1549
1550 netdev->features |= NETIF_F_IPV6_CSUM;
1551 netdev->features |= NETIF_F_TSO;
1552 netdev->features |= NETIF_F_TSO6;
1553
1554 netdev->features |= NETIF_F_GRO;
1555
1556 netdev->vlan_features |= NETIF_F_TSO;
1557 netdev->vlan_features |= NETIF_F_TSO6;
1558 netdev->vlan_features |= NETIF_F_IP_CSUM;
1559 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1560 netdev->vlan_features |= NETIF_F_SG;
1561
1562 if (pci_using_dac)
1563 netdev->features |= NETIF_F_HIGHDMA;
1564
1565 if (adapter->hw.mac.type == e1000_82576)
1566 netdev->features |= NETIF_F_SCTP_CSUM;
1567
1568 adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw);
1569
1570 /* before reading the NVM, reset the controller to put the device in a
1571 * known good starting state */
1572 hw->mac.ops.reset_hw(hw);
1573
1574 /* make sure the NVM is good */
1575 if (igb_validate_nvm_checksum(hw) < 0) {
1576 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1577 err = -EIO;
1578 goto err_eeprom;
1579 }
1580
1581 /* copy the MAC address out of the NVM */
1582 if (hw->mac.ops.read_mac_addr(hw))
1583 dev_err(&pdev->dev, "NVM Read Error\n");
1584
1585 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1586 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1587
1588 if (!is_valid_ether_addr(netdev->perm_addr)) {
1589 dev_err(&pdev->dev, "Invalid MAC Address\n");
1590 err = -EIO;
1591 goto err_eeprom;
1592 }
1593
1594 setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1595 (unsigned long) adapter);
1596 setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1597 (unsigned long) adapter);
1598
1599 INIT_WORK(&adapter->reset_task, igb_reset_task);
1600 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1601
1602 /* Initialize link properties that are user-changeable */
1603 adapter->fc_autoneg = true;
1604 hw->mac.autoneg = true;
1605 hw->phy.autoneg_advertised = 0x2f;
1606
1607 hw->fc.requested_mode = e1000_fc_default;
1608 hw->fc.current_mode = e1000_fc_default;
1609
1610 adapter->itr_setting = IGB_DEFAULT_ITR;
1611 adapter->itr = IGB_START_ITR;
1612
1613 igb_validate_mdi_setting(hw);
1614
1615 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1616 * enable the ACPI Magic Packet filter
1617 */
1618
1619 if (hw->bus.func == 0)
1620 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1621 else if (hw->bus.func == 1)
1622 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1623
1624 if (eeprom_data & eeprom_apme_mask)
1625 adapter->eeprom_wol |= E1000_WUFC_MAG;
1626
1627 /* now that we have the eeprom settings, apply the special cases where
1628 * the eeprom may be wrong or the board simply won't support wake on
1629 * lan on a particular port */
1630 switch (pdev->device) {
1631 case E1000_DEV_ID_82575GB_QUAD_COPPER:
1632 adapter->eeprom_wol = 0;
1633 break;
1634 case E1000_DEV_ID_82575EB_FIBER_SERDES:
1635 case E1000_DEV_ID_82576_FIBER:
1636 case E1000_DEV_ID_82576_SERDES:
1637 /* Wake events only supported on port A for dual fiber
1638 * regardless of eeprom setting */
1639 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1640 adapter->eeprom_wol = 0;
1641 break;
1642 case E1000_DEV_ID_82576_QUAD_COPPER:
1643 /* if quad port adapter, disable WoL on all but port A */
1644 if (global_quad_port_a != 0)
1645 adapter->eeprom_wol = 0;
1646 else
1647 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1648 /* Reset for multiple quad port adapters */
1649 if (++global_quad_port_a == 4)
1650 global_quad_port_a = 0;
1651 break;
1652 }
1653
1654 /* initialize the wol settings based on the eeprom settings */
1655 adapter->wol = adapter->eeprom_wol;
1656 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1657
1658 /* reset the hardware with the new settings */
1659 igb_reset(adapter);
1660
1661 /* let the f/w know that the h/w is now under the control of the
1662 * driver. */
1663 igb_get_hw_control(adapter);
1664
1665 strcpy(netdev->name, "eth%d");
1666 err = register_netdev(netdev);
1667 if (err)
1668 goto err_register;
1669
1670 /* carrier off reporting is important to ethtool even BEFORE open */
1671 netif_carrier_off(netdev);
1672
1673 #ifdef CONFIG_IGB_DCA
1674 if (dca_add_requester(&pdev->dev) == 0) {
1675 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1676 dev_info(&pdev->dev, "DCA enabled\n");
1677 igb_setup_dca(adapter);
1678 }
1679 #endif
1680
1681 /*
1682 * Initialize hardware timer: we keep it running just in case
1683 * that some program needs it later on.
1684 */
1685 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1686 adapter->cycles.read = igb_read_clock;
1687 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1688 adapter->cycles.mult = 1;
1689 adapter->cycles.shift = IGB_TSYNC_SHIFT;
1690 wr32(E1000_TIMINCA,
1691 (1<<24) |
1692 IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS * IGB_TSYNC_SCALE);
1693 #if 0
1694 /*
1695 * Avoid rollover while we initialize by resetting the time counter.
1696 */
1697 wr32(E1000_SYSTIML, 0x00000000);
1698 wr32(E1000_SYSTIMH, 0x00000000);
1699 #else
1700 /*
1701 * Set registers so that rollover occurs soon to test this.
1702 */
1703 wr32(E1000_SYSTIML, 0x00000000);
1704 wr32(E1000_SYSTIMH, 0xFF800000);
1705 #endif
1706 wrfl();
1707 timecounter_init(&adapter->clock,
1708 &adapter->cycles,
1709 ktime_to_ns(ktime_get_real()));
1710
1711 /*
1712 * Synchronize our NIC clock against system wall clock. NIC
1713 * time stamp reading requires ~3us per sample, each sample
1714 * was pretty stable even under load => only require 10
1715 * samples for each offset comparison.
1716 */
1717 memset(&adapter->compare, 0, sizeof(adapter->compare));
1718 adapter->compare.source = &adapter->clock;
1719 adapter->compare.target = ktime_get_real;
1720 adapter->compare.num_samples = 10;
1721 timecompare_update(&adapter->compare, 0);
1722
1723 #ifdef DEBUG
1724 {
1725 char buffer[160];
1726 printk(KERN_DEBUG
1727 "igb: %s: hw %p initialized timer\n",
1728 igb_get_time_str(adapter, buffer),
1729 &adapter->hw);
1730 }
1731 #endif
1732
1733 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1734 /* print bus type/speed/width info */
1735 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1736 netdev->name,
1737 ((hw->bus.speed == e1000_bus_speed_2500)
1738 ? "2.5Gb/s" : "unknown"),
1739 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1740 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1741 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1742 "unknown"),
1743 netdev->dev_addr);
1744
1745 igb_read_part_num(hw, &part_num);
1746 dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1747 (part_num >> 8), (part_num & 0xff));
1748
1749 dev_info(&pdev->dev,
1750 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1751 adapter->msix_entries ? "MSI-X" :
1752 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1753 adapter->num_rx_queues, adapter->num_tx_queues);
1754
1755 return 0;
1756
1757 err_register:
1758 igb_release_hw_control(adapter);
1759 err_eeprom:
1760 if (!igb_check_reset_block(hw))
1761 igb_reset_phy(hw);
1762
1763 if (hw->flash_address)
1764 iounmap(hw->flash_address);
1765 err_sw_init:
1766 igb_clear_interrupt_scheme(adapter);
1767 iounmap(hw->hw_addr);
1768 err_ioremap:
1769 free_netdev(netdev);
1770 err_alloc_etherdev:
1771 pci_release_selected_regions(pdev, pci_select_bars(pdev,
1772 IORESOURCE_MEM));
1773 err_pci_reg:
1774 err_dma:
1775 pci_disable_device(pdev);
1776 return err;
1777 }
1778
1779 /**
1780 * igb_remove - Device Removal Routine
1781 * @pdev: PCI device information struct
1782 *
1783 * igb_remove is called by the PCI subsystem to alert the driver
1784 * that it should release a PCI device. The could be caused by a
1785 * Hot-Plug event, or because the driver is going to be removed from
1786 * memory.
1787 **/
1788 static void __devexit igb_remove(struct pci_dev *pdev)
1789 {
1790 struct net_device *netdev = pci_get_drvdata(pdev);
1791 struct igb_adapter *adapter = netdev_priv(netdev);
1792 struct e1000_hw *hw = &adapter->hw;
1793
1794 /* flush_scheduled work may reschedule our watchdog task, so
1795 * explicitly disable watchdog tasks from being rescheduled */
1796 set_bit(__IGB_DOWN, &adapter->state);
1797 del_timer_sync(&adapter->watchdog_timer);
1798 del_timer_sync(&adapter->phy_info_timer);
1799
1800 flush_scheduled_work();
1801
1802 #ifdef CONFIG_IGB_DCA
1803 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1804 dev_info(&pdev->dev, "DCA disabled\n");
1805 dca_remove_requester(&pdev->dev);
1806 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1807 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1808 }
1809 #endif
1810
1811 /* Release control of h/w to f/w. If f/w is AMT enabled, this
1812 * would have already happened in close and is redundant. */
1813 igb_release_hw_control(adapter);
1814
1815 unregister_netdev(netdev);
1816
1817 if (!igb_check_reset_block(&adapter->hw))
1818 igb_reset_phy(&adapter->hw);
1819
1820 igb_clear_interrupt_scheme(adapter);
1821
1822 #ifdef CONFIG_PCI_IOV
1823 /* reclaim resources allocated to VFs */
1824 if (adapter->vf_data) {
1825 /* disable iov and allow time for transactions to clear */
1826 pci_disable_sriov(pdev);
1827 msleep(500);
1828
1829 kfree(adapter->vf_data);
1830 adapter->vf_data = NULL;
1831 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1832 msleep(100);
1833 dev_info(&pdev->dev, "IOV Disabled\n");
1834 }
1835 #endif
1836 iounmap(hw->hw_addr);
1837 if (hw->flash_address)
1838 iounmap(hw->flash_address);
1839 pci_release_selected_regions(pdev, pci_select_bars(pdev,
1840 IORESOURCE_MEM));
1841
1842 free_netdev(netdev);
1843
1844 pci_disable_pcie_error_reporting(pdev);
1845
1846 pci_disable_device(pdev);
1847 }
1848
1849 /**
1850 * igb_sw_init - Initialize general software structures (struct igb_adapter)
1851 * @adapter: board private structure to initialize
1852 *
1853 * igb_sw_init initializes the Adapter private data structure.
1854 * Fields are initialized based on PCI device information and
1855 * OS network device settings (MTU size).
1856 **/
1857 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1858 {
1859 struct e1000_hw *hw = &adapter->hw;
1860 struct net_device *netdev = adapter->netdev;
1861 struct pci_dev *pdev = adapter->pdev;
1862
1863 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1864
1865 adapter->tx_ring_count = IGB_DEFAULT_TXD;
1866 adapter->rx_ring_count = IGB_DEFAULT_RXD;
1867 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1868 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1869
1870 /* This call may decrease the number of queues depending on
1871 * interrupt mode. */
1872 if (igb_init_interrupt_scheme(adapter)) {
1873 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1874 return -ENOMEM;
1875 }
1876
1877 /* Explicitly disable IRQ since the NIC can be in any state. */
1878 igb_irq_disable(adapter);
1879
1880 set_bit(__IGB_DOWN, &adapter->state);
1881 return 0;
1882 }
1883
1884 /**
1885 * igb_open - Called when a network interface is made active
1886 * @netdev: network interface device structure
1887 *
1888 * Returns 0 on success, negative value on failure
1889 *
1890 * The open entry point is called when a network interface is made
1891 * active by the system (IFF_UP). At this point all resources needed
1892 * for transmit and receive operations are allocated, the interrupt
1893 * handler is registered with the OS, the watchdog timer is started,
1894 * and the stack is notified that the interface is ready.
1895 **/
1896 static int igb_open(struct net_device *netdev)
1897 {
1898 struct igb_adapter *adapter = netdev_priv(netdev);
1899 struct e1000_hw *hw = &adapter->hw;
1900 int err;
1901 int i;
1902
1903 /* disallow open during test */
1904 if (test_bit(__IGB_TESTING, &adapter->state))
1905 return -EBUSY;
1906
1907 netif_carrier_off(netdev);
1908
1909 /* allocate transmit descriptors */
1910 err = igb_setup_all_tx_resources(adapter);
1911 if (err)
1912 goto err_setup_tx;
1913
1914 /* allocate receive descriptors */
1915 err = igb_setup_all_rx_resources(adapter);
1916 if (err)
1917 goto err_setup_rx;
1918
1919 /* e1000_power_up_phy(adapter); */
1920
1921 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1922 if ((adapter->hw.mng_cookie.status &
1923 E1000_MNG_DHCP_COOKIE_STATUS_VLAN))
1924 igb_update_mng_vlan(adapter);
1925
1926 /* before we allocate an interrupt, we must be ready to handle it.
1927 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1928 * as soon as we call pci_request_irq, so we have to setup our
1929 * clean_rx handler before we do so. */
1930 igb_configure(adapter);
1931
1932 igb_set_vmolr(hw, adapter->vfs_allocated_count);
1933
1934 err = igb_request_irq(adapter);
1935 if (err)
1936 goto err_req_irq;
1937
1938 /* From here on the code is the same as igb_up() */
1939 clear_bit(__IGB_DOWN, &adapter->state);
1940
1941 for (i = 0; i < adapter->num_q_vectors; i++) {
1942 struct igb_q_vector *q_vector = adapter->q_vector[i];
1943 napi_enable(&q_vector->napi);
1944 }
1945
1946 /* Clear any pending interrupts. */
1947 rd32(E1000_ICR);
1948
1949 igb_irq_enable(adapter);
1950
1951 netif_tx_start_all_queues(netdev);
1952
1953 /* Fire a link status change interrupt to start the watchdog. */
1954 wr32(E1000_ICS, E1000_ICS_LSC);
1955
1956 return 0;
1957
1958 err_req_irq:
1959 igb_release_hw_control(adapter);
1960 /* e1000_power_down_phy(adapter); */
1961 igb_free_all_rx_resources(adapter);
1962 err_setup_rx:
1963 igb_free_all_tx_resources(adapter);
1964 err_setup_tx:
1965 igb_reset(adapter);
1966
1967 return err;
1968 }
1969
1970 /**
1971 * igb_close - Disables a network interface
1972 * @netdev: network interface device structure
1973 *
1974 * Returns 0, this is not allowed to fail
1975 *
1976 * The close entry point is called when an interface is de-activated
1977 * by the OS. The hardware is still under the driver's control, but
1978 * needs to be disabled. A global MAC reset is issued to stop the
1979 * hardware, and all transmit and receive resources are freed.
1980 **/
1981 static int igb_close(struct net_device *netdev)
1982 {
1983 struct igb_adapter *adapter = netdev_priv(netdev);
1984
1985 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
1986 igb_down(adapter);
1987
1988 igb_free_irq(adapter);
1989
1990 igb_free_all_tx_resources(adapter);
1991 igb_free_all_rx_resources(adapter);
1992
1993 /* kill manageability vlan ID if supported, but not if a vlan with
1994 * the same ID is registered on the host OS (let 8021q kill it) */
1995 if ((adapter->hw.mng_cookie.status &
1996 E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
1997 !(adapter->vlgrp &&
1998 vlan_group_get_device(adapter->vlgrp, adapter->mng_vlan_id)))
1999 igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
2000
2001 return 0;
2002 }
2003
2004 /**
2005 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2006 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2007 *
2008 * Return 0 on success, negative on failure
2009 **/
2010 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2011 {
2012 struct pci_dev *pdev = tx_ring->pdev;
2013 int size;
2014
2015 size = sizeof(struct igb_buffer) * tx_ring->count;
2016 tx_ring->buffer_info = vmalloc(size);
2017 if (!tx_ring->buffer_info)
2018 goto err;
2019 memset(tx_ring->buffer_info, 0, size);
2020
2021 /* round up to nearest 4K */
2022 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2023 tx_ring->size = ALIGN(tx_ring->size, 4096);
2024
2025 tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size,
2026 &tx_ring->dma);
2027
2028 if (!tx_ring->desc)
2029 goto err;
2030
2031 tx_ring->next_to_use = 0;
2032 tx_ring->next_to_clean = 0;
2033 return 0;
2034
2035 err:
2036 vfree(tx_ring->buffer_info);
2037 dev_err(&pdev->dev,
2038 "Unable to allocate memory for the transmit descriptor ring\n");
2039 return -ENOMEM;
2040 }
2041
2042 /**
2043 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2044 * (Descriptors) for all queues
2045 * @adapter: board private structure
2046 *
2047 * Return 0 on success, negative on failure
2048 **/
2049 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2050 {
2051 int i, err = 0;
2052 int r_idx;
2053
2054 for (i = 0; i < adapter->num_tx_queues; i++) {
2055 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
2056 if (err) {
2057 dev_err(&adapter->pdev->dev,
2058 "Allocation for Tx Queue %u failed\n", i);
2059 for (i--; i >= 0; i--)
2060 igb_free_tx_resources(&adapter->tx_ring[i]);
2061 break;
2062 }
2063 }
2064
2065 for (i = 0; i < IGB_MAX_TX_QUEUES; i++) {
2066 r_idx = i % adapter->num_tx_queues;
2067 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2068 }
2069 return err;
2070 }
2071
2072 /**
2073 * igb_setup_tctl - configure the transmit control registers
2074 * @adapter: Board private structure
2075 **/
2076 static void igb_setup_tctl(struct igb_adapter *adapter)
2077 {
2078 struct e1000_hw *hw = &adapter->hw;
2079 u32 tctl;
2080
2081 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2082 wr32(E1000_TXDCTL(0), 0);
2083
2084 /* Program the Transmit Control Register */
2085 tctl = rd32(E1000_TCTL);
2086 tctl &= ~E1000_TCTL_CT;
2087 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2088 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2089
2090 igb_config_collision_dist(hw);
2091
2092 /* Enable transmits */
2093 tctl |= E1000_TCTL_EN;
2094
2095 wr32(E1000_TCTL, tctl);
2096 }
2097
2098 /**
2099 * igb_configure_tx_ring - Configure transmit ring after Reset
2100 * @adapter: board private structure
2101 * @ring: tx ring to configure
2102 *
2103 * Configure a transmit ring after a reset.
2104 **/
2105 static void igb_configure_tx_ring(struct igb_adapter *adapter,
2106 struct igb_ring *ring)
2107 {
2108 struct e1000_hw *hw = &adapter->hw;
2109 u32 txdctl;
2110 u64 tdba = ring->dma;
2111 int reg_idx = ring->reg_idx;
2112
2113 /* disable the queue */
2114 txdctl = rd32(E1000_TXDCTL(reg_idx));
2115 wr32(E1000_TXDCTL(reg_idx),
2116 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2117 wrfl();
2118 mdelay(10);
2119
2120 wr32(E1000_TDLEN(reg_idx),
2121 ring->count * sizeof(union e1000_adv_tx_desc));
2122 wr32(E1000_TDBAL(reg_idx),
2123 tdba & 0x00000000ffffffffULL);
2124 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2125
2126 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2127 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2128 writel(0, ring->head);
2129 writel(0, ring->tail);
2130
2131 txdctl |= IGB_TX_PTHRESH;
2132 txdctl |= IGB_TX_HTHRESH << 8;
2133 txdctl |= IGB_TX_WTHRESH << 16;
2134
2135 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2136 wr32(E1000_TXDCTL(reg_idx), txdctl);
2137 }
2138
2139 /**
2140 * igb_configure_tx - Configure transmit Unit after Reset
2141 * @adapter: board private structure
2142 *
2143 * Configure the Tx unit of the MAC after a reset.
2144 **/
2145 static void igb_configure_tx(struct igb_adapter *adapter)
2146 {
2147 int i;
2148
2149 for (i = 0; i < adapter->num_tx_queues; i++)
2150 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2151 }
2152
2153 /**
2154 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2155 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2156 *
2157 * Returns 0 on success, negative on failure
2158 **/
2159 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2160 {
2161 struct pci_dev *pdev = rx_ring->pdev;
2162 int size, desc_len;
2163
2164 size = sizeof(struct igb_buffer) * rx_ring->count;
2165 rx_ring->buffer_info = vmalloc(size);
2166 if (!rx_ring->buffer_info)
2167 goto err;
2168 memset(rx_ring->buffer_info, 0, size);
2169
2170 desc_len = sizeof(union e1000_adv_rx_desc);
2171
2172 /* Round up to nearest 4K */
2173 rx_ring->size = rx_ring->count * desc_len;
2174 rx_ring->size = ALIGN(rx_ring->size, 4096);
2175
2176 rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2177 &rx_ring->dma);
2178
2179 if (!rx_ring->desc)
2180 goto err;
2181
2182 rx_ring->next_to_clean = 0;
2183 rx_ring->next_to_use = 0;
2184
2185 return 0;
2186
2187 err:
2188 vfree(rx_ring->buffer_info);
2189 dev_err(&pdev->dev, "Unable to allocate memory for "
2190 "the receive descriptor ring\n");
2191 return -ENOMEM;
2192 }
2193
2194 /**
2195 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2196 * (Descriptors) for all queues
2197 * @adapter: board private structure
2198 *
2199 * Return 0 on success, negative on failure
2200 **/
2201 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2202 {
2203 int i, err = 0;
2204
2205 for (i = 0; i < adapter->num_rx_queues; i++) {
2206 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2207 if (err) {
2208 dev_err(&adapter->pdev->dev,
2209 "Allocation for Rx Queue %u failed\n", i);
2210 for (i--; i >= 0; i--)
2211 igb_free_rx_resources(&adapter->rx_ring[i]);
2212 break;
2213 }
2214 }
2215
2216 return err;
2217 }
2218
2219 /**
2220 * igb_setup_mrqc - configure the multiple receive queue control registers
2221 * @adapter: Board private structure
2222 **/
2223 static void igb_setup_mrqc(struct igb_adapter *adapter)
2224 {
2225 struct e1000_hw *hw = &adapter->hw;
2226 u32 mrqc, rxcsum;
2227 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2228 union e1000_reta {
2229 u32 dword;
2230 u8 bytes[4];
2231 } reta;
2232 static const u8 rsshash[40] = {
2233 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2234 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2235 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2236 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2237
2238 /* Fill out hash function seeds */
2239 for (j = 0; j < 10; j++) {
2240 u32 rsskey = rsshash[(j * 4)];
2241 rsskey |= rsshash[(j * 4) + 1] << 8;
2242 rsskey |= rsshash[(j * 4) + 2] << 16;
2243 rsskey |= rsshash[(j * 4) + 3] << 24;
2244 array_wr32(E1000_RSSRK(0), j, rsskey);
2245 }
2246
2247 num_rx_queues = adapter->num_rx_queues;
2248
2249 if (adapter->vfs_allocated_count) {
2250 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2251 switch (hw->mac.type) {
2252 case e1000_82576:
2253 shift = 3;
2254 num_rx_queues = 2;
2255 break;
2256 case e1000_82575:
2257 shift = 2;
2258 shift2 = 6;
2259 default:
2260 break;
2261 }
2262 } else {
2263 if (hw->mac.type == e1000_82575)
2264 shift = 6;
2265 }
2266
2267 for (j = 0; j < (32 * 4); j++) {
2268 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2269 if (shift2)
2270 reta.bytes[j & 3] |= num_rx_queues << shift2;
2271 if ((j & 3) == 3)
2272 wr32(E1000_RETA(j >> 2), reta.dword);
2273 }
2274
2275 /*
2276 * Disable raw packet checksumming so that RSS hash is placed in
2277 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2278 * offloads as they are enabled by default
2279 */
2280 rxcsum = rd32(E1000_RXCSUM);
2281 rxcsum |= E1000_RXCSUM_PCSD;
2282
2283 if (adapter->hw.mac.type >= e1000_82576)
2284 /* Enable Receive Checksum Offload for SCTP */
2285 rxcsum |= E1000_RXCSUM_CRCOFL;
2286
2287 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2288 wr32(E1000_RXCSUM, rxcsum);
2289
2290 /* If VMDq is enabled then we set the appropriate mode for that, else
2291 * we default to RSS so that an RSS hash is calculated per packet even
2292 * if we are only using one queue */
2293 if (adapter->vfs_allocated_count) {
2294 if (hw->mac.type > e1000_82575) {
2295 /* Set the default pool for the PF's first queue */
2296 u32 vtctl = rd32(E1000_VT_CTL);
2297 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2298 E1000_VT_CTL_DISABLE_DEF_POOL);
2299 vtctl |= adapter->vfs_allocated_count <<
2300 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2301 wr32(E1000_VT_CTL, vtctl);
2302 }
2303 if (adapter->num_rx_queues > 1)
2304 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2305 else
2306 mrqc = E1000_MRQC_ENABLE_VMDQ;
2307 } else {
2308 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2309 }
2310 igb_vmm_control(adapter);
2311
2312 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2313 E1000_MRQC_RSS_FIELD_IPV4_TCP);
2314 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2315 E1000_MRQC_RSS_FIELD_IPV6_TCP);
2316 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2317 E1000_MRQC_RSS_FIELD_IPV6_UDP);
2318 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2319 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2320
2321 wr32(E1000_MRQC, mrqc);
2322 }
2323
2324 /**
2325 * igb_setup_rctl - configure the receive control registers
2326 * @adapter: Board private structure
2327 **/
2328 static void igb_setup_rctl(struct igb_adapter *adapter)
2329 {
2330 struct e1000_hw *hw = &adapter->hw;
2331 u32 rctl;
2332
2333 rctl = rd32(E1000_RCTL);
2334
2335 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2336 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2337
2338 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2339 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2340
2341 /*
2342 * enable stripping of CRC. It's unlikely this will break BMC
2343 * redirection as it did with e1000. Newer features require
2344 * that the HW strips the CRC.
2345 */
2346 rctl |= E1000_RCTL_SECRC;
2347
2348 /*
2349 * disable store bad packets and clear size bits.
2350 */
2351 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2352
2353 /* enable LPE to prevent packets larger than max_frame_size */
2354 rctl |= E1000_RCTL_LPE;
2355
2356 /* disable queue 0 to prevent tail write w/o re-config */
2357 wr32(E1000_RXDCTL(0), 0);
2358
2359 /* Attention!!! For SR-IOV PF driver operations you must enable
2360 * queue drop for all VF and PF queues to prevent head of line blocking
2361 * if an un-trusted VF does not provide descriptors to hardware.
2362 */
2363 if (adapter->vfs_allocated_count) {
2364 u32 vmolr;
2365
2366 /* set all queue drop enable bits */
2367 wr32(E1000_QDE, ALL_QUEUES);
2368
2369 vmolr = rd32(E1000_VMOLR(adapter->vfs_allocated_count));
2370 if (rctl & E1000_RCTL_LPE)
2371 vmolr |= E1000_VMOLR_LPE;
2372 if (adapter->num_rx_queues > 1)
2373 vmolr |= E1000_VMOLR_RSSE;
2374 wr32(E1000_VMOLR(adapter->vfs_allocated_count), vmolr);
2375 }
2376
2377 wr32(E1000_RCTL, rctl);
2378 }
2379
2380 /**
2381 * igb_rlpml_set - set maximum receive packet size
2382 * @adapter: board private structure
2383 *
2384 * Configure maximum receivable packet size.
2385 **/
2386 static void igb_rlpml_set(struct igb_adapter *adapter)
2387 {
2388 u32 max_frame_size = adapter->max_frame_size;
2389 struct e1000_hw *hw = &adapter->hw;
2390 u16 pf_id = adapter->vfs_allocated_count;
2391
2392 if (adapter->vlgrp)
2393 max_frame_size += VLAN_TAG_SIZE;
2394
2395 /* if vfs are enabled we set RLPML to the largest possible request
2396 * size and set the VMOLR RLPML to the size we need */
2397 if (pf_id) {
2398 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2399 max_frame_size = MAX_STD_JUMBO_FRAME_SIZE + VLAN_TAG_SIZE;
2400 }
2401
2402 wr32(E1000_RLPML, max_frame_size);
2403 }
2404
2405 /**
2406 * igb_configure_rx_ring - Configure a receive ring after Reset
2407 * @adapter: board private structure
2408 * @ring: receive ring to be configured
2409 *
2410 * Configure the Rx unit of the MAC after a reset.
2411 **/
2412 static void igb_configure_rx_ring(struct igb_adapter *adapter,
2413 struct igb_ring *ring)
2414 {
2415 struct e1000_hw *hw = &adapter->hw;
2416 u64 rdba = ring->dma;
2417 int reg_idx = ring->reg_idx;
2418 u32 srrctl, rxdctl;
2419
2420 /* disable the queue */
2421 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2422 wr32(E1000_RXDCTL(reg_idx),
2423 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2424
2425 /* Set DMA base address registers */
2426 wr32(E1000_RDBAL(reg_idx),
2427 rdba & 0x00000000ffffffffULL);
2428 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2429 wr32(E1000_RDLEN(reg_idx),
2430 ring->count * sizeof(union e1000_adv_rx_desc));
2431
2432 /* initialize head and tail */
2433 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2434 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2435 writel(0, ring->head);
2436 writel(0, ring->tail);
2437
2438 /* set descriptor configuration */
2439 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2440 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2441 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2442 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2443 srrctl |= IGB_RXBUFFER_16384 >>
2444 E1000_SRRCTL_BSIZEPKT_SHIFT;
2445 #else
2446 srrctl |= (PAGE_SIZE / 2) >>
2447 E1000_SRRCTL_BSIZEPKT_SHIFT;
2448 #endif
2449 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2450 } else {
2451 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2452 E1000_SRRCTL_BSIZEPKT_SHIFT;
2453 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2454 }
2455
2456 wr32(E1000_SRRCTL(reg_idx), srrctl);
2457
2458 /* enable receive descriptor fetching */
2459 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2460 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2461 rxdctl &= 0xFFF00000;
2462 rxdctl |= IGB_RX_PTHRESH;
2463 rxdctl |= IGB_RX_HTHRESH << 8;
2464 rxdctl |= IGB_RX_WTHRESH << 16;
2465 wr32(E1000_RXDCTL(reg_idx), rxdctl);
2466 }
2467
2468 /**
2469 * igb_configure_rx - Configure receive Unit after Reset
2470 * @adapter: board private structure
2471 *
2472 * Configure the Rx unit of the MAC after a reset.
2473 **/
2474 static void igb_configure_rx(struct igb_adapter *adapter)
2475 {
2476 int i;
2477
2478 /* set UTA to appropriate mode */
2479 igb_set_uta(adapter);
2480
2481 /* set the correct pool for the PF default MAC address in entry 0 */
2482 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2483 adapter->vfs_allocated_count);
2484
2485 /* Setup the HW Rx Head and Tail Descriptor Pointers and
2486 * the Base and Length of the Rx Descriptor Ring */
2487 for (i = 0; i < adapter->num_rx_queues; i++)
2488 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2489 }
2490
2491 /**
2492 * igb_free_tx_resources - Free Tx Resources per Queue
2493 * @tx_ring: Tx descriptor ring for a specific queue
2494 *
2495 * Free all transmit software resources
2496 **/
2497 void igb_free_tx_resources(struct igb_ring *tx_ring)
2498 {
2499 igb_clean_tx_ring(tx_ring);
2500
2501 vfree(tx_ring->buffer_info);
2502 tx_ring->buffer_info = NULL;
2503
2504 pci_free_consistent(tx_ring->pdev, tx_ring->size,
2505 tx_ring->desc, tx_ring->dma);
2506
2507 tx_ring->desc = NULL;
2508 }
2509
2510 /**
2511 * igb_free_all_tx_resources - Free Tx Resources for All Queues
2512 * @adapter: board private structure
2513 *
2514 * Free all transmit software resources
2515 **/
2516 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2517 {
2518 int i;
2519
2520 for (i = 0; i < adapter->num_tx_queues; i++)
2521 igb_free_tx_resources(&adapter->tx_ring[i]);
2522 }
2523
2524 static void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2525 struct igb_buffer *buffer_info)
2526 {
2527 buffer_info->dma = 0;
2528 if (buffer_info->skb) {
2529 skb_dma_unmap(&tx_ring->pdev->dev,
2530 buffer_info->skb,
2531 DMA_TO_DEVICE);
2532 dev_kfree_skb_any(buffer_info->skb);
2533 buffer_info->skb = NULL;
2534 }
2535 buffer_info->time_stamp = 0;
2536 /* buffer_info must be completely set up in the transmit path */
2537 }
2538
2539 /**
2540 * igb_clean_tx_ring - Free Tx Buffers
2541 * @tx_ring: ring to be cleaned
2542 **/
2543 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2544 {
2545 struct igb_buffer *buffer_info;
2546 unsigned long size;
2547 unsigned int i;
2548
2549 if (!tx_ring->buffer_info)
2550 return;
2551 /* Free all the Tx ring sk_buffs */
2552
2553 for (i = 0; i < tx_ring->count; i++) {
2554 buffer_info = &tx_ring->buffer_info[i];
2555 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2556 }
2557
2558 size = sizeof(struct igb_buffer) * tx_ring->count;
2559 memset(tx_ring->buffer_info, 0, size);
2560
2561 /* Zero out the descriptor ring */
2562
2563 memset(tx_ring->desc, 0, tx_ring->size);
2564
2565 tx_ring->next_to_use = 0;
2566 tx_ring->next_to_clean = 0;
2567
2568 writel(0, tx_ring->head);
2569 writel(0, tx_ring->tail);
2570 }
2571
2572 /**
2573 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2574 * @adapter: board private structure
2575 **/
2576 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2577 {
2578 int i;
2579
2580 for (i = 0; i < adapter->num_tx_queues; i++)
2581 igb_clean_tx_ring(&adapter->tx_ring[i]);
2582 }
2583
2584 /**
2585 * igb_free_rx_resources - Free Rx Resources
2586 * @rx_ring: ring to clean the resources from
2587 *
2588 * Free all receive software resources
2589 **/
2590 void igb_free_rx_resources(struct igb_ring *rx_ring)
2591 {
2592 igb_clean_rx_ring(rx_ring);
2593
2594 vfree(rx_ring->buffer_info);
2595 rx_ring->buffer_info = NULL;
2596
2597 pci_free_consistent(rx_ring->pdev, rx_ring->size,
2598 rx_ring->desc, rx_ring->dma);
2599
2600 rx_ring->desc = NULL;
2601 }
2602
2603 /**
2604 * igb_free_all_rx_resources - Free Rx Resources for All Queues
2605 * @adapter: board private structure
2606 *
2607 * Free all receive software resources
2608 **/
2609 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2610 {
2611 int i;
2612
2613 for (i = 0; i < adapter->num_rx_queues; i++)
2614 igb_free_rx_resources(&adapter->rx_ring[i]);
2615 }
2616
2617 /**
2618 * igb_clean_rx_ring - Free Rx Buffers per Queue
2619 * @rx_ring: ring to free buffers from
2620 **/
2621 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2622 {
2623 struct igb_buffer *buffer_info;
2624 unsigned long size;
2625 unsigned int i;
2626
2627 if (!rx_ring->buffer_info)
2628 return;
2629 /* Free all the Rx ring sk_buffs */
2630 for (i = 0; i < rx_ring->count; i++) {
2631 buffer_info = &rx_ring->buffer_info[i];
2632 if (buffer_info->dma) {
2633 pci_unmap_single(rx_ring->pdev,
2634 buffer_info->dma,
2635 rx_ring->rx_buffer_len,
2636 PCI_DMA_FROMDEVICE);
2637 buffer_info->dma = 0;
2638 }
2639
2640 if (buffer_info->skb) {
2641 dev_kfree_skb(buffer_info->skb);
2642 buffer_info->skb = NULL;
2643 }
2644 if (buffer_info->page_dma) {
2645 pci_unmap_page(rx_ring->pdev,
2646 buffer_info->page_dma,
2647 PAGE_SIZE / 2,
2648 PCI_DMA_FROMDEVICE);
2649 buffer_info->page_dma = 0;
2650 }
2651 if (buffer_info->page) {
2652 put_page(buffer_info->page);
2653 buffer_info->page = NULL;
2654 buffer_info->page_offset = 0;
2655 }
2656 }
2657
2658 size = sizeof(struct igb_buffer) * rx_ring->count;
2659 memset(rx_ring->buffer_info, 0, size);
2660
2661 /* Zero out the descriptor ring */
2662 memset(rx_ring->desc, 0, rx_ring->size);
2663
2664 rx_ring->next_to_clean = 0;
2665 rx_ring->next_to_use = 0;
2666
2667 writel(0, rx_ring->head);
2668 writel(0, rx_ring->tail);
2669 }
2670
2671 /**
2672 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2673 * @adapter: board private structure
2674 **/
2675 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2676 {
2677 int i;
2678
2679 for (i = 0; i < adapter->num_rx_queues; i++)
2680 igb_clean_rx_ring(&adapter->rx_ring[i]);
2681 }
2682
2683 /**
2684 * igb_set_mac - Change the Ethernet Address of the NIC
2685 * @netdev: network interface device structure
2686 * @p: pointer to an address structure
2687 *
2688 * Returns 0 on success, negative on failure
2689 **/
2690 static int igb_set_mac(struct net_device *netdev, void *p)
2691 {
2692 struct igb_adapter *adapter = netdev_priv(netdev);
2693 struct e1000_hw *hw = &adapter->hw;
2694 struct sockaddr *addr = p;
2695
2696 if (!is_valid_ether_addr(addr->sa_data))
2697 return -EADDRNOTAVAIL;
2698
2699 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2700 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2701
2702 /* set the correct pool for the new PF MAC address in entry 0 */
2703 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2704 adapter->vfs_allocated_count);
2705
2706 return 0;
2707 }
2708
2709 /**
2710 * igb_write_mc_addr_list - write multicast addresses to MTA
2711 * @netdev: network interface device structure
2712 *
2713 * Writes multicast address list to the MTA hash table.
2714 * Returns: -ENOMEM on failure
2715 * 0 on no addresses written
2716 * X on writing X addresses to MTA
2717 **/
2718 static int igb_write_mc_addr_list(struct net_device *netdev)
2719 {
2720 struct igb_adapter *adapter = netdev_priv(netdev);
2721 struct e1000_hw *hw = &adapter->hw;
2722 struct dev_mc_list *mc_ptr = netdev->mc_list;
2723 u8 *mta_list;
2724 u32 vmolr = 0;
2725 int i;
2726
2727 if (!netdev->mc_count) {
2728 /* nothing to program, so clear mc list */
2729 igb_update_mc_addr_list(hw, NULL, 0);
2730 igb_restore_vf_multicasts(adapter);
2731 return 0;
2732 }
2733
2734 mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
2735 if (!mta_list)
2736 return -ENOMEM;
2737
2738 /* set vmolr receive overflow multicast bit */
2739 vmolr |= E1000_VMOLR_ROMPE;
2740
2741 /* The shared function expects a packed array of only addresses. */
2742 mc_ptr = netdev->mc_list;
2743
2744 for (i = 0; i < netdev->mc_count; i++) {
2745 if (!mc_ptr)
2746 break;
2747 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2748 mc_ptr = mc_ptr->next;
2749 }
2750 igb_update_mc_addr_list(hw, mta_list, i);
2751 kfree(mta_list);
2752
2753 return netdev->mc_count;
2754 }
2755
2756 /**
2757 * igb_write_uc_addr_list - write unicast addresses to RAR table
2758 * @netdev: network interface device structure
2759 *
2760 * Writes unicast address list to the RAR table.
2761 * Returns: -ENOMEM on failure/insufficient address space
2762 * 0 on no addresses written
2763 * X on writing X addresses to the RAR table
2764 **/
2765 static int igb_write_uc_addr_list(struct net_device *netdev)
2766 {
2767 struct igb_adapter *adapter = netdev_priv(netdev);
2768 struct e1000_hw *hw = &adapter->hw;
2769 unsigned int vfn = adapter->vfs_allocated_count;
2770 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2771 int count = 0;
2772
2773 /* return ENOMEM indicating insufficient memory for addresses */
2774 if (netdev->uc.count > rar_entries)
2775 return -ENOMEM;
2776
2777 if (netdev->uc.count && rar_entries) {
2778 struct netdev_hw_addr *ha;
2779 list_for_each_entry(ha, &netdev->uc.list, list) {
2780 if (!rar_entries)
2781 break;
2782 igb_rar_set_qsel(adapter, ha->addr,
2783 rar_entries--,
2784 vfn);
2785 count++;
2786 }
2787 }
2788 /* write the addresses in reverse order to avoid write combining */
2789 for (; rar_entries > 0 ; rar_entries--) {
2790 wr32(E1000_RAH(rar_entries), 0);
2791 wr32(E1000_RAL(rar_entries), 0);
2792 }
2793 wrfl();
2794
2795 return count;
2796 }
2797
2798 /**
2799 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2800 * @netdev: network interface device structure
2801 *
2802 * The set_rx_mode entry point is called whenever the unicast or multicast
2803 * address lists or the network interface flags are updated. This routine is
2804 * responsible for configuring the hardware for proper unicast, multicast,
2805 * promiscuous mode, and all-multi behavior.
2806 **/
2807 static void igb_set_rx_mode(struct net_device *netdev)
2808 {
2809 struct igb_adapter *adapter = netdev_priv(netdev);
2810 struct e1000_hw *hw = &adapter->hw;
2811 unsigned int vfn = adapter->vfs_allocated_count;
2812 u32 rctl, vmolr = 0;
2813 int count;
2814
2815 /* Check for Promiscuous and All Multicast modes */
2816 rctl = rd32(E1000_RCTL);
2817
2818 /* clear the effected bits */
2819 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2820
2821 if (netdev->flags & IFF_PROMISC) {
2822 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2823 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2824 } else {
2825 if (netdev->flags & IFF_ALLMULTI) {
2826 rctl |= E1000_RCTL_MPE;
2827 vmolr |= E1000_VMOLR_MPME;
2828 } else {
2829 /*
2830 * Write addresses to the MTA, if the attempt fails
2831 * then we should just turn on promiscous mode so
2832 * that we can at least receive multicast traffic
2833 */
2834 count = igb_write_mc_addr_list(netdev);
2835 if (count < 0) {
2836 rctl |= E1000_RCTL_MPE;
2837 vmolr |= E1000_VMOLR_MPME;
2838 } else if (count) {
2839 vmolr |= E1000_VMOLR_ROMPE;
2840 }
2841 }
2842 /*
2843 * Write addresses to available RAR registers, if there is not
2844 * sufficient space to store all the addresses then enable
2845 * unicast promiscous mode
2846 */
2847 count = igb_write_uc_addr_list(netdev);
2848 if (count < 0) {
2849 rctl |= E1000_RCTL_UPE;
2850 vmolr |= E1000_VMOLR_ROPE;
2851 }
2852 rctl |= E1000_RCTL_VFE;
2853 }
2854 wr32(E1000_RCTL, rctl);
2855
2856 /*
2857 * In order to support SR-IOV and eventually VMDq it is necessary to set
2858 * the VMOLR to enable the appropriate modes. Without this workaround
2859 * we will have issues with VLAN tag stripping not being done for frames
2860 * that are only arriving because we are the default pool
2861 */
2862 if (hw->mac.type < e1000_82576)
2863 return;
2864
2865 vmolr |= rd32(E1000_VMOLR(vfn)) &
2866 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
2867 wr32(E1000_VMOLR(vfn), vmolr);
2868 igb_restore_vf_multicasts(adapter);
2869 }
2870
2871 /* Need to wait a few seconds after link up to get diagnostic information from
2872 * the phy */
2873 static void igb_update_phy_info(unsigned long data)
2874 {
2875 struct igb_adapter *adapter = (struct igb_adapter *) data;
2876 igb_get_phy_info(&adapter->hw);
2877 }
2878
2879 /**
2880 * igb_has_link - check shared code for link and determine up/down
2881 * @adapter: pointer to driver private info
2882 **/
2883 static bool igb_has_link(struct igb_adapter *adapter)
2884 {
2885 struct e1000_hw *hw = &adapter->hw;
2886 bool link_active = false;
2887 s32 ret_val = 0;
2888
2889 /* get_link_status is set on LSC (link status) interrupt or
2890 * rx sequence error interrupt. get_link_status will stay
2891 * false until the e1000_check_for_link establishes link
2892 * for copper adapters ONLY
2893 */
2894 switch (hw->phy.media_type) {
2895 case e1000_media_type_copper:
2896 if (hw->mac.get_link_status) {
2897 ret_val = hw->mac.ops.check_for_link(hw);
2898 link_active = !hw->mac.get_link_status;
2899 } else {
2900 link_active = true;
2901 }
2902 break;
2903 case e1000_media_type_internal_serdes:
2904 ret_val = hw->mac.ops.check_for_link(hw);
2905 link_active = hw->mac.serdes_has_link;
2906 break;
2907 default:
2908 case e1000_media_type_unknown:
2909 break;
2910 }
2911
2912 return link_active;
2913 }
2914
2915 /**
2916 * igb_watchdog - Timer Call-back
2917 * @data: pointer to adapter cast into an unsigned long
2918 **/
2919 static void igb_watchdog(unsigned long data)
2920 {
2921 struct igb_adapter *adapter = (struct igb_adapter *)data;
2922 /* Do the rest outside of interrupt context */
2923 schedule_work(&adapter->watchdog_task);
2924 }
2925
2926 static void igb_watchdog_task(struct work_struct *work)
2927 {
2928 struct igb_adapter *adapter = container_of(work,
2929 struct igb_adapter, watchdog_task);
2930 struct e1000_hw *hw = &adapter->hw;
2931 struct net_device *netdev = adapter->netdev;
2932 struct igb_ring *tx_ring = adapter->tx_ring;
2933 u32 link;
2934 int i;
2935
2936 link = igb_has_link(adapter);
2937 if ((netif_carrier_ok(netdev)) && link)
2938 goto link_up;
2939
2940 if (link) {
2941 if (!netif_carrier_ok(netdev)) {
2942 u32 ctrl;
2943 hw->mac.ops.get_speed_and_duplex(&adapter->hw,
2944 &adapter->link_speed,
2945 &adapter->link_duplex);
2946
2947 ctrl = rd32(E1000_CTRL);
2948 /* Links status message must follow this format */
2949 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
2950 "Flow Control: %s\n",
2951 netdev->name,
2952 adapter->link_speed,
2953 adapter->link_duplex == FULL_DUPLEX ?
2954 "Full Duplex" : "Half Duplex",
2955 ((ctrl & E1000_CTRL_TFCE) && (ctrl &
2956 E1000_CTRL_RFCE)) ? "RX/TX" : ((ctrl &
2957 E1000_CTRL_RFCE) ? "RX" : ((ctrl &
2958 E1000_CTRL_TFCE) ? "TX" : "None")));
2959
2960 /* tweak tx_queue_len according to speed/duplex and
2961 * adjust the timeout factor */
2962 netdev->tx_queue_len = adapter->tx_queue_len;
2963 adapter->tx_timeout_factor = 1;
2964 switch (adapter->link_speed) {
2965 case SPEED_10:
2966 netdev->tx_queue_len = 10;
2967 adapter->tx_timeout_factor = 14;
2968 break;
2969 case SPEED_100:
2970 netdev->tx_queue_len = 100;
2971 /* maybe add some timeout factor ? */
2972 break;
2973 }
2974
2975 netif_carrier_on(netdev);
2976
2977 igb_ping_all_vfs(adapter);
2978
2979 /* link state has changed, schedule phy info update */
2980 if (!test_bit(__IGB_DOWN, &adapter->state))
2981 mod_timer(&adapter->phy_info_timer,
2982 round_jiffies(jiffies + 2 * HZ));
2983 }
2984 } else {
2985 if (netif_carrier_ok(netdev)) {
2986 adapter->link_speed = 0;
2987 adapter->link_duplex = 0;
2988 /* Links status message must follow this format */
2989 printk(KERN_INFO "igb: %s NIC Link is Down\n",
2990 netdev->name);
2991 netif_carrier_off(netdev);
2992
2993 igb_ping_all_vfs(adapter);
2994
2995 /* link state has changed, schedule phy info update */
2996 if (!test_bit(__IGB_DOWN, &adapter->state))
2997 mod_timer(&adapter->phy_info_timer,
2998 round_jiffies(jiffies + 2 * HZ));
2999 }
3000 }
3001
3002 link_up:
3003 igb_update_stats(adapter);
3004
3005 hw->mac.tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
3006 adapter->tpt_old = adapter->stats.tpt;
3007 hw->mac.collision_delta = adapter->stats.colc - adapter->colc_old;
3008 adapter->colc_old = adapter->stats.colc;
3009
3010 adapter->gorc = adapter->stats.gorc - adapter->gorc_old;
3011 adapter->gorc_old = adapter->stats.gorc;
3012 adapter->gotc = adapter->stats.gotc - adapter->gotc_old;
3013 adapter->gotc_old = adapter->stats.gotc;
3014
3015 igb_update_adaptive(&adapter->hw);
3016
3017 if (!netif_carrier_ok(netdev)) {
3018 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3019 /* We've lost link, so the controller stops DMA,
3020 * but we've got queued Tx work that's never going
3021 * to get done, so reset controller to flush Tx.
3022 * (Do the reset outside of interrupt context). */
3023 adapter->tx_timeout_count++;
3024 schedule_work(&adapter->reset_task);
3025 /* return immediately since reset is imminent */
3026 return;
3027 }
3028 }
3029
3030 /* Cause software interrupt to ensure rx ring is cleaned */
3031 if (adapter->msix_entries) {
3032 u32 eics = 0;
3033 for (i = 0; i < adapter->num_q_vectors; i++) {
3034 struct igb_q_vector *q_vector = adapter->q_vector[i];
3035 eics |= q_vector->eims_value;
3036 }
3037 wr32(E1000_EICS, eics);
3038 } else {
3039 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3040 }
3041
3042 /* Force detection of hung controller every watchdog period */
3043 tx_ring->detect_tx_hung = true;
3044
3045 /* Reset the timer */
3046 if (!test_bit(__IGB_DOWN, &adapter->state))
3047 mod_timer(&adapter->watchdog_timer,
3048 round_jiffies(jiffies + 2 * HZ));
3049 }
3050
3051 enum latency_range {
3052 lowest_latency = 0,
3053 low_latency = 1,
3054 bulk_latency = 2,
3055 latency_invalid = 255
3056 };
3057
3058
3059 /**
3060 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3061 *
3062 * Stores a new ITR value based on strictly on packet size. This
3063 * algorithm is less sophisticated than that used in igb_update_itr,
3064 * due to the difficulty of synchronizing statistics across multiple
3065 * receive rings. The divisors and thresholds used by this fuction
3066 * were determined based on theoretical maximum wire speed and testing
3067 * data, in order to minimize response time while increasing bulk
3068 * throughput.
3069 * This functionality is controlled by the InterruptThrottleRate module
3070 * parameter (see igb_param.c)
3071 * NOTE: This function is called only when operating in a multiqueue
3072 * receive environment.
3073 * @q_vector: pointer to q_vector
3074 **/
3075 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3076 {
3077 int new_val = q_vector->itr_val;
3078 int avg_wire_size = 0;
3079 struct igb_adapter *adapter = q_vector->adapter;
3080
3081 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3082 * ints/sec - ITR timer value of 120 ticks.
3083 */
3084 if (adapter->link_speed != SPEED_1000) {
3085 new_val = 976;
3086 goto set_itr_val;
3087 }
3088
3089 if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3090 struct igb_ring *ring = q_vector->rx_ring;
3091 avg_wire_size = ring->total_bytes / ring->total_packets;
3092 }
3093
3094 if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3095 struct igb_ring *ring = q_vector->tx_ring;
3096 avg_wire_size = max_t(u32, avg_wire_size,
3097 (ring->total_bytes /
3098 ring->total_packets));
3099 }
3100
3101 /* if avg_wire_size isn't set no work was done */
3102 if (!avg_wire_size)
3103 goto clear_counts;
3104
3105 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3106 avg_wire_size += 24;
3107
3108 /* Don't starve jumbo frames */
3109 avg_wire_size = min(avg_wire_size, 3000);
3110
3111 /* Give a little boost to mid-size frames */
3112 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3113 new_val = avg_wire_size / 3;
3114 else
3115 new_val = avg_wire_size / 2;
3116
3117 set_itr_val:
3118 if (new_val != q_vector->itr_val) {
3119 q_vector->itr_val = new_val;
3120 q_vector->set_itr = 1;
3121 }
3122 clear_counts:
3123 if (q_vector->rx_ring) {
3124 q_vector->rx_ring->total_bytes = 0;
3125 q_vector->rx_ring->total_packets = 0;
3126 }
3127 if (q_vector->tx_ring) {
3128 q_vector->tx_ring->total_bytes = 0;
3129 q_vector->tx_ring->total_packets = 0;
3130 }
3131 }
3132
3133 /**
3134 * igb_update_itr - update the dynamic ITR value based on statistics
3135 * Stores a new ITR value based on packets and byte
3136 * counts during the last interrupt. The advantage of per interrupt
3137 * computation is faster updates and more accurate ITR for the current
3138 * traffic pattern. Constants in this function were computed
3139 * based on theoretical maximum wire speed and thresholds were set based
3140 * on testing data as well as attempting to minimize response time
3141 * while increasing bulk throughput.
3142 * this functionality is controlled by the InterruptThrottleRate module
3143 * parameter (see igb_param.c)
3144 * NOTE: These calculations are only valid when operating in a single-
3145 * queue environment.
3146 * @adapter: pointer to adapter
3147 * @itr_setting: current q_vector->itr_val
3148 * @packets: the number of packets during this measurement interval
3149 * @bytes: the number of bytes during this measurement interval
3150 **/
3151 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3152 int packets, int bytes)
3153 {
3154 unsigned int retval = itr_setting;
3155
3156 if (packets == 0)
3157 goto update_itr_done;
3158
3159 switch (itr_setting) {
3160 case lowest_latency:
3161 /* handle TSO and jumbo frames */
3162 if (bytes/packets > 8000)
3163 retval = bulk_latency;
3164 else if ((packets < 5) && (bytes > 512))
3165 retval = low_latency;
3166 break;
3167 case low_latency: /* 50 usec aka 20000 ints/s */
3168 if (bytes > 10000) {
3169 /* this if handles the TSO accounting */
3170 if (bytes/packets > 8000) {
3171 retval = bulk_latency;
3172 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3173 retval = bulk_latency;
3174 } else if ((packets > 35)) {
3175 retval = lowest_latency;
3176 }
3177 } else if (bytes/packets > 2000) {
3178 retval = bulk_latency;
3179 } else if (packets <= 2 && bytes < 512) {
3180 retval = lowest_latency;
3181 }
3182 break;
3183 case bulk_latency: /* 250 usec aka 4000 ints/s */
3184 if (bytes > 25000) {
3185 if (packets > 35)
3186 retval = low_latency;
3187 } else if (bytes < 1500) {
3188 retval = low_latency;
3189 }
3190 break;
3191 }
3192
3193 update_itr_done:
3194 return retval;
3195 }
3196
3197 static void igb_set_itr(struct igb_adapter *adapter)
3198 {
3199 struct igb_q_vector *q_vector = adapter->q_vector[0];
3200 u16 current_itr;
3201 u32 new_itr = q_vector->itr_val;
3202
3203 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3204 if (adapter->link_speed != SPEED_1000) {
3205 current_itr = 0;
3206 new_itr = 4000;
3207 goto set_itr_now;
3208 }
3209
3210 adapter->rx_itr = igb_update_itr(adapter,
3211 adapter->rx_itr,
3212 adapter->rx_ring->total_packets,
3213 adapter->rx_ring->total_bytes);
3214
3215 adapter->tx_itr = igb_update_itr(adapter,
3216 adapter->tx_itr,
3217 adapter->tx_ring->total_packets,
3218 adapter->tx_ring->total_bytes);
3219 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3220
3221 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3222 if (adapter->itr_setting == 3 && current_itr == lowest_latency)
3223 current_itr = low_latency;
3224
3225 switch (current_itr) {
3226 /* counts and packets in update_itr are dependent on these numbers */
3227 case lowest_latency:
3228 new_itr = 56; /* aka 70,000 ints/sec */
3229 break;
3230 case low_latency:
3231 new_itr = 196; /* aka 20,000 ints/sec */
3232 break;
3233 case bulk_latency:
3234 new_itr = 980; /* aka 4,000 ints/sec */
3235 break;
3236 default:
3237 break;
3238 }
3239
3240 set_itr_now:
3241 adapter->rx_ring->total_bytes = 0;
3242 adapter->rx_ring->total_packets = 0;
3243 adapter->tx_ring->total_bytes = 0;
3244 adapter->tx_ring->total_packets = 0;
3245
3246 if (new_itr != q_vector->itr_val) {
3247 /* this attempts to bias the interrupt rate towards Bulk
3248 * by adding intermediate steps when interrupt rate is
3249 * increasing */
3250 new_itr = new_itr > q_vector->itr_val ?
3251 max((new_itr * q_vector->itr_val) /
3252 (new_itr + (q_vector->itr_val >> 2)),
3253 new_itr) :
3254 new_itr;
3255 /* Don't write the value here; it resets the adapter's
3256 * internal timer, and causes us to delay far longer than
3257 * we should between interrupts. Instead, we write the ITR
3258 * value at the beginning of the next interrupt so the timing
3259 * ends up being correct.
3260 */
3261 q_vector->itr_val = new_itr;
3262 q_vector->set_itr = 1;
3263 }
3264
3265 return;
3266 }
3267
3268 #define IGB_TX_FLAGS_CSUM 0x00000001
3269 #define IGB_TX_FLAGS_VLAN 0x00000002
3270 #define IGB_TX_FLAGS_TSO 0x00000004
3271 #define IGB_TX_FLAGS_IPV4 0x00000008
3272 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3273 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3274 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3275
3276 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3277 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3278 {
3279 struct e1000_adv_tx_context_desc *context_desc;
3280 unsigned int i;
3281 int err;
3282 struct igb_buffer *buffer_info;
3283 u32 info = 0, tu_cmd = 0;
3284 u32 mss_l4len_idx, l4len;
3285 *hdr_len = 0;
3286
3287 if (skb_header_cloned(skb)) {
3288 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3289 if (err)
3290 return err;
3291 }
3292
3293 l4len = tcp_hdrlen(skb);
3294 *hdr_len += l4len;
3295
3296 if (skb->protocol == htons(ETH_P_IP)) {
3297 struct iphdr *iph = ip_hdr(skb);
3298 iph->tot_len = 0;
3299 iph->check = 0;
3300 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3301 iph->daddr, 0,
3302 IPPROTO_TCP,
3303 0);
3304 } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
3305 ipv6_hdr(skb)->payload_len = 0;
3306 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3307 &ipv6_hdr(skb)->daddr,
3308 0, IPPROTO_TCP, 0);
3309 }
3310
3311 i = tx_ring->next_to_use;
3312
3313 buffer_info = &tx_ring->buffer_info[i];
3314 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3315 /* VLAN MACLEN IPLEN */
3316 if (tx_flags & IGB_TX_FLAGS_VLAN)
3317 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3318 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3319 *hdr_len += skb_network_offset(skb);
3320 info |= skb_network_header_len(skb);
3321 *hdr_len += skb_network_header_len(skb);
3322 context_desc->vlan_macip_lens = cpu_to_le32(info);
3323
3324 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3325 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3326
3327 if (skb->protocol == htons(ETH_P_IP))
3328 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3329 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3330
3331 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3332
3333 /* MSS L4LEN IDX */
3334 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3335 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3336
3337 /* For 82575, context index must be unique per ring. */
3338 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3339 mss_l4len_idx |= tx_ring->reg_idx << 4;
3340
3341 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3342 context_desc->seqnum_seed = 0;
3343
3344 buffer_info->time_stamp = jiffies;
3345 buffer_info->next_to_watch = i;
3346 buffer_info->dma = 0;
3347 i++;
3348 if (i == tx_ring->count)
3349 i = 0;
3350
3351 tx_ring->next_to_use = i;
3352
3353 return true;
3354 }
3355
3356 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3357 struct sk_buff *skb, u32 tx_flags)
3358 {
3359 struct e1000_adv_tx_context_desc *context_desc;
3360 struct pci_dev *pdev = tx_ring->pdev;
3361 struct igb_buffer *buffer_info;
3362 u32 info = 0, tu_cmd = 0;
3363 unsigned int i;
3364
3365 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3366 (tx_flags & IGB_TX_FLAGS_VLAN)) {
3367 i = tx_ring->next_to_use;
3368 buffer_info = &tx_ring->buffer_info[i];
3369 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3370
3371 if (tx_flags & IGB_TX_FLAGS_VLAN)
3372 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3373 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3374 if (skb->ip_summed == CHECKSUM_PARTIAL)
3375 info |= skb_network_header_len(skb);
3376
3377 context_desc->vlan_macip_lens = cpu_to_le32(info);
3378
3379 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3380
3381 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3382 __be16 protocol;
3383
3384 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3385 const struct vlan_ethhdr *vhdr =
3386 (const struct vlan_ethhdr*)skb->data;
3387
3388 protocol = vhdr->h_vlan_encapsulated_proto;
3389 } else {
3390 protocol = skb->protocol;
3391 }
3392
3393 switch (protocol) {
3394 case cpu_to_be16(ETH_P_IP):
3395 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3396 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3397 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3398 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3399 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3400 break;
3401 case cpu_to_be16(ETH_P_IPV6):
3402 /* XXX what about other V6 headers?? */
3403 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3404 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3405 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3406 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3407 break;
3408 default:
3409 if (unlikely(net_ratelimit()))
3410 dev_warn(&pdev->dev,
3411 "partial checksum but proto=%x!\n",
3412 skb->protocol);
3413 break;
3414 }
3415 }
3416
3417 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3418 context_desc->seqnum_seed = 0;
3419 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3420 context_desc->mss_l4len_idx =
3421 cpu_to_le32(tx_ring->reg_idx << 4);
3422
3423 buffer_info->time_stamp = jiffies;
3424 buffer_info->next_to_watch = i;
3425 buffer_info->dma = 0;
3426
3427 i++;
3428 if (i == tx_ring->count)
3429 i = 0;
3430 tx_ring->next_to_use = i;
3431
3432 return true;
3433 }
3434 return false;
3435 }
3436
3437 #define IGB_MAX_TXD_PWR 16
3438 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
3439
3440 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3441 unsigned int first)
3442 {
3443 struct igb_buffer *buffer_info;
3444 struct pci_dev *pdev = tx_ring->pdev;
3445 unsigned int len = skb_headlen(skb);
3446 unsigned int count = 0, i;
3447 unsigned int f;
3448 dma_addr_t *map;
3449
3450 i = tx_ring->next_to_use;
3451
3452 if (skb_dma_map(&pdev->dev, skb, DMA_TO_DEVICE)) {
3453 dev_err(&pdev->dev, "TX DMA map failed\n");
3454 return 0;
3455 }
3456
3457 map = skb_shinfo(skb)->dma_maps;
3458
3459 buffer_info = &tx_ring->buffer_info[i];
3460 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3461 buffer_info->length = len;
3462 /* set time_stamp *before* dma to help avoid a possible race */
3463 buffer_info->time_stamp = jiffies;
3464 buffer_info->next_to_watch = i;
3465 buffer_info->dma = skb_shinfo(skb)->dma_head;
3466
3467 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3468 struct skb_frag_struct *frag;
3469
3470 i++;
3471 if (i == tx_ring->count)
3472 i = 0;
3473
3474 frag = &skb_shinfo(skb)->frags[f];
3475 len = frag->size;
3476
3477 buffer_info = &tx_ring->buffer_info[i];
3478 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3479 buffer_info->length = len;
3480 buffer_info->time_stamp = jiffies;
3481 buffer_info->next_to_watch = i;
3482 buffer_info->dma = map[count];
3483 count++;
3484 }
3485
3486 tx_ring->buffer_info[i].skb = skb;
3487 tx_ring->buffer_info[first].next_to_watch = i;
3488
3489 return count + 1;
3490 }
3491
3492 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3493 int tx_flags, int count, u32 paylen,
3494 u8 hdr_len)
3495 {
3496 union e1000_adv_tx_desc *tx_desc = NULL;
3497 struct igb_buffer *buffer_info;
3498 u32 olinfo_status = 0, cmd_type_len;
3499 unsigned int i;
3500
3501 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3502 E1000_ADVTXD_DCMD_DEXT);
3503
3504 if (tx_flags & IGB_TX_FLAGS_VLAN)
3505 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3506
3507 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3508 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3509
3510 if (tx_flags & IGB_TX_FLAGS_TSO) {
3511 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3512
3513 /* insert tcp checksum */
3514 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3515
3516 /* insert ip checksum */
3517 if (tx_flags & IGB_TX_FLAGS_IPV4)
3518 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3519
3520 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3521 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3522 }
3523
3524 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3525 (tx_flags & (IGB_TX_FLAGS_CSUM |
3526 IGB_TX_FLAGS_TSO |
3527 IGB_TX_FLAGS_VLAN)))
3528 olinfo_status |= tx_ring->reg_idx << 4;
3529
3530 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3531
3532 i = tx_ring->next_to_use;
3533 while (count--) {
3534 buffer_info = &tx_ring->buffer_info[i];
3535 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3536 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3537 tx_desc->read.cmd_type_len =
3538 cpu_to_le32(cmd_type_len | buffer_info->length);
3539 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3540 i++;
3541 if (i == tx_ring->count)
3542 i = 0;
3543 }
3544
3545 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3546 /* Force memory writes to complete before letting h/w
3547 * know there are new descriptors to fetch. (Only
3548 * applicable for weak-ordered memory model archs,
3549 * such as IA-64). */
3550 wmb();
3551
3552 tx_ring->next_to_use = i;
3553 writel(i, tx_ring->tail);
3554 /* we need this if more than one processor can write to our tail
3555 * at a time, it syncronizes IO on IA64/Altix systems */
3556 mmiowb();
3557 }
3558
3559 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3560 {
3561 struct net_device *netdev = tx_ring->netdev;
3562
3563 netif_stop_subqueue(netdev, tx_ring->queue_index);
3564
3565 /* Herbert's original patch had:
3566 * smp_mb__after_netif_stop_queue();
3567 * but since that doesn't exist yet, just open code it. */
3568 smp_mb();
3569
3570 /* We need to check again in a case another CPU has just
3571 * made room available. */
3572 if (igb_desc_unused(tx_ring) < size)
3573 return -EBUSY;
3574
3575 /* A reprieve! */
3576 netif_wake_subqueue(netdev, tx_ring->queue_index);
3577 tx_ring->tx_stats.restart_queue++;
3578 return 0;
3579 }
3580
3581 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3582 {
3583 if (igb_desc_unused(tx_ring) >= size)
3584 return 0;
3585 return __igb_maybe_stop_tx(tx_ring, size);
3586 }
3587
3588 static netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3589 struct igb_ring *tx_ring)
3590 {
3591 struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3592 unsigned int first;
3593 unsigned int tx_flags = 0;
3594 u8 hdr_len = 0;
3595 int count = 0;
3596 int tso = 0;
3597 union skb_shared_tx *shtx;
3598
3599 if (test_bit(__IGB_DOWN, &adapter->state)) {
3600 dev_kfree_skb_any(skb);
3601 return NETDEV_TX_OK;
3602 }
3603
3604 if (skb->len <= 0) {
3605 dev_kfree_skb_any(skb);
3606 return NETDEV_TX_OK;
3607 }
3608
3609 /* need: 1 descriptor per page,
3610 * + 2 desc gap to keep tail from touching head,
3611 * + 1 desc for skb->data,
3612 * + 1 desc for context descriptor,
3613 * otherwise try next time */
3614 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3615 /* this is a hard error */
3616 return NETDEV_TX_BUSY;
3617 }
3618
3619 /*
3620 * TODO: check that there currently is no other packet with
3621 * time stamping in the queue
3622 *
3623 * When doing time stamping, keep the connection to the socket
3624 * a while longer: it is still needed by skb_hwtstamp_tx(),
3625 * called either in igb_tx_hwtstamp() or by our caller when
3626 * doing software time stamping.
3627 */
3628 shtx = skb_tx(skb);
3629 if (unlikely(shtx->hardware)) {
3630 shtx->in_progress = 1;
3631 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3632 }
3633
3634 if (adapter->vlgrp && vlan_tx_tag_present(skb)) {
3635 tx_flags |= IGB_TX_FLAGS_VLAN;
3636 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3637 }
3638
3639 if (skb->protocol == htons(ETH_P_IP))
3640 tx_flags |= IGB_TX_FLAGS_IPV4;
3641
3642 first = tx_ring->next_to_use;
3643 if (skb_is_gso(skb)) {
3644 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3645 if (tso < 0) {
3646 dev_kfree_skb_any(skb);
3647 return NETDEV_TX_OK;
3648 }
3649 }
3650
3651 if (tso)
3652 tx_flags |= IGB_TX_FLAGS_TSO;
3653 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3654 (skb->ip_summed == CHECKSUM_PARTIAL))
3655 tx_flags |= IGB_TX_FLAGS_CSUM;
3656
3657 /*
3658 * count reflects descriptors mapped, if 0 then mapping error
3659 * has occured and we need to rewind the descriptor queue
3660 */
3661 count = igb_tx_map_adv(tx_ring, skb, first);
3662
3663 if (!count) {
3664 dev_kfree_skb_any(skb);
3665 tx_ring->buffer_info[first].time_stamp = 0;
3666 tx_ring->next_to_use = first;
3667 return NETDEV_TX_OK;
3668 }
3669
3670 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3671
3672 /* Make sure there is space in the ring for the next send. */
3673 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3674
3675 return NETDEV_TX_OK;
3676 }
3677
3678 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3679 struct net_device *netdev)
3680 {
3681 struct igb_adapter *adapter = netdev_priv(netdev);
3682 struct igb_ring *tx_ring;
3683
3684 int r_idx = 0;
3685 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3686 tx_ring = adapter->multi_tx_table[r_idx];
3687
3688 /* This goes back to the question of how to logically map a tx queue
3689 * to a flow. Right now, performance is impacted slightly negatively
3690 * if using multiple tx queues. If the stack breaks away from a
3691 * single qdisc implementation, we can look at this again. */
3692 return igb_xmit_frame_ring_adv(skb, tx_ring);
3693 }
3694
3695 /**
3696 * igb_tx_timeout - Respond to a Tx Hang
3697 * @netdev: network interface device structure
3698 **/
3699 static void igb_tx_timeout(struct net_device *netdev)
3700 {
3701 struct igb_adapter *adapter = netdev_priv(netdev);
3702 struct e1000_hw *hw = &adapter->hw;
3703
3704 /* Do the reset outside of interrupt context */
3705 adapter->tx_timeout_count++;
3706 schedule_work(&adapter->reset_task);
3707 wr32(E1000_EICS,
3708 (adapter->eims_enable_mask & ~adapter->eims_other));
3709 }
3710
3711 static void igb_reset_task(struct work_struct *work)
3712 {
3713 struct igb_adapter *adapter;
3714 adapter = container_of(work, struct igb_adapter, reset_task);
3715
3716 igb_reinit_locked(adapter);
3717 }
3718
3719 /**
3720 * igb_get_stats - Get System Network Statistics
3721 * @netdev: network interface device structure
3722 *
3723 * Returns the address of the device statistics structure.
3724 * The statistics are actually updated from the timer callback.
3725 **/
3726 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3727 {
3728 /* only return the current stats */
3729 return &netdev->stats;
3730 }
3731
3732 /**
3733 * igb_change_mtu - Change the Maximum Transfer Unit
3734 * @netdev: network interface device structure
3735 * @new_mtu: new value for maximum frame size
3736 *
3737 * Returns 0 on success, negative on failure
3738 **/
3739 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3740 {
3741 struct igb_adapter *adapter = netdev_priv(netdev);
3742 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3743 u32 rx_buffer_len, i;
3744
3745 if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) ||
3746 (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3747 dev_err(&adapter->pdev->dev, "Invalid MTU setting\n");
3748 return -EINVAL;
3749 }
3750
3751 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3752 dev_err(&adapter->pdev->dev, "MTU > 9216 not supported.\n");
3753 return -EINVAL;
3754 }
3755
3756 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3757 msleep(1);
3758
3759 /* igb_down has a dependency on max_frame_size */
3760 adapter->max_frame_size = max_frame;
3761 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3762 * means we reserve 2 more, this pushes us to allocate from the next
3763 * larger slab size.
3764 * i.e. RXBUFFER_2048 --> size-4096 slab
3765 */
3766
3767 if (max_frame <= IGB_RXBUFFER_1024)
3768 rx_buffer_len = IGB_RXBUFFER_1024;
3769 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3770 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3771 else
3772 rx_buffer_len = IGB_RXBUFFER_128;
3773
3774 if (netif_running(netdev))
3775 igb_down(adapter);
3776
3777 dev_info(&adapter->pdev->dev, "changing MTU from %d to %d\n",
3778 netdev->mtu, new_mtu);
3779 netdev->mtu = new_mtu;
3780
3781 for (i = 0; i < adapter->num_rx_queues; i++)
3782 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3783
3784 if (netif_running(netdev))
3785 igb_up(adapter);
3786 else
3787 igb_reset(adapter);
3788
3789 clear_bit(__IGB_RESETTING, &adapter->state);
3790
3791 return 0;
3792 }
3793
3794 /**
3795 * igb_update_stats - Update the board statistics counters
3796 * @adapter: board private structure
3797 **/
3798
3799 void igb_update_stats(struct igb_adapter *adapter)
3800 {
3801 struct net_device *netdev = adapter->netdev;
3802 struct e1000_hw *hw = &adapter->hw;
3803 struct pci_dev *pdev = adapter->pdev;
3804 u16 phy_tmp;
3805
3806 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3807
3808 /*
3809 * Prevent stats update while adapter is being reset, or if the pci
3810 * connection is down.
3811 */
3812 if (adapter->link_speed == 0)
3813 return;
3814 if (pci_channel_offline(pdev))
3815 return;
3816
3817 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3818 adapter->stats.gprc += rd32(E1000_GPRC);
3819 adapter->stats.gorc += rd32(E1000_GORCL);
3820 rd32(E1000_GORCH); /* clear GORCL */
3821 adapter->stats.bprc += rd32(E1000_BPRC);
3822 adapter->stats.mprc += rd32(E1000_MPRC);
3823 adapter->stats.roc += rd32(E1000_ROC);
3824
3825 adapter->stats.prc64 += rd32(E1000_PRC64);
3826 adapter->stats.prc127 += rd32(E1000_PRC127);
3827 adapter->stats.prc255 += rd32(E1000_PRC255);
3828 adapter->stats.prc511 += rd32(E1000_PRC511);
3829 adapter->stats.prc1023 += rd32(E1000_PRC1023);
3830 adapter->stats.prc1522 += rd32(E1000_PRC1522);
3831 adapter->stats.symerrs += rd32(E1000_SYMERRS);
3832 adapter->stats.sec += rd32(E1000_SEC);
3833
3834 adapter->stats.mpc += rd32(E1000_MPC);
3835 adapter->stats.scc += rd32(E1000_SCC);
3836 adapter->stats.ecol += rd32(E1000_ECOL);
3837 adapter->stats.mcc += rd32(E1000_MCC);
3838 adapter->stats.latecol += rd32(E1000_LATECOL);
3839 adapter->stats.dc += rd32(E1000_DC);
3840 adapter->stats.rlec += rd32(E1000_RLEC);
3841 adapter->stats.xonrxc += rd32(E1000_XONRXC);
3842 adapter->stats.xontxc += rd32(E1000_XONTXC);
3843 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
3844 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
3845 adapter->stats.fcruc += rd32(E1000_FCRUC);
3846 adapter->stats.gptc += rd32(E1000_GPTC);
3847 adapter->stats.gotc += rd32(E1000_GOTCL);
3848 rd32(E1000_GOTCH); /* clear GOTCL */
3849 adapter->stats.rnbc += rd32(E1000_RNBC);
3850 adapter->stats.ruc += rd32(E1000_RUC);
3851 adapter->stats.rfc += rd32(E1000_RFC);
3852 adapter->stats.rjc += rd32(E1000_RJC);
3853 adapter->stats.tor += rd32(E1000_TORH);
3854 adapter->stats.tot += rd32(E1000_TOTH);
3855 adapter->stats.tpr += rd32(E1000_TPR);
3856
3857 adapter->stats.ptc64 += rd32(E1000_PTC64);
3858 adapter->stats.ptc127 += rd32(E1000_PTC127);
3859 adapter->stats.ptc255 += rd32(E1000_PTC255);
3860 adapter->stats.ptc511 += rd32(E1000_PTC511);
3861 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
3862 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
3863
3864 adapter->stats.mptc += rd32(E1000_MPTC);
3865 adapter->stats.bptc += rd32(E1000_BPTC);
3866
3867 /* used for adaptive IFS */
3868
3869 hw->mac.tx_packet_delta = rd32(E1000_TPT);
3870 adapter->stats.tpt += hw->mac.tx_packet_delta;
3871 hw->mac.collision_delta = rd32(E1000_COLC);
3872 adapter->stats.colc += hw->mac.collision_delta;
3873
3874 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
3875 adapter->stats.rxerrc += rd32(E1000_RXERRC);
3876 adapter->stats.tncrs += rd32(E1000_TNCRS);
3877 adapter->stats.tsctc += rd32(E1000_TSCTC);
3878 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
3879
3880 adapter->stats.iac += rd32(E1000_IAC);
3881 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
3882 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
3883 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
3884 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
3885 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
3886 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
3887 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
3888 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
3889
3890 /* Fill out the OS statistics structure */
3891 netdev->stats.multicast = adapter->stats.mprc;
3892 netdev->stats.collisions = adapter->stats.colc;
3893
3894 /* Rx Errors */
3895
3896 if (hw->mac.type != e1000_82575) {
3897 u32 rqdpc_tmp;
3898 u64 rqdpc_total = 0;
3899 int i;
3900 /* Read out drops stats per RX queue. Notice RQDPC (Receive
3901 * Queue Drop Packet Count) stats only gets incremented, if
3902 * the DROP_EN but it set (in the SRRCTL register for that
3903 * queue). If DROP_EN bit is NOT set, then the some what
3904 * equivalent count is stored in RNBC (not per queue basis).
3905 * Also note the drop count is due to lack of available
3906 * descriptors.
3907 */
3908 for (i = 0; i < adapter->num_rx_queues; i++) {
3909 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0xFFF;
3910 adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3911 rqdpc_total += adapter->rx_ring[i].rx_stats.drops;
3912 }
3913 netdev->stats.rx_fifo_errors = rqdpc_total;
3914 }
3915
3916 /* Note RNBC (Receive No Buffers Count) is an not an exact
3917 * drop count as the hardware FIFO might save the day. Thats
3918 * one of the reason for saving it in rx_fifo_errors, as its
3919 * potentially not a true drop.
3920 */
3921 netdev->stats.rx_fifo_errors += adapter->stats.rnbc;
3922
3923 /* RLEC on some newer hardware can be incorrect so build
3924 * our own version based on RUC and ROC */
3925 netdev->stats.rx_errors = adapter->stats.rxerrc +
3926 adapter->stats.crcerrs + adapter->stats.algnerrc +
3927 adapter->stats.ruc + adapter->stats.roc +
3928 adapter->stats.cexterr;
3929 netdev->stats.rx_length_errors = adapter->stats.ruc +
3930 adapter->stats.roc;
3931 netdev->stats.rx_crc_errors = adapter->stats.crcerrs;
3932 netdev->stats.rx_frame_errors = adapter->stats.algnerrc;
3933 netdev->stats.rx_missed_errors = adapter->stats.mpc;
3934
3935 /* Tx Errors */
3936 netdev->stats.tx_errors = adapter->stats.ecol +
3937 adapter->stats.latecol;
3938 netdev->stats.tx_aborted_errors = adapter->stats.ecol;
3939 netdev->stats.tx_window_errors = adapter->stats.latecol;
3940 netdev->stats.tx_carrier_errors = adapter->stats.tncrs;
3941
3942 /* Tx Dropped needs to be maintained elsewhere */
3943
3944 /* Phy Stats */
3945 if (hw->phy.media_type == e1000_media_type_copper) {
3946 if ((adapter->link_speed == SPEED_1000) &&
3947 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
3948 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
3949 adapter->phy_stats.idle_errors += phy_tmp;
3950 }
3951 }
3952
3953 /* Management Stats */
3954 adapter->stats.mgptc += rd32(E1000_MGTPTC);
3955 adapter->stats.mgprc += rd32(E1000_MGTPRC);
3956 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
3957 }
3958
3959 static irqreturn_t igb_msix_other(int irq, void *data)
3960 {
3961 struct igb_adapter *adapter = data;
3962 struct e1000_hw *hw = &adapter->hw;
3963 u32 icr = rd32(E1000_ICR);
3964 /* reading ICR causes bit 31 of EICR to be cleared */
3965
3966 if (icr & E1000_ICR_DOUTSYNC) {
3967 /* HW is reporting DMA is out of sync */
3968 adapter->stats.doosync++;
3969 }
3970
3971 /* Check for a mailbox event */
3972 if (icr & E1000_ICR_VMMB)
3973 igb_msg_task(adapter);
3974
3975 if (icr & E1000_ICR_LSC) {
3976 hw->mac.get_link_status = 1;
3977 /* guard against interrupt when we're going down */
3978 if (!test_bit(__IGB_DOWN, &adapter->state))
3979 mod_timer(&adapter->watchdog_timer, jiffies + 1);
3980 }
3981
3982 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_VMMB);
3983 wr32(E1000_EIMS, adapter->eims_other);
3984
3985 return IRQ_HANDLED;
3986 }
3987
3988 static void igb_write_itr(struct igb_q_vector *q_vector)
3989 {
3990 u32 itr_val = q_vector->itr_val & 0x7FFC;
3991
3992 if (!q_vector->set_itr)
3993 return;
3994
3995 if (!itr_val)
3996 itr_val = 0x4;
3997
3998 if (q_vector->itr_shift)
3999 itr_val |= itr_val << q_vector->itr_shift;
4000 else
4001 itr_val |= 0x8000000;
4002
4003 writel(itr_val, q_vector->itr_register);
4004 q_vector->set_itr = 0;
4005 }
4006
4007 static irqreturn_t igb_msix_ring(int irq, void *data)
4008 {
4009 struct igb_q_vector *q_vector = data;
4010
4011 /* Write the ITR value calculated from the previous interrupt. */
4012 igb_write_itr(q_vector);
4013
4014 napi_schedule(&q_vector->napi);
4015
4016 return IRQ_HANDLED;
4017 }
4018
4019 #ifdef CONFIG_IGB_DCA
4020 static void igb_update_dca(struct igb_q_vector *q_vector)
4021 {
4022 struct igb_adapter *adapter = q_vector->adapter;
4023 struct e1000_hw *hw = &adapter->hw;
4024 int cpu = get_cpu();
4025
4026 if (q_vector->cpu == cpu)
4027 goto out_no_update;
4028
4029 if (q_vector->tx_ring) {
4030 int q = q_vector->tx_ring->reg_idx;
4031 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4032 if (hw->mac.type == e1000_82575) {
4033 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4034 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4035 } else {
4036 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4037 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4038 E1000_DCA_TXCTRL_CPUID_SHIFT;
4039 }
4040 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4041 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4042 }
4043 if (q_vector->rx_ring) {
4044 int q = q_vector->rx_ring->reg_idx;
4045 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4046 if (hw->mac.type == e1000_82575) {
4047 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4048 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4049 } else {
4050 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4051 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4052 E1000_DCA_RXCTRL_CPUID_SHIFT;
4053 }
4054 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4055 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4056 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4057 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4058 }
4059 q_vector->cpu = cpu;
4060 out_no_update:
4061 put_cpu();
4062 }
4063
4064 static void igb_setup_dca(struct igb_adapter *adapter)
4065 {
4066 struct e1000_hw *hw = &adapter->hw;
4067 int i;
4068
4069 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4070 return;
4071
4072 /* Always use CB2 mode, difference is masked in the CB driver. */
4073 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4074
4075 for (i = 0; i < adapter->num_q_vectors; i++) {
4076 struct igb_q_vector *q_vector = adapter->q_vector[i];
4077 q_vector->cpu = -1;
4078 igb_update_dca(q_vector);
4079 }
4080 }
4081
4082 static int __igb_notify_dca(struct device *dev, void *data)
4083 {
4084 struct net_device *netdev = dev_get_drvdata(dev);
4085 struct igb_adapter *adapter = netdev_priv(netdev);
4086 struct e1000_hw *hw = &adapter->hw;
4087 unsigned long event = *(unsigned long *)data;
4088
4089 switch (event) {
4090 case DCA_PROVIDER_ADD:
4091 /* if already enabled, don't do it again */
4092 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4093 break;
4094 /* Always use CB2 mode, difference is masked
4095 * in the CB driver. */
4096 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4097 if (dca_add_requester(dev) == 0) {
4098 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4099 dev_info(&adapter->pdev->dev, "DCA enabled\n");
4100 igb_setup_dca(adapter);
4101 break;
4102 }
4103 /* Fall Through since DCA is disabled. */
4104 case DCA_PROVIDER_REMOVE:
4105 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4106 /* without this a class_device is left
4107 * hanging around in the sysfs model */
4108 dca_remove_requester(dev);
4109 dev_info(&adapter->pdev->dev, "DCA disabled\n");
4110 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4111 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4112 }
4113 break;
4114 }
4115
4116 return 0;
4117 }
4118
4119 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4120 void *p)
4121 {
4122 int ret_val;
4123
4124 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4125 __igb_notify_dca);
4126
4127 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4128 }
4129 #endif /* CONFIG_IGB_DCA */
4130
4131 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4132 {
4133 struct e1000_hw *hw = &adapter->hw;
4134 u32 ping;
4135 int i;
4136
4137 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4138 ping = E1000_PF_CONTROL_MSG;
4139 if (adapter->vf_data[i].clear_to_send)
4140 ping |= E1000_VT_MSGTYPE_CTS;
4141 igb_write_mbx(hw, &ping, 1, i);
4142 }
4143 }
4144
4145 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4146 u32 *msgbuf, u32 vf)
4147 {
4148 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4149 u16 *hash_list = (u16 *)&msgbuf[1];
4150 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4151 int i;
4152
4153 /* only up to 30 hash values supported */
4154 if (n > 30)
4155 n = 30;
4156
4157 /* salt away the number of multi cast addresses assigned
4158 * to this VF for later use to restore when the PF multi cast
4159 * list changes
4160 */
4161 vf_data->num_vf_mc_hashes = n;
4162
4163 /* VFs are limited to using the MTA hash table for their multicast
4164 * addresses */
4165 for (i = 0; i < n; i++)
4166 vf_data->vf_mc_hashes[i] = hash_list[i];
4167
4168 /* Flush and reset the mta with the new values */
4169 igb_set_rx_mode(adapter->netdev);
4170
4171 return 0;
4172 }
4173
4174 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4175 {
4176 struct e1000_hw *hw = &adapter->hw;
4177 struct vf_data_storage *vf_data;
4178 int i, j;
4179
4180 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4181 vf_data = &adapter->vf_data[i];
4182 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4183 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4184 }
4185 }
4186
4187 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4188 {
4189 struct e1000_hw *hw = &adapter->hw;
4190 u32 pool_mask, reg, vid;
4191 int i;
4192
4193 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4194
4195 /* Find the vlan filter for this id */
4196 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4197 reg = rd32(E1000_VLVF(i));
4198
4199 /* remove the vf from the pool */
4200 reg &= ~pool_mask;
4201
4202 /* if pool is empty then remove entry from vfta */
4203 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4204 (reg & E1000_VLVF_VLANID_ENABLE)) {
4205 reg = 0;
4206 vid = reg & E1000_VLVF_VLANID_MASK;
4207 igb_vfta_set(hw, vid, false);
4208 }
4209
4210 wr32(E1000_VLVF(i), reg);
4211 }
4212
4213 adapter->vf_data[vf].vlans_enabled = 0;
4214 }
4215
4216 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4217 {
4218 struct e1000_hw *hw = &adapter->hw;
4219 u32 reg, i;
4220
4221 /* It is an error to call this function when VFs are not enabled */
4222 if (!adapter->vfs_allocated_count)
4223 return -1;
4224
4225 /* Find the vlan filter for this id */
4226 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4227 reg = rd32(E1000_VLVF(i));
4228 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4229 vid == (reg & E1000_VLVF_VLANID_MASK))
4230 break;
4231 }
4232
4233 if (add) {
4234 if (i == E1000_VLVF_ARRAY_SIZE) {
4235 /* Did not find a matching VLAN ID entry that was
4236 * enabled. Search for a free filter entry, i.e.
4237 * one without the enable bit set
4238 */
4239 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4240 reg = rd32(E1000_VLVF(i));
4241 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4242 break;
4243 }
4244 }
4245 if (i < E1000_VLVF_ARRAY_SIZE) {
4246 /* Found an enabled/available entry */
4247 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4248
4249 /* if !enabled we need to set this up in vfta */
4250 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4251 /* add VID to filter table, if bit already set
4252 * PF must have added it outside of table */
4253 if (igb_vfta_set(hw, vid, true))
4254 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT +
4255 adapter->vfs_allocated_count);
4256 reg |= E1000_VLVF_VLANID_ENABLE;
4257 }
4258 reg &= ~E1000_VLVF_VLANID_MASK;
4259 reg |= vid;
4260
4261 wr32(E1000_VLVF(i), reg);
4262
4263 /* do not modify RLPML for PF devices */
4264 if (vf >= adapter->vfs_allocated_count)
4265 return 0;
4266
4267 if (!adapter->vf_data[vf].vlans_enabled) {
4268 u32 size;
4269 reg = rd32(E1000_VMOLR(vf));
4270 size = reg & E1000_VMOLR_RLPML_MASK;
4271 size += 4;
4272 reg &= ~E1000_VMOLR_RLPML_MASK;
4273 reg |= size;
4274 wr32(E1000_VMOLR(vf), reg);
4275 }
4276 adapter->vf_data[vf].vlans_enabled++;
4277
4278 return 0;
4279 }
4280 } else {
4281 if (i < E1000_VLVF_ARRAY_SIZE) {
4282 /* remove vf from the pool */
4283 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4284 /* if pool is empty then remove entry from vfta */
4285 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4286 reg = 0;
4287 igb_vfta_set(hw, vid, false);
4288 }
4289 wr32(E1000_VLVF(i), reg);
4290
4291 /* do not modify RLPML for PF devices */
4292 if (vf >= adapter->vfs_allocated_count)
4293 return 0;
4294
4295 adapter->vf_data[vf].vlans_enabled--;
4296 if (!adapter->vf_data[vf].vlans_enabled) {
4297 u32 size;
4298 reg = rd32(E1000_VMOLR(vf));
4299 size = reg & E1000_VMOLR_RLPML_MASK;
4300 size -= 4;
4301 reg &= ~E1000_VMOLR_RLPML_MASK;
4302 reg |= size;
4303 wr32(E1000_VMOLR(vf), reg);
4304 }
4305 return 0;
4306 }
4307 }
4308 return -1;
4309 }
4310
4311 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4312 {
4313 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4314 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4315
4316 return igb_vlvf_set(adapter, vid, add, vf);
4317 }
4318
4319 static inline void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4320 {
4321 struct e1000_hw *hw = &adapter->hw;
4322
4323 /* disable mailbox functionality for vf */
4324 adapter->vf_data[vf].clear_to_send = false;
4325
4326 /* reset offloads to defaults */
4327 igb_set_vmolr(hw, vf);
4328
4329 /* reset vlans for device */
4330 igb_clear_vf_vfta(adapter, vf);
4331
4332 /* reset multicast table array for vf */
4333 adapter->vf_data[vf].num_vf_mc_hashes = 0;
4334
4335 /* Flush and reset the mta with the new values */
4336 igb_set_rx_mode(adapter->netdev);
4337 }
4338
4339 static inline void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4340 {
4341 struct e1000_hw *hw = &adapter->hw;
4342 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4343 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4344 u32 reg, msgbuf[3];
4345 u8 *addr = (u8 *)(&msgbuf[1]);
4346
4347 /* process all the same items cleared in a function level reset */
4348 igb_vf_reset_event(adapter, vf);
4349
4350 /* set vf mac address */
4351 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4352
4353 /* enable transmit and receive for vf */
4354 reg = rd32(E1000_VFTE);
4355 wr32(E1000_VFTE, reg | (1 << vf));
4356 reg = rd32(E1000_VFRE);
4357 wr32(E1000_VFRE, reg | (1 << vf));
4358
4359 /* enable mailbox functionality for vf */
4360 adapter->vf_data[vf].clear_to_send = true;
4361
4362 /* reply to reset with ack and vf mac address */
4363 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4364 memcpy(addr, vf_mac, 6);
4365 igb_write_mbx(hw, msgbuf, 3, vf);
4366 }
4367
4368 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4369 {
4370 unsigned char *addr = (char *)&msg[1];
4371 int err = -1;
4372
4373 if (is_valid_ether_addr(addr))
4374 err = igb_set_vf_mac(adapter, vf, addr);
4375
4376 return err;
4377
4378 }
4379
4380 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4381 {
4382 struct e1000_hw *hw = &adapter->hw;
4383 u32 msg = E1000_VT_MSGTYPE_NACK;
4384
4385 /* if device isn't clear to send it shouldn't be reading either */
4386 if (!adapter->vf_data[vf].clear_to_send)
4387 igb_write_mbx(hw, &msg, 1, vf);
4388 }
4389
4390
4391 static void igb_msg_task(struct igb_adapter *adapter)
4392 {
4393 struct e1000_hw *hw = &adapter->hw;
4394 u32 vf;
4395
4396 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4397 /* process any reset requests */
4398 if (!igb_check_for_rst(hw, vf)) {
4399 adapter->vf_data[vf].clear_to_send = false;
4400 igb_vf_reset_event(adapter, vf);
4401 }
4402
4403 /* process any messages pending */
4404 if (!igb_check_for_msg(hw, vf))
4405 igb_rcv_msg_from_vf(adapter, vf);
4406
4407 /* process any acks */
4408 if (!igb_check_for_ack(hw, vf))
4409 igb_rcv_ack_from_vf(adapter, vf);
4410
4411 }
4412 }
4413
4414 static int igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4415 {
4416 u32 mbx_size = E1000_VFMAILBOX_SIZE;
4417 u32 msgbuf[mbx_size];
4418 struct e1000_hw *hw = &adapter->hw;
4419 s32 retval;
4420
4421 retval = igb_read_mbx(hw, msgbuf, mbx_size, vf);
4422
4423 if (retval)
4424 dev_err(&adapter->pdev->dev,
4425 "Error receiving message from VF\n");
4426
4427 /* this is a message we already processed, do nothing */
4428 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4429 return retval;
4430
4431 /*
4432 * until the vf completes a reset it should not be
4433 * allowed to start any configuration.
4434 */
4435
4436 if (msgbuf[0] == E1000_VF_RESET) {
4437 igb_vf_reset_msg(adapter, vf);
4438
4439 return retval;
4440 }
4441
4442 if (!adapter->vf_data[vf].clear_to_send) {
4443 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4444 igb_write_mbx(hw, msgbuf, 1, vf);
4445 return retval;
4446 }
4447
4448 switch ((msgbuf[0] & 0xFFFF)) {
4449 case E1000_VF_SET_MAC_ADDR:
4450 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4451 break;
4452 case E1000_VF_SET_MULTICAST:
4453 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4454 break;
4455 case E1000_VF_SET_LPE:
4456 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4457 break;
4458 case E1000_VF_SET_VLAN:
4459 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4460 break;
4461 default:
4462 dev_err(&adapter->pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4463 retval = -1;
4464 break;
4465 }
4466
4467 /* notify the VF of the results of what it sent us */
4468 if (retval)
4469 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4470 else
4471 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4472
4473 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4474
4475 igb_write_mbx(hw, msgbuf, 1, vf);
4476
4477 return retval;
4478 }
4479
4480 /**
4481 * igb_set_uta - Set unicast filter table address
4482 * @adapter: board private structure
4483 *
4484 * The unicast table address is a register array of 32-bit registers.
4485 * The table is meant to be used in a way similar to how the MTA is used
4486 * however due to certain limitations in the hardware it is necessary to
4487 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4488 * enable bit to allow vlan tag stripping when promiscous mode is enabled
4489 **/
4490 static void igb_set_uta(struct igb_adapter *adapter)
4491 {
4492 struct e1000_hw *hw = &adapter->hw;
4493 int i;
4494
4495 /* The UTA table only exists on 82576 hardware and newer */
4496 if (hw->mac.type < e1000_82576)
4497 return;
4498
4499 /* we only need to do this if VMDq is enabled */
4500 if (!adapter->vfs_allocated_count)
4501 return;
4502
4503 for (i = 0; i < hw->mac.uta_reg_count; i++)
4504 array_wr32(E1000_UTA, i, ~0);
4505 }
4506
4507 /**
4508 * igb_intr_msi - Interrupt Handler
4509 * @irq: interrupt number
4510 * @data: pointer to a network interface device structure
4511 **/
4512 static irqreturn_t igb_intr_msi(int irq, void *data)
4513 {
4514 struct igb_adapter *adapter = data;
4515 struct igb_q_vector *q_vector = adapter->q_vector[0];
4516 struct e1000_hw *hw = &adapter->hw;
4517 /* read ICR disables interrupts using IAM */
4518 u32 icr = rd32(E1000_ICR);
4519
4520 igb_write_itr(q_vector);
4521
4522 if (icr & E1000_ICR_DOUTSYNC) {
4523 /* HW is reporting DMA is out of sync */
4524 adapter->stats.doosync++;
4525 }
4526
4527 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4528 hw->mac.get_link_status = 1;
4529 if (!test_bit(__IGB_DOWN, &adapter->state))
4530 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4531 }
4532
4533 napi_schedule(&q_vector->napi);
4534
4535 return IRQ_HANDLED;
4536 }
4537
4538 /**
4539 * igb_intr - Legacy Interrupt Handler
4540 * @irq: interrupt number
4541 * @data: pointer to a network interface device structure
4542 **/
4543 static irqreturn_t igb_intr(int irq, void *data)
4544 {
4545 struct igb_adapter *adapter = data;
4546 struct igb_q_vector *q_vector = adapter->q_vector[0];
4547 struct e1000_hw *hw = &adapter->hw;
4548 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
4549 * need for the IMC write */
4550 u32 icr = rd32(E1000_ICR);
4551 if (!icr)
4552 return IRQ_NONE; /* Not our interrupt */
4553
4554 igb_write_itr(q_vector);
4555
4556 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4557 * not set, then the adapter didn't send an interrupt */
4558 if (!(icr & E1000_ICR_INT_ASSERTED))
4559 return IRQ_NONE;
4560
4561 if (icr & E1000_ICR_DOUTSYNC) {
4562 /* HW is reporting DMA is out of sync */
4563 adapter->stats.doosync++;
4564 }
4565
4566 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4567 hw->mac.get_link_status = 1;
4568 /* guard against interrupt when we're going down */
4569 if (!test_bit(__IGB_DOWN, &adapter->state))
4570 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4571 }
4572
4573 napi_schedule(&q_vector->napi);
4574
4575 return IRQ_HANDLED;
4576 }
4577
4578 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4579 {
4580 struct igb_adapter *adapter = q_vector->adapter;
4581 struct e1000_hw *hw = &adapter->hw;
4582
4583 if (adapter->itr_setting & 3) {
4584 if (!adapter->msix_entries)
4585 igb_set_itr(adapter);
4586 else
4587 igb_update_ring_itr(q_vector);
4588 }
4589
4590 if (!test_bit(__IGB_DOWN, &adapter->state)) {
4591 if (adapter->msix_entries)
4592 wr32(E1000_EIMS, q_vector->eims_value);
4593 else
4594 igb_irq_enable(adapter);
4595 }
4596 }
4597
4598 /**
4599 * igb_poll - NAPI Rx polling callback
4600 * @napi: napi polling structure
4601 * @budget: count of how many packets we should handle
4602 **/
4603 static int igb_poll(struct napi_struct *napi, int budget)
4604 {
4605 struct igb_q_vector *q_vector = container_of(napi,
4606 struct igb_q_vector,
4607 napi);
4608 int tx_clean_complete = 1, work_done = 0;
4609
4610 #ifdef CONFIG_IGB_DCA
4611 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4612 igb_update_dca(q_vector);
4613 #endif
4614 if (q_vector->tx_ring)
4615 tx_clean_complete = igb_clean_tx_irq(q_vector);
4616
4617 if (q_vector->rx_ring)
4618 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4619
4620 if (!tx_clean_complete)
4621 work_done = budget;
4622
4623 /* If not enough Rx work done, exit the polling mode */
4624 if (work_done < budget) {
4625 napi_complete(napi);
4626 igb_ring_irq_enable(q_vector);
4627 }
4628
4629 return work_done;
4630 }
4631
4632 /**
4633 * igb_hwtstamp - utility function which checks for TX time stamp
4634 * @adapter: board private structure
4635 * @skb: packet that was just sent
4636 *
4637 * If we were asked to do hardware stamping and such a time stamp is
4638 * available, then it must have been for this skb here because we only
4639 * allow only one such packet into the queue.
4640 */
4641 static void igb_tx_hwtstamp(struct igb_adapter *adapter, struct sk_buff *skb)
4642 {
4643 union skb_shared_tx *shtx = skb_tx(skb);
4644 struct e1000_hw *hw = &adapter->hw;
4645
4646 if (unlikely(shtx->hardware)) {
4647 u32 valid = rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID;
4648 if (valid) {
4649 u64 regval = rd32(E1000_TXSTMPL);
4650 u64 ns;
4651 struct skb_shared_hwtstamps shhwtstamps;
4652
4653 memset(&shhwtstamps, 0, sizeof(shhwtstamps));
4654 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4655 ns = timecounter_cyc2time(&adapter->clock,
4656 regval);
4657 timecompare_update(&adapter->compare, ns);
4658 shhwtstamps.hwtstamp = ns_to_ktime(ns);
4659 shhwtstamps.syststamp =
4660 timecompare_transform(&adapter->compare, ns);
4661 skb_tstamp_tx(skb, &shhwtstamps);
4662 }
4663 }
4664 }
4665
4666 /**
4667 * igb_clean_tx_irq - Reclaim resources after transmit completes
4668 * @q_vector: pointer to q_vector containing needed info
4669 * returns true if ring is completely cleaned
4670 **/
4671 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4672 {
4673 struct igb_adapter *adapter = q_vector->adapter;
4674 struct igb_ring *tx_ring = q_vector->tx_ring;
4675 struct net_device *netdev = tx_ring->netdev;
4676 struct e1000_hw *hw = &adapter->hw;
4677 struct igb_buffer *buffer_info;
4678 struct sk_buff *skb;
4679 union e1000_adv_tx_desc *tx_desc, *eop_desc;
4680 unsigned int total_bytes = 0, total_packets = 0;
4681 unsigned int i, eop, count = 0;
4682 bool cleaned = false;
4683
4684 i = tx_ring->next_to_clean;
4685 eop = tx_ring->buffer_info[i].next_to_watch;
4686 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4687
4688 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
4689 (count < tx_ring->count)) {
4690 for (cleaned = false; !cleaned; count++) {
4691 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4692 buffer_info = &tx_ring->buffer_info[i];
4693 cleaned = (i == eop);
4694 skb = buffer_info->skb;
4695
4696 if (skb) {
4697 unsigned int segs, bytecount;
4698 /* gso_segs is currently only valid for tcp */
4699 segs = skb_shinfo(skb)->gso_segs ?: 1;
4700 /* multiply data chunks by size of headers */
4701 bytecount = ((segs - 1) * skb_headlen(skb)) +
4702 skb->len;
4703 total_packets += segs;
4704 total_bytes += bytecount;
4705
4706 igb_tx_hwtstamp(adapter, skb);
4707 }
4708
4709 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4710 tx_desc->wb.status = 0;
4711
4712 i++;
4713 if (i == tx_ring->count)
4714 i = 0;
4715 }
4716 eop = tx_ring->buffer_info[i].next_to_watch;
4717 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4718 }
4719
4720 tx_ring->next_to_clean = i;
4721
4722 if (unlikely(count &&
4723 netif_carrier_ok(netdev) &&
4724 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
4725 /* Make sure that anybody stopping the queue after this
4726 * sees the new next_to_clean.
4727 */
4728 smp_mb();
4729 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
4730 !(test_bit(__IGB_DOWN, &adapter->state))) {
4731 netif_wake_subqueue(netdev, tx_ring->queue_index);
4732 tx_ring->tx_stats.restart_queue++;
4733 }
4734 }
4735
4736 if (tx_ring->detect_tx_hung) {
4737 /* Detect a transmit hang in hardware, this serializes the
4738 * check with the clearing of time_stamp and movement of i */
4739 tx_ring->detect_tx_hung = false;
4740 if (tx_ring->buffer_info[i].time_stamp &&
4741 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
4742 (adapter->tx_timeout_factor * HZ))
4743 && !(rd32(E1000_STATUS) &
4744 E1000_STATUS_TXOFF)) {
4745
4746 /* detected Tx unit hang */
4747 dev_err(&tx_ring->pdev->dev,
4748 "Detected Tx Unit Hang\n"
4749 " Tx Queue <%d>\n"
4750 " TDH <%x>\n"
4751 " TDT <%x>\n"
4752 " next_to_use <%x>\n"
4753 " next_to_clean <%x>\n"
4754 "buffer_info[next_to_clean]\n"
4755 " time_stamp <%lx>\n"
4756 " next_to_watch <%x>\n"
4757 " jiffies <%lx>\n"
4758 " desc.status <%x>\n",
4759 tx_ring->queue_index,
4760 readl(tx_ring->head),
4761 readl(tx_ring->tail),
4762 tx_ring->next_to_use,
4763 tx_ring->next_to_clean,
4764 tx_ring->buffer_info[i].time_stamp,
4765 eop,
4766 jiffies,
4767 eop_desc->wb.status);
4768 netif_stop_subqueue(netdev, tx_ring->queue_index);
4769 }
4770 }
4771 tx_ring->total_bytes += total_bytes;
4772 tx_ring->total_packets += total_packets;
4773 tx_ring->tx_stats.bytes += total_bytes;
4774 tx_ring->tx_stats.packets += total_packets;
4775 netdev->stats.tx_bytes += total_bytes;
4776 netdev->stats.tx_packets += total_packets;
4777 return (count < tx_ring->count);
4778 }
4779
4780 /**
4781 * igb_receive_skb - helper function to handle rx indications
4782 * @q_vector: structure containing interrupt and ring information
4783 * @skb: packet to send up
4784 * @vlan_tag: vlan tag for packet
4785 **/
4786 static void igb_receive_skb(struct igb_q_vector *q_vector,
4787 struct sk_buff *skb,
4788 u16 vlan_tag)
4789 {
4790 struct igb_adapter *adapter = q_vector->adapter;
4791
4792 if (vlan_tag)
4793 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
4794 vlan_tag, skb);
4795 else
4796 napi_gro_receive(&q_vector->napi, skb);
4797 }
4798
4799 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
4800 u32 status_err, struct sk_buff *skb)
4801 {
4802 skb->ip_summed = CHECKSUM_NONE;
4803
4804 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
4805 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
4806 (status_err & E1000_RXD_STAT_IXSM))
4807 return;
4808
4809 /* TCP/UDP checksum error bit is set */
4810 if (status_err &
4811 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
4812 /*
4813 * work around errata with sctp packets where the TCPE aka
4814 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
4815 * packets, (aka let the stack check the crc32c)
4816 */
4817 if ((skb->len == 60) &&
4818 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
4819 ring->rx_stats.csum_err++;
4820
4821 /* let the stack verify checksum errors */
4822 return;
4823 }
4824 /* It must be a TCP or UDP packet with a valid checksum */
4825 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
4826 skb->ip_summed = CHECKSUM_UNNECESSARY;
4827
4828 dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
4829 }
4830
4831 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
4832 union e1000_adv_rx_desc *rx_desc)
4833 {
4834 /* HW will not DMA in data larger than the given buffer, even if it
4835 * parses the (NFS, of course) header to be larger. In that case, it
4836 * fills the header buffer and spills the rest into the page.
4837 */
4838 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
4839 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
4840 if (hlen > rx_ring->rx_buffer_len)
4841 hlen = rx_ring->rx_buffer_len;
4842 return hlen;
4843 }
4844
4845 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
4846 int *work_done, int budget)
4847 {
4848 struct igb_adapter *adapter = q_vector->adapter;
4849 struct igb_ring *rx_ring = q_vector->rx_ring;
4850 struct net_device *netdev = rx_ring->netdev;
4851 struct e1000_hw *hw = &adapter->hw;
4852 struct pci_dev *pdev = rx_ring->pdev;
4853 union e1000_adv_rx_desc *rx_desc , *next_rxd;
4854 struct igb_buffer *buffer_info , *next_buffer;
4855 struct sk_buff *skb;
4856 bool cleaned = false;
4857 int cleaned_count = 0;
4858 unsigned int total_bytes = 0, total_packets = 0;
4859 unsigned int i;
4860 u32 staterr;
4861 u16 length;
4862 u16 vlan_tag;
4863
4864 i = rx_ring->next_to_clean;
4865 buffer_info = &rx_ring->buffer_info[i];
4866 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
4867 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4868
4869 while (staterr & E1000_RXD_STAT_DD) {
4870 if (*work_done >= budget)
4871 break;
4872 (*work_done)++;
4873
4874 skb = buffer_info->skb;
4875 prefetch(skb->data - NET_IP_ALIGN);
4876 buffer_info->skb = NULL;
4877
4878 i++;
4879 if (i == rx_ring->count)
4880 i = 0;
4881 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
4882 prefetch(next_rxd);
4883 next_buffer = &rx_ring->buffer_info[i];
4884
4885 length = le16_to_cpu(rx_desc->wb.upper.length);
4886 cleaned = true;
4887 cleaned_count++;
4888
4889 if (buffer_info->dma) {
4890 pci_unmap_single(pdev, buffer_info->dma,
4891 rx_ring->rx_buffer_len,
4892 PCI_DMA_FROMDEVICE);
4893 buffer_info->dma = 0;
4894 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
4895 skb_put(skb, length);
4896 goto send_up;
4897 }
4898 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
4899 }
4900
4901 if (length) {
4902 pci_unmap_page(pdev, buffer_info->page_dma,
4903 PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
4904 buffer_info->page_dma = 0;
4905
4906 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
4907 buffer_info->page,
4908 buffer_info->page_offset,
4909 length);
4910
4911 if (page_count(buffer_info->page) != 1)
4912 buffer_info->page = NULL;
4913 else
4914 get_page(buffer_info->page);
4915
4916 skb->len += length;
4917 skb->data_len += length;
4918
4919 skb->truesize += length;
4920 }
4921
4922 if (!(staterr & E1000_RXD_STAT_EOP)) {
4923 buffer_info->skb = next_buffer->skb;
4924 buffer_info->dma = next_buffer->dma;
4925 next_buffer->skb = skb;
4926 next_buffer->dma = 0;
4927 goto next_desc;
4928 }
4929 send_up:
4930 /*
4931 * If this bit is set, then the RX registers contain
4932 * the time stamp. No other packet will be time
4933 * stamped until we read these registers, so read the
4934 * registers to make them available again. Because
4935 * only one packet can be time stamped at a time, we
4936 * know that the register values must belong to this
4937 * one here and therefore we don't need to compare
4938 * any of the additional attributes stored for it.
4939 *
4940 * If nothing went wrong, then it should have a
4941 * skb_shared_tx that we can turn into a
4942 * skb_shared_hwtstamps.
4943 *
4944 * TODO: can time stamping be triggered (thus locking
4945 * the registers) without the packet reaching this point
4946 * here? In that case RX time stamping would get stuck.
4947 *
4948 * TODO: in "time stamp all packets" mode this bit is
4949 * not set. Need a global flag for this mode and then
4950 * always read the registers. Cannot be done without
4951 * a race condition.
4952 */
4953 if (unlikely(staterr & E1000_RXD_STAT_TS)) {
4954 u64 regval;
4955 u64 ns;
4956 struct skb_shared_hwtstamps *shhwtstamps =
4957 skb_hwtstamps(skb);
4958
4959 WARN(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID),
4960 "igb: no RX time stamp available for time stamped packet");
4961 regval = rd32(E1000_RXSTMPL);
4962 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4963 ns = timecounter_cyc2time(&adapter->clock, regval);
4964 timecompare_update(&adapter->compare, ns);
4965 memset(shhwtstamps, 0, sizeof(*shhwtstamps));
4966 shhwtstamps->hwtstamp = ns_to_ktime(ns);
4967 shhwtstamps->syststamp =
4968 timecompare_transform(&adapter->compare, ns);
4969 }
4970
4971 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
4972 dev_kfree_skb_irq(skb);
4973 goto next_desc;
4974 }
4975
4976 total_bytes += skb->len;
4977 total_packets++;
4978
4979 igb_rx_checksum_adv(rx_ring, staterr, skb);
4980
4981 skb->protocol = eth_type_trans(skb, netdev);
4982 skb_record_rx_queue(skb, rx_ring->queue_index);
4983
4984 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
4985 le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
4986
4987 igb_receive_skb(q_vector, skb, vlan_tag);
4988
4989 next_desc:
4990 rx_desc->wb.upper.status_error = 0;
4991
4992 /* return some buffers to hardware, one at a time is too slow */
4993 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
4994 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
4995 cleaned_count = 0;
4996 }
4997
4998 /* use prefetched values */
4999 rx_desc = next_rxd;
5000 buffer_info = next_buffer;
5001 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5002 }
5003
5004 rx_ring->next_to_clean = i;
5005 cleaned_count = igb_desc_unused(rx_ring);
5006
5007 if (cleaned_count)
5008 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5009
5010 rx_ring->total_packets += total_packets;
5011 rx_ring->total_bytes += total_bytes;
5012 rx_ring->rx_stats.packets += total_packets;
5013 rx_ring->rx_stats.bytes += total_bytes;
5014 netdev->stats.rx_bytes += total_bytes;
5015 netdev->stats.rx_packets += total_packets;
5016 return cleaned;
5017 }
5018
5019 /**
5020 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5021 * @adapter: address of board private structure
5022 **/
5023 static void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring,
5024 int cleaned_count)
5025 {
5026 struct net_device *netdev = rx_ring->netdev;
5027 union e1000_adv_rx_desc *rx_desc;
5028 struct igb_buffer *buffer_info;
5029 struct sk_buff *skb;
5030 unsigned int i;
5031 int bufsz;
5032
5033 i = rx_ring->next_to_use;
5034 buffer_info = &rx_ring->buffer_info[i];
5035
5036 bufsz = rx_ring->rx_buffer_len;
5037
5038 while (cleaned_count--) {
5039 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5040
5041 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5042 if (!buffer_info->page) {
5043 buffer_info->page = alloc_page(GFP_ATOMIC);
5044 if (!buffer_info->page) {
5045 rx_ring->rx_stats.alloc_failed++;
5046 goto no_buffers;
5047 }
5048 buffer_info->page_offset = 0;
5049 } else {
5050 buffer_info->page_offset ^= PAGE_SIZE / 2;
5051 }
5052 buffer_info->page_dma =
5053 pci_map_page(rx_ring->pdev, buffer_info->page,
5054 buffer_info->page_offset,
5055 PAGE_SIZE / 2,
5056 PCI_DMA_FROMDEVICE);
5057 }
5058
5059 if (!buffer_info->skb) {
5060 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5061 if (!skb) {
5062 rx_ring->rx_stats.alloc_failed++;
5063 goto no_buffers;
5064 }
5065
5066 buffer_info->skb = skb;
5067 buffer_info->dma = pci_map_single(rx_ring->pdev,
5068 skb->data,
5069 bufsz,
5070 PCI_DMA_FROMDEVICE);
5071 }
5072 /* Refresh the desc even if buffer_addrs didn't change because
5073 * each write-back erases this info. */
5074 if (bufsz < IGB_RXBUFFER_1024) {
5075 rx_desc->read.pkt_addr =
5076 cpu_to_le64(buffer_info->page_dma);
5077 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5078 } else {
5079 rx_desc->read.pkt_addr =
5080 cpu_to_le64(buffer_info->dma);
5081 rx_desc->read.hdr_addr = 0;
5082 }
5083
5084 i++;
5085 if (i == rx_ring->count)
5086 i = 0;
5087 buffer_info = &rx_ring->buffer_info[i];
5088 }
5089
5090 no_buffers:
5091 if (rx_ring->next_to_use != i) {
5092 rx_ring->next_to_use = i;
5093 if (i == 0)
5094 i = (rx_ring->count - 1);
5095 else
5096 i--;
5097
5098 /* Force memory writes to complete before letting h/w
5099 * know there are new descriptors to fetch. (Only
5100 * applicable for weak-ordered memory model archs,
5101 * such as IA-64). */
5102 wmb();
5103 writel(i, rx_ring->tail);
5104 }
5105 }
5106
5107 /**
5108 * igb_mii_ioctl -
5109 * @netdev:
5110 * @ifreq:
5111 * @cmd:
5112 **/
5113 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5114 {
5115 struct igb_adapter *adapter = netdev_priv(netdev);
5116 struct mii_ioctl_data *data = if_mii(ifr);
5117
5118 if (adapter->hw.phy.media_type != e1000_media_type_copper)
5119 return -EOPNOTSUPP;
5120
5121 switch (cmd) {
5122 case SIOCGMIIPHY:
5123 data->phy_id = adapter->hw.phy.addr;
5124 break;
5125 case SIOCGMIIREG:
5126 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5127 &data->val_out))
5128 return -EIO;
5129 break;
5130 case SIOCSMIIREG:
5131 default:
5132 return -EOPNOTSUPP;
5133 }
5134 return 0;
5135 }
5136
5137 /**
5138 * igb_hwtstamp_ioctl - control hardware time stamping
5139 * @netdev:
5140 * @ifreq:
5141 * @cmd:
5142 *
5143 * Outgoing time stamping can be enabled and disabled. Play nice and
5144 * disable it when requested, although it shouldn't case any overhead
5145 * when no packet needs it. At most one packet in the queue may be
5146 * marked for time stamping, otherwise it would be impossible to tell
5147 * for sure to which packet the hardware time stamp belongs.
5148 *
5149 * Incoming time stamping has to be configured via the hardware
5150 * filters. Not all combinations are supported, in particular event
5151 * type has to be specified. Matching the kind of event packet is
5152 * not supported, with the exception of "all V2 events regardless of
5153 * level 2 or 4".
5154 *
5155 **/
5156 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5157 struct ifreq *ifr, int cmd)
5158 {
5159 struct igb_adapter *adapter = netdev_priv(netdev);
5160 struct e1000_hw *hw = &adapter->hw;
5161 struct hwtstamp_config config;
5162 u32 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
5163 u32 tsync_rx_ctl_bit = E1000_TSYNCRXCTL_ENABLED;
5164 u32 tsync_rx_ctl_type = 0;
5165 u32 tsync_rx_cfg = 0;
5166 int is_l4 = 0;
5167 int is_l2 = 0;
5168 short port = 319; /* PTP */
5169 u32 regval;
5170
5171 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5172 return -EFAULT;
5173
5174 /* reserved for future extensions */
5175 if (config.flags)
5176 return -EINVAL;
5177
5178 switch (config.tx_type) {
5179 case HWTSTAMP_TX_OFF:
5180 tsync_tx_ctl_bit = 0;
5181 break;
5182 case HWTSTAMP_TX_ON:
5183 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
5184 break;
5185 default:
5186 return -ERANGE;
5187 }
5188
5189 switch (config.rx_filter) {
5190 case HWTSTAMP_FILTER_NONE:
5191 tsync_rx_ctl_bit = 0;
5192 break;
5193 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5194 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5195 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5196 case HWTSTAMP_FILTER_ALL:
5197 /*
5198 * register TSYNCRXCFG must be set, therefore it is not
5199 * possible to time stamp both Sync and Delay_Req messages
5200 * => fall back to time stamping all packets
5201 */
5202 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_ALL;
5203 config.rx_filter = HWTSTAMP_FILTER_ALL;
5204 break;
5205 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5206 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
5207 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5208 is_l4 = 1;
5209 break;
5210 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5211 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
5212 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5213 is_l4 = 1;
5214 break;
5215 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5216 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5217 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5218 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5219 is_l2 = 1;
5220 is_l4 = 1;
5221 config.rx_filter = HWTSTAMP_FILTER_SOME;
5222 break;
5223 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5224 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5225 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5226 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5227 is_l2 = 1;
5228 is_l4 = 1;
5229 config.rx_filter = HWTSTAMP_FILTER_SOME;
5230 break;
5231 case HWTSTAMP_FILTER_PTP_V2_EVENT:
5232 case HWTSTAMP_FILTER_PTP_V2_SYNC:
5233 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5234 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5235 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5236 is_l2 = 1;
5237 break;
5238 default:
5239 return -ERANGE;
5240 }
5241
5242 /* enable/disable TX */
5243 regval = rd32(E1000_TSYNCTXCTL);
5244 regval = (regval & ~E1000_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit;
5245 wr32(E1000_TSYNCTXCTL, regval);
5246
5247 /* enable/disable RX, define which PTP packets are time stamped */
5248 regval = rd32(E1000_TSYNCRXCTL);
5249 regval = (regval & ~E1000_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit;
5250 regval = (regval & ~0xE) | tsync_rx_ctl_type;
5251 wr32(E1000_TSYNCRXCTL, regval);
5252 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5253
5254 /*
5255 * Ethertype Filter Queue Filter[0][15:0] = 0x88F7
5256 * (Ethertype to filter on)
5257 * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
5258 * Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping)
5259 */
5260 wr32(E1000_ETQF0, is_l2 ? 0x440088f7 : 0);
5261
5262 /* L4 Queue Filter[0]: only filter by source and destination port */
5263 wr32(E1000_SPQF0, htons(port));
5264 wr32(E1000_IMIREXT(0), is_l4 ?
5265 ((1<<12) | (1<<19) /* bypass size and control flags */) : 0);
5266 wr32(E1000_IMIR(0), is_l4 ?
5267 (htons(port)
5268 | (0<<16) /* immediate interrupt disabled */
5269 | 0 /* (1<<17) bit cleared: do not bypass
5270 destination port check */)
5271 : 0);
5272 wr32(E1000_FTQF0, is_l4 ?
5273 (0x11 /* UDP */
5274 | (1<<15) /* VF not compared */
5275 | (1<<27) /* Enable Timestamping */
5276 | (7<<28) /* only source port filter enabled,
5277 source/target address and protocol
5278 masked */)
5279 : ((1<<15) | (15<<28) /* all mask bits set = filter not
5280 enabled */));
5281
5282 wrfl();
5283
5284 adapter->hwtstamp_config = config;
5285
5286 /* clear TX/RX time stamp registers, just to be sure */
5287 regval = rd32(E1000_TXSTMPH);
5288 regval = rd32(E1000_RXSTMPH);
5289
5290 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5291 -EFAULT : 0;
5292 }
5293
5294 /**
5295 * igb_ioctl -
5296 * @netdev:
5297 * @ifreq:
5298 * @cmd:
5299 **/
5300 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5301 {
5302 switch (cmd) {
5303 case SIOCGMIIPHY:
5304 case SIOCGMIIREG:
5305 case SIOCSMIIREG:
5306 return igb_mii_ioctl(netdev, ifr, cmd);
5307 case SIOCSHWTSTAMP:
5308 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5309 default:
5310 return -EOPNOTSUPP;
5311 }
5312 }
5313
5314 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5315 {
5316 struct igb_adapter *adapter = hw->back;
5317 u16 cap_offset;
5318
5319 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5320 if (!cap_offset)
5321 return -E1000_ERR_CONFIG;
5322
5323 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5324
5325 return 0;
5326 }
5327
5328 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5329 {
5330 struct igb_adapter *adapter = hw->back;
5331 u16 cap_offset;
5332
5333 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5334 if (!cap_offset)
5335 return -E1000_ERR_CONFIG;
5336
5337 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5338
5339 return 0;
5340 }
5341
5342 static void igb_vlan_rx_register(struct net_device *netdev,
5343 struct vlan_group *grp)
5344 {
5345 struct igb_adapter *adapter = netdev_priv(netdev);
5346 struct e1000_hw *hw = &adapter->hw;
5347 u32 ctrl, rctl;
5348
5349 igb_irq_disable(adapter);
5350 adapter->vlgrp = grp;
5351
5352 if (grp) {
5353 /* enable VLAN tag insert/strip */
5354 ctrl = rd32(E1000_CTRL);
5355 ctrl |= E1000_CTRL_VME;
5356 wr32(E1000_CTRL, ctrl);
5357
5358 /* enable VLAN receive filtering */
5359 rctl = rd32(E1000_RCTL);
5360 rctl &= ~E1000_RCTL_CFIEN;
5361 wr32(E1000_RCTL, rctl);
5362 igb_update_mng_vlan(adapter);
5363 } else {
5364 /* disable VLAN tag insert/strip */
5365 ctrl = rd32(E1000_CTRL);
5366 ctrl &= ~E1000_CTRL_VME;
5367 wr32(E1000_CTRL, ctrl);
5368
5369 if (adapter->mng_vlan_id != (u16)IGB_MNG_VLAN_NONE) {
5370 igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
5371 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
5372 }
5373 }
5374
5375 igb_rlpml_set(adapter);
5376
5377 if (!test_bit(__IGB_DOWN, &adapter->state))
5378 igb_irq_enable(adapter);
5379 }
5380
5381 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5382 {
5383 struct igb_adapter *adapter = netdev_priv(netdev);
5384 struct e1000_hw *hw = &adapter->hw;
5385 int pf_id = adapter->vfs_allocated_count;
5386
5387 if ((hw->mng_cookie.status &
5388 E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
5389 (vid == adapter->mng_vlan_id))
5390 return;
5391
5392 /* add vid to vlvf if sr-iov is enabled,
5393 * if that fails add directly to filter table */
5394 if (igb_vlvf_set(adapter, vid, true, pf_id))
5395 igb_vfta_set(hw, vid, true);
5396
5397 }
5398
5399 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5400 {
5401 struct igb_adapter *adapter = netdev_priv(netdev);
5402 struct e1000_hw *hw = &adapter->hw;
5403 int pf_id = adapter->vfs_allocated_count;
5404
5405 igb_irq_disable(adapter);
5406 vlan_group_set_device(adapter->vlgrp, vid, NULL);
5407
5408 if (!test_bit(__IGB_DOWN, &adapter->state))
5409 igb_irq_enable(adapter);
5410
5411 if ((adapter->hw.mng_cookie.status &
5412 E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
5413 (vid == adapter->mng_vlan_id)) {
5414 /* release control to f/w */
5415 igb_release_hw_control(adapter);
5416 return;
5417 }
5418
5419 /* remove vid from vlvf if sr-iov is enabled,
5420 * if not in vlvf remove from vfta */
5421 if (igb_vlvf_set(adapter, vid, false, pf_id))
5422 igb_vfta_set(hw, vid, false);
5423 }
5424
5425 static void igb_restore_vlan(struct igb_adapter *adapter)
5426 {
5427 igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5428
5429 if (adapter->vlgrp) {
5430 u16 vid;
5431 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5432 if (!vlan_group_get_device(adapter->vlgrp, vid))
5433 continue;
5434 igb_vlan_rx_add_vid(adapter->netdev, vid);
5435 }
5436 }
5437 }
5438
5439 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5440 {
5441 struct e1000_mac_info *mac = &adapter->hw.mac;
5442
5443 mac->autoneg = 0;
5444
5445 switch (spddplx) {
5446 case SPEED_10 + DUPLEX_HALF:
5447 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5448 break;
5449 case SPEED_10 + DUPLEX_FULL:
5450 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5451 break;
5452 case SPEED_100 + DUPLEX_HALF:
5453 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5454 break;
5455 case SPEED_100 + DUPLEX_FULL:
5456 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5457 break;
5458 case SPEED_1000 + DUPLEX_FULL:
5459 mac->autoneg = 1;
5460 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5461 break;
5462 case SPEED_1000 + DUPLEX_HALF: /* not supported */
5463 default:
5464 dev_err(&adapter->pdev->dev,
5465 "Unsupported Speed/Duplex configuration\n");
5466 return -EINVAL;
5467 }
5468 return 0;
5469 }
5470
5471 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5472 {
5473 struct net_device *netdev = pci_get_drvdata(pdev);
5474 struct igb_adapter *adapter = netdev_priv(netdev);
5475 struct e1000_hw *hw = &adapter->hw;
5476 u32 ctrl, rctl, status;
5477 u32 wufc = adapter->wol;
5478 #ifdef CONFIG_PM
5479 int retval = 0;
5480 #endif
5481
5482 netif_device_detach(netdev);
5483
5484 if (netif_running(netdev))
5485 igb_close(netdev);
5486
5487 igb_clear_interrupt_scheme(adapter);
5488
5489 #ifdef CONFIG_PM
5490 retval = pci_save_state(pdev);
5491 if (retval)
5492 return retval;
5493 #endif
5494
5495 status = rd32(E1000_STATUS);
5496 if (status & E1000_STATUS_LU)
5497 wufc &= ~E1000_WUFC_LNKC;
5498
5499 if (wufc) {
5500 igb_setup_rctl(adapter);
5501 igb_set_rx_mode(netdev);
5502
5503 /* turn on all-multi mode if wake on multicast is enabled */
5504 if (wufc & E1000_WUFC_MC) {
5505 rctl = rd32(E1000_RCTL);
5506 rctl |= E1000_RCTL_MPE;
5507 wr32(E1000_RCTL, rctl);
5508 }
5509
5510 ctrl = rd32(E1000_CTRL);
5511 /* advertise wake from D3Cold */
5512 #define E1000_CTRL_ADVD3WUC 0x00100000
5513 /* phy power management enable */
5514 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5515 ctrl |= E1000_CTRL_ADVD3WUC;
5516 wr32(E1000_CTRL, ctrl);
5517
5518 /* Allow time for pending master requests to run */
5519 igb_disable_pcie_master(&adapter->hw);
5520
5521 wr32(E1000_WUC, E1000_WUC_PME_EN);
5522 wr32(E1000_WUFC, wufc);
5523 } else {
5524 wr32(E1000_WUC, 0);
5525 wr32(E1000_WUFC, 0);
5526 }
5527
5528 *enable_wake = wufc || adapter->en_mng_pt;
5529 if (!*enable_wake)
5530 igb_shutdown_serdes_link_82575(hw);
5531
5532 /* Release control of h/w to f/w. If f/w is AMT enabled, this
5533 * would have already happened in close and is redundant. */
5534 igb_release_hw_control(adapter);
5535
5536 pci_disable_device(pdev);
5537
5538 return 0;
5539 }
5540
5541 #ifdef CONFIG_PM
5542 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5543 {
5544 int retval;
5545 bool wake;
5546
5547 retval = __igb_shutdown(pdev, &wake);
5548 if (retval)
5549 return retval;
5550
5551 if (wake) {
5552 pci_prepare_to_sleep(pdev);
5553 } else {
5554 pci_wake_from_d3(pdev, false);
5555 pci_set_power_state(pdev, PCI_D3hot);
5556 }
5557
5558 return 0;
5559 }
5560
5561 static int igb_resume(struct pci_dev *pdev)
5562 {
5563 struct net_device *netdev = pci_get_drvdata(pdev);
5564 struct igb_adapter *adapter = netdev_priv(netdev);
5565 struct e1000_hw *hw = &adapter->hw;
5566 u32 err;
5567
5568 pci_set_power_state(pdev, PCI_D0);
5569 pci_restore_state(pdev);
5570
5571 err = pci_enable_device_mem(pdev);
5572 if (err) {
5573 dev_err(&pdev->dev,
5574 "igb: Cannot enable PCI device from suspend\n");
5575 return err;
5576 }
5577 pci_set_master(pdev);
5578
5579 pci_enable_wake(pdev, PCI_D3hot, 0);
5580 pci_enable_wake(pdev, PCI_D3cold, 0);
5581
5582 if (igb_init_interrupt_scheme(adapter)) {
5583 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5584 return -ENOMEM;
5585 }
5586
5587 /* e1000_power_up_phy(adapter); */
5588
5589 igb_reset(adapter);
5590
5591 /* let the f/w know that the h/w is now under the control of the
5592 * driver. */
5593 igb_get_hw_control(adapter);
5594
5595 wr32(E1000_WUS, ~0);
5596
5597 if (netif_running(netdev)) {
5598 err = igb_open(netdev);
5599 if (err)
5600 return err;
5601 }
5602
5603 netif_device_attach(netdev);
5604
5605 return 0;
5606 }
5607 #endif
5608
5609 static void igb_shutdown(struct pci_dev *pdev)
5610 {
5611 bool wake;
5612
5613 __igb_shutdown(pdev, &wake);
5614
5615 if (system_state == SYSTEM_POWER_OFF) {
5616 pci_wake_from_d3(pdev, wake);
5617 pci_set_power_state(pdev, PCI_D3hot);
5618 }
5619 }
5620
5621 #ifdef CONFIG_NET_POLL_CONTROLLER
5622 /*
5623 * Polling 'interrupt' - used by things like netconsole to send skbs
5624 * without having to re-enable interrupts. It's not called while
5625 * the interrupt routine is executing.
5626 */
5627 static void igb_netpoll(struct net_device *netdev)
5628 {
5629 struct igb_adapter *adapter = netdev_priv(netdev);
5630 struct e1000_hw *hw = &adapter->hw;
5631 int i;
5632
5633 if (!adapter->msix_entries) {
5634 struct igb_q_vector *q_vector = adapter->q_vector[0];
5635 igb_irq_disable(adapter);
5636 napi_schedule(&q_vector->napi);
5637 return;
5638 }
5639
5640 for (i = 0; i < adapter->num_q_vectors; i++) {
5641 struct igb_q_vector *q_vector = adapter->q_vector[i];
5642 wr32(E1000_EIMC, q_vector->eims_value);
5643 napi_schedule(&q_vector->napi);
5644 }
5645 }
5646 #endif /* CONFIG_NET_POLL_CONTROLLER */
5647
5648 /**
5649 * igb_io_error_detected - called when PCI error is detected
5650 * @pdev: Pointer to PCI device
5651 * @state: The current pci connection state
5652 *
5653 * This function is called after a PCI bus error affecting
5654 * this device has been detected.
5655 */
5656 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5657 pci_channel_state_t state)
5658 {
5659 struct net_device *netdev = pci_get_drvdata(pdev);
5660 struct igb_adapter *adapter = netdev_priv(netdev);
5661
5662 netif_device_detach(netdev);
5663
5664 if (state == pci_channel_io_perm_failure)
5665 return PCI_ERS_RESULT_DISCONNECT;
5666
5667 if (netif_running(netdev))
5668 igb_down(adapter);
5669 pci_disable_device(pdev);
5670
5671 /* Request a slot slot reset. */
5672 return PCI_ERS_RESULT_NEED_RESET;
5673 }
5674
5675 /**
5676 * igb_io_slot_reset - called after the pci bus has been reset.
5677 * @pdev: Pointer to PCI device
5678 *
5679 * Restart the card from scratch, as if from a cold-boot. Implementation
5680 * resembles the first-half of the igb_resume routine.
5681 */
5682 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5683 {
5684 struct net_device *netdev = pci_get_drvdata(pdev);
5685 struct igb_adapter *adapter = netdev_priv(netdev);
5686 struct e1000_hw *hw = &adapter->hw;
5687 pci_ers_result_t result;
5688 int err;
5689
5690 if (pci_enable_device_mem(pdev)) {
5691 dev_err(&pdev->dev,
5692 "Cannot re-enable PCI device after reset.\n");
5693 result = PCI_ERS_RESULT_DISCONNECT;
5694 } else {
5695 pci_set_master(pdev);
5696 pci_restore_state(pdev);
5697
5698 pci_enable_wake(pdev, PCI_D3hot, 0);
5699 pci_enable_wake(pdev, PCI_D3cold, 0);
5700
5701 igb_reset(adapter);
5702 wr32(E1000_WUS, ~0);
5703 result = PCI_ERS_RESULT_RECOVERED;
5704 }
5705
5706 err = pci_cleanup_aer_uncorrect_error_status(pdev);
5707 if (err) {
5708 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
5709 "failed 0x%0x\n", err);
5710 /* non-fatal, continue */
5711 }
5712
5713 return result;
5714 }
5715
5716 /**
5717 * igb_io_resume - called when traffic can start flowing again.
5718 * @pdev: Pointer to PCI device
5719 *
5720 * This callback is called when the error recovery driver tells us that
5721 * its OK to resume normal operation. Implementation resembles the
5722 * second-half of the igb_resume routine.
5723 */
5724 static void igb_io_resume(struct pci_dev *pdev)
5725 {
5726 struct net_device *netdev = pci_get_drvdata(pdev);
5727 struct igb_adapter *adapter = netdev_priv(netdev);
5728
5729 if (netif_running(netdev)) {
5730 if (igb_up(adapter)) {
5731 dev_err(&pdev->dev, "igb_up failed after reset\n");
5732 return;
5733 }
5734 }
5735
5736 netif_device_attach(netdev);
5737
5738 /* let the f/w know that the h/w is now under the control of the
5739 * driver. */
5740 igb_get_hw_control(adapter);
5741 }
5742
5743 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
5744 u8 qsel)
5745 {
5746 u32 rar_low, rar_high;
5747 struct e1000_hw *hw = &adapter->hw;
5748
5749 /* HW expects these in little endian so we reverse the byte order
5750 * from network order (big endian) to little endian
5751 */
5752 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
5753 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
5754 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
5755
5756 /* Indicate to hardware the Address is Valid. */
5757 rar_high |= E1000_RAH_AV;
5758
5759 if (hw->mac.type == e1000_82575)
5760 rar_high |= E1000_RAH_POOL_1 * qsel;
5761 else
5762 rar_high |= E1000_RAH_POOL_1 << qsel;
5763
5764 wr32(E1000_RAL(index), rar_low);
5765 wrfl();
5766 wr32(E1000_RAH(index), rar_high);
5767 wrfl();
5768 }
5769
5770 static int igb_set_vf_mac(struct igb_adapter *adapter,
5771 int vf, unsigned char *mac_addr)
5772 {
5773 struct e1000_hw *hw = &adapter->hw;
5774 /* VF MAC addresses start at end of receive addresses and moves
5775 * torwards the first, as a result a collision should not be possible */
5776 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5777
5778 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
5779
5780 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
5781
5782 return 0;
5783 }
5784
5785 static void igb_vmm_control(struct igb_adapter *adapter)
5786 {
5787 struct e1000_hw *hw = &adapter->hw;
5788 u32 reg_data;
5789
5790 if (!adapter->vfs_allocated_count)
5791 return;
5792
5793 /* VF's need PF reset indication before they
5794 * can send/receive mail */
5795 reg_data = rd32(E1000_CTRL_EXT);
5796 reg_data |= E1000_CTRL_EXT_PFRSTD;
5797 wr32(E1000_CTRL_EXT, reg_data);
5798
5799 igb_vmdq_set_loopback_pf(hw, true);
5800 igb_vmdq_set_replication_pf(hw, true);
5801 }
5802
5803 /* igb_main.c */
This page took 0.155796 seconds and 6 git commands to generate.