igb: re-use ring configuration code in ethtool testing
[deliverable/linux.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2009 Intel Corporation.
5
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
9
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
14
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
21
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "1.3.16-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60 [board_82575] = &e1000_82575_info,
61 };
62
63 static struct pci_device_id igb_pci_tbl[] = {
64 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
65 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
66 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
67 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
68 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
69 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
70 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
74 /* required last entry */
75 {0, }
76 };
77
78 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
79
80 void igb_reset(struct igb_adapter *);
81 static int igb_setup_all_tx_resources(struct igb_adapter *);
82 static int igb_setup_all_rx_resources(struct igb_adapter *);
83 static void igb_free_all_tx_resources(struct igb_adapter *);
84 static void igb_free_all_rx_resources(struct igb_adapter *);
85 static void igb_setup_mrqc(struct igb_adapter *);
86 void igb_update_stats(struct igb_adapter *);
87 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
88 static void __devexit igb_remove(struct pci_dev *pdev);
89 static int igb_sw_init(struct igb_adapter *);
90 static int igb_open(struct net_device *);
91 static int igb_close(struct net_device *);
92 static void igb_configure_tx(struct igb_adapter *);
93 static void igb_configure_rx(struct igb_adapter *);
94 static void igb_clean_all_tx_rings(struct igb_adapter *);
95 static void igb_clean_all_rx_rings(struct igb_adapter *);
96 static void igb_clean_tx_ring(struct igb_ring *);
97 static void igb_clean_rx_ring(struct igb_ring *);
98 static void igb_set_rx_mode(struct net_device *);
99 static void igb_update_phy_info(unsigned long);
100 static void igb_watchdog(unsigned long);
101 static void igb_watchdog_task(struct work_struct *);
102 static netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *,
103 struct igb_ring *);
104 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
105 struct net_device *);
106 static struct net_device_stats *igb_get_stats(struct net_device *);
107 static int igb_change_mtu(struct net_device *, int);
108 static int igb_set_mac(struct net_device *, void *);
109 static void igb_set_uta(struct igb_adapter *adapter);
110 static irqreturn_t igb_intr(int irq, void *);
111 static irqreturn_t igb_intr_msi(int irq, void *);
112 static irqreturn_t igb_msix_other(int irq, void *);
113 static irqreturn_t igb_msix_ring(int irq, void *);
114 #ifdef CONFIG_IGB_DCA
115 static void igb_update_dca(struct igb_q_vector *);
116 static void igb_setup_dca(struct igb_adapter *);
117 #endif /* CONFIG_IGB_DCA */
118 static bool igb_clean_tx_irq(struct igb_q_vector *);
119 static int igb_poll(struct napi_struct *, int);
120 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
121 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
122 static void igb_tx_timeout(struct net_device *);
123 static void igb_reset_task(struct work_struct *);
124 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
125 static void igb_vlan_rx_add_vid(struct net_device *, u16);
126 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
127 static void igb_restore_vlan(struct igb_adapter *);
128 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
129 static void igb_ping_all_vfs(struct igb_adapter *);
130 static void igb_msg_task(struct igb_adapter *);
131 static int igb_rcv_msg_from_vf(struct igb_adapter *, u32);
132 static void igb_vmm_control(struct igb_adapter *);
133 static int igb_set_vf_mac(struct igb_adapter *adapter, int, unsigned char *);
134 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
135
136 static inline void igb_set_vmolr(struct e1000_hw *hw, int vfn)
137 {
138 u32 reg_data;
139
140 reg_data = rd32(E1000_VMOLR(vfn));
141 reg_data |= E1000_VMOLR_BAM | /* Accept broadcast */
142 E1000_VMOLR_ROMPE | /* Accept packets matched in MTA */
143 E1000_VMOLR_AUPE | /* Accept untagged packets */
144 E1000_VMOLR_STRVLAN; /* Strip vlan tags */
145 wr32(E1000_VMOLR(vfn), reg_data);
146 }
147
148 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
149 int vfn)
150 {
151 struct e1000_hw *hw = &adapter->hw;
152 u32 vmolr;
153
154 /* if it isn't the PF check to see if VFs are enabled and
155 * increase the size to support vlan tags */
156 if (vfn < adapter->vfs_allocated_count &&
157 adapter->vf_data[vfn].vlans_enabled)
158 size += VLAN_TAG_SIZE;
159
160 vmolr = rd32(E1000_VMOLR(vfn));
161 vmolr &= ~E1000_VMOLR_RLPML_MASK;
162 vmolr |= size | E1000_VMOLR_LPE;
163 wr32(E1000_VMOLR(vfn), vmolr);
164
165 return 0;
166 }
167
168 #ifdef CONFIG_PM
169 static int igb_suspend(struct pci_dev *, pm_message_t);
170 static int igb_resume(struct pci_dev *);
171 #endif
172 static void igb_shutdown(struct pci_dev *);
173 #ifdef CONFIG_IGB_DCA
174 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
175 static struct notifier_block dca_notifier = {
176 .notifier_call = igb_notify_dca,
177 .next = NULL,
178 .priority = 0
179 };
180 #endif
181 #ifdef CONFIG_NET_POLL_CONTROLLER
182 /* for netdump / net console */
183 static void igb_netpoll(struct net_device *);
184 #endif
185 #ifdef CONFIG_PCI_IOV
186 static unsigned int max_vfs = 0;
187 module_param(max_vfs, uint, 0);
188 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
189 "per physical function");
190 #endif /* CONFIG_PCI_IOV */
191
192 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
193 pci_channel_state_t);
194 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
195 static void igb_io_resume(struct pci_dev *);
196
197 static struct pci_error_handlers igb_err_handler = {
198 .error_detected = igb_io_error_detected,
199 .slot_reset = igb_io_slot_reset,
200 .resume = igb_io_resume,
201 };
202
203
204 static struct pci_driver igb_driver = {
205 .name = igb_driver_name,
206 .id_table = igb_pci_tbl,
207 .probe = igb_probe,
208 .remove = __devexit_p(igb_remove),
209 #ifdef CONFIG_PM
210 /* Power Managment Hooks */
211 .suspend = igb_suspend,
212 .resume = igb_resume,
213 #endif
214 .shutdown = igb_shutdown,
215 .err_handler = &igb_err_handler
216 };
217
218 static int global_quad_port_a; /* global quad port a indication */
219
220 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
221 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
222 MODULE_LICENSE("GPL");
223 MODULE_VERSION(DRV_VERSION);
224
225 /**
226 * Scale the NIC clock cycle by a large factor so that
227 * relatively small clock corrections can be added or
228 * substracted at each clock tick. The drawbacks of a
229 * large factor are a) that the clock register overflows
230 * more quickly (not such a big deal) and b) that the
231 * increment per tick has to fit into 24 bits.
232 *
233 * Note that
234 * TIMINCA = IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS *
235 * IGB_TSYNC_SCALE
236 * TIMINCA += TIMINCA * adjustment [ppm] / 1e9
237 *
238 * The base scale factor is intentionally a power of two
239 * so that the division in %struct timecounter can be done with
240 * a shift.
241 */
242 #define IGB_TSYNC_SHIFT (19)
243 #define IGB_TSYNC_SCALE (1<<IGB_TSYNC_SHIFT)
244
245 /**
246 * The duration of one clock cycle of the NIC.
247 *
248 * @todo This hard-coded value is part of the specification and might change
249 * in future hardware revisions. Add revision check.
250 */
251 #define IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS 16
252
253 #if (IGB_TSYNC_SCALE * IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS) >= (1<<24)
254 # error IGB_TSYNC_SCALE and/or IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS are too large to fit into TIMINCA
255 #endif
256
257 /**
258 * igb_read_clock - read raw cycle counter (to be used by time counter)
259 */
260 static cycle_t igb_read_clock(const struct cyclecounter *tc)
261 {
262 struct igb_adapter *adapter =
263 container_of(tc, struct igb_adapter, cycles);
264 struct e1000_hw *hw = &adapter->hw;
265 u64 stamp;
266
267 stamp = rd32(E1000_SYSTIML);
268 stamp |= (u64)rd32(E1000_SYSTIMH) << 32ULL;
269
270 return stamp;
271 }
272
273 #ifdef DEBUG
274 /**
275 * igb_get_hw_dev_name - return device name string
276 * used by hardware layer to print debugging information
277 **/
278 char *igb_get_hw_dev_name(struct e1000_hw *hw)
279 {
280 struct igb_adapter *adapter = hw->back;
281 return adapter->netdev->name;
282 }
283
284 /**
285 * igb_get_time_str - format current NIC and system time as string
286 */
287 static char *igb_get_time_str(struct igb_adapter *adapter,
288 char buffer[160])
289 {
290 cycle_t hw = adapter->cycles.read(&adapter->cycles);
291 struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
292 struct timespec sys;
293 struct timespec delta;
294 getnstimeofday(&sys);
295
296 delta = timespec_sub(nic, sys);
297
298 sprintf(buffer,
299 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
300 hw,
301 (long)nic.tv_sec, nic.tv_nsec,
302 (long)sys.tv_sec, sys.tv_nsec,
303 (long)delta.tv_sec, delta.tv_nsec);
304
305 return buffer;
306 }
307 #endif
308
309 /**
310 * igb_init_module - Driver Registration Routine
311 *
312 * igb_init_module is the first routine called when the driver is
313 * loaded. All it does is register with the PCI subsystem.
314 **/
315 static int __init igb_init_module(void)
316 {
317 int ret;
318 printk(KERN_INFO "%s - version %s\n",
319 igb_driver_string, igb_driver_version);
320
321 printk(KERN_INFO "%s\n", igb_copyright);
322
323 global_quad_port_a = 0;
324
325 #ifdef CONFIG_IGB_DCA
326 dca_register_notify(&dca_notifier);
327 #endif
328
329 ret = pci_register_driver(&igb_driver);
330 return ret;
331 }
332
333 module_init(igb_init_module);
334
335 /**
336 * igb_exit_module - Driver Exit Cleanup Routine
337 *
338 * igb_exit_module is called just before the driver is removed
339 * from memory.
340 **/
341 static void __exit igb_exit_module(void)
342 {
343 #ifdef CONFIG_IGB_DCA
344 dca_unregister_notify(&dca_notifier);
345 #endif
346 pci_unregister_driver(&igb_driver);
347 }
348
349 module_exit(igb_exit_module);
350
351 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
352 /**
353 * igb_cache_ring_register - Descriptor ring to register mapping
354 * @adapter: board private structure to initialize
355 *
356 * Once we know the feature-set enabled for the device, we'll cache
357 * the register offset the descriptor ring is assigned to.
358 **/
359 static void igb_cache_ring_register(struct igb_adapter *adapter)
360 {
361 int i;
362 u32 rbase_offset = adapter->vfs_allocated_count;
363
364 switch (adapter->hw.mac.type) {
365 case e1000_82576:
366 /* The queues are allocated for virtualization such that VF 0
367 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
368 * In order to avoid collision we start at the first free queue
369 * and continue consuming queues in the same sequence
370 */
371 for (i = 0; i < adapter->num_rx_queues; i++)
372 adapter->rx_ring[i].reg_idx = rbase_offset +
373 Q_IDX_82576(i);
374 for (i = 0; i < adapter->num_tx_queues; i++)
375 adapter->tx_ring[i].reg_idx = rbase_offset +
376 Q_IDX_82576(i);
377 break;
378 case e1000_82575:
379 default:
380 for (i = 0; i < adapter->num_rx_queues; i++)
381 adapter->rx_ring[i].reg_idx = i;
382 for (i = 0; i < adapter->num_tx_queues; i++)
383 adapter->tx_ring[i].reg_idx = i;
384 break;
385 }
386 }
387
388 static void igb_free_queues(struct igb_adapter *adapter)
389 {
390 kfree(adapter->tx_ring);
391 kfree(adapter->rx_ring);
392
393 adapter->tx_ring = NULL;
394 adapter->rx_ring = NULL;
395
396 adapter->num_rx_queues = 0;
397 adapter->num_tx_queues = 0;
398 }
399
400 /**
401 * igb_alloc_queues - Allocate memory for all rings
402 * @adapter: board private structure to initialize
403 *
404 * We allocate one ring per queue at run-time since we don't know the
405 * number of queues at compile-time.
406 **/
407 static int igb_alloc_queues(struct igb_adapter *adapter)
408 {
409 int i;
410
411 adapter->tx_ring = kcalloc(adapter->num_tx_queues,
412 sizeof(struct igb_ring), GFP_KERNEL);
413 if (!adapter->tx_ring)
414 goto err;
415
416 adapter->rx_ring = kcalloc(adapter->num_rx_queues,
417 sizeof(struct igb_ring), GFP_KERNEL);
418 if (!adapter->rx_ring)
419 goto err;
420
421 for (i = 0; i < adapter->num_tx_queues; i++) {
422 struct igb_ring *ring = &(adapter->tx_ring[i]);
423 ring->count = adapter->tx_ring_count;
424 ring->queue_index = i;
425 ring->pdev = adapter->pdev;
426 ring->netdev = adapter->netdev;
427 /* For 82575, context index must be unique per ring. */
428 if (adapter->hw.mac.type == e1000_82575)
429 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
430 }
431
432 for (i = 0; i < adapter->num_rx_queues; i++) {
433 struct igb_ring *ring = &(adapter->rx_ring[i]);
434 ring->count = adapter->rx_ring_count;
435 ring->queue_index = i;
436 ring->pdev = adapter->pdev;
437 ring->netdev = adapter->netdev;
438 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
439 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
440 /* set flag indicating ring supports SCTP checksum offload */
441 if (adapter->hw.mac.type >= e1000_82576)
442 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
443 }
444
445 igb_cache_ring_register(adapter);
446
447 return 0;
448
449 err:
450 igb_free_queues(adapter);
451
452 return -ENOMEM;
453 }
454
455 #define IGB_N0_QUEUE -1
456 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
457 {
458 u32 msixbm = 0;
459 struct igb_adapter *adapter = q_vector->adapter;
460 struct e1000_hw *hw = &adapter->hw;
461 u32 ivar, index;
462 int rx_queue = IGB_N0_QUEUE;
463 int tx_queue = IGB_N0_QUEUE;
464
465 if (q_vector->rx_ring)
466 rx_queue = q_vector->rx_ring->reg_idx;
467 if (q_vector->tx_ring)
468 tx_queue = q_vector->tx_ring->reg_idx;
469
470 switch (hw->mac.type) {
471 case e1000_82575:
472 /* The 82575 assigns vectors using a bitmask, which matches the
473 bitmask for the EICR/EIMS/EIMC registers. To assign one
474 or more queues to a vector, we write the appropriate bits
475 into the MSIXBM register for that vector. */
476 if (rx_queue > IGB_N0_QUEUE)
477 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
478 if (tx_queue > IGB_N0_QUEUE)
479 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
480 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
481 q_vector->eims_value = msixbm;
482 break;
483 case e1000_82576:
484 /* 82576 uses a table-based method for assigning vectors.
485 Each queue has a single entry in the table to which we write
486 a vector number along with a "valid" bit. Sadly, the layout
487 of the table is somewhat counterintuitive. */
488 if (rx_queue > IGB_N0_QUEUE) {
489 index = (rx_queue & 0x7);
490 ivar = array_rd32(E1000_IVAR0, index);
491 if (rx_queue < 8) {
492 /* vector goes into low byte of register */
493 ivar = ivar & 0xFFFFFF00;
494 ivar |= msix_vector | E1000_IVAR_VALID;
495 } else {
496 /* vector goes into third byte of register */
497 ivar = ivar & 0xFF00FFFF;
498 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
499 }
500 array_wr32(E1000_IVAR0, index, ivar);
501 }
502 if (tx_queue > IGB_N0_QUEUE) {
503 index = (tx_queue & 0x7);
504 ivar = array_rd32(E1000_IVAR0, index);
505 if (tx_queue < 8) {
506 /* vector goes into second byte of register */
507 ivar = ivar & 0xFFFF00FF;
508 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
509 } else {
510 /* vector goes into high byte of register */
511 ivar = ivar & 0x00FFFFFF;
512 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
513 }
514 array_wr32(E1000_IVAR0, index, ivar);
515 }
516 q_vector->eims_value = 1 << msix_vector;
517 break;
518 default:
519 BUG();
520 break;
521 }
522 }
523
524 /**
525 * igb_configure_msix - Configure MSI-X hardware
526 *
527 * igb_configure_msix sets up the hardware to properly
528 * generate MSI-X interrupts.
529 **/
530 static void igb_configure_msix(struct igb_adapter *adapter)
531 {
532 u32 tmp;
533 int i, vector = 0;
534 struct e1000_hw *hw = &adapter->hw;
535
536 adapter->eims_enable_mask = 0;
537
538 /* set vector for other causes, i.e. link changes */
539 switch (hw->mac.type) {
540 case e1000_82575:
541 tmp = rd32(E1000_CTRL_EXT);
542 /* enable MSI-X PBA support*/
543 tmp |= E1000_CTRL_EXT_PBA_CLR;
544
545 /* Auto-Mask interrupts upon ICR read. */
546 tmp |= E1000_CTRL_EXT_EIAME;
547 tmp |= E1000_CTRL_EXT_IRCA;
548
549 wr32(E1000_CTRL_EXT, tmp);
550
551 /* enable msix_other interrupt */
552 array_wr32(E1000_MSIXBM(0), vector++,
553 E1000_EIMS_OTHER);
554 adapter->eims_other = E1000_EIMS_OTHER;
555
556 break;
557
558 case e1000_82576:
559 /* Turn on MSI-X capability first, or our settings
560 * won't stick. And it will take days to debug. */
561 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
562 E1000_GPIE_PBA | E1000_GPIE_EIAME |
563 E1000_GPIE_NSICR);
564
565 /* enable msix_other interrupt */
566 adapter->eims_other = 1 << vector;
567 tmp = (vector++ | E1000_IVAR_VALID) << 8;
568
569 wr32(E1000_IVAR_MISC, tmp);
570 break;
571 default:
572 /* do nothing, since nothing else supports MSI-X */
573 break;
574 } /* switch (hw->mac.type) */
575
576 adapter->eims_enable_mask |= adapter->eims_other;
577
578 for (i = 0; i < adapter->num_q_vectors; i++) {
579 struct igb_q_vector *q_vector = adapter->q_vector[i];
580 igb_assign_vector(q_vector, vector++);
581 adapter->eims_enable_mask |= q_vector->eims_value;
582 }
583
584 wrfl();
585 }
586
587 /**
588 * igb_request_msix - Initialize MSI-X interrupts
589 *
590 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
591 * kernel.
592 **/
593 static int igb_request_msix(struct igb_adapter *adapter)
594 {
595 struct net_device *netdev = adapter->netdev;
596 struct e1000_hw *hw = &adapter->hw;
597 int i, err = 0, vector = 0;
598
599 err = request_irq(adapter->msix_entries[vector].vector,
600 &igb_msix_other, 0, netdev->name, adapter);
601 if (err)
602 goto out;
603 vector++;
604
605 for (i = 0; i < adapter->num_q_vectors; i++) {
606 struct igb_q_vector *q_vector = adapter->q_vector[i];
607
608 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
609
610 if (q_vector->rx_ring && q_vector->tx_ring)
611 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
612 q_vector->rx_ring->queue_index);
613 else if (q_vector->tx_ring)
614 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
615 q_vector->tx_ring->queue_index);
616 else if (q_vector->rx_ring)
617 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
618 q_vector->rx_ring->queue_index);
619 else
620 sprintf(q_vector->name, "%s-unused", netdev->name);
621
622 err = request_irq(adapter->msix_entries[vector].vector,
623 &igb_msix_ring, 0, q_vector->name,
624 q_vector);
625 if (err)
626 goto out;
627 vector++;
628 }
629
630 igb_configure_msix(adapter);
631 return 0;
632 out:
633 return err;
634 }
635
636 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
637 {
638 if (adapter->msix_entries) {
639 pci_disable_msix(adapter->pdev);
640 kfree(adapter->msix_entries);
641 adapter->msix_entries = NULL;
642 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
643 pci_disable_msi(adapter->pdev);
644 }
645 }
646
647 /**
648 * igb_free_q_vectors - Free memory allocated for interrupt vectors
649 * @adapter: board private structure to initialize
650 *
651 * This function frees the memory allocated to the q_vectors. In addition if
652 * NAPI is enabled it will delete any references to the NAPI struct prior
653 * to freeing the q_vector.
654 **/
655 static void igb_free_q_vectors(struct igb_adapter *adapter)
656 {
657 int v_idx;
658
659 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
660 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
661 adapter->q_vector[v_idx] = NULL;
662 netif_napi_del(&q_vector->napi);
663 kfree(q_vector);
664 }
665 adapter->num_q_vectors = 0;
666 }
667
668 /**
669 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
670 *
671 * This function resets the device so that it has 0 rx queues, tx queues, and
672 * MSI-X interrupts allocated.
673 */
674 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
675 {
676 igb_free_queues(adapter);
677 igb_free_q_vectors(adapter);
678 igb_reset_interrupt_capability(adapter);
679 }
680
681 /**
682 * igb_set_interrupt_capability - set MSI or MSI-X if supported
683 *
684 * Attempt to configure interrupts using the best available
685 * capabilities of the hardware and kernel.
686 **/
687 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
688 {
689 int err;
690 int numvecs, i;
691
692 /* Number of supported queues. */
693 adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
694 adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus());
695
696 /* start with one vector for every rx queue */
697 numvecs = adapter->num_rx_queues;
698
699 /* if tx handler is seperate add 1 for every tx queue */
700 numvecs += adapter->num_tx_queues;
701
702 /* store the number of vectors reserved for queues */
703 adapter->num_q_vectors = numvecs;
704
705 /* add 1 vector for link status interrupts */
706 numvecs++;
707 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
708 GFP_KERNEL);
709 if (!adapter->msix_entries)
710 goto msi_only;
711
712 for (i = 0; i < numvecs; i++)
713 adapter->msix_entries[i].entry = i;
714
715 err = pci_enable_msix(adapter->pdev,
716 adapter->msix_entries,
717 numvecs);
718 if (err == 0)
719 goto out;
720
721 igb_reset_interrupt_capability(adapter);
722
723 /* If we can't do MSI-X, try MSI */
724 msi_only:
725 #ifdef CONFIG_PCI_IOV
726 /* disable SR-IOV for non MSI-X configurations */
727 if (adapter->vf_data) {
728 struct e1000_hw *hw = &adapter->hw;
729 /* disable iov and allow time for transactions to clear */
730 pci_disable_sriov(adapter->pdev);
731 msleep(500);
732
733 kfree(adapter->vf_data);
734 adapter->vf_data = NULL;
735 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
736 msleep(100);
737 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
738 }
739 #endif
740 adapter->num_rx_queues = 1;
741 adapter->num_tx_queues = 1;
742 adapter->num_q_vectors = 1;
743 if (!pci_enable_msi(adapter->pdev))
744 adapter->flags |= IGB_FLAG_HAS_MSI;
745 out:
746 /* Notify the stack of the (possibly) reduced Tx Queue count. */
747 adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
748 return;
749 }
750
751 /**
752 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
753 * @adapter: board private structure to initialize
754 *
755 * We allocate one q_vector per queue interrupt. If allocation fails we
756 * return -ENOMEM.
757 **/
758 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
759 {
760 struct igb_q_vector *q_vector;
761 struct e1000_hw *hw = &adapter->hw;
762 int v_idx;
763
764 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
765 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
766 if (!q_vector)
767 goto err_out;
768 q_vector->adapter = adapter;
769 q_vector->itr_shift = (hw->mac.type == e1000_82575) ? 16 : 0;
770 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
771 q_vector->itr_val = IGB_START_ITR;
772 q_vector->set_itr = 1;
773 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
774 adapter->q_vector[v_idx] = q_vector;
775 }
776 return 0;
777
778 err_out:
779 while (v_idx) {
780 v_idx--;
781 q_vector = adapter->q_vector[v_idx];
782 netif_napi_del(&q_vector->napi);
783 kfree(q_vector);
784 adapter->q_vector[v_idx] = NULL;
785 }
786 return -ENOMEM;
787 }
788
789 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
790 int ring_idx, int v_idx)
791 {
792 struct igb_q_vector *q_vector;
793
794 q_vector = adapter->q_vector[v_idx];
795 q_vector->rx_ring = &adapter->rx_ring[ring_idx];
796 q_vector->rx_ring->q_vector = q_vector;
797 q_vector->itr_val = adapter->itr;
798 }
799
800 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
801 int ring_idx, int v_idx)
802 {
803 struct igb_q_vector *q_vector;
804
805 q_vector = adapter->q_vector[v_idx];
806 q_vector->tx_ring = &adapter->tx_ring[ring_idx];
807 q_vector->tx_ring->q_vector = q_vector;
808 q_vector->itr_val = adapter->itr;
809 }
810
811 /**
812 * igb_map_ring_to_vector - maps allocated queues to vectors
813 *
814 * This function maps the recently allocated queues to vectors.
815 **/
816 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
817 {
818 int i;
819 int v_idx = 0;
820
821 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
822 (adapter->num_q_vectors < adapter->num_tx_queues))
823 return -ENOMEM;
824
825 if (adapter->num_q_vectors >=
826 (adapter->num_rx_queues + adapter->num_tx_queues)) {
827 for (i = 0; i < adapter->num_rx_queues; i++)
828 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
829 for (i = 0; i < adapter->num_tx_queues; i++)
830 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
831 } else {
832 for (i = 0; i < adapter->num_rx_queues; i++) {
833 if (i < adapter->num_tx_queues)
834 igb_map_tx_ring_to_vector(adapter, i, v_idx);
835 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
836 }
837 for (; i < adapter->num_tx_queues; i++)
838 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
839 }
840 return 0;
841 }
842
843 /**
844 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
845 *
846 * This function initializes the interrupts and allocates all of the queues.
847 **/
848 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
849 {
850 struct pci_dev *pdev = adapter->pdev;
851 int err;
852
853 igb_set_interrupt_capability(adapter);
854
855 err = igb_alloc_q_vectors(adapter);
856 if (err) {
857 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
858 goto err_alloc_q_vectors;
859 }
860
861 err = igb_alloc_queues(adapter);
862 if (err) {
863 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
864 goto err_alloc_queues;
865 }
866
867 err = igb_map_ring_to_vector(adapter);
868 if (err) {
869 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
870 goto err_map_queues;
871 }
872
873
874 return 0;
875 err_map_queues:
876 igb_free_queues(adapter);
877 err_alloc_queues:
878 igb_free_q_vectors(adapter);
879 err_alloc_q_vectors:
880 igb_reset_interrupt_capability(adapter);
881 return err;
882 }
883
884 /**
885 * igb_request_irq - initialize interrupts
886 *
887 * Attempts to configure interrupts using the best available
888 * capabilities of the hardware and kernel.
889 **/
890 static int igb_request_irq(struct igb_adapter *adapter)
891 {
892 struct net_device *netdev = adapter->netdev;
893 struct pci_dev *pdev = adapter->pdev;
894 struct e1000_hw *hw = &adapter->hw;
895 int err = 0;
896
897 if (adapter->msix_entries) {
898 err = igb_request_msix(adapter);
899 if (!err)
900 goto request_done;
901 /* fall back to MSI */
902 igb_clear_interrupt_scheme(adapter);
903 if (!pci_enable_msi(adapter->pdev))
904 adapter->flags |= IGB_FLAG_HAS_MSI;
905 igb_free_all_tx_resources(adapter);
906 igb_free_all_rx_resources(adapter);
907 adapter->num_tx_queues = 1;
908 adapter->num_rx_queues = 1;
909 adapter->num_q_vectors = 1;
910 err = igb_alloc_q_vectors(adapter);
911 if (err) {
912 dev_err(&pdev->dev,
913 "Unable to allocate memory for vectors\n");
914 goto request_done;
915 }
916 err = igb_alloc_queues(adapter);
917 if (err) {
918 dev_err(&pdev->dev,
919 "Unable to allocate memory for queues\n");
920 igb_free_q_vectors(adapter);
921 goto request_done;
922 }
923 igb_setup_all_tx_resources(adapter);
924 igb_setup_all_rx_resources(adapter);
925 } else {
926 switch (hw->mac.type) {
927 case e1000_82575:
928 wr32(E1000_MSIXBM(0),
929 (E1000_EICR_RX_QUEUE0 |
930 E1000_EICR_TX_QUEUE0 |
931 E1000_EIMS_OTHER));
932 break;
933 case e1000_82576:
934 wr32(E1000_IVAR0, E1000_IVAR_VALID);
935 break;
936 default:
937 break;
938 }
939 }
940
941 if (adapter->flags & IGB_FLAG_HAS_MSI) {
942 err = request_irq(adapter->pdev->irq, &igb_intr_msi, 0,
943 netdev->name, adapter);
944 if (!err)
945 goto request_done;
946
947 /* fall back to legacy interrupts */
948 igb_reset_interrupt_capability(adapter);
949 adapter->flags &= ~IGB_FLAG_HAS_MSI;
950 }
951
952 err = request_irq(adapter->pdev->irq, &igb_intr, IRQF_SHARED,
953 netdev->name, adapter);
954
955 if (err)
956 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
957 err);
958
959 request_done:
960 return err;
961 }
962
963 static void igb_free_irq(struct igb_adapter *adapter)
964 {
965 if (adapter->msix_entries) {
966 int vector = 0, i;
967
968 free_irq(adapter->msix_entries[vector++].vector, adapter);
969
970 for (i = 0; i < adapter->num_q_vectors; i++) {
971 struct igb_q_vector *q_vector = adapter->q_vector[i];
972 free_irq(adapter->msix_entries[vector++].vector,
973 q_vector);
974 }
975 } else {
976 free_irq(adapter->pdev->irq, adapter);
977 }
978 }
979
980 /**
981 * igb_irq_disable - Mask off interrupt generation on the NIC
982 * @adapter: board private structure
983 **/
984 static void igb_irq_disable(struct igb_adapter *adapter)
985 {
986 struct e1000_hw *hw = &adapter->hw;
987
988 if (adapter->msix_entries) {
989 u32 regval = rd32(E1000_EIAM);
990 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
991 wr32(E1000_EIMC, adapter->eims_enable_mask);
992 regval = rd32(E1000_EIAC);
993 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
994 }
995
996 wr32(E1000_IAM, 0);
997 wr32(E1000_IMC, ~0);
998 wrfl();
999 synchronize_irq(adapter->pdev->irq);
1000 }
1001
1002 /**
1003 * igb_irq_enable - Enable default interrupt generation settings
1004 * @adapter: board private structure
1005 **/
1006 static void igb_irq_enable(struct igb_adapter *adapter)
1007 {
1008 struct e1000_hw *hw = &adapter->hw;
1009
1010 if (adapter->msix_entries) {
1011 u32 regval = rd32(E1000_EIAC);
1012 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1013 regval = rd32(E1000_EIAM);
1014 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1015 wr32(E1000_EIMS, adapter->eims_enable_mask);
1016 if (adapter->vfs_allocated_count)
1017 wr32(E1000_MBVFIMR, 0xFF);
1018 wr32(E1000_IMS, (E1000_IMS_LSC | E1000_IMS_VMMB |
1019 E1000_IMS_DOUTSYNC));
1020 } else {
1021 wr32(E1000_IMS, IMS_ENABLE_MASK);
1022 wr32(E1000_IAM, IMS_ENABLE_MASK);
1023 }
1024 }
1025
1026 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1027 {
1028 struct net_device *netdev = adapter->netdev;
1029 u16 vid = adapter->hw.mng_cookie.vlan_id;
1030 u16 old_vid = adapter->mng_vlan_id;
1031 if (adapter->vlgrp) {
1032 if (!vlan_group_get_device(adapter->vlgrp, vid)) {
1033 if (adapter->hw.mng_cookie.status &
1034 E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1035 igb_vlan_rx_add_vid(netdev, vid);
1036 adapter->mng_vlan_id = vid;
1037 } else
1038 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1039
1040 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1041 (vid != old_vid) &&
1042 !vlan_group_get_device(adapter->vlgrp, old_vid))
1043 igb_vlan_rx_kill_vid(netdev, old_vid);
1044 } else
1045 adapter->mng_vlan_id = vid;
1046 }
1047 }
1048
1049 /**
1050 * igb_release_hw_control - release control of the h/w to f/w
1051 * @adapter: address of board private structure
1052 *
1053 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1054 * For ASF and Pass Through versions of f/w this means that the
1055 * driver is no longer loaded.
1056 *
1057 **/
1058 static void igb_release_hw_control(struct igb_adapter *adapter)
1059 {
1060 struct e1000_hw *hw = &adapter->hw;
1061 u32 ctrl_ext;
1062
1063 /* Let firmware take over control of h/w */
1064 ctrl_ext = rd32(E1000_CTRL_EXT);
1065 wr32(E1000_CTRL_EXT,
1066 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1067 }
1068
1069
1070 /**
1071 * igb_get_hw_control - get control of the h/w from f/w
1072 * @adapter: address of board private structure
1073 *
1074 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1075 * For ASF and Pass Through versions of f/w this means that
1076 * the driver is loaded.
1077 *
1078 **/
1079 static void igb_get_hw_control(struct igb_adapter *adapter)
1080 {
1081 struct e1000_hw *hw = &adapter->hw;
1082 u32 ctrl_ext;
1083
1084 /* Let firmware know the driver has taken over */
1085 ctrl_ext = rd32(E1000_CTRL_EXT);
1086 wr32(E1000_CTRL_EXT,
1087 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1088 }
1089
1090 /**
1091 * igb_configure - configure the hardware for RX and TX
1092 * @adapter: private board structure
1093 **/
1094 static void igb_configure(struct igb_adapter *adapter)
1095 {
1096 struct net_device *netdev = adapter->netdev;
1097 int i;
1098
1099 igb_get_hw_control(adapter);
1100 igb_set_rx_mode(netdev);
1101
1102 igb_restore_vlan(adapter);
1103
1104 igb_setup_tctl(adapter);
1105 igb_setup_mrqc(adapter);
1106 igb_setup_rctl(adapter);
1107
1108 igb_configure_tx(adapter);
1109 igb_configure_rx(adapter);
1110
1111 igb_rx_fifo_flush_82575(&adapter->hw);
1112
1113 /* call igb_desc_unused which always leaves
1114 * at least 1 descriptor unused to make sure
1115 * next_to_use != next_to_clean */
1116 for (i = 0; i < adapter->num_rx_queues; i++) {
1117 struct igb_ring *ring = &adapter->rx_ring[i];
1118 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1119 }
1120
1121
1122 adapter->tx_queue_len = netdev->tx_queue_len;
1123 }
1124
1125
1126 /**
1127 * igb_up - Open the interface and prepare it to handle traffic
1128 * @adapter: board private structure
1129 **/
1130
1131 int igb_up(struct igb_adapter *adapter)
1132 {
1133 struct e1000_hw *hw = &adapter->hw;
1134 int i;
1135
1136 /* hardware has been reset, we need to reload some things */
1137 igb_configure(adapter);
1138
1139 clear_bit(__IGB_DOWN, &adapter->state);
1140
1141 for (i = 0; i < adapter->num_q_vectors; i++) {
1142 struct igb_q_vector *q_vector = adapter->q_vector[i];
1143 napi_enable(&q_vector->napi);
1144 }
1145 if (adapter->msix_entries)
1146 igb_configure_msix(adapter);
1147
1148 igb_set_vmolr(hw, adapter->vfs_allocated_count);
1149
1150 /* Clear any pending interrupts. */
1151 rd32(E1000_ICR);
1152 igb_irq_enable(adapter);
1153
1154 /* notify VFs that reset has been completed */
1155 if (adapter->vfs_allocated_count) {
1156 u32 reg_data = rd32(E1000_CTRL_EXT);
1157 reg_data |= E1000_CTRL_EXT_PFRSTD;
1158 wr32(E1000_CTRL_EXT, reg_data);
1159 }
1160
1161 netif_tx_start_all_queues(adapter->netdev);
1162
1163 /* Fire a link change interrupt to start the watchdog. */
1164 wr32(E1000_ICS, E1000_ICS_LSC);
1165 return 0;
1166 }
1167
1168 void igb_down(struct igb_adapter *adapter)
1169 {
1170 struct e1000_hw *hw = &adapter->hw;
1171 struct net_device *netdev = adapter->netdev;
1172 u32 tctl, rctl;
1173 int i;
1174
1175 /* signal that we're down so the interrupt handler does not
1176 * reschedule our watchdog timer */
1177 set_bit(__IGB_DOWN, &adapter->state);
1178
1179 /* disable receives in the hardware */
1180 rctl = rd32(E1000_RCTL);
1181 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1182 /* flush and sleep below */
1183
1184 netif_tx_stop_all_queues(netdev);
1185
1186 /* disable transmits in the hardware */
1187 tctl = rd32(E1000_TCTL);
1188 tctl &= ~E1000_TCTL_EN;
1189 wr32(E1000_TCTL, tctl);
1190 /* flush both disables and wait for them to finish */
1191 wrfl();
1192 msleep(10);
1193
1194 for (i = 0; i < adapter->num_q_vectors; i++) {
1195 struct igb_q_vector *q_vector = adapter->q_vector[i];
1196 napi_disable(&q_vector->napi);
1197 }
1198
1199 igb_irq_disable(adapter);
1200
1201 del_timer_sync(&adapter->watchdog_timer);
1202 del_timer_sync(&adapter->phy_info_timer);
1203
1204 netdev->tx_queue_len = adapter->tx_queue_len;
1205 netif_carrier_off(netdev);
1206
1207 /* record the stats before reset*/
1208 igb_update_stats(adapter);
1209
1210 adapter->link_speed = 0;
1211 adapter->link_duplex = 0;
1212
1213 if (!pci_channel_offline(adapter->pdev))
1214 igb_reset(adapter);
1215 igb_clean_all_tx_rings(adapter);
1216 igb_clean_all_rx_rings(adapter);
1217 #ifdef CONFIG_IGB_DCA
1218
1219 /* since we reset the hardware DCA settings were cleared */
1220 igb_setup_dca(adapter);
1221 #endif
1222 }
1223
1224 void igb_reinit_locked(struct igb_adapter *adapter)
1225 {
1226 WARN_ON(in_interrupt());
1227 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1228 msleep(1);
1229 igb_down(adapter);
1230 igb_up(adapter);
1231 clear_bit(__IGB_RESETTING, &adapter->state);
1232 }
1233
1234 void igb_reset(struct igb_adapter *adapter)
1235 {
1236 struct e1000_hw *hw = &adapter->hw;
1237 struct e1000_mac_info *mac = &hw->mac;
1238 struct e1000_fc_info *fc = &hw->fc;
1239 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1240 u16 hwm;
1241
1242 /* Repartition Pba for greater than 9k mtu
1243 * To take effect CTRL.RST is required.
1244 */
1245 switch (mac->type) {
1246 case e1000_82576:
1247 pba = E1000_PBA_64K;
1248 break;
1249 case e1000_82575:
1250 default:
1251 pba = E1000_PBA_34K;
1252 break;
1253 }
1254
1255 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1256 (mac->type < e1000_82576)) {
1257 /* adjust PBA for jumbo frames */
1258 wr32(E1000_PBA, pba);
1259
1260 /* To maintain wire speed transmits, the Tx FIFO should be
1261 * large enough to accommodate two full transmit packets,
1262 * rounded up to the next 1KB and expressed in KB. Likewise,
1263 * the Rx FIFO should be large enough to accommodate at least
1264 * one full receive packet and is similarly rounded up and
1265 * expressed in KB. */
1266 pba = rd32(E1000_PBA);
1267 /* upper 16 bits has Tx packet buffer allocation size in KB */
1268 tx_space = pba >> 16;
1269 /* lower 16 bits has Rx packet buffer allocation size in KB */
1270 pba &= 0xffff;
1271 /* the tx fifo also stores 16 bytes of information about the tx
1272 * but don't include ethernet FCS because hardware appends it */
1273 min_tx_space = (adapter->max_frame_size +
1274 sizeof(union e1000_adv_tx_desc) -
1275 ETH_FCS_LEN) * 2;
1276 min_tx_space = ALIGN(min_tx_space, 1024);
1277 min_tx_space >>= 10;
1278 /* software strips receive CRC, so leave room for it */
1279 min_rx_space = adapter->max_frame_size;
1280 min_rx_space = ALIGN(min_rx_space, 1024);
1281 min_rx_space >>= 10;
1282
1283 /* If current Tx allocation is less than the min Tx FIFO size,
1284 * and the min Tx FIFO size is less than the current Rx FIFO
1285 * allocation, take space away from current Rx allocation */
1286 if (tx_space < min_tx_space &&
1287 ((min_tx_space - tx_space) < pba)) {
1288 pba = pba - (min_tx_space - tx_space);
1289
1290 /* if short on rx space, rx wins and must trump tx
1291 * adjustment */
1292 if (pba < min_rx_space)
1293 pba = min_rx_space;
1294 }
1295 wr32(E1000_PBA, pba);
1296 }
1297
1298 /* flow control settings */
1299 /* The high water mark must be low enough to fit one full frame
1300 * (or the size used for early receive) above it in the Rx FIFO.
1301 * Set it to the lower of:
1302 * - 90% of the Rx FIFO size, or
1303 * - the full Rx FIFO size minus one full frame */
1304 hwm = min(((pba << 10) * 9 / 10),
1305 ((pba << 10) - 2 * adapter->max_frame_size));
1306
1307 if (mac->type < e1000_82576) {
1308 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */
1309 fc->low_water = fc->high_water - 8;
1310 } else {
1311 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1312 fc->low_water = fc->high_water - 16;
1313 }
1314 fc->pause_time = 0xFFFF;
1315 fc->send_xon = 1;
1316 fc->current_mode = fc->requested_mode;
1317
1318 /* disable receive for all VFs and wait one second */
1319 if (adapter->vfs_allocated_count) {
1320 int i;
1321 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1322 adapter->vf_data[i].clear_to_send = false;
1323
1324 /* ping all the active vfs to let them know we are going down */
1325 igb_ping_all_vfs(adapter);
1326
1327 /* disable transmits and receives */
1328 wr32(E1000_VFRE, 0);
1329 wr32(E1000_VFTE, 0);
1330 }
1331
1332 /* Allow time for pending master requests to run */
1333 adapter->hw.mac.ops.reset_hw(&adapter->hw);
1334 wr32(E1000_WUC, 0);
1335
1336 if (adapter->hw.mac.ops.init_hw(&adapter->hw))
1337 dev_err(&adapter->pdev->dev, "Hardware Error\n");
1338
1339 igb_update_mng_vlan(adapter);
1340
1341 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1342 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1343
1344 igb_reset_adaptive(&adapter->hw);
1345 igb_get_phy_info(&adapter->hw);
1346 }
1347
1348 static const struct net_device_ops igb_netdev_ops = {
1349 .ndo_open = igb_open,
1350 .ndo_stop = igb_close,
1351 .ndo_start_xmit = igb_xmit_frame_adv,
1352 .ndo_get_stats = igb_get_stats,
1353 .ndo_set_rx_mode = igb_set_rx_mode,
1354 .ndo_set_multicast_list = igb_set_rx_mode,
1355 .ndo_set_mac_address = igb_set_mac,
1356 .ndo_change_mtu = igb_change_mtu,
1357 .ndo_do_ioctl = igb_ioctl,
1358 .ndo_tx_timeout = igb_tx_timeout,
1359 .ndo_validate_addr = eth_validate_addr,
1360 .ndo_vlan_rx_register = igb_vlan_rx_register,
1361 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1362 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1363 #ifdef CONFIG_NET_POLL_CONTROLLER
1364 .ndo_poll_controller = igb_netpoll,
1365 #endif
1366 };
1367
1368 /**
1369 * igb_probe - Device Initialization Routine
1370 * @pdev: PCI device information struct
1371 * @ent: entry in igb_pci_tbl
1372 *
1373 * Returns 0 on success, negative on failure
1374 *
1375 * igb_probe initializes an adapter identified by a pci_dev structure.
1376 * The OS initialization, configuring of the adapter private structure,
1377 * and a hardware reset occur.
1378 **/
1379 static int __devinit igb_probe(struct pci_dev *pdev,
1380 const struct pci_device_id *ent)
1381 {
1382 struct net_device *netdev;
1383 struct igb_adapter *adapter;
1384 struct e1000_hw *hw;
1385 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1386 unsigned long mmio_start, mmio_len;
1387 int err, pci_using_dac;
1388 u16 eeprom_data = 0;
1389 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1390 u32 part_num;
1391
1392 err = pci_enable_device_mem(pdev);
1393 if (err)
1394 return err;
1395
1396 pci_using_dac = 0;
1397 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1398 if (!err) {
1399 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1400 if (!err)
1401 pci_using_dac = 1;
1402 } else {
1403 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1404 if (err) {
1405 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1406 if (err) {
1407 dev_err(&pdev->dev, "No usable DMA "
1408 "configuration, aborting\n");
1409 goto err_dma;
1410 }
1411 }
1412 }
1413
1414 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1415 IORESOURCE_MEM),
1416 igb_driver_name);
1417 if (err)
1418 goto err_pci_reg;
1419
1420 pci_enable_pcie_error_reporting(pdev);
1421
1422 pci_set_master(pdev);
1423 pci_save_state(pdev);
1424
1425 err = -ENOMEM;
1426 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1427 IGB_ABS_MAX_TX_QUEUES);
1428 if (!netdev)
1429 goto err_alloc_etherdev;
1430
1431 SET_NETDEV_DEV(netdev, &pdev->dev);
1432
1433 pci_set_drvdata(pdev, netdev);
1434 adapter = netdev_priv(netdev);
1435 adapter->netdev = netdev;
1436 adapter->pdev = pdev;
1437 hw = &adapter->hw;
1438 hw->back = adapter;
1439 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1440
1441 mmio_start = pci_resource_start(pdev, 0);
1442 mmio_len = pci_resource_len(pdev, 0);
1443
1444 err = -EIO;
1445 hw->hw_addr = ioremap(mmio_start, mmio_len);
1446 if (!hw->hw_addr)
1447 goto err_ioremap;
1448
1449 netdev->netdev_ops = &igb_netdev_ops;
1450 igb_set_ethtool_ops(netdev);
1451 netdev->watchdog_timeo = 5 * HZ;
1452
1453 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1454
1455 netdev->mem_start = mmio_start;
1456 netdev->mem_end = mmio_start + mmio_len;
1457
1458 /* PCI config space info */
1459 hw->vendor_id = pdev->vendor;
1460 hw->device_id = pdev->device;
1461 hw->revision_id = pdev->revision;
1462 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1463 hw->subsystem_device_id = pdev->subsystem_device;
1464
1465 /* setup the private structure */
1466 hw->back = adapter;
1467 /* Copy the default MAC, PHY and NVM function pointers */
1468 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1469 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1470 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1471 /* Initialize skew-specific constants */
1472 err = ei->get_invariants(hw);
1473 if (err)
1474 goto err_sw_init;
1475
1476 #ifdef CONFIG_PCI_IOV
1477 /* since iov functionality isn't critical to base device function we
1478 * can accept failure. If it fails we don't allow iov to be enabled */
1479 if (hw->mac.type == e1000_82576) {
1480 /* 82576 supports a maximum of 7 VFs in addition to the PF */
1481 unsigned int num_vfs = (max_vfs > 7) ? 7 : max_vfs;
1482 int i;
1483 unsigned char mac_addr[ETH_ALEN];
1484
1485 if (num_vfs) {
1486 adapter->vf_data = kcalloc(num_vfs,
1487 sizeof(struct vf_data_storage),
1488 GFP_KERNEL);
1489 if (!adapter->vf_data) {
1490 dev_err(&pdev->dev,
1491 "Could not allocate VF private data - "
1492 "IOV enable failed\n");
1493 } else {
1494 err = pci_enable_sriov(pdev, num_vfs);
1495 if (!err) {
1496 adapter->vfs_allocated_count = num_vfs;
1497 dev_info(&pdev->dev,
1498 "%d vfs allocated\n",
1499 num_vfs);
1500 for (i = 0;
1501 i < adapter->vfs_allocated_count;
1502 i++) {
1503 random_ether_addr(mac_addr);
1504 igb_set_vf_mac(adapter, i,
1505 mac_addr);
1506 }
1507 } else {
1508 kfree(adapter->vf_data);
1509 adapter->vf_data = NULL;
1510 }
1511 }
1512 }
1513 }
1514
1515 #endif
1516 /* setup the private structure */
1517 err = igb_sw_init(adapter);
1518 if (err)
1519 goto err_sw_init;
1520
1521 igb_get_bus_info_pcie(hw);
1522
1523 hw->phy.autoneg_wait_to_complete = false;
1524 hw->mac.adaptive_ifs = true;
1525
1526 /* Copper options */
1527 if (hw->phy.media_type == e1000_media_type_copper) {
1528 hw->phy.mdix = AUTO_ALL_MODES;
1529 hw->phy.disable_polarity_correction = false;
1530 hw->phy.ms_type = e1000_ms_hw_default;
1531 }
1532
1533 if (igb_check_reset_block(hw))
1534 dev_info(&pdev->dev,
1535 "PHY reset is blocked due to SOL/IDER session.\n");
1536
1537 netdev->features = NETIF_F_SG |
1538 NETIF_F_IP_CSUM |
1539 NETIF_F_HW_VLAN_TX |
1540 NETIF_F_HW_VLAN_RX |
1541 NETIF_F_HW_VLAN_FILTER;
1542
1543 netdev->features |= NETIF_F_IPV6_CSUM;
1544 netdev->features |= NETIF_F_TSO;
1545 netdev->features |= NETIF_F_TSO6;
1546
1547 netdev->features |= NETIF_F_GRO;
1548
1549 netdev->vlan_features |= NETIF_F_TSO;
1550 netdev->vlan_features |= NETIF_F_TSO6;
1551 netdev->vlan_features |= NETIF_F_IP_CSUM;
1552 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1553 netdev->vlan_features |= NETIF_F_SG;
1554
1555 if (pci_using_dac)
1556 netdev->features |= NETIF_F_HIGHDMA;
1557
1558 if (adapter->hw.mac.type == e1000_82576)
1559 netdev->features |= NETIF_F_SCTP_CSUM;
1560
1561 adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw);
1562
1563 /* before reading the NVM, reset the controller to put the device in a
1564 * known good starting state */
1565 hw->mac.ops.reset_hw(hw);
1566
1567 /* make sure the NVM is good */
1568 if (igb_validate_nvm_checksum(hw) < 0) {
1569 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1570 err = -EIO;
1571 goto err_eeprom;
1572 }
1573
1574 /* copy the MAC address out of the NVM */
1575 if (hw->mac.ops.read_mac_addr(hw))
1576 dev_err(&pdev->dev, "NVM Read Error\n");
1577
1578 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1579 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1580
1581 if (!is_valid_ether_addr(netdev->perm_addr)) {
1582 dev_err(&pdev->dev, "Invalid MAC Address\n");
1583 err = -EIO;
1584 goto err_eeprom;
1585 }
1586
1587 setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1588 (unsigned long) adapter);
1589 setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1590 (unsigned long) adapter);
1591
1592 INIT_WORK(&adapter->reset_task, igb_reset_task);
1593 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1594
1595 /* Initialize link properties that are user-changeable */
1596 adapter->fc_autoneg = true;
1597 hw->mac.autoneg = true;
1598 hw->phy.autoneg_advertised = 0x2f;
1599
1600 hw->fc.requested_mode = e1000_fc_default;
1601 hw->fc.current_mode = e1000_fc_default;
1602
1603 adapter->itr_setting = IGB_DEFAULT_ITR;
1604 adapter->itr = IGB_START_ITR;
1605
1606 igb_validate_mdi_setting(hw);
1607
1608 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1609 * enable the ACPI Magic Packet filter
1610 */
1611
1612 if (hw->bus.func == 0)
1613 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1614 else if (hw->bus.func == 1)
1615 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1616
1617 if (eeprom_data & eeprom_apme_mask)
1618 adapter->eeprom_wol |= E1000_WUFC_MAG;
1619
1620 /* now that we have the eeprom settings, apply the special cases where
1621 * the eeprom may be wrong or the board simply won't support wake on
1622 * lan on a particular port */
1623 switch (pdev->device) {
1624 case E1000_DEV_ID_82575GB_QUAD_COPPER:
1625 adapter->eeprom_wol = 0;
1626 break;
1627 case E1000_DEV_ID_82575EB_FIBER_SERDES:
1628 case E1000_DEV_ID_82576_FIBER:
1629 case E1000_DEV_ID_82576_SERDES:
1630 /* Wake events only supported on port A for dual fiber
1631 * regardless of eeprom setting */
1632 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1633 adapter->eeprom_wol = 0;
1634 break;
1635 case E1000_DEV_ID_82576_QUAD_COPPER:
1636 /* if quad port adapter, disable WoL on all but port A */
1637 if (global_quad_port_a != 0)
1638 adapter->eeprom_wol = 0;
1639 else
1640 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1641 /* Reset for multiple quad port adapters */
1642 if (++global_quad_port_a == 4)
1643 global_quad_port_a = 0;
1644 break;
1645 }
1646
1647 /* initialize the wol settings based on the eeprom settings */
1648 adapter->wol = adapter->eeprom_wol;
1649 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1650
1651 /* reset the hardware with the new settings */
1652 igb_reset(adapter);
1653
1654 /* let the f/w know that the h/w is now under the control of the
1655 * driver. */
1656 igb_get_hw_control(adapter);
1657
1658 strcpy(netdev->name, "eth%d");
1659 err = register_netdev(netdev);
1660 if (err)
1661 goto err_register;
1662
1663 /* carrier off reporting is important to ethtool even BEFORE open */
1664 netif_carrier_off(netdev);
1665
1666 #ifdef CONFIG_IGB_DCA
1667 if (dca_add_requester(&pdev->dev) == 0) {
1668 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1669 dev_info(&pdev->dev, "DCA enabled\n");
1670 igb_setup_dca(adapter);
1671 }
1672 #endif
1673
1674 /*
1675 * Initialize hardware timer: we keep it running just in case
1676 * that some program needs it later on.
1677 */
1678 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1679 adapter->cycles.read = igb_read_clock;
1680 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1681 adapter->cycles.mult = 1;
1682 adapter->cycles.shift = IGB_TSYNC_SHIFT;
1683 wr32(E1000_TIMINCA,
1684 (1<<24) |
1685 IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS * IGB_TSYNC_SCALE);
1686 #if 0
1687 /*
1688 * Avoid rollover while we initialize by resetting the time counter.
1689 */
1690 wr32(E1000_SYSTIML, 0x00000000);
1691 wr32(E1000_SYSTIMH, 0x00000000);
1692 #else
1693 /*
1694 * Set registers so that rollover occurs soon to test this.
1695 */
1696 wr32(E1000_SYSTIML, 0x00000000);
1697 wr32(E1000_SYSTIMH, 0xFF800000);
1698 #endif
1699 wrfl();
1700 timecounter_init(&adapter->clock,
1701 &adapter->cycles,
1702 ktime_to_ns(ktime_get_real()));
1703
1704 /*
1705 * Synchronize our NIC clock against system wall clock. NIC
1706 * time stamp reading requires ~3us per sample, each sample
1707 * was pretty stable even under load => only require 10
1708 * samples for each offset comparison.
1709 */
1710 memset(&adapter->compare, 0, sizeof(adapter->compare));
1711 adapter->compare.source = &adapter->clock;
1712 adapter->compare.target = ktime_get_real;
1713 adapter->compare.num_samples = 10;
1714 timecompare_update(&adapter->compare, 0);
1715
1716 #ifdef DEBUG
1717 {
1718 char buffer[160];
1719 printk(KERN_DEBUG
1720 "igb: %s: hw %p initialized timer\n",
1721 igb_get_time_str(adapter, buffer),
1722 &adapter->hw);
1723 }
1724 #endif
1725
1726 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1727 /* print bus type/speed/width info */
1728 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1729 netdev->name,
1730 ((hw->bus.speed == e1000_bus_speed_2500)
1731 ? "2.5Gb/s" : "unknown"),
1732 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1733 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1734 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1735 "unknown"),
1736 netdev->dev_addr);
1737
1738 igb_read_part_num(hw, &part_num);
1739 dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1740 (part_num >> 8), (part_num & 0xff));
1741
1742 dev_info(&pdev->dev,
1743 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1744 adapter->msix_entries ? "MSI-X" :
1745 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1746 adapter->num_rx_queues, adapter->num_tx_queues);
1747
1748 return 0;
1749
1750 err_register:
1751 igb_release_hw_control(adapter);
1752 err_eeprom:
1753 if (!igb_check_reset_block(hw))
1754 igb_reset_phy(hw);
1755
1756 if (hw->flash_address)
1757 iounmap(hw->flash_address);
1758 err_sw_init:
1759 igb_clear_interrupt_scheme(adapter);
1760 iounmap(hw->hw_addr);
1761 err_ioremap:
1762 free_netdev(netdev);
1763 err_alloc_etherdev:
1764 pci_release_selected_regions(pdev, pci_select_bars(pdev,
1765 IORESOURCE_MEM));
1766 err_pci_reg:
1767 err_dma:
1768 pci_disable_device(pdev);
1769 return err;
1770 }
1771
1772 /**
1773 * igb_remove - Device Removal Routine
1774 * @pdev: PCI device information struct
1775 *
1776 * igb_remove is called by the PCI subsystem to alert the driver
1777 * that it should release a PCI device. The could be caused by a
1778 * Hot-Plug event, or because the driver is going to be removed from
1779 * memory.
1780 **/
1781 static void __devexit igb_remove(struct pci_dev *pdev)
1782 {
1783 struct net_device *netdev = pci_get_drvdata(pdev);
1784 struct igb_adapter *adapter = netdev_priv(netdev);
1785 struct e1000_hw *hw = &adapter->hw;
1786
1787 /* flush_scheduled work may reschedule our watchdog task, so
1788 * explicitly disable watchdog tasks from being rescheduled */
1789 set_bit(__IGB_DOWN, &adapter->state);
1790 del_timer_sync(&adapter->watchdog_timer);
1791 del_timer_sync(&adapter->phy_info_timer);
1792
1793 flush_scheduled_work();
1794
1795 #ifdef CONFIG_IGB_DCA
1796 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1797 dev_info(&pdev->dev, "DCA disabled\n");
1798 dca_remove_requester(&pdev->dev);
1799 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1800 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1801 }
1802 #endif
1803
1804 /* Release control of h/w to f/w. If f/w is AMT enabled, this
1805 * would have already happened in close and is redundant. */
1806 igb_release_hw_control(adapter);
1807
1808 unregister_netdev(netdev);
1809
1810 if (!igb_check_reset_block(&adapter->hw))
1811 igb_reset_phy(&adapter->hw);
1812
1813 igb_clear_interrupt_scheme(adapter);
1814
1815 #ifdef CONFIG_PCI_IOV
1816 /* reclaim resources allocated to VFs */
1817 if (adapter->vf_data) {
1818 /* disable iov and allow time for transactions to clear */
1819 pci_disable_sriov(pdev);
1820 msleep(500);
1821
1822 kfree(adapter->vf_data);
1823 adapter->vf_data = NULL;
1824 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1825 msleep(100);
1826 dev_info(&pdev->dev, "IOV Disabled\n");
1827 }
1828 #endif
1829 iounmap(hw->hw_addr);
1830 if (hw->flash_address)
1831 iounmap(hw->flash_address);
1832 pci_release_selected_regions(pdev, pci_select_bars(pdev,
1833 IORESOURCE_MEM));
1834
1835 free_netdev(netdev);
1836
1837 pci_disable_pcie_error_reporting(pdev);
1838
1839 pci_disable_device(pdev);
1840 }
1841
1842 /**
1843 * igb_sw_init - Initialize general software structures (struct igb_adapter)
1844 * @adapter: board private structure to initialize
1845 *
1846 * igb_sw_init initializes the Adapter private data structure.
1847 * Fields are initialized based on PCI device information and
1848 * OS network device settings (MTU size).
1849 **/
1850 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1851 {
1852 struct e1000_hw *hw = &adapter->hw;
1853 struct net_device *netdev = adapter->netdev;
1854 struct pci_dev *pdev = adapter->pdev;
1855
1856 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1857
1858 adapter->tx_ring_count = IGB_DEFAULT_TXD;
1859 adapter->rx_ring_count = IGB_DEFAULT_RXD;
1860 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1861 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1862
1863 /* This call may decrease the number of queues depending on
1864 * interrupt mode. */
1865 if (igb_init_interrupt_scheme(adapter)) {
1866 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1867 return -ENOMEM;
1868 }
1869
1870 /* Explicitly disable IRQ since the NIC can be in any state. */
1871 igb_irq_disable(adapter);
1872
1873 set_bit(__IGB_DOWN, &adapter->state);
1874 return 0;
1875 }
1876
1877 /**
1878 * igb_open - Called when a network interface is made active
1879 * @netdev: network interface device structure
1880 *
1881 * Returns 0 on success, negative value on failure
1882 *
1883 * The open entry point is called when a network interface is made
1884 * active by the system (IFF_UP). At this point all resources needed
1885 * for transmit and receive operations are allocated, the interrupt
1886 * handler is registered with the OS, the watchdog timer is started,
1887 * and the stack is notified that the interface is ready.
1888 **/
1889 static int igb_open(struct net_device *netdev)
1890 {
1891 struct igb_adapter *adapter = netdev_priv(netdev);
1892 struct e1000_hw *hw = &adapter->hw;
1893 int err;
1894 int i;
1895
1896 /* disallow open during test */
1897 if (test_bit(__IGB_TESTING, &adapter->state))
1898 return -EBUSY;
1899
1900 netif_carrier_off(netdev);
1901
1902 /* allocate transmit descriptors */
1903 err = igb_setup_all_tx_resources(adapter);
1904 if (err)
1905 goto err_setup_tx;
1906
1907 /* allocate receive descriptors */
1908 err = igb_setup_all_rx_resources(adapter);
1909 if (err)
1910 goto err_setup_rx;
1911
1912 /* e1000_power_up_phy(adapter); */
1913
1914 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1915 if ((adapter->hw.mng_cookie.status &
1916 E1000_MNG_DHCP_COOKIE_STATUS_VLAN))
1917 igb_update_mng_vlan(adapter);
1918
1919 /* before we allocate an interrupt, we must be ready to handle it.
1920 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1921 * as soon as we call pci_request_irq, so we have to setup our
1922 * clean_rx handler before we do so. */
1923 igb_configure(adapter);
1924
1925 igb_set_vmolr(hw, adapter->vfs_allocated_count);
1926
1927 err = igb_request_irq(adapter);
1928 if (err)
1929 goto err_req_irq;
1930
1931 /* From here on the code is the same as igb_up() */
1932 clear_bit(__IGB_DOWN, &adapter->state);
1933
1934 for (i = 0; i < adapter->num_q_vectors; i++) {
1935 struct igb_q_vector *q_vector = adapter->q_vector[i];
1936 napi_enable(&q_vector->napi);
1937 }
1938
1939 /* Clear any pending interrupts. */
1940 rd32(E1000_ICR);
1941
1942 igb_irq_enable(adapter);
1943
1944 /* notify VFs that reset has been completed */
1945 if (adapter->vfs_allocated_count) {
1946 u32 reg_data = rd32(E1000_CTRL_EXT);
1947 reg_data |= E1000_CTRL_EXT_PFRSTD;
1948 wr32(E1000_CTRL_EXT, reg_data);
1949 }
1950
1951 netif_tx_start_all_queues(netdev);
1952
1953 /* Fire a link status change interrupt to start the watchdog. */
1954 wr32(E1000_ICS, E1000_ICS_LSC);
1955
1956 return 0;
1957
1958 err_req_irq:
1959 igb_release_hw_control(adapter);
1960 /* e1000_power_down_phy(adapter); */
1961 igb_free_all_rx_resources(adapter);
1962 err_setup_rx:
1963 igb_free_all_tx_resources(adapter);
1964 err_setup_tx:
1965 igb_reset(adapter);
1966
1967 return err;
1968 }
1969
1970 /**
1971 * igb_close - Disables a network interface
1972 * @netdev: network interface device structure
1973 *
1974 * Returns 0, this is not allowed to fail
1975 *
1976 * The close entry point is called when an interface is de-activated
1977 * by the OS. The hardware is still under the driver's control, but
1978 * needs to be disabled. A global MAC reset is issued to stop the
1979 * hardware, and all transmit and receive resources are freed.
1980 **/
1981 static int igb_close(struct net_device *netdev)
1982 {
1983 struct igb_adapter *adapter = netdev_priv(netdev);
1984
1985 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
1986 igb_down(adapter);
1987
1988 igb_free_irq(adapter);
1989
1990 igb_free_all_tx_resources(adapter);
1991 igb_free_all_rx_resources(adapter);
1992
1993 /* kill manageability vlan ID if supported, but not if a vlan with
1994 * the same ID is registered on the host OS (let 8021q kill it) */
1995 if ((adapter->hw.mng_cookie.status &
1996 E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
1997 !(adapter->vlgrp &&
1998 vlan_group_get_device(adapter->vlgrp, adapter->mng_vlan_id)))
1999 igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
2000
2001 return 0;
2002 }
2003
2004 /**
2005 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2006 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2007 *
2008 * Return 0 on success, negative on failure
2009 **/
2010 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2011 {
2012 struct pci_dev *pdev = tx_ring->pdev;
2013 int size;
2014
2015 size = sizeof(struct igb_buffer) * tx_ring->count;
2016 tx_ring->buffer_info = vmalloc(size);
2017 if (!tx_ring->buffer_info)
2018 goto err;
2019 memset(tx_ring->buffer_info, 0, size);
2020
2021 /* round up to nearest 4K */
2022 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2023 tx_ring->size = ALIGN(tx_ring->size, 4096);
2024
2025 tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size,
2026 &tx_ring->dma);
2027
2028 if (!tx_ring->desc)
2029 goto err;
2030
2031 tx_ring->next_to_use = 0;
2032 tx_ring->next_to_clean = 0;
2033 return 0;
2034
2035 err:
2036 vfree(tx_ring->buffer_info);
2037 dev_err(&pdev->dev,
2038 "Unable to allocate memory for the transmit descriptor ring\n");
2039 return -ENOMEM;
2040 }
2041
2042 /**
2043 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2044 * (Descriptors) for all queues
2045 * @adapter: board private structure
2046 *
2047 * Return 0 on success, negative on failure
2048 **/
2049 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2050 {
2051 int i, err = 0;
2052 int r_idx;
2053
2054 for (i = 0; i < adapter->num_tx_queues; i++) {
2055 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
2056 if (err) {
2057 dev_err(&adapter->pdev->dev,
2058 "Allocation for Tx Queue %u failed\n", i);
2059 for (i--; i >= 0; i--)
2060 igb_free_tx_resources(&adapter->tx_ring[i]);
2061 break;
2062 }
2063 }
2064
2065 for (i = 0; i < IGB_MAX_TX_QUEUES; i++) {
2066 r_idx = i % adapter->num_tx_queues;
2067 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2068 }
2069 return err;
2070 }
2071
2072 /**
2073 * igb_setup_tctl - configure the transmit control registers
2074 * @adapter: Board private structure
2075 **/
2076 void igb_setup_tctl(struct igb_adapter *adapter)
2077 {
2078 struct e1000_hw *hw = &adapter->hw;
2079 u32 tctl;
2080
2081 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2082 wr32(E1000_TXDCTL(0), 0);
2083
2084 /* Program the Transmit Control Register */
2085 tctl = rd32(E1000_TCTL);
2086 tctl &= ~E1000_TCTL_CT;
2087 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2088 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2089
2090 igb_config_collision_dist(hw);
2091
2092 /* Enable transmits */
2093 tctl |= E1000_TCTL_EN;
2094
2095 wr32(E1000_TCTL, tctl);
2096 }
2097
2098 /**
2099 * igb_configure_tx_ring - Configure transmit ring after Reset
2100 * @adapter: board private structure
2101 * @ring: tx ring to configure
2102 *
2103 * Configure a transmit ring after a reset.
2104 **/
2105 void igb_configure_tx_ring(struct igb_adapter *adapter,
2106 struct igb_ring *ring)
2107 {
2108 struct e1000_hw *hw = &adapter->hw;
2109 u32 txdctl;
2110 u64 tdba = ring->dma;
2111 int reg_idx = ring->reg_idx;
2112
2113 /* disable the queue */
2114 txdctl = rd32(E1000_TXDCTL(reg_idx));
2115 wr32(E1000_TXDCTL(reg_idx),
2116 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2117 wrfl();
2118 mdelay(10);
2119
2120 wr32(E1000_TDLEN(reg_idx),
2121 ring->count * sizeof(union e1000_adv_tx_desc));
2122 wr32(E1000_TDBAL(reg_idx),
2123 tdba & 0x00000000ffffffffULL);
2124 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2125
2126 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2127 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2128 writel(0, ring->head);
2129 writel(0, ring->tail);
2130
2131 txdctl |= IGB_TX_PTHRESH;
2132 txdctl |= IGB_TX_HTHRESH << 8;
2133 txdctl |= IGB_TX_WTHRESH << 16;
2134
2135 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2136 wr32(E1000_TXDCTL(reg_idx), txdctl);
2137 }
2138
2139 /**
2140 * igb_configure_tx - Configure transmit Unit after Reset
2141 * @adapter: board private structure
2142 *
2143 * Configure the Tx unit of the MAC after a reset.
2144 **/
2145 static void igb_configure_tx(struct igb_adapter *adapter)
2146 {
2147 int i;
2148
2149 for (i = 0; i < adapter->num_tx_queues; i++)
2150 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2151 }
2152
2153 /**
2154 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2155 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2156 *
2157 * Returns 0 on success, negative on failure
2158 **/
2159 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2160 {
2161 struct pci_dev *pdev = rx_ring->pdev;
2162 int size, desc_len;
2163
2164 size = sizeof(struct igb_buffer) * rx_ring->count;
2165 rx_ring->buffer_info = vmalloc(size);
2166 if (!rx_ring->buffer_info)
2167 goto err;
2168 memset(rx_ring->buffer_info, 0, size);
2169
2170 desc_len = sizeof(union e1000_adv_rx_desc);
2171
2172 /* Round up to nearest 4K */
2173 rx_ring->size = rx_ring->count * desc_len;
2174 rx_ring->size = ALIGN(rx_ring->size, 4096);
2175
2176 rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2177 &rx_ring->dma);
2178
2179 if (!rx_ring->desc)
2180 goto err;
2181
2182 rx_ring->next_to_clean = 0;
2183 rx_ring->next_to_use = 0;
2184
2185 return 0;
2186
2187 err:
2188 vfree(rx_ring->buffer_info);
2189 dev_err(&pdev->dev, "Unable to allocate memory for "
2190 "the receive descriptor ring\n");
2191 return -ENOMEM;
2192 }
2193
2194 /**
2195 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2196 * (Descriptors) for all queues
2197 * @adapter: board private structure
2198 *
2199 * Return 0 on success, negative on failure
2200 **/
2201 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2202 {
2203 int i, err = 0;
2204
2205 for (i = 0; i < adapter->num_rx_queues; i++) {
2206 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2207 if (err) {
2208 dev_err(&adapter->pdev->dev,
2209 "Allocation for Rx Queue %u failed\n", i);
2210 for (i--; i >= 0; i--)
2211 igb_free_rx_resources(&adapter->rx_ring[i]);
2212 break;
2213 }
2214 }
2215
2216 return err;
2217 }
2218
2219 /**
2220 * igb_setup_mrqc - configure the multiple receive queue control registers
2221 * @adapter: Board private structure
2222 **/
2223 static void igb_setup_mrqc(struct igb_adapter *adapter)
2224 {
2225 struct e1000_hw *hw = &adapter->hw;
2226 u32 mrqc, rxcsum;
2227 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2228 union e1000_reta {
2229 u32 dword;
2230 u8 bytes[4];
2231 } reta;
2232 static const u8 rsshash[40] = {
2233 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2234 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2235 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2236 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2237
2238 /* Fill out hash function seeds */
2239 for (j = 0; j < 10; j++) {
2240 u32 rsskey = rsshash[(j * 4)];
2241 rsskey |= rsshash[(j * 4) + 1] << 8;
2242 rsskey |= rsshash[(j * 4) + 2] << 16;
2243 rsskey |= rsshash[(j * 4) + 3] << 24;
2244 array_wr32(E1000_RSSRK(0), j, rsskey);
2245 }
2246
2247 num_rx_queues = adapter->num_rx_queues;
2248
2249 if (adapter->vfs_allocated_count) {
2250 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2251 switch (hw->mac.type) {
2252 case e1000_82576:
2253 shift = 3;
2254 num_rx_queues = 2;
2255 break;
2256 case e1000_82575:
2257 shift = 2;
2258 shift2 = 6;
2259 default:
2260 break;
2261 }
2262 } else {
2263 if (hw->mac.type == e1000_82575)
2264 shift = 6;
2265 }
2266
2267 for (j = 0; j < (32 * 4); j++) {
2268 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2269 if (shift2)
2270 reta.bytes[j & 3] |= num_rx_queues << shift2;
2271 if ((j & 3) == 3)
2272 wr32(E1000_RETA(j >> 2), reta.dword);
2273 }
2274
2275 /*
2276 * Disable raw packet checksumming so that RSS hash is placed in
2277 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2278 * offloads as they are enabled by default
2279 */
2280 rxcsum = rd32(E1000_RXCSUM);
2281 rxcsum |= E1000_RXCSUM_PCSD;
2282
2283 if (adapter->hw.mac.type >= e1000_82576)
2284 /* Enable Receive Checksum Offload for SCTP */
2285 rxcsum |= E1000_RXCSUM_CRCOFL;
2286
2287 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2288 wr32(E1000_RXCSUM, rxcsum);
2289
2290 /* If VMDq is enabled then we set the appropriate mode for that, else
2291 * we default to RSS so that an RSS hash is calculated per packet even
2292 * if we are only using one queue */
2293 if (adapter->vfs_allocated_count) {
2294 if (hw->mac.type > e1000_82575) {
2295 /* Set the default pool for the PF's first queue */
2296 u32 vtctl = rd32(E1000_VT_CTL);
2297 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2298 E1000_VT_CTL_DISABLE_DEF_POOL);
2299 vtctl |= adapter->vfs_allocated_count <<
2300 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2301 wr32(E1000_VT_CTL, vtctl);
2302 }
2303 if (adapter->num_rx_queues > 1)
2304 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2305 else
2306 mrqc = E1000_MRQC_ENABLE_VMDQ;
2307 } else {
2308 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2309 }
2310 igb_vmm_control(adapter);
2311
2312 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2313 E1000_MRQC_RSS_FIELD_IPV4_TCP);
2314 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2315 E1000_MRQC_RSS_FIELD_IPV6_TCP);
2316 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2317 E1000_MRQC_RSS_FIELD_IPV6_UDP);
2318 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2319 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2320
2321 wr32(E1000_MRQC, mrqc);
2322 }
2323
2324 /**
2325 * igb_setup_rctl - configure the receive control registers
2326 * @adapter: Board private structure
2327 **/
2328 void igb_setup_rctl(struct igb_adapter *adapter)
2329 {
2330 struct e1000_hw *hw = &adapter->hw;
2331 u32 rctl;
2332
2333 rctl = rd32(E1000_RCTL);
2334
2335 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2336 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2337
2338 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2339 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2340
2341 /*
2342 * enable stripping of CRC. It's unlikely this will break BMC
2343 * redirection as it did with e1000. Newer features require
2344 * that the HW strips the CRC.
2345 */
2346 rctl |= E1000_RCTL_SECRC;
2347
2348 /*
2349 * disable store bad packets and clear size bits.
2350 */
2351 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2352
2353 /* enable LPE to prevent packets larger than max_frame_size */
2354 rctl |= E1000_RCTL_LPE;
2355
2356 /* disable queue 0 to prevent tail write w/o re-config */
2357 wr32(E1000_RXDCTL(0), 0);
2358
2359 /* Attention!!! For SR-IOV PF driver operations you must enable
2360 * queue drop for all VF and PF queues to prevent head of line blocking
2361 * if an un-trusted VF does not provide descriptors to hardware.
2362 */
2363 if (adapter->vfs_allocated_count) {
2364 u32 vmolr;
2365
2366 /* set all queue drop enable bits */
2367 wr32(E1000_QDE, ALL_QUEUES);
2368
2369 vmolr = rd32(E1000_VMOLR(adapter->vfs_allocated_count));
2370 if (rctl & E1000_RCTL_LPE)
2371 vmolr |= E1000_VMOLR_LPE;
2372 if (adapter->num_rx_queues > 1)
2373 vmolr |= E1000_VMOLR_RSSE;
2374 wr32(E1000_VMOLR(adapter->vfs_allocated_count), vmolr);
2375 }
2376
2377 wr32(E1000_RCTL, rctl);
2378 }
2379
2380 /**
2381 * igb_rlpml_set - set maximum receive packet size
2382 * @adapter: board private structure
2383 *
2384 * Configure maximum receivable packet size.
2385 **/
2386 static void igb_rlpml_set(struct igb_adapter *adapter)
2387 {
2388 u32 max_frame_size = adapter->max_frame_size;
2389 struct e1000_hw *hw = &adapter->hw;
2390 u16 pf_id = adapter->vfs_allocated_count;
2391
2392 if (adapter->vlgrp)
2393 max_frame_size += VLAN_TAG_SIZE;
2394
2395 /* if vfs are enabled we set RLPML to the largest possible request
2396 * size and set the VMOLR RLPML to the size we need */
2397 if (pf_id) {
2398 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2399 max_frame_size = MAX_STD_JUMBO_FRAME_SIZE + VLAN_TAG_SIZE;
2400 }
2401
2402 wr32(E1000_RLPML, max_frame_size);
2403 }
2404
2405 /**
2406 * igb_configure_rx_ring - Configure a receive ring after Reset
2407 * @adapter: board private structure
2408 * @ring: receive ring to be configured
2409 *
2410 * Configure the Rx unit of the MAC after a reset.
2411 **/
2412 void igb_configure_rx_ring(struct igb_adapter *adapter,
2413 struct igb_ring *ring)
2414 {
2415 struct e1000_hw *hw = &adapter->hw;
2416 u64 rdba = ring->dma;
2417 int reg_idx = ring->reg_idx;
2418 u32 srrctl, rxdctl;
2419
2420 /* disable the queue */
2421 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2422 wr32(E1000_RXDCTL(reg_idx),
2423 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2424
2425 /* Set DMA base address registers */
2426 wr32(E1000_RDBAL(reg_idx),
2427 rdba & 0x00000000ffffffffULL);
2428 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2429 wr32(E1000_RDLEN(reg_idx),
2430 ring->count * sizeof(union e1000_adv_rx_desc));
2431
2432 /* initialize head and tail */
2433 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2434 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2435 writel(0, ring->head);
2436 writel(0, ring->tail);
2437
2438 /* set descriptor configuration */
2439 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2440 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2441 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2442 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2443 srrctl |= IGB_RXBUFFER_16384 >>
2444 E1000_SRRCTL_BSIZEPKT_SHIFT;
2445 #else
2446 srrctl |= (PAGE_SIZE / 2) >>
2447 E1000_SRRCTL_BSIZEPKT_SHIFT;
2448 #endif
2449 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2450 } else {
2451 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2452 E1000_SRRCTL_BSIZEPKT_SHIFT;
2453 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2454 }
2455
2456 wr32(E1000_SRRCTL(reg_idx), srrctl);
2457
2458 /* enable receive descriptor fetching */
2459 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2460 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2461 rxdctl &= 0xFFF00000;
2462 rxdctl |= IGB_RX_PTHRESH;
2463 rxdctl |= IGB_RX_HTHRESH << 8;
2464 rxdctl |= IGB_RX_WTHRESH << 16;
2465 wr32(E1000_RXDCTL(reg_idx), rxdctl);
2466 }
2467
2468 /**
2469 * igb_configure_rx - Configure receive Unit after Reset
2470 * @adapter: board private structure
2471 *
2472 * Configure the Rx unit of the MAC after a reset.
2473 **/
2474 static void igb_configure_rx(struct igb_adapter *adapter)
2475 {
2476 int i;
2477
2478 /* set UTA to appropriate mode */
2479 igb_set_uta(adapter);
2480
2481 /* set the correct pool for the PF default MAC address in entry 0 */
2482 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2483 adapter->vfs_allocated_count);
2484
2485 /* Setup the HW Rx Head and Tail Descriptor Pointers and
2486 * the Base and Length of the Rx Descriptor Ring */
2487 for (i = 0; i < adapter->num_rx_queues; i++)
2488 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2489 }
2490
2491 /**
2492 * igb_free_tx_resources - Free Tx Resources per Queue
2493 * @tx_ring: Tx descriptor ring for a specific queue
2494 *
2495 * Free all transmit software resources
2496 **/
2497 void igb_free_tx_resources(struct igb_ring *tx_ring)
2498 {
2499 igb_clean_tx_ring(tx_ring);
2500
2501 vfree(tx_ring->buffer_info);
2502 tx_ring->buffer_info = NULL;
2503
2504 pci_free_consistent(tx_ring->pdev, tx_ring->size,
2505 tx_ring->desc, tx_ring->dma);
2506
2507 tx_ring->desc = NULL;
2508 }
2509
2510 /**
2511 * igb_free_all_tx_resources - Free Tx Resources for All Queues
2512 * @adapter: board private structure
2513 *
2514 * Free all transmit software resources
2515 **/
2516 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2517 {
2518 int i;
2519
2520 for (i = 0; i < adapter->num_tx_queues; i++)
2521 igb_free_tx_resources(&adapter->tx_ring[i]);
2522 }
2523
2524 static void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2525 struct igb_buffer *buffer_info)
2526 {
2527 buffer_info->dma = 0;
2528 if (buffer_info->skb) {
2529 skb_dma_unmap(&tx_ring->pdev->dev,
2530 buffer_info->skb,
2531 DMA_TO_DEVICE);
2532 dev_kfree_skb_any(buffer_info->skb);
2533 buffer_info->skb = NULL;
2534 }
2535 buffer_info->time_stamp = 0;
2536 /* buffer_info must be completely set up in the transmit path */
2537 }
2538
2539 /**
2540 * igb_clean_tx_ring - Free Tx Buffers
2541 * @tx_ring: ring to be cleaned
2542 **/
2543 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2544 {
2545 struct igb_buffer *buffer_info;
2546 unsigned long size;
2547 unsigned int i;
2548
2549 if (!tx_ring->buffer_info)
2550 return;
2551 /* Free all the Tx ring sk_buffs */
2552
2553 for (i = 0; i < tx_ring->count; i++) {
2554 buffer_info = &tx_ring->buffer_info[i];
2555 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2556 }
2557
2558 size = sizeof(struct igb_buffer) * tx_ring->count;
2559 memset(tx_ring->buffer_info, 0, size);
2560
2561 /* Zero out the descriptor ring */
2562
2563 memset(tx_ring->desc, 0, tx_ring->size);
2564
2565 tx_ring->next_to_use = 0;
2566 tx_ring->next_to_clean = 0;
2567
2568 writel(0, tx_ring->head);
2569 writel(0, tx_ring->tail);
2570 }
2571
2572 /**
2573 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2574 * @adapter: board private structure
2575 **/
2576 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2577 {
2578 int i;
2579
2580 for (i = 0; i < adapter->num_tx_queues; i++)
2581 igb_clean_tx_ring(&adapter->tx_ring[i]);
2582 }
2583
2584 /**
2585 * igb_free_rx_resources - Free Rx Resources
2586 * @rx_ring: ring to clean the resources from
2587 *
2588 * Free all receive software resources
2589 **/
2590 void igb_free_rx_resources(struct igb_ring *rx_ring)
2591 {
2592 igb_clean_rx_ring(rx_ring);
2593
2594 vfree(rx_ring->buffer_info);
2595 rx_ring->buffer_info = NULL;
2596
2597 pci_free_consistent(rx_ring->pdev, rx_ring->size,
2598 rx_ring->desc, rx_ring->dma);
2599
2600 rx_ring->desc = NULL;
2601 }
2602
2603 /**
2604 * igb_free_all_rx_resources - Free Rx Resources for All Queues
2605 * @adapter: board private structure
2606 *
2607 * Free all receive software resources
2608 **/
2609 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2610 {
2611 int i;
2612
2613 for (i = 0; i < adapter->num_rx_queues; i++)
2614 igb_free_rx_resources(&adapter->rx_ring[i]);
2615 }
2616
2617 /**
2618 * igb_clean_rx_ring - Free Rx Buffers per Queue
2619 * @rx_ring: ring to free buffers from
2620 **/
2621 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2622 {
2623 struct igb_buffer *buffer_info;
2624 unsigned long size;
2625 unsigned int i;
2626
2627 if (!rx_ring->buffer_info)
2628 return;
2629 /* Free all the Rx ring sk_buffs */
2630 for (i = 0; i < rx_ring->count; i++) {
2631 buffer_info = &rx_ring->buffer_info[i];
2632 if (buffer_info->dma) {
2633 pci_unmap_single(rx_ring->pdev,
2634 buffer_info->dma,
2635 rx_ring->rx_buffer_len,
2636 PCI_DMA_FROMDEVICE);
2637 buffer_info->dma = 0;
2638 }
2639
2640 if (buffer_info->skb) {
2641 dev_kfree_skb(buffer_info->skb);
2642 buffer_info->skb = NULL;
2643 }
2644 if (buffer_info->page_dma) {
2645 pci_unmap_page(rx_ring->pdev,
2646 buffer_info->page_dma,
2647 PAGE_SIZE / 2,
2648 PCI_DMA_FROMDEVICE);
2649 buffer_info->page_dma = 0;
2650 }
2651 if (buffer_info->page) {
2652 put_page(buffer_info->page);
2653 buffer_info->page = NULL;
2654 buffer_info->page_offset = 0;
2655 }
2656 }
2657
2658 size = sizeof(struct igb_buffer) * rx_ring->count;
2659 memset(rx_ring->buffer_info, 0, size);
2660
2661 /* Zero out the descriptor ring */
2662 memset(rx_ring->desc, 0, rx_ring->size);
2663
2664 rx_ring->next_to_clean = 0;
2665 rx_ring->next_to_use = 0;
2666
2667 writel(0, rx_ring->head);
2668 writel(0, rx_ring->tail);
2669 }
2670
2671 /**
2672 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2673 * @adapter: board private structure
2674 **/
2675 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2676 {
2677 int i;
2678
2679 for (i = 0; i < adapter->num_rx_queues; i++)
2680 igb_clean_rx_ring(&adapter->rx_ring[i]);
2681 }
2682
2683 /**
2684 * igb_set_mac - Change the Ethernet Address of the NIC
2685 * @netdev: network interface device structure
2686 * @p: pointer to an address structure
2687 *
2688 * Returns 0 on success, negative on failure
2689 **/
2690 static int igb_set_mac(struct net_device *netdev, void *p)
2691 {
2692 struct igb_adapter *adapter = netdev_priv(netdev);
2693 struct e1000_hw *hw = &adapter->hw;
2694 struct sockaddr *addr = p;
2695
2696 if (!is_valid_ether_addr(addr->sa_data))
2697 return -EADDRNOTAVAIL;
2698
2699 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2700 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2701
2702 /* set the correct pool for the new PF MAC address in entry 0 */
2703 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2704 adapter->vfs_allocated_count);
2705
2706 return 0;
2707 }
2708
2709 /**
2710 * igb_write_mc_addr_list - write multicast addresses to MTA
2711 * @netdev: network interface device structure
2712 *
2713 * Writes multicast address list to the MTA hash table.
2714 * Returns: -ENOMEM on failure
2715 * 0 on no addresses written
2716 * X on writing X addresses to MTA
2717 **/
2718 static int igb_write_mc_addr_list(struct net_device *netdev)
2719 {
2720 struct igb_adapter *adapter = netdev_priv(netdev);
2721 struct e1000_hw *hw = &adapter->hw;
2722 struct dev_mc_list *mc_ptr = netdev->mc_list;
2723 u8 *mta_list;
2724 u32 vmolr = 0;
2725 int i;
2726
2727 if (!netdev->mc_count) {
2728 /* nothing to program, so clear mc list */
2729 igb_update_mc_addr_list(hw, NULL, 0);
2730 igb_restore_vf_multicasts(adapter);
2731 return 0;
2732 }
2733
2734 mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
2735 if (!mta_list)
2736 return -ENOMEM;
2737
2738 /* set vmolr receive overflow multicast bit */
2739 vmolr |= E1000_VMOLR_ROMPE;
2740
2741 /* The shared function expects a packed array of only addresses. */
2742 mc_ptr = netdev->mc_list;
2743
2744 for (i = 0; i < netdev->mc_count; i++) {
2745 if (!mc_ptr)
2746 break;
2747 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2748 mc_ptr = mc_ptr->next;
2749 }
2750 igb_update_mc_addr_list(hw, mta_list, i);
2751 kfree(mta_list);
2752
2753 return netdev->mc_count;
2754 }
2755
2756 /**
2757 * igb_write_uc_addr_list - write unicast addresses to RAR table
2758 * @netdev: network interface device structure
2759 *
2760 * Writes unicast address list to the RAR table.
2761 * Returns: -ENOMEM on failure/insufficient address space
2762 * 0 on no addresses written
2763 * X on writing X addresses to the RAR table
2764 **/
2765 static int igb_write_uc_addr_list(struct net_device *netdev)
2766 {
2767 struct igb_adapter *adapter = netdev_priv(netdev);
2768 struct e1000_hw *hw = &adapter->hw;
2769 unsigned int vfn = adapter->vfs_allocated_count;
2770 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2771 int count = 0;
2772
2773 /* return ENOMEM indicating insufficient memory for addresses */
2774 if (netdev->uc.count > rar_entries)
2775 return -ENOMEM;
2776
2777 if (netdev->uc.count && rar_entries) {
2778 struct netdev_hw_addr *ha;
2779 list_for_each_entry(ha, &netdev->uc.list, list) {
2780 if (!rar_entries)
2781 break;
2782 igb_rar_set_qsel(adapter, ha->addr,
2783 rar_entries--,
2784 vfn);
2785 count++;
2786 }
2787 }
2788 /* write the addresses in reverse order to avoid write combining */
2789 for (; rar_entries > 0 ; rar_entries--) {
2790 wr32(E1000_RAH(rar_entries), 0);
2791 wr32(E1000_RAL(rar_entries), 0);
2792 }
2793 wrfl();
2794
2795 return count;
2796 }
2797
2798 /**
2799 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2800 * @netdev: network interface device structure
2801 *
2802 * The set_rx_mode entry point is called whenever the unicast or multicast
2803 * address lists or the network interface flags are updated. This routine is
2804 * responsible for configuring the hardware for proper unicast, multicast,
2805 * promiscuous mode, and all-multi behavior.
2806 **/
2807 static void igb_set_rx_mode(struct net_device *netdev)
2808 {
2809 struct igb_adapter *adapter = netdev_priv(netdev);
2810 struct e1000_hw *hw = &adapter->hw;
2811 unsigned int vfn = adapter->vfs_allocated_count;
2812 u32 rctl, vmolr = 0;
2813 int count;
2814
2815 /* Check for Promiscuous and All Multicast modes */
2816 rctl = rd32(E1000_RCTL);
2817
2818 /* clear the effected bits */
2819 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2820
2821 if (netdev->flags & IFF_PROMISC) {
2822 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2823 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2824 } else {
2825 if (netdev->flags & IFF_ALLMULTI) {
2826 rctl |= E1000_RCTL_MPE;
2827 vmolr |= E1000_VMOLR_MPME;
2828 } else {
2829 /*
2830 * Write addresses to the MTA, if the attempt fails
2831 * then we should just turn on promiscous mode so
2832 * that we can at least receive multicast traffic
2833 */
2834 count = igb_write_mc_addr_list(netdev);
2835 if (count < 0) {
2836 rctl |= E1000_RCTL_MPE;
2837 vmolr |= E1000_VMOLR_MPME;
2838 } else if (count) {
2839 vmolr |= E1000_VMOLR_ROMPE;
2840 }
2841 }
2842 /*
2843 * Write addresses to available RAR registers, if there is not
2844 * sufficient space to store all the addresses then enable
2845 * unicast promiscous mode
2846 */
2847 count = igb_write_uc_addr_list(netdev);
2848 if (count < 0) {
2849 rctl |= E1000_RCTL_UPE;
2850 vmolr |= E1000_VMOLR_ROPE;
2851 }
2852 rctl |= E1000_RCTL_VFE;
2853 }
2854 wr32(E1000_RCTL, rctl);
2855
2856 /*
2857 * In order to support SR-IOV and eventually VMDq it is necessary to set
2858 * the VMOLR to enable the appropriate modes. Without this workaround
2859 * we will have issues with VLAN tag stripping not being done for frames
2860 * that are only arriving because we are the default pool
2861 */
2862 if (hw->mac.type < e1000_82576)
2863 return;
2864
2865 vmolr |= rd32(E1000_VMOLR(vfn)) &
2866 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
2867 wr32(E1000_VMOLR(vfn), vmolr);
2868 igb_restore_vf_multicasts(adapter);
2869 }
2870
2871 /* Need to wait a few seconds after link up to get diagnostic information from
2872 * the phy */
2873 static void igb_update_phy_info(unsigned long data)
2874 {
2875 struct igb_adapter *adapter = (struct igb_adapter *) data;
2876 igb_get_phy_info(&adapter->hw);
2877 }
2878
2879 /**
2880 * igb_has_link - check shared code for link and determine up/down
2881 * @adapter: pointer to driver private info
2882 **/
2883 static bool igb_has_link(struct igb_adapter *adapter)
2884 {
2885 struct e1000_hw *hw = &adapter->hw;
2886 bool link_active = false;
2887 s32 ret_val = 0;
2888
2889 /* get_link_status is set on LSC (link status) interrupt or
2890 * rx sequence error interrupt. get_link_status will stay
2891 * false until the e1000_check_for_link establishes link
2892 * for copper adapters ONLY
2893 */
2894 switch (hw->phy.media_type) {
2895 case e1000_media_type_copper:
2896 if (hw->mac.get_link_status) {
2897 ret_val = hw->mac.ops.check_for_link(hw);
2898 link_active = !hw->mac.get_link_status;
2899 } else {
2900 link_active = true;
2901 }
2902 break;
2903 case e1000_media_type_internal_serdes:
2904 ret_val = hw->mac.ops.check_for_link(hw);
2905 link_active = hw->mac.serdes_has_link;
2906 break;
2907 default:
2908 case e1000_media_type_unknown:
2909 break;
2910 }
2911
2912 return link_active;
2913 }
2914
2915 /**
2916 * igb_watchdog - Timer Call-back
2917 * @data: pointer to adapter cast into an unsigned long
2918 **/
2919 static void igb_watchdog(unsigned long data)
2920 {
2921 struct igb_adapter *adapter = (struct igb_adapter *)data;
2922 /* Do the rest outside of interrupt context */
2923 schedule_work(&adapter->watchdog_task);
2924 }
2925
2926 static void igb_watchdog_task(struct work_struct *work)
2927 {
2928 struct igb_adapter *adapter = container_of(work,
2929 struct igb_adapter, watchdog_task);
2930 struct e1000_hw *hw = &adapter->hw;
2931 struct net_device *netdev = adapter->netdev;
2932 struct igb_ring *tx_ring = adapter->tx_ring;
2933 u32 link;
2934 int i;
2935
2936 link = igb_has_link(adapter);
2937 if ((netif_carrier_ok(netdev)) && link)
2938 goto link_up;
2939
2940 if (link) {
2941 if (!netif_carrier_ok(netdev)) {
2942 u32 ctrl;
2943 hw->mac.ops.get_speed_and_duplex(&adapter->hw,
2944 &adapter->link_speed,
2945 &adapter->link_duplex);
2946
2947 ctrl = rd32(E1000_CTRL);
2948 /* Links status message must follow this format */
2949 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
2950 "Flow Control: %s\n",
2951 netdev->name,
2952 adapter->link_speed,
2953 adapter->link_duplex == FULL_DUPLEX ?
2954 "Full Duplex" : "Half Duplex",
2955 ((ctrl & E1000_CTRL_TFCE) && (ctrl &
2956 E1000_CTRL_RFCE)) ? "RX/TX" : ((ctrl &
2957 E1000_CTRL_RFCE) ? "RX" : ((ctrl &
2958 E1000_CTRL_TFCE) ? "TX" : "None")));
2959
2960 /* tweak tx_queue_len according to speed/duplex and
2961 * adjust the timeout factor */
2962 netdev->tx_queue_len = adapter->tx_queue_len;
2963 adapter->tx_timeout_factor = 1;
2964 switch (adapter->link_speed) {
2965 case SPEED_10:
2966 netdev->tx_queue_len = 10;
2967 adapter->tx_timeout_factor = 14;
2968 break;
2969 case SPEED_100:
2970 netdev->tx_queue_len = 100;
2971 /* maybe add some timeout factor ? */
2972 break;
2973 }
2974
2975 netif_carrier_on(netdev);
2976
2977 igb_ping_all_vfs(adapter);
2978
2979 /* link state has changed, schedule phy info update */
2980 if (!test_bit(__IGB_DOWN, &adapter->state))
2981 mod_timer(&adapter->phy_info_timer,
2982 round_jiffies(jiffies + 2 * HZ));
2983 }
2984 } else {
2985 if (netif_carrier_ok(netdev)) {
2986 adapter->link_speed = 0;
2987 adapter->link_duplex = 0;
2988 /* Links status message must follow this format */
2989 printk(KERN_INFO "igb: %s NIC Link is Down\n",
2990 netdev->name);
2991 netif_carrier_off(netdev);
2992
2993 igb_ping_all_vfs(adapter);
2994
2995 /* link state has changed, schedule phy info update */
2996 if (!test_bit(__IGB_DOWN, &adapter->state))
2997 mod_timer(&adapter->phy_info_timer,
2998 round_jiffies(jiffies + 2 * HZ));
2999 }
3000 }
3001
3002 link_up:
3003 igb_update_stats(adapter);
3004
3005 hw->mac.tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
3006 adapter->tpt_old = adapter->stats.tpt;
3007 hw->mac.collision_delta = adapter->stats.colc - adapter->colc_old;
3008 adapter->colc_old = adapter->stats.colc;
3009
3010 adapter->gorc = adapter->stats.gorc - adapter->gorc_old;
3011 adapter->gorc_old = adapter->stats.gorc;
3012 adapter->gotc = adapter->stats.gotc - adapter->gotc_old;
3013 adapter->gotc_old = adapter->stats.gotc;
3014
3015 igb_update_adaptive(&adapter->hw);
3016
3017 if (!netif_carrier_ok(netdev)) {
3018 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3019 /* We've lost link, so the controller stops DMA,
3020 * but we've got queued Tx work that's never going
3021 * to get done, so reset controller to flush Tx.
3022 * (Do the reset outside of interrupt context). */
3023 adapter->tx_timeout_count++;
3024 schedule_work(&adapter->reset_task);
3025 /* return immediately since reset is imminent */
3026 return;
3027 }
3028 }
3029
3030 /* Cause software interrupt to ensure rx ring is cleaned */
3031 if (adapter->msix_entries) {
3032 u32 eics = 0;
3033 for (i = 0; i < adapter->num_q_vectors; i++) {
3034 struct igb_q_vector *q_vector = adapter->q_vector[i];
3035 eics |= q_vector->eims_value;
3036 }
3037 wr32(E1000_EICS, eics);
3038 } else {
3039 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3040 }
3041
3042 /* Force detection of hung controller every watchdog period */
3043 tx_ring->detect_tx_hung = true;
3044
3045 /* Reset the timer */
3046 if (!test_bit(__IGB_DOWN, &adapter->state))
3047 mod_timer(&adapter->watchdog_timer,
3048 round_jiffies(jiffies + 2 * HZ));
3049 }
3050
3051 enum latency_range {
3052 lowest_latency = 0,
3053 low_latency = 1,
3054 bulk_latency = 2,
3055 latency_invalid = 255
3056 };
3057
3058
3059 /**
3060 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3061 *
3062 * Stores a new ITR value based on strictly on packet size. This
3063 * algorithm is less sophisticated than that used in igb_update_itr,
3064 * due to the difficulty of synchronizing statistics across multiple
3065 * receive rings. The divisors and thresholds used by this fuction
3066 * were determined based on theoretical maximum wire speed and testing
3067 * data, in order to minimize response time while increasing bulk
3068 * throughput.
3069 * This functionality is controlled by the InterruptThrottleRate module
3070 * parameter (see igb_param.c)
3071 * NOTE: This function is called only when operating in a multiqueue
3072 * receive environment.
3073 * @q_vector: pointer to q_vector
3074 **/
3075 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3076 {
3077 int new_val = q_vector->itr_val;
3078 int avg_wire_size = 0;
3079 struct igb_adapter *adapter = q_vector->adapter;
3080
3081 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3082 * ints/sec - ITR timer value of 120 ticks.
3083 */
3084 if (adapter->link_speed != SPEED_1000) {
3085 new_val = 976;
3086 goto set_itr_val;
3087 }
3088
3089 if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3090 struct igb_ring *ring = q_vector->rx_ring;
3091 avg_wire_size = ring->total_bytes / ring->total_packets;
3092 }
3093
3094 if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3095 struct igb_ring *ring = q_vector->tx_ring;
3096 avg_wire_size = max_t(u32, avg_wire_size,
3097 (ring->total_bytes /
3098 ring->total_packets));
3099 }
3100
3101 /* if avg_wire_size isn't set no work was done */
3102 if (!avg_wire_size)
3103 goto clear_counts;
3104
3105 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3106 avg_wire_size += 24;
3107
3108 /* Don't starve jumbo frames */
3109 avg_wire_size = min(avg_wire_size, 3000);
3110
3111 /* Give a little boost to mid-size frames */
3112 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3113 new_val = avg_wire_size / 3;
3114 else
3115 new_val = avg_wire_size / 2;
3116
3117 set_itr_val:
3118 if (new_val != q_vector->itr_val) {
3119 q_vector->itr_val = new_val;
3120 q_vector->set_itr = 1;
3121 }
3122 clear_counts:
3123 if (q_vector->rx_ring) {
3124 q_vector->rx_ring->total_bytes = 0;
3125 q_vector->rx_ring->total_packets = 0;
3126 }
3127 if (q_vector->tx_ring) {
3128 q_vector->tx_ring->total_bytes = 0;
3129 q_vector->tx_ring->total_packets = 0;
3130 }
3131 }
3132
3133 /**
3134 * igb_update_itr - update the dynamic ITR value based on statistics
3135 * Stores a new ITR value based on packets and byte
3136 * counts during the last interrupt. The advantage of per interrupt
3137 * computation is faster updates and more accurate ITR for the current
3138 * traffic pattern. Constants in this function were computed
3139 * based on theoretical maximum wire speed and thresholds were set based
3140 * on testing data as well as attempting to minimize response time
3141 * while increasing bulk throughput.
3142 * this functionality is controlled by the InterruptThrottleRate module
3143 * parameter (see igb_param.c)
3144 * NOTE: These calculations are only valid when operating in a single-
3145 * queue environment.
3146 * @adapter: pointer to adapter
3147 * @itr_setting: current q_vector->itr_val
3148 * @packets: the number of packets during this measurement interval
3149 * @bytes: the number of bytes during this measurement interval
3150 **/
3151 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3152 int packets, int bytes)
3153 {
3154 unsigned int retval = itr_setting;
3155
3156 if (packets == 0)
3157 goto update_itr_done;
3158
3159 switch (itr_setting) {
3160 case lowest_latency:
3161 /* handle TSO and jumbo frames */
3162 if (bytes/packets > 8000)
3163 retval = bulk_latency;
3164 else if ((packets < 5) && (bytes > 512))
3165 retval = low_latency;
3166 break;
3167 case low_latency: /* 50 usec aka 20000 ints/s */
3168 if (bytes > 10000) {
3169 /* this if handles the TSO accounting */
3170 if (bytes/packets > 8000) {
3171 retval = bulk_latency;
3172 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3173 retval = bulk_latency;
3174 } else if ((packets > 35)) {
3175 retval = lowest_latency;
3176 }
3177 } else if (bytes/packets > 2000) {
3178 retval = bulk_latency;
3179 } else if (packets <= 2 && bytes < 512) {
3180 retval = lowest_latency;
3181 }
3182 break;
3183 case bulk_latency: /* 250 usec aka 4000 ints/s */
3184 if (bytes > 25000) {
3185 if (packets > 35)
3186 retval = low_latency;
3187 } else if (bytes < 1500) {
3188 retval = low_latency;
3189 }
3190 break;
3191 }
3192
3193 update_itr_done:
3194 return retval;
3195 }
3196
3197 static void igb_set_itr(struct igb_adapter *adapter)
3198 {
3199 struct igb_q_vector *q_vector = adapter->q_vector[0];
3200 u16 current_itr;
3201 u32 new_itr = q_vector->itr_val;
3202
3203 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3204 if (adapter->link_speed != SPEED_1000) {
3205 current_itr = 0;
3206 new_itr = 4000;
3207 goto set_itr_now;
3208 }
3209
3210 adapter->rx_itr = igb_update_itr(adapter,
3211 adapter->rx_itr,
3212 adapter->rx_ring->total_packets,
3213 adapter->rx_ring->total_bytes);
3214
3215 adapter->tx_itr = igb_update_itr(adapter,
3216 adapter->tx_itr,
3217 adapter->tx_ring->total_packets,
3218 adapter->tx_ring->total_bytes);
3219 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3220
3221 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3222 if (adapter->itr_setting == 3 && current_itr == lowest_latency)
3223 current_itr = low_latency;
3224
3225 switch (current_itr) {
3226 /* counts and packets in update_itr are dependent on these numbers */
3227 case lowest_latency:
3228 new_itr = 56; /* aka 70,000 ints/sec */
3229 break;
3230 case low_latency:
3231 new_itr = 196; /* aka 20,000 ints/sec */
3232 break;
3233 case bulk_latency:
3234 new_itr = 980; /* aka 4,000 ints/sec */
3235 break;
3236 default:
3237 break;
3238 }
3239
3240 set_itr_now:
3241 adapter->rx_ring->total_bytes = 0;
3242 adapter->rx_ring->total_packets = 0;
3243 adapter->tx_ring->total_bytes = 0;
3244 adapter->tx_ring->total_packets = 0;
3245
3246 if (new_itr != q_vector->itr_val) {
3247 /* this attempts to bias the interrupt rate towards Bulk
3248 * by adding intermediate steps when interrupt rate is
3249 * increasing */
3250 new_itr = new_itr > q_vector->itr_val ?
3251 max((new_itr * q_vector->itr_val) /
3252 (new_itr + (q_vector->itr_val >> 2)),
3253 new_itr) :
3254 new_itr;
3255 /* Don't write the value here; it resets the adapter's
3256 * internal timer, and causes us to delay far longer than
3257 * we should between interrupts. Instead, we write the ITR
3258 * value at the beginning of the next interrupt so the timing
3259 * ends up being correct.
3260 */
3261 q_vector->itr_val = new_itr;
3262 q_vector->set_itr = 1;
3263 }
3264
3265 return;
3266 }
3267
3268 #define IGB_TX_FLAGS_CSUM 0x00000001
3269 #define IGB_TX_FLAGS_VLAN 0x00000002
3270 #define IGB_TX_FLAGS_TSO 0x00000004
3271 #define IGB_TX_FLAGS_IPV4 0x00000008
3272 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3273 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3274 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3275
3276 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3277 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3278 {
3279 struct e1000_adv_tx_context_desc *context_desc;
3280 unsigned int i;
3281 int err;
3282 struct igb_buffer *buffer_info;
3283 u32 info = 0, tu_cmd = 0;
3284 u32 mss_l4len_idx, l4len;
3285 *hdr_len = 0;
3286
3287 if (skb_header_cloned(skb)) {
3288 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3289 if (err)
3290 return err;
3291 }
3292
3293 l4len = tcp_hdrlen(skb);
3294 *hdr_len += l4len;
3295
3296 if (skb->protocol == htons(ETH_P_IP)) {
3297 struct iphdr *iph = ip_hdr(skb);
3298 iph->tot_len = 0;
3299 iph->check = 0;
3300 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3301 iph->daddr, 0,
3302 IPPROTO_TCP,
3303 0);
3304 } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
3305 ipv6_hdr(skb)->payload_len = 0;
3306 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3307 &ipv6_hdr(skb)->daddr,
3308 0, IPPROTO_TCP, 0);
3309 }
3310
3311 i = tx_ring->next_to_use;
3312
3313 buffer_info = &tx_ring->buffer_info[i];
3314 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3315 /* VLAN MACLEN IPLEN */
3316 if (tx_flags & IGB_TX_FLAGS_VLAN)
3317 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3318 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3319 *hdr_len += skb_network_offset(skb);
3320 info |= skb_network_header_len(skb);
3321 *hdr_len += skb_network_header_len(skb);
3322 context_desc->vlan_macip_lens = cpu_to_le32(info);
3323
3324 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3325 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3326
3327 if (skb->protocol == htons(ETH_P_IP))
3328 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3329 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3330
3331 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3332
3333 /* MSS L4LEN IDX */
3334 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3335 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3336
3337 /* For 82575, context index must be unique per ring. */
3338 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3339 mss_l4len_idx |= tx_ring->reg_idx << 4;
3340
3341 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3342 context_desc->seqnum_seed = 0;
3343
3344 buffer_info->time_stamp = jiffies;
3345 buffer_info->next_to_watch = i;
3346 buffer_info->dma = 0;
3347 i++;
3348 if (i == tx_ring->count)
3349 i = 0;
3350
3351 tx_ring->next_to_use = i;
3352
3353 return true;
3354 }
3355
3356 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3357 struct sk_buff *skb, u32 tx_flags)
3358 {
3359 struct e1000_adv_tx_context_desc *context_desc;
3360 struct pci_dev *pdev = tx_ring->pdev;
3361 struct igb_buffer *buffer_info;
3362 u32 info = 0, tu_cmd = 0;
3363 unsigned int i;
3364
3365 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3366 (tx_flags & IGB_TX_FLAGS_VLAN)) {
3367 i = tx_ring->next_to_use;
3368 buffer_info = &tx_ring->buffer_info[i];
3369 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3370
3371 if (tx_flags & IGB_TX_FLAGS_VLAN)
3372 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3373 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3374 if (skb->ip_summed == CHECKSUM_PARTIAL)
3375 info |= skb_network_header_len(skb);
3376
3377 context_desc->vlan_macip_lens = cpu_to_le32(info);
3378
3379 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3380
3381 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3382 __be16 protocol;
3383
3384 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3385 const struct vlan_ethhdr *vhdr =
3386 (const struct vlan_ethhdr*)skb->data;
3387
3388 protocol = vhdr->h_vlan_encapsulated_proto;
3389 } else {
3390 protocol = skb->protocol;
3391 }
3392
3393 switch (protocol) {
3394 case cpu_to_be16(ETH_P_IP):
3395 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3396 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3397 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3398 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3399 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3400 break;
3401 case cpu_to_be16(ETH_P_IPV6):
3402 /* XXX what about other V6 headers?? */
3403 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3404 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3405 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3406 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3407 break;
3408 default:
3409 if (unlikely(net_ratelimit()))
3410 dev_warn(&pdev->dev,
3411 "partial checksum but proto=%x!\n",
3412 skb->protocol);
3413 break;
3414 }
3415 }
3416
3417 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3418 context_desc->seqnum_seed = 0;
3419 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3420 context_desc->mss_l4len_idx =
3421 cpu_to_le32(tx_ring->reg_idx << 4);
3422
3423 buffer_info->time_stamp = jiffies;
3424 buffer_info->next_to_watch = i;
3425 buffer_info->dma = 0;
3426
3427 i++;
3428 if (i == tx_ring->count)
3429 i = 0;
3430 tx_ring->next_to_use = i;
3431
3432 return true;
3433 }
3434 return false;
3435 }
3436
3437 #define IGB_MAX_TXD_PWR 16
3438 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
3439
3440 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3441 unsigned int first)
3442 {
3443 struct igb_buffer *buffer_info;
3444 struct pci_dev *pdev = tx_ring->pdev;
3445 unsigned int len = skb_headlen(skb);
3446 unsigned int count = 0, i;
3447 unsigned int f;
3448 dma_addr_t *map;
3449
3450 i = tx_ring->next_to_use;
3451
3452 if (skb_dma_map(&pdev->dev, skb, DMA_TO_DEVICE)) {
3453 dev_err(&pdev->dev, "TX DMA map failed\n");
3454 return 0;
3455 }
3456
3457 map = skb_shinfo(skb)->dma_maps;
3458
3459 buffer_info = &tx_ring->buffer_info[i];
3460 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3461 buffer_info->length = len;
3462 /* set time_stamp *before* dma to help avoid a possible race */
3463 buffer_info->time_stamp = jiffies;
3464 buffer_info->next_to_watch = i;
3465 buffer_info->dma = skb_shinfo(skb)->dma_head;
3466
3467 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3468 struct skb_frag_struct *frag;
3469
3470 i++;
3471 if (i == tx_ring->count)
3472 i = 0;
3473
3474 frag = &skb_shinfo(skb)->frags[f];
3475 len = frag->size;
3476
3477 buffer_info = &tx_ring->buffer_info[i];
3478 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3479 buffer_info->length = len;
3480 buffer_info->time_stamp = jiffies;
3481 buffer_info->next_to_watch = i;
3482 buffer_info->dma = map[count];
3483 count++;
3484 }
3485
3486 tx_ring->buffer_info[i].skb = skb;
3487 tx_ring->buffer_info[first].next_to_watch = i;
3488
3489 return count + 1;
3490 }
3491
3492 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3493 int tx_flags, int count, u32 paylen,
3494 u8 hdr_len)
3495 {
3496 union e1000_adv_tx_desc *tx_desc = NULL;
3497 struct igb_buffer *buffer_info;
3498 u32 olinfo_status = 0, cmd_type_len;
3499 unsigned int i;
3500
3501 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3502 E1000_ADVTXD_DCMD_DEXT);
3503
3504 if (tx_flags & IGB_TX_FLAGS_VLAN)
3505 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3506
3507 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3508 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3509
3510 if (tx_flags & IGB_TX_FLAGS_TSO) {
3511 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3512
3513 /* insert tcp checksum */
3514 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3515
3516 /* insert ip checksum */
3517 if (tx_flags & IGB_TX_FLAGS_IPV4)
3518 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3519
3520 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3521 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3522 }
3523
3524 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3525 (tx_flags & (IGB_TX_FLAGS_CSUM |
3526 IGB_TX_FLAGS_TSO |
3527 IGB_TX_FLAGS_VLAN)))
3528 olinfo_status |= tx_ring->reg_idx << 4;
3529
3530 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3531
3532 i = tx_ring->next_to_use;
3533 while (count--) {
3534 buffer_info = &tx_ring->buffer_info[i];
3535 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3536 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3537 tx_desc->read.cmd_type_len =
3538 cpu_to_le32(cmd_type_len | buffer_info->length);
3539 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3540 i++;
3541 if (i == tx_ring->count)
3542 i = 0;
3543 }
3544
3545 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3546 /* Force memory writes to complete before letting h/w
3547 * know there are new descriptors to fetch. (Only
3548 * applicable for weak-ordered memory model archs,
3549 * such as IA-64). */
3550 wmb();
3551
3552 tx_ring->next_to_use = i;
3553 writel(i, tx_ring->tail);
3554 /* we need this if more than one processor can write to our tail
3555 * at a time, it syncronizes IO on IA64/Altix systems */
3556 mmiowb();
3557 }
3558
3559 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3560 {
3561 struct net_device *netdev = tx_ring->netdev;
3562
3563 netif_stop_subqueue(netdev, tx_ring->queue_index);
3564
3565 /* Herbert's original patch had:
3566 * smp_mb__after_netif_stop_queue();
3567 * but since that doesn't exist yet, just open code it. */
3568 smp_mb();
3569
3570 /* We need to check again in a case another CPU has just
3571 * made room available. */
3572 if (igb_desc_unused(tx_ring) < size)
3573 return -EBUSY;
3574
3575 /* A reprieve! */
3576 netif_wake_subqueue(netdev, tx_ring->queue_index);
3577 tx_ring->tx_stats.restart_queue++;
3578 return 0;
3579 }
3580
3581 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3582 {
3583 if (igb_desc_unused(tx_ring) >= size)
3584 return 0;
3585 return __igb_maybe_stop_tx(tx_ring, size);
3586 }
3587
3588 static netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3589 struct igb_ring *tx_ring)
3590 {
3591 struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3592 unsigned int first;
3593 unsigned int tx_flags = 0;
3594 u8 hdr_len = 0;
3595 int count = 0;
3596 int tso = 0;
3597 union skb_shared_tx *shtx;
3598
3599 if (test_bit(__IGB_DOWN, &adapter->state)) {
3600 dev_kfree_skb_any(skb);
3601 return NETDEV_TX_OK;
3602 }
3603
3604 if (skb->len <= 0) {
3605 dev_kfree_skb_any(skb);
3606 return NETDEV_TX_OK;
3607 }
3608
3609 /* need: 1 descriptor per page,
3610 * + 2 desc gap to keep tail from touching head,
3611 * + 1 desc for skb->data,
3612 * + 1 desc for context descriptor,
3613 * otherwise try next time */
3614 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3615 /* this is a hard error */
3616 return NETDEV_TX_BUSY;
3617 }
3618
3619 /*
3620 * TODO: check that there currently is no other packet with
3621 * time stamping in the queue
3622 *
3623 * When doing time stamping, keep the connection to the socket
3624 * a while longer: it is still needed by skb_hwtstamp_tx(),
3625 * called either in igb_tx_hwtstamp() or by our caller when
3626 * doing software time stamping.
3627 */
3628 shtx = skb_tx(skb);
3629 if (unlikely(shtx->hardware)) {
3630 shtx->in_progress = 1;
3631 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3632 }
3633
3634 if (adapter->vlgrp && vlan_tx_tag_present(skb)) {
3635 tx_flags |= IGB_TX_FLAGS_VLAN;
3636 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3637 }
3638
3639 if (skb->protocol == htons(ETH_P_IP))
3640 tx_flags |= IGB_TX_FLAGS_IPV4;
3641
3642 first = tx_ring->next_to_use;
3643 if (skb_is_gso(skb)) {
3644 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3645 if (tso < 0) {
3646 dev_kfree_skb_any(skb);
3647 return NETDEV_TX_OK;
3648 }
3649 }
3650
3651 if (tso)
3652 tx_flags |= IGB_TX_FLAGS_TSO;
3653 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3654 (skb->ip_summed == CHECKSUM_PARTIAL))
3655 tx_flags |= IGB_TX_FLAGS_CSUM;
3656
3657 /*
3658 * count reflects descriptors mapped, if 0 then mapping error
3659 * has occured and we need to rewind the descriptor queue
3660 */
3661 count = igb_tx_map_adv(tx_ring, skb, first);
3662
3663 if (!count) {
3664 dev_kfree_skb_any(skb);
3665 tx_ring->buffer_info[first].time_stamp = 0;
3666 tx_ring->next_to_use = first;
3667 return NETDEV_TX_OK;
3668 }
3669
3670 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3671
3672 /* Make sure there is space in the ring for the next send. */
3673 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3674
3675 return NETDEV_TX_OK;
3676 }
3677
3678 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3679 struct net_device *netdev)
3680 {
3681 struct igb_adapter *adapter = netdev_priv(netdev);
3682 struct igb_ring *tx_ring;
3683
3684 int r_idx = 0;
3685 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3686 tx_ring = adapter->multi_tx_table[r_idx];
3687
3688 /* This goes back to the question of how to logically map a tx queue
3689 * to a flow. Right now, performance is impacted slightly negatively
3690 * if using multiple tx queues. If the stack breaks away from a
3691 * single qdisc implementation, we can look at this again. */
3692 return igb_xmit_frame_ring_adv(skb, tx_ring);
3693 }
3694
3695 /**
3696 * igb_tx_timeout - Respond to a Tx Hang
3697 * @netdev: network interface device structure
3698 **/
3699 static void igb_tx_timeout(struct net_device *netdev)
3700 {
3701 struct igb_adapter *adapter = netdev_priv(netdev);
3702 struct e1000_hw *hw = &adapter->hw;
3703
3704 /* Do the reset outside of interrupt context */
3705 adapter->tx_timeout_count++;
3706 schedule_work(&adapter->reset_task);
3707 wr32(E1000_EICS,
3708 (adapter->eims_enable_mask & ~adapter->eims_other));
3709 }
3710
3711 static void igb_reset_task(struct work_struct *work)
3712 {
3713 struct igb_adapter *adapter;
3714 adapter = container_of(work, struct igb_adapter, reset_task);
3715
3716 igb_reinit_locked(adapter);
3717 }
3718
3719 /**
3720 * igb_get_stats - Get System Network Statistics
3721 * @netdev: network interface device structure
3722 *
3723 * Returns the address of the device statistics structure.
3724 * The statistics are actually updated from the timer callback.
3725 **/
3726 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3727 {
3728 /* only return the current stats */
3729 return &netdev->stats;
3730 }
3731
3732 /**
3733 * igb_change_mtu - Change the Maximum Transfer Unit
3734 * @netdev: network interface device structure
3735 * @new_mtu: new value for maximum frame size
3736 *
3737 * Returns 0 on success, negative on failure
3738 **/
3739 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3740 {
3741 struct igb_adapter *adapter = netdev_priv(netdev);
3742 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3743 u32 rx_buffer_len, i;
3744
3745 if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) ||
3746 (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3747 dev_err(&adapter->pdev->dev, "Invalid MTU setting\n");
3748 return -EINVAL;
3749 }
3750
3751 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3752 dev_err(&adapter->pdev->dev, "MTU > 9216 not supported.\n");
3753 return -EINVAL;
3754 }
3755
3756 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3757 msleep(1);
3758
3759 /* igb_down has a dependency on max_frame_size */
3760 adapter->max_frame_size = max_frame;
3761 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3762 * means we reserve 2 more, this pushes us to allocate from the next
3763 * larger slab size.
3764 * i.e. RXBUFFER_2048 --> size-4096 slab
3765 */
3766
3767 if (max_frame <= IGB_RXBUFFER_1024)
3768 rx_buffer_len = IGB_RXBUFFER_1024;
3769 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3770 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3771 else
3772 rx_buffer_len = IGB_RXBUFFER_128;
3773
3774 if (netif_running(netdev))
3775 igb_down(adapter);
3776
3777 dev_info(&adapter->pdev->dev, "changing MTU from %d to %d\n",
3778 netdev->mtu, new_mtu);
3779 netdev->mtu = new_mtu;
3780
3781 for (i = 0; i < adapter->num_rx_queues; i++)
3782 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3783
3784 if (netif_running(netdev))
3785 igb_up(adapter);
3786 else
3787 igb_reset(adapter);
3788
3789 clear_bit(__IGB_RESETTING, &adapter->state);
3790
3791 return 0;
3792 }
3793
3794 /**
3795 * igb_update_stats - Update the board statistics counters
3796 * @adapter: board private structure
3797 **/
3798
3799 void igb_update_stats(struct igb_adapter *adapter)
3800 {
3801 struct net_device *netdev = adapter->netdev;
3802 struct e1000_hw *hw = &adapter->hw;
3803 struct pci_dev *pdev = adapter->pdev;
3804 u16 phy_tmp;
3805
3806 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3807
3808 /*
3809 * Prevent stats update while adapter is being reset, or if the pci
3810 * connection is down.
3811 */
3812 if (adapter->link_speed == 0)
3813 return;
3814 if (pci_channel_offline(pdev))
3815 return;
3816
3817 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3818 adapter->stats.gprc += rd32(E1000_GPRC);
3819 adapter->stats.gorc += rd32(E1000_GORCL);
3820 rd32(E1000_GORCH); /* clear GORCL */
3821 adapter->stats.bprc += rd32(E1000_BPRC);
3822 adapter->stats.mprc += rd32(E1000_MPRC);
3823 adapter->stats.roc += rd32(E1000_ROC);
3824
3825 adapter->stats.prc64 += rd32(E1000_PRC64);
3826 adapter->stats.prc127 += rd32(E1000_PRC127);
3827 adapter->stats.prc255 += rd32(E1000_PRC255);
3828 adapter->stats.prc511 += rd32(E1000_PRC511);
3829 adapter->stats.prc1023 += rd32(E1000_PRC1023);
3830 adapter->stats.prc1522 += rd32(E1000_PRC1522);
3831 adapter->stats.symerrs += rd32(E1000_SYMERRS);
3832 adapter->stats.sec += rd32(E1000_SEC);
3833
3834 adapter->stats.mpc += rd32(E1000_MPC);
3835 adapter->stats.scc += rd32(E1000_SCC);
3836 adapter->stats.ecol += rd32(E1000_ECOL);
3837 adapter->stats.mcc += rd32(E1000_MCC);
3838 adapter->stats.latecol += rd32(E1000_LATECOL);
3839 adapter->stats.dc += rd32(E1000_DC);
3840 adapter->stats.rlec += rd32(E1000_RLEC);
3841 adapter->stats.xonrxc += rd32(E1000_XONRXC);
3842 adapter->stats.xontxc += rd32(E1000_XONTXC);
3843 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
3844 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
3845 adapter->stats.fcruc += rd32(E1000_FCRUC);
3846 adapter->stats.gptc += rd32(E1000_GPTC);
3847 adapter->stats.gotc += rd32(E1000_GOTCL);
3848 rd32(E1000_GOTCH); /* clear GOTCL */
3849 adapter->stats.rnbc += rd32(E1000_RNBC);
3850 adapter->stats.ruc += rd32(E1000_RUC);
3851 adapter->stats.rfc += rd32(E1000_RFC);
3852 adapter->stats.rjc += rd32(E1000_RJC);
3853 adapter->stats.tor += rd32(E1000_TORH);
3854 adapter->stats.tot += rd32(E1000_TOTH);
3855 adapter->stats.tpr += rd32(E1000_TPR);
3856
3857 adapter->stats.ptc64 += rd32(E1000_PTC64);
3858 adapter->stats.ptc127 += rd32(E1000_PTC127);
3859 adapter->stats.ptc255 += rd32(E1000_PTC255);
3860 adapter->stats.ptc511 += rd32(E1000_PTC511);
3861 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
3862 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
3863
3864 adapter->stats.mptc += rd32(E1000_MPTC);
3865 adapter->stats.bptc += rd32(E1000_BPTC);
3866
3867 /* used for adaptive IFS */
3868
3869 hw->mac.tx_packet_delta = rd32(E1000_TPT);
3870 adapter->stats.tpt += hw->mac.tx_packet_delta;
3871 hw->mac.collision_delta = rd32(E1000_COLC);
3872 adapter->stats.colc += hw->mac.collision_delta;
3873
3874 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
3875 adapter->stats.rxerrc += rd32(E1000_RXERRC);
3876 adapter->stats.tncrs += rd32(E1000_TNCRS);
3877 adapter->stats.tsctc += rd32(E1000_TSCTC);
3878 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
3879
3880 adapter->stats.iac += rd32(E1000_IAC);
3881 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
3882 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
3883 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
3884 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
3885 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
3886 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
3887 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
3888 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
3889
3890 /* Fill out the OS statistics structure */
3891 netdev->stats.multicast = adapter->stats.mprc;
3892 netdev->stats.collisions = adapter->stats.colc;
3893
3894 /* Rx Errors */
3895
3896 if (hw->mac.type != e1000_82575) {
3897 u32 rqdpc_tmp;
3898 u64 rqdpc_total = 0;
3899 int i;
3900 /* Read out drops stats per RX queue. Notice RQDPC (Receive
3901 * Queue Drop Packet Count) stats only gets incremented, if
3902 * the DROP_EN but it set (in the SRRCTL register for that
3903 * queue). If DROP_EN bit is NOT set, then the some what
3904 * equivalent count is stored in RNBC (not per queue basis).
3905 * Also note the drop count is due to lack of available
3906 * descriptors.
3907 */
3908 for (i = 0; i < adapter->num_rx_queues; i++) {
3909 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0xFFF;
3910 adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3911 rqdpc_total += adapter->rx_ring[i].rx_stats.drops;
3912 }
3913 netdev->stats.rx_fifo_errors = rqdpc_total;
3914 }
3915
3916 /* Note RNBC (Receive No Buffers Count) is an not an exact
3917 * drop count as the hardware FIFO might save the day. Thats
3918 * one of the reason for saving it in rx_fifo_errors, as its
3919 * potentially not a true drop.
3920 */
3921 netdev->stats.rx_fifo_errors += adapter->stats.rnbc;
3922
3923 /* RLEC on some newer hardware can be incorrect so build
3924 * our own version based on RUC and ROC */
3925 netdev->stats.rx_errors = adapter->stats.rxerrc +
3926 adapter->stats.crcerrs + adapter->stats.algnerrc +
3927 adapter->stats.ruc + adapter->stats.roc +
3928 adapter->stats.cexterr;
3929 netdev->stats.rx_length_errors = adapter->stats.ruc +
3930 adapter->stats.roc;
3931 netdev->stats.rx_crc_errors = adapter->stats.crcerrs;
3932 netdev->stats.rx_frame_errors = adapter->stats.algnerrc;
3933 netdev->stats.rx_missed_errors = adapter->stats.mpc;
3934
3935 /* Tx Errors */
3936 netdev->stats.tx_errors = adapter->stats.ecol +
3937 adapter->stats.latecol;
3938 netdev->stats.tx_aborted_errors = adapter->stats.ecol;
3939 netdev->stats.tx_window_errors = adapter->stats.latecol;
3940 netdev->stats.tx_carrier_errors = adapter->stats.tncrs;
3941
3942 /* Tx Dropped needs to be maintained elsewhere */
3943
3944 /* Phy Stats */
3945 if (hw->phy.media_type == e1000_media_type_copper) {
3946 if ((adapter->link_speed == SPEED_1000) &&
3947 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
3948 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
3949 adapter->phy_stats.idle_errors += phy_tmp;
3950 }
3951 }
3952
3953 /* Management Stats */
3954 adapter->stats.mgptc += rd32(E1000_MGTPTC);
3955 adapter->stats.mgprc += rd32(E1000_MGTPRC);
3956 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
3957 }
3958
3959 static irqreturn_t igb_msix_other(int irq, void *data)
3960 {
3961 struct igb_adapter *adapter = data;
3962 struct e1000_hw *hw = &adapter->hw;
3963 u32 icr = rd32(E1000_ICR);
3964 /* reading ICR causes bit 31 of EICR to be cleared */
3965
3966 if (icr & E1000_ICR_DOUTSYNC) {
3967 /* HW is reporting DMA is out of sync */
3968 adapter->stats.doosync++;
3969 }
3970
3971 /* Check for a mailbox event */
3972 if (icr & E1000_ICR_VMMB)
3973 igb_msg_task(adapter);
3974
3975 if (icr & E1000_ICR_LSC) {
3976 hw->mac.get_link_status = 1;
3977 /* guard against interrupt when we're going down */
3978 if (!test_bit(__IGB_DOWN, &adapter->state))
3979 mod_timer(&adapter->watchdog_timer, jiffies + 1);
3980 }
3981
3982 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_VMMB);
3983 wr32(E1000_EIMS, adapter->eims_other);
3984
3985 return IRQ_HANDLED;
3986 }
3987
3988 static void igb_write_itr(struct igb_q_vector *q_vector)
3989 {
3990 u32 itr_val = q_vector->itr_val & 0x7FFC;
3991
3992 if (!q_vector->set_itr)
3993 return;
3994
3995 if (!itr_val)
3996 itr_val = 0x4;
3997
3998 if (q_vector->itr_shift)
3999 itr_val |= itr_val << q_vector->itr_shift;
4000 else
4001 itr_val |= 0x8000000;
4002
4003 writel(itr_val, q_vector->itr_register);
4004 q_vector->set_itr = 0;
4005 }
4006
4007 static irqreturn_t igb_msix_ring(int irq, void *data)
4008 {
4009 struct igb_q_vector *q_vector = data;
4010
4011 /* Write the ITR value calculated from the previous interrupt. */
4012 igb_write_itr(q_vector);
4013
4014 napi_schedule(&q_vector->napi);
4015
4016 return IRQ_HANDLED;
4017 }
4018
4019 #ifdef CONFIG_IGB_DCA
4020 static void igb_update_dca(struct igb_q_vector *q_vector)
4021 {
4022 struct igb_adapter *adapter = q_vector->adapter;
4023 struct e1000_hw *hw = &adapter->hw;
4024 int cpu = get_cpu();
4025
4026 if (q_vector->cpu == cpu)
4027 goto out_no_update;
4028
4029 if (q_vector->tx_ring) {
4030 int q = q_vector->tx_ring->reg_idx;
4031 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4032 if (hw->mac.type == e1000_82575) {
4033 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4034 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4035 } else {
4036 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4037 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4038 E1000_DCA_TXCTRL_CPUID_SHIFT;
4039 }
4040 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4041 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4042 }
4043 if (q_vector->rx_ring) {
4044 int q = q_vector->rx_ring->reg_idx;
4045 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4046 if (hw->mac.type == e1000_82575) {
4047 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4048 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4049 } else {
4050 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4051 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4052 E1000_DCA_RXCTRL_CPUID_SHIFT;
4053 }
4054 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4055 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4056 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4057 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4058 }
4059 q_vector->cpu = cpu;
4060 out_no_update:
4061 put_cpu();
4062 }
4063
4064 static void igb_setup_dca(struct igb_adapter *adapter)
4065 {
4066 struct e1000_hw *hw = &adapter->hw;
4067 int i;
4068
4069 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4070 return;
4071
4072 /* Always use CB2 mode, difference is masked in the CB driver. */
4073 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4074
4075 for (i = 0; i < adapter->num_q_vectors; i++) {
4076 struct igb_q_vector *q_vector = adapter->q_vector[i];
4077 q_vector->cpu = -1;
4078 igb_update_dca(q_vector);
4079 }
4080 }
4081
4082 static int __igb_notify_dca(struct device *dev, void *data)
4083 {
4084 struct net_device *netdev = dev_get_drvdata(dev);
4085 struct igb_adapter *adapter = netdev_priv(netdev);
4086 struct e1000_hw *hw = &adapter->hw;
4087 unsigned long event = *(unsigned long *)data;
4088
4089 switch (event) {
4090 case DCA_PROVIDER_ADD:
4091 /* if already enabled, don't do it again */
4092 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4093 break;
4094 /* Always use CB2 mode, difference is masked
4095 * in the CB driver. */
4096 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4097 if (dca_add_requester(dev) == 0) {
4098 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4099 dev_info(&adapter->pdev->dev, "DCA enabled\n");
4100 igb_setup_dca(adapter);
4101 break;
4102 }
4103 /* Fall Through since DCA is disabled. */
4104 case DCA_PROVIDER_REMOVE:
4105 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4106 /* without this a class_device is left
4107 * hanging around in the sysfs model */
4108 dca_remove_requester(dev);
4109 dev_info(&adapter->pdev->dev, "DCA disabled\n");
4110 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4111 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4112 }
4113 break;
4114 }
4115
4116 return 0;
4117 }
4118
4119 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4120 void *p)
4121 {
4122 int ret_val;
4123
4124 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4125 __igb_notify_dca);
4126
4127 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4128 }
4129 #endif /* CONFIG_IGB_DCA */
4130
4131 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4132 {
4133 struct e1000_hw *hw = &adapter->hw;
4134 u32 ping;
4135 int i;
4136
4137 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4138 ping = E1000_PF_CONTROL_MSG;
4139 if (adapter->vf_data[i].clear_to_send)
4140 ping |= E1000_VT_MSGTYPE_CTS;
4141 igb_write_mbx(hw, &ping, 1, i);
4142 }
4143 }
4144
4145 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4146 u32 *msgbuf, u32 vf)
4147 {
4148 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4149 u16 *hash_list = (u16 *)&msgbuf[1];
4150 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4151 int i;
4152
4153 /* only up to 30 hash values supported */
4154 if (n > 30)
4155 n = 30;
4156
4157 /* salt away the number of multi cast addresses assigned
4158 * to this VF for later use to restore when the PF multi cast
4159 * list changes
4160 */
4161 vf_data->num_vf_mc_hashes = n;
4162
4163 /* VFs are limited to using the MTA hash table for their multicast
4164 * addresses */
4165 for (i = 0; i < n; i++)
4166 vf_data->vf_mc_hashes[i] = hash_list[i];
4167
4168 /* Flush and reset the mta with the new values */
4169 igb_set_rx_mode(adapter->netdev);
4170
4171 return 0;
4172 }
4173
4174 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4175 {
4176 struct e1000_hw *hw = &adapter->hw;
4177 struct vf_data_storage *vf_data;
4178 int i, j;
4179
4180 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4181 vf_data = &adapter->vf_data[i];
4182 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4183 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4184 }
4185 }
4186
4187 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4188 {
4189 struct e1000_hw *hw = &adapter->hw;
4190 u32 pool_mask, reg, vid;
4191 int i;
4192
4193 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4194
4195 /* Find the vlan filter for this id */
4196 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4197 reg = rd32(E1000_VLVF(i));
4198
4199 /* remove the vf from the pool */
4200 reg &= ~pool_mask;
4201
4202 /* if pool is empty then remove entry from vfta */
4203 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4204 (reg & E1000_VLVF_VLANID_ENABLE)) {
4205 reg = 0;
4206 vid = reg & E1000_VLVF_VLANID_MASK;
4207 igb_vfta_set(hw, vid, false);
4208 }
4209
4210 wr32(E1000_VLVF(i), reg);
4211 }
4212
4213 adapter->vf_data[vf].vlans_enabled = 0;
4214 }
4215
4216 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4217 {
4218 struct e1000_hw *hw = &adapter->hw;
4219 u32 reg, i;
4220
4221 /* It is an error to call this function when VFs are not enabled */
4222 if (!adapter->vfs_allocated_count)
4223 return -1;
4224
4225 /* Find the vlan filter for this id */
4226 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4227 reg = rd32(E1000_VLVF(i));
4228 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4229 vid == (reg & E1000_VLVF_VLANID_MASK))
4230 break;
4231 }
4232
4233 if (add) {
4234 if (i == E1000_VLVF_ARRAY_SIZE) {
4235 /* Did not find a matching VLAN ID entry that was
4236 * enabled. Search for a free filter entry, i.e.
4237 * one without the enable bit set
4238 */
4239 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4240 reg = rd32(E1000_VLVF(i));
4241 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4242 break;
4243 }
4244 }
4245 if (i < E1000_VLVF_ARRAY_SIZE) {
4246 /* Found an enabled/available entry */
4247 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4248
4249 /* if !enabled we need to set this up in vfta */
4250 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4251 /* add VID to filter table, if bit already set
4252 * PF must have added it outside of table */
4253 if (igb_vfta_set(hw, vid, true))
4254 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT +
4255 adapter->vfs_allocated_count);
4256 reg |= E1000_VLVF_VLANID_ENABLE;
4257 }
4258 reg &= ~E1000_VLVF_VLANID_MASK;
4259 reg |= vid;
4260
4261 wr32(E1000_VLVF(i), reg);
4262
4263 /* do not modify RLPML for PF devices */
4264 if (vf >= adapter->vfs_allocated_count)
4265 return 0;
4266
4267 if (!adapter->vf_data[vf].vlans_enabled) {
4268 u32 size;
4269 reg = rd32(E1000_VMOLR(vf));
4270 size = reg & E1000_VMOLR_RLPML_MASK;
4271 size += 4;
4272 reg &= ~E1000_VMOLR_RLPML_MASK;
4273 reg |= size;
4274 wr32(E1000_VMOLR(vf), reg);
4275 }
4276 adapter->vf_data[vf].vlans_enabled++;
4277
4278 return 0;
4279 }
4280 } else {
4281 if (i < E1000_VLVF_ARRAY_SIZE) {
4282 /* remove vf from the pool */
4283 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4284 /* if pool is empty then remove entry from vfta */
4285 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4286 reg = 0;
4287 igb_vfta_set(hw, vid, false);
4288 }
4289 wr32(E1000_VLVF(i), reg);
4290
4291 /* do not modify RLPML for PF devices */
4292 if (vf >= adapter->vfs_allocated_count)
4293 return 0;
4294
4295 adapter->vf_data[vf].vlans_enabled--;
4296 if (!adapter->vf_data[vf].vlans_enabled) {
4297 u32 size;
4298 reg = rd32(E1000_VMOLR(vf));
4299 size = reg & E1000_VMOLR_RLPML_MASK;
4300 size -= 4;
4301 reg &= ~E1000_VMOLR_RLPML_MASK;
4302 reg |= size;
4303 wr32(E1000_VMOLR(vf), reg);
4304 }
4305 return 0;
4306 }
4307 }
4308 return -1;
4309 }
4310
4311 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4312 {
4313 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4314 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4315
4316 return igb_vlvf_set(adapter, vid, add, vf);
4317 }
4318
4319 static inline void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4320 {
4321 struct e1000_hw *hw = &adapter->hw;
4322
4323 /* disable mailbox functionality for vf */
4324 adapter->vf_data[vf].clear_to_send = false;
4325
4326 /* reset offloads to defaults */
4327 igb_set_vmolr(hw, vf);
4328
4329 /* reset vlans for device */
4330 igb_clear_vf_vfta(adapter, vf);
4331
4332 /* reset multicast table array for vf */
4333 adapter->vf_data[vf].num_vf_mc_hashes = 0;
4334
4335 /* Flush and reset the mta with the new values */
4336 igb_set_rx_mode(adapter->netdev);
4337 }
4338
4339 static inline void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4340 {
4341 struct e1000_hw *hw = &adapter->hw;
4342 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4343 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4344 u32 reg, msgbuf[3];
4345 u8 *addr = (u8 *)(&msgbuf[1]);
4346
4347 /* process all the same items cleared in a function level reset */
4348 igb_vf_reset_event(adapter, vf);
4349
4350 /* set vf mac address */
4351 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4352
4353 /* enable transmit and receive for vf */
4354 reg = rd32(E1000_VFTE);
4355 wr32(E1000_VFTE, reg | (1 << vf));
4356 reg = rd32(E1000_VFRE);
4357 wr32(E1000_VFRE, reg | (1 << vf));
4358
4359 /* enable mailbox functionality for vf */
4360 adapter->vf_data[vf].clear_to_send = true;
4361
4362 /* reply to reset with ack and vf mac address */
4363 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4364 memcpy(addr, vf_mac, 6);
4365 igb_write_mbx(hw, msgbuf, 3, vf);
4366 }
4367
4368 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4369 {
4370 unsigned char *addr = (char *)&msg[1];
4371 int err = -1;
4372
4373 if (is_valid_ether_addr(addr))
4374 err = igb_set_vf_mac(adapter, vf, addr);
4375
4376 return err;
4377
4378 }
4379
4380 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4381 {
4382 struct e1000_hw *hw = &adapter->hw;
4383 u32 msg = E1000_VT_MSGTYPE_NACK;
4384
4385 /* if device isn't clear to send it shouldn't be reading either */
4386 if (!adapter->vf_data[vf].clear_to_send)
4387 igb_write_mbx(hw, &msg, 1, vf);
4388 }
4389
4390
4391 static void igb_msg_task(struct igb_adapter *adapter)
4392 {
4393 struct e1000_hw *hw = &adapter->hw;
4394 u32 vf;
4395
4396 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4397 /* process any reset requests */
4398 if (!igb_check_for_rst(hw, vf)) {
4399 adapter->vf_data[vf].clear_to_send = false;
4400 igb_vf_reset_event(adapter, vf);
4401 }
4402
4403 /* process any messages pending */
4404 if (!igb_check_for_msg(hw, vf))
4405 igb_rcv_msg_from_vf(adapter, vf);
4406
4407 /* process any acks */
4408 if (!igb_check_for_ack(hw, vf))
4409 igb_rcv_ack_from_vf(adapter, vf);
4410
4411 }
4412 }
4413
4414 static int igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4415 {
4416 u32 mbx_size = E1000_VFMAILBOX_SIZE;
4417 u32 msgbuf[mbx_size];
4418 struct e1000_hw *hw = &adapter->hw;
4419 s32 retval;
4420
4421 retval = igb_read_mbx(hw, msgbuf, mbx_size, vf);
4422
4423 if (retval)
4424 dev_err(&adapter->pdev->dev,
4425 "Error receiving message from VF\n");
4426
4427 /* this is a message we already processed, do nothing */
4428 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4429 return retval;
4430
4431 /*
4432 * until the vf completes a reset it should not be
4433 * allowed to start any configuration.
4434 */
4435
4436 if (msgbuf[0] == E1000_VF_RESET) {
4437 igb_vf_reset_msg(adapter, vf);
4438
4439 return retval;
4440 }
4441
4442 if (!adapter->vf_data[vf].clear_to_send) {
4443 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4444 igb_write_mbx(hw, msgbuf, 1, vf);
4445 return retval;
4446 }
4447
4448 switch ((msgbuf[0] & 0xFFFF)) {
4449 case E1000_VF_SET_MAC_ADDR:
4450 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4451 break;
4452 case E1000_VF_SET_MULTICAST:
4453 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4454 break;
4455 case E1000_VF_SET_LPE:
4456 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4457 break;
4458 case E1000_VF_SET_VLAN:
4459 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4460 break;
4461 default:
4462 dev_err(&adapter->pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4463 retval = -1;
4464 break;
4465 }
4466
4467 /* notify the VF of the results of what it sent us */
4468 if (retval)
4469 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4470 else
4471 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4472
4473 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4474
4475 igb_write_mbx(hw, msgbuf, 1, vf);
4476
4477 return retval;
4478 }
4479
4480 /**
4481 * igb_set_uta - Set unicast filter table address
4482 * @adapter: board private structure
4483 *
4484 * The unicast table address is a register array of 32-bit registers.
4485 * The table is meant to be used in a way similar to how the MTA is used
4486 * however due to certain limitations in the hardware it is necessary to
4487 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4488 * enable bit to allow vlan tag stripping when promiscous mode is enabled
4489 **/
4490 static void igb_set_uta(struct igb_adapter *adapter)
4491 {
4492 struct e1000_hw *hw = &adapter->hw;
4493 int i;
4494
4495 /* The UTA table only exists on 82576 hardware and newer */
4496 if (hw->mac.type < e1000_82576)
4497 return;
4498
4499 /* we only need to do this if VMDq is enabled */
4500 if (!adapter->vfs_allocated_count)
4501 return;
4502
4503 for (i = 0; i < hw->mac.uta_reg_count; i++)
4504 array_wr32(E1000_UTA, i, ~0);
4505 }
4506
4507 /**
4508 * igb_intr_msi - Interrupt Handler
4509 * @irq: interrupt number
4510 * @data: pointer to a network interface device structure
4511 **/
4512 static irqreturn_t igb_intr_msi(int irq, void *data)
4513 {
4514 struct igb_adapter *adapter = data;
4515 struct igb_q_vector *q_vector = adapter->q_vector[0];
4516 struct e1000_hw *hw = &adapter->hw;
4517 /* read ICR disables interrupts using IAM */
4518 u32 icr = rd32(E1000_ICR);
4519
4520 igb_write_itr(q_vector);
4521
4522 if (icr & E1000_ICR_DOUTSYNC) {
4523 /* HW is reporting DMA is out of sync */
4524 adapter->stats.doosync++;
4525 }
4526
4527 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4528 hw->mac.get_link_status = 1;
4529 if (!test_bit(__IGB_DOWN, &adapter->state))
4530 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4531 }
4532
4533 napi_schedule(&q_vector->napi);
4534
4535 return IRQ_HANDLED;
4536 }
4537
4538 /**
4539 * igb_intr - Legacy Interrupt Handler
4540 * @irq: interrupt number
4541 * @data: pointer to a network interface device structure
4542 **/
4543 static irqreturn_t igb_intr(int irq, void *data)
4544 {
4545 struct igb_adapter *adapter = data;
4546 struct igb_q_vector *q_vector = adapter->q_vector[0];
4547 struct e1000_hw *hw = &adapter->hw;
4548 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
4549 * need for the IMC write */
4550 u32 icr = rd32(E1000_ICR);
4551 if (!icr)
4552 return IRQ_NONE; /* Not our interrupt */
4553
4554 igb_write_itr(q_vector);
4555
4556 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4557 * not set, then the adapter didn't send an interrupt */
4558 if (!(icr & E1000_ICR_INT_ASSERTED))
4559 return IRQ_NONE;
4560
4561 if (icr & E1000_ICR_DOUTSYNC) {
4562 /* HW is reporting DMA is out of sync */
4563 adapter->stats.doosync++;
4564 }
4565
4566 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4567 hw->mac.get_link_status = 1;
4568 /* guard against interrupt when we're going down */
4569 if (!test_bit(__IGB_DOWN, &adapter->state))
4570 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4571 }
4572
4573 napi_schedule(&q_vector->napi);
4574
4575 return IRQ_HANDLED;
4576 }
4577
4578 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4579 {
4580 struct igb_adapter *adapter = q_vector->adapter;
4581 struct e1000_hw *hw = &adapter->hw;
4582
4583 if (adapter->itr_setting & 3) {
4584 if (!adapter->msix_entries)
4585 igb_set_itr(adapter);
4586 else
4587 igb_update_ring_itr(q_vector);
4588 }
4589
4590 if (!test_bit(__IGB_DOWN, &adapter->state)) {
4591 if (adapter->msix_entries)
4592 wr32(E1000_EIMS, q_vector->eims_value);
4593 else
4594 igb_irq_enable(adapter);
4595 }
4596 }
4597
4598 /**
4599 * igb_poll - NAPI Rx polling callback
4600 * @napi: napi polling structure
4601 * @budget: count of how many packets we should handle
4602 **/
4603 static int igb_poll(struct napi_struct *napi, int budget)
4604 {
4605 struct igb_q_vector *q_vector = container_of(napi,
4606 struct igb_q_vector,
4607 napi);
4608 int tx_clean_complete = 1, work_done = 0;
4609
4610 #ifdef CONFIG_IGB_DCA
4611 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4612 igb_update_dca(q_vector);
4613 #endif
4614 if (q_vector->tx_ring)
4615 tx_clean_complete = igb_clean_tx_irq(q_vector);
4616
4617 if (q_vector->rx_ring)
4618 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4619
4620 if (!tx_clean_complete)
4621 work_done = budget;
4622
4623 /* If not enough Rx work done, exit the polling mode */
4624 if (work_done < budget) {
4625 napi_complete(napi);
4626 igb_ring_irq_enable(q_vector);
4627 }
4628
4629 return work_done;
4630 }
4631
4632 /**
4633 * igb_hwtstamp - utility function which checks for TX time stamp
4634 * @adapter: board private structure
4635 * @skb: packet that was just sent
4636 *
4637 * If we were asked to do hardware stamping and such a time stamp is
4638 * available, then it must have been for this skb here because we only
4639 * allow only one such packet into the queue.
4640 */
4641 static void igb_tx_hwtstamp(struct igb_adapter *adapter, struct sk_buff *skb)
4642 {
4643 union skb_shared_tx *shtx = skb_tx(skb);
4644 struct e1000_hw *hw = &adapter->hw;
4645
4646 if (unlikely(shtx->hardware)) {
4647 u32 valid = rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID;
4648 if (valid) {
4649 u64 regval = rd32(E1000_TXSTMPL);
4650 u64 ns;
4651 struct skb_shared_hwtstamps shhwtstamps;
4652
4653 memset(&shhwtstamps, 0, sizeof(shhwtstamps));
4654 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4655 ns = timecounter_cyc2time(&adapter->clock,
4656 regval);
4657 timecompare_update(&adapter->compare, ns);
4658 shhwtstamps.hwtstamp = ns_to_ktime(ns);
4659 shhwtstamps.syststamp =
4660 timecompare_transform(&adapter->compare, ns);
4661 skb_tstamp_tx(skb, &shhwtstamps);
4662 }
4663 }
4664 }
4665
4666 /**
4667 * igb_clean_tx_irq - Reclaim resources after transmit completes
4668 * @q_vector: pointer to q_vector containing needed info
4669 * returns true if ring is completely cleaned
4670 **/
4671 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4672 {
4673 struct igb_adapter *adapter = q_vector->adapter;
4674 struct igb_ring *tx_ring = q_vector->tx_ring;
4675 struct net_device *netdev = tx_ring->netdev;
4676 struct e1000_hw *hw = &adapter->hw;
4677 struct igb_buffer *buffer_info;
4678 struct sk_buff *skb;
4679 union e1000_adv_tx_desc *tx_desc, *eop_desc;
4680 unsigned int total_bytes = 0, total_packets = 0;
4681 unsigned int i, eop, count = 0;
4682 bool cleaned = false;
4683
4684 i = tx_ring->next_to_clean;
4685 eop = tx_ring->buffer_info[i].next_to_watch;
4686 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4687
4688 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
4689 (count < tx_ring->count)) {
4690 for (cleaned = false; !cleaned; count++) {
4691 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4692 buffer_info = &tx_ring->buffer_info[i];
4693 cleaned = (i == eop);
4694 skb = buffer_info->skb;
4695
4696 if (skb) {
4697 unsigned int segs, bytecount;
4698 /* gso_segs is currently only valid for tcp */
4699 segs = skb_shinfo(skb)->gso_segs ?: 1;
4700 /* multiply data chunks by size of headers */
4701 bytecount = ((segs - 1) * skb_headlen(skb)) +
4702 skb->len;
4703 total_packets += segs;
4704 total_bytes += bytecount;
4705
4706 igb_tx_hwtstamp(adapter, skb);
4707 }
4708
4709 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4710 tx_desc->wb.status = 0;
4711
4712 i++;
4713 if (i == tx_ring->count)
4714 i = 0;
4715 }
4716 eop = tx_ring->buffer_info[i].next_to_watch;
4717 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4718 }
4719
4720 tx_ring->next_to_clean = i;
4721
4722 if (unlikely(count &&
4723 netif_carrier_ok(netdev) &&
4724 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
4725 /* Make sure that anybody stopping the queue after this
4726 * sees the new next_to_clean.
4727 */
4728 smp_mb();
4729 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
4730 !(test_bit(__IGB_DOWN, &adapter->state))) {
4731 netif_wake_subqueue(netdev, tx_ring->queue_index);
4732 tx_ring->tx_stats.restart_queue++;
4733 }
4734 }
4735
4736 if (tx_ring->detect_tx_hung) {
4737 /* Detect a transmit hang in hardware, this serializes the
4738 * check with the clearing of time_stamp and movement of i */
4739 tx_ring->detect_tx_hung = false;
4740 if (tx_ring->buffer_info[i].time_stamp &&
4741 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
4742 (adapter->tx_timeout_factor * HZ))
4743 && !(rd32(E1000_STATUS) &
4744 E1000_STATUS_TXOFF)) {
4745
4746 /* detected Tx unit hang */
4747 dev_err(&tx_ring->pdev->dev,
4748 "Detected Tx Unit Hang\n"
4749 " Tx Queue <%d>\n"
4750 " TDH <%x>\n"
4751 " TDT <%x>\n"
4752 " next_to_use <%x>\n"
4753 " next_to_clean <%x>\n"
4754 "buffer_info[next_to_clean]\n"
4755 " time_stamp <%lx>\n"
4756 " next_to_watch <%x>\n"
4757 " jiffies <%lx>\n"
4758 " desc.status <%x>\n",
4759 tx_ring->queue_index,
4760 readl(tx_ring->head),
4761 readl(tx_ring->tail),
4762 tx_ring->next_to_use,
4763 tx_ring->next_to_clean,
4764 tx_ring->buffer_info[i].time_stamp,
4765 eop,
4766 jiffies,
4767 eop_desc->wb.status);
4768 netif_stop_subqueue(netdev, tx_ring->queue_index);
4769 }
4770 }
4771 tx_ring->total_bytes += total_bytes;
4772 tx_ring->total_packets += total_packets;
4773 tx_ring->tx_stats.bytes += total_bytes;
4774 tx_ring->tx_stats.packets += total_packets;
4775 netdev->stats.tx_bytes += total_bytes;
4776 netdev->stats.tx_packets += total_packets;
4777 return (count < tx_ring->count);
4778 }
4779
4780 /**
4781 * igb_receive_skb - helper function to handle rx indications
4782 * @q_vector: structure containing interrupt and ring information
4783 * @skb: packet to send up
4784 * @vlan_tag: vlan tag for packet
4785 **/
4786 static void igb_receive_skb(struct igb_q_vector *q_vector,
4787 struct sk_buff *skb,
4788 u16 vlan_tag)
4789 {
4790 struct igb_adapter *adapter = q_vector->adapter;
4791
4792 if (vlan_tag)
4793 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
4794 vlan_tag, skb);
4795 else
4796 napi_gro_receive(&q_vector->napi, skb);
4797 }
4798
4799 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
4800 u32 status_err, struct sk_buff *skb)
4801 {
4802 skb->ip_summed = CHECKSUM_NONE;
4803
4804 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
4805 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
4806 (status_err & E1000_RXD_STAT_IXSM))
4807 return;
4808
4809 /* TCP/UDP checksum error bit is set */
4810 if (status_err &
4811 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
4812 /*
4813 * work around errata with sctp packets where the TCPE aka
4814 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
4815 * packets, (aka let the stack check the crc32c)
4816 */
4817 if ((skb->len == 60) &&
4818 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
4819 ring->rx_stats.csum_err++;
4820
4821 /* let the stack verify checksum errors */
4822 return;
4823 }
4824 /* It must be a TCP or UDP packet with a valid checksum */
4825 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
4826 skb->ip_summed = CHECKSUM_UNNECESSARY;
4827
4828 dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
4829 }
4830
4831 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
4832 union e1000_adv_rx_desc *rx_desc)
4833 {
4834 /* HW will not DMA in data larger than the given buffer, even if it
4835 * parses the (NFS, of course) header to be larger. In that case, it
4836 * fills the header buffer and spills the rest into the page.
4837 */
4838 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
4839 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
4840 if (hlen > rx_ring->rx_buffer_len)
4841 hlen = rx_ring->rx_buffer_len;
4842 return hlen;
4843 }
4844
4845 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
4846 int *work_done, int budget)
4847 {
4848 struct igb_adapter *adapter = q_vector->adapter;
4849 struct igb_ring *rx_ring = q_vector->rx_ring;
4850 struct net_device *netdev = rx_ring->netdev;
4851 struct e1000_hw *hw = &adapter->hw;
4852 struct pci_dev *pdev = rx_ring->pdev;
4853 union e1000_adv_rx_desc *rx_desc , *next_rxd;
4854 struct igb_buffer *buffer_info , *next_buffer;
4855 struct sk_buff *skb;
4856 bool cleaned = false;
4857 int cleaned_count = 0;
4858 unsigned int total_bytes = 0, total_packets = 0;
4859 unsigned int i;
4860 u32 staterr;
4861 u16 length;
4862 u16 vlan_tag;
4863
4864 i = rx_ring->next_to_clean;
4865 buffer_info = &rx_ring->buffer_info[i];
4866 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
4867 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4868
4869 while (staterr & E1000_RXD_STAT_DD) {
4870 if (*work_done >= budget)
4871 break;
4872 (*work_done)++;
4873
4874 skb = buffer_info->skb;
4875 prefetch(skb->data - NET_IP_ALIGN);
4876 buffer_info->skb = NULL;
4877
4878 i++;
4879 if (i == rx_ring->count)
4880 i = 0;
4881 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
4882 prefetch(next_rxd);
4883 next_buffer = &rx_ring->buffer_info[i];
4884
4885 length = le16_to_cpu(rx_desc->wb.upper.length);
4886 cleaned = true;
4887 cleaned_count++;
4888
4889 if (buffer_info->dma) {
4890 pci_unmap_single(pdev, buffer_info->dma,
4891 rx_ring->rx_buffer_len,
4892 PCI_DMA_FROMDEVICE);
4893 buffer_info->dma = 0;
4894 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
4895 skb_put(skb, length);
4896 goto send_up;
4897 }
4898 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
4899 }
4900
4901 if (length) {
4902 pci_unmap_page(pdev, buffer_info->page_dma,
4903 PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
4904 buffer_info->page_dma = 0;
4905
4906 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
4907 buffer_info->page,
4908 buffer_info->page_offset,
4909 length);
4910
4911 if (page_count(buffer_info->page) != 1)
4912 buffer_info->page = NULL;
4913 else
4914 get_page(buffer_info->page);
4915
4916 skb->len += length;
4917 skb->data_len += length;
4918
4919 skb->truesize += length;
4920 }
4921
4922 if (!(staterr & E1000_RXD_STAT_EOP)) {
4923 buffer_info->skb = next_buffer->skb;
4924 buffer_info->dma = next_buffer->dma;
4925 next_buffer->skb = skb;
4926 next_buffer->dma = 0;
4927 goto next_desc;
4928 }
4929 send_up:
4930 /*
4931 * If this bit is set, then the RX registers contain
4932 * the time stamp. No other packet will be time
4933 * stamped until we read these registers, so read the
4934 * registers to make them available again. Because
4935 * only one packet can be time stamped at a time, we
4936 * know that the register values must belong to this
4937 * one here and therefore we don't need to compare
4938 * any of the additional attributes stored for it.
4939 *
4940 * If nothing went wrong, then it should have a
4941 * skb_shared_tx that we can turn into a
4942 * skb_shared_hwtstamps.
4943 *
4944 * TODO: can time stamping be triggered (thus locking
4945 * the registers) without the packet reaching this point
4946 * here? In that case RX time stamping would get stuck.
4947 *
4948 * TODO: in "time stamp all packets" mode this bit is
4949 * not set. Need a global flag for this mode and then
4950 * always read the registers. Cannot be done without
4951 * a race condition.
4952 */
4953 if (unlikely(staterr & E1000_RXD_STAT_TS)) {
4954 u64 regval;
4955 u64 ns;
4956 struct skb_shared_hwtstamps *shhwtstamps =
4957 skb_hwtstamps(skb);
4958
4959 WARN(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID),
4960 "igb: no RX time stamp available for time stamped packet");
4961 regval = rd32(E1000_RXSTMPL);
4962 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4963 ns = timecounter_cyc2time(&adapter->clock, regval);
4964 timecompare_update(&adapter->compare, ns);
4965 memset(shhwtstamps, 0, sizeof(*shhwtstamps));
4966 shhwtstamps->hwtstamp = ns_to_ktime(ns);
4967 shhwtstamps->syststamp =
4968 timecompare_transform(&adapter->compare, ns);
4969 }
4970
4971 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
4972 dev_kfree_skb_irq(skb);
4973 goto next_desc;
4974 }
4975
4976 total_bytes += skb->len;
4977 total_packets++;
4978
4979 igb_rx_checksum_adv(rx_ring, staterr, skb);
4980
4981 skb->protocol = eth_type_trans(skb, netdev);
4982 skb_record_rx_queue(skb, rx_ring->queue_index);
4983
4984 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
4985 le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
4986
4987 igb_receive_skb(q_vector, skb, vlan_tag);
4988
4989 next_desc:
4990 rx_desc->wb.upper.status_error = 0;
4991
4992 /* return some buffers to hardware, one at a time is too slow */
4993 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
4994 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
4995 cleaned_count = 0;
4996 }
4997
4998 /* use prefetched values */
4999 rx_desc = next_rxd;
5000 buffer_info = next_buffer;
5001 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5002 }
5003
5004 rx_ring->next_to_clean = i;
5005 cleaned_count = igb_desc_unused(rx_ring);
5006
5007 if (cleaned_count)
5008 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5009
5010 rx_ring->total_packets += total_packets;
5011 rx_ring->total_bytes += total_bytes;
5012 rx_ring->rx_stats.packets += total_packets;
5013 rx_ring->rx_stats.bytes += total_bytes;
5014 netdev->stats.rx_bytes += total_bytes;
5015 netdev->stats.rx_packets += total_packets;
5016 return cleaned;
5017 }
5018
5019 /**
5020 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5021 * @adapter: address of board private structure
5022 **/
5023 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5024 {
5025 struct net_device *netdev = rx_ring->netdev;
5026 union e1000_adv_rx_desc *rx_desc;
5027 struct igb_buffer *buffer_info;
5028 struct sk_buff *skb;
5029 unsigned int i;
5030 int bufsz;
5031
5032 i = rx_ring->next_to_use;
5033 buffer_info = &rx_ring->buffer_info[i];
5034
5035 bufsz = rx_ring->rx_buffer_len;
5036
5037 while (cleaned_count--) {
5038 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5039
5040 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5041 if (!buffer_info->page) {
5042 buffer_info->page = alloc_page(GFP_ATOMIC);
5043 if (!buffer_info->page) {
5044 rx_ring->rx_stats.alloc_failed++;
5045 goto no_buffers;
5046 }
5047 buffer_info->page_offset = 0;
5048 } else {
5049 buffer_info->page_offset ^= PAGE_SIZE / 2;
5050 }
5051 buffer_info->page_dma =
5052 pci_map_page(rx_ring->pdev, buffer_info->page,
5053 buffer_info->page_offset,
5054 PAGE_SIZE / 2,
5055 PCI_DMA_FROMDEVICE);
5056 }
5057
5058 if (!buffer_info->skb) {
5059 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5060 if (!skb) {
5061 rx_ring->rx_stats.alloc_failed++;
5062 goto no_buffers;
5063 }
5064
5065 buffer_info->skb = skb;
5066 buffer_info->dma = pci_map_single(rx_ring->pdev,
5067 skb->data,
5068 bufsz,
5069 PCI_DMA_FROMDEVICE);
5070 }
5071 /* Refresh the desc even if buffer_addrs didn't change because
5072 * each write-back erases this info. */
5073 if (bufsz < IGB_RXBUFFER_1024) {
5074 rx_desc->read.pkt_addr =
5075 cpu_to_le64(buffer_info->page_dma);
5076 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5077 } else {
5078 rx_desc->read.pkt_addr =
5079 cpu_to_le64(buffer_info->dma);
5080 rx_desc->read.hdr_addr = 0;
5081 }
5082
5083 i++;
5084 if (i == rx_ring->count)
5085 i = 0;
5086 buffer_info = &rx_ring->buffer_info[i];
5087 }
5088
5089 no_buffers:
5090 if (rx_ring->next_to_use != i) {
5091 rx_ring->next_to_use = i;
5092 if (i == 0)
5093 i = (rx_ring->count - 1);
5094 else
5095 i--;
5096
5097 /* Force memory writes to complete before letting h/w
5098 * know there are new descriptors to fetch. (Only
5099 * applicable for weak-ordered memory model archs,
5100 * such as IA-64). */
5101 wmb();
5102 writel(i, rx_ring->tail);
5103 }
5104 }
5105
5106 /**
5107 * igb_mii_ioctl -
5108 * @netdev:
5109 * @ifreq:
5110 * @cmd:
5111 **/
5112 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5113 {
5114 struct igb_adapter *adapter = netdev_priv(netdev);
5115 struct mii_ioctl_data *data = if_mii(ifr);
5116
5117 if (adapter->hw.phy.media_type != e1000_media_type_copper)
5118 return -EOPNOTSUPP;
5119
5120 switch (cmd) {
5121 case SIOCGMIIPHY:
5122 data->phy_id = adapter->hw.phy.addr;
5123 break;
5124 case SIOCGMIIREG:
5125 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5126 &data->val_out))
5127 return -EIO;
5128 break;
5129 case SIOCSMIIREG:
5130 default:
5131 return -EOPNOTSUPP;
5132 }
5133 return 0;
5134 }
5135
5136 /**
5137 * igb_hwtstamp_ioctl - control hardware time stamping
5138 * @netdev:
5139 * @ifreq:
5140 * @cmd:
5141 *
5142 * Outgoing time stamping can be enabled and disabled. Play nice and
5143 * disable it when requested, although it shouldn't case any overhead
5144 * when no packet needs it. At most one packet in the queue may be
5145 * marked for time stamping, otherwise it would be impossible to tell
5146 * for sure to which packet the hardware time stamp belongs.
5147 *
5148 * Incoming time stamping has to be configured via the hardware
5149 * filters. Not all combinations are supported, in particular event
5150 * type has to be specified. Matching the kind of event packet is
5151 * not supported, with the exception of "all V2 events regardless of
5152 * level 2 or 4".
5153 *
5154 **/
5155 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5156 struct ifreq *ifr, int cmd)
5157 {
5158 struct igb_adapter *adapter = netdev_priv(netdev);
5159 struct e1000_hw *hw = &adapter->hw;
5160 struct hwtstamp_config config;
5161 u32 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
5162 u32 tsync_rx_ctl_bit = E1000_TSYNCRXCTL_ENABLED;
5163 u32 tsync_rx_ctl_type = 0;
5164 u32 tsync_rx_cfg = 0;
5165 int is_l4 = 0;
5166 int is_l2 = 0;
5167 short port = 319; /* PTP */
5168 u32 regval;
5169
5170 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5171 return -EFAULT;
5172
5173 /* reserved for future extensions */
5174 if (config.flags)
5175 return -EINVAL;
5176
5177 switch (config.tx_type) {
5178 case HWTSTAMP_TX_OFF:
5179 tsync_tx_ctl_bit = 0;
5180 break;
5181 case HWTSTAMP_TX_ON:
5182 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
5183 break;
5184 default:
5185 return -ERANGE;
5186 }
5187
5188 switch (config.rx_filter) {
5189 case HWTSTAMP_FILTER_NONE:
5190 tsync_rx_ctl_bit = 0;
5191 break;
5192 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5193 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5194 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5195 case HWTSTAMP_FILTER_ALL:
5196 /*
5197 * register TSYNCRXCFG must be set, therefore it is not
5198 * possible to time stamp both Sync and Delay_Req messages
5199 * => fall back to time stamping all packets
5200 */
5201 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_ALL;
5202 config.rx_filter = HWTSTAMP_FILTER_ALL;
5203 break;
5204 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5205 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
5206 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5207 is_l4 = 1;
5208 break;
5209 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5210 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
5211 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5212 is_l4 = 1;
5213 break;
5214 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5215 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5216 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5217 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5218 is_l2 = 1;
5219 is_l4 = 1;
5220 config.rx_filter = HWTSTAMP_FILTER_SOME;
5221 break;
5222 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5223 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5224 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5225 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5226 is_l2 = 1;
5227 is_l4 = 1;
5228 config.rx_filter = HWTSTAMP_FILTER_SOME;
5229 break;
5230 case HWTSTAMP_FILTER_PTP_V2_EVENT:
5231 case HWTSTAMP_FILTER_PTP_V2_SYNC:
5232 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5233 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5234 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5235 is_l2 = 1;
5236 break;
5237 default:
5238 return -ERANGE;
5239 }
5240
5241 /* enable/disable TX */
5242 regval = rd32(E1000_TSYNCTXCTL);
5243 regval = (regval & ~E1000_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit;
5244 wr32(E1000_TSYNCTXCTL, regval);
5245
5246 /* enable/disable RX, define which PTP packets are time stamped */
5247 regval = rd32(E1000_TSYNCRXCTL);
5248 regval = (regval & ~E1000_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit;
5249 regval = (regval & ~0xE) | tsync_rx_ctl_type;
5250 wr32(E1000_TSYNCRXCTL, regval);
5251 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5252
5253 /*
5254 * Ethertype Filter Queue Filter[0][15:0] = 0x88F7
5255 * (Ethertype to filter on)
5256 * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
5257 * Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping)
5258 */
5259 wr32(E1000_ETQF0, is_l2 ? 0x440088f7 : 0);
5260
5261 /* L4 Queue Filter[0]: only filter by source and destination port */
5262 wr32(E1000_SPQF0, htons(port));
5263 wr32(E1000_IMIREXT(0), is_l4 ?
5264 ((1<<12) | (1<<19) /* bypass size and control flags */) : 0);
5265 wr32(E1000_IMIR(0), is_l4 ?
5266 (htons(port)
5267 | (0<<16) /* immediate interrupt disabled */
5268 | 0 /* (1<<17) bit cleared: do not bypass
5269 destination port check */)
5270 : 0);
5271 wr32(E1000_FTQF0, is_l4 ?
5272 (0x11 /* UDP */
5273 | (1<<15) /* VF not compared */
5274 | (1<<27) /* Enable Timestamping */
5275 | (7<<28) /* only source port filter enabled,
5276 source/target address and protocol
5277 masked */)
5278 : ((1<<15) | (15<<28) /* all mask bits set = filter not
5279 enabled */));
5280
5281 wrfl();
5282
5283 adapter->hwtstamp_config = config;
5284
5285 /* clear TX/RX time stamp registers, just to be sure */
5286 regval = rd32(E1000_TXSTMPH);
5287 regval = rd32(E1000_RXSTMPH);
5288
5289 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5290 -EFAULT : 0;
5291 }
5292
5293 /**
5294 * igb_ioctl -
5295 * @netdev:
5296 * @ifreq:
5297 * @cmd:
5298 **/
5299 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5300 {
5301 switch (cmd) {
5302 case SIOCGMIIPHY:
5303 case SIOCGMIIREG:
5304 case SIOCSMIIREG:
5305 return igb_mii_ioctl(netdev, ifr, cmd);
5306 case SIOCSHWTSTAMP:
5307 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5308 default:
5309 return -EOPNOTSUPP;
5310 }
5311 }
5312
5313 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5314 {
5315 struct igb_adapter *adapter = hw->back;
5316 u16 cap_offset;
5317
5318 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5319 if (!cap_offset)
5320 return -E1000_ERR_CONFIG;
5321
5322 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5323
5324 return 0;
5325 }
5326
5327 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5328 {
5329 struct igb_adapter *adapter = hw->back;
5330 u16 cap_offset;
5331
5332 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5333 if (!cap_offset)
5334 return -E1000_ERR_CONFIG;
5335
5336 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5337
5338 return 0;
5339 }
5340
5341 static void igb_vlan_rx_register(struct net_device *netdev,
5342 struct vlan_group *grp)
5343 {
5344 struct igb_adapter *adapter = netdev_priv(netdev);
5345 struct e1000_hw *hw = &adapter->hw;
5346 u32 ctrl, rctl;
5347
5348 igb_irq_disable(adapter);
5349 adapter->vlgrp = grp;
5350
5351 if (grp) {
5352 /* enable VLAN tag insert/strip */
5353 ctrl = rd32(E1000_CTRL);
5354 ctrl |= E1000_CTRL_VME;
5355 wr32(E1000_CTRL, ctrl);
5356
5357 /* enable VLAN receive filtering */
5358 rctl = rd32(E1000_RCTL);
5359 rctl &= ~E1000_RCTL_CFIEN;
5360 wr32(E1000_RCTL, rctl);
5361 igb_update_mng_vlan(adapter);
5362 } else {
5363 /* disable VLAN tag insert/strip */
5364 ctrl = rd32(E1000_CTRL);
5365 ctrl &= ~E1000_CTRL_VME;
5366 wr32(E1000_CTRL, ctrl);
5367
5368 if (adapter->mng_vlan_id != (u16)IGB_MNG_VLAN_NONE) {
5369 igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
5370 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
5371 }
5372 }
5373
5374 igb_rlpml_set(adapter);
5375
5376 if (!test_bit(__IGB_DOWN, &adapter->state))
5377 igb_irq_enable(adapter);
5378 }
5379
5380 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5381 {
5382 struct igb_adapter *adapter = netdev_priv(netdev);
5383 struct e1000_hw *hw = &adapter->hw;
5384 int pf_id = adapter->vfs_allocated_count;
5385
5386 if ((hw->mng_cookie.status &
5387 E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
5388 (vid == adapter->mng_vlan_id))
5389 return;
5390
5391 /* add vid to vlvf if sr-iov is enabled,
5392 * if that fails add directly to filter table */
5393 if (igb_vlvf_set(adapter, vid, true, pf_id))
5394 igb_vfta_set(hw, vid, true);
5395
5396 }
5397
5398 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5399 {
5400 struct igb_adapter *adapter = netdev_priv(netdev);
5401 struct e1000_hw *hw = &adapter->hw;
5402 int pf_id = adapter->vfs_allocated_count;
5403
5404 igb_irq_disable(adapter);
5405 vlan_group_set_device(adapter->vlgrp, vid, NULL);
5406
5407 if (!test_bit(__IGB_DOWN, &adapter->state))
5408 igb_irq_enable(adapter);
5409
5410 if ((adapter->hw.mng_cookie.status &
5411 E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
5412 (vid == adapter->mng_vlan_id)) {
5413 /* release control to f/w */
5414 igb_release_hw_control(adapter);
5415 return;
5416 }
5417
5418 /* remove vid from vlvf if sr-iov is enabled,
5419 * if not in vlvf remove from vfta */
5420 if (igb_vlvf_set(adapter, vid, false, pf_id))
5421 igb_vfta_set(hw, vid, false);
5422 }
5423
5424 static void igb_restore_vlan(struct igb_adapter *adapter)
5425 {
5426 igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5427
5428 if (adapter->vlgrp) {
5429 u16 vid;
5430 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5431 if (!vlan_group_get_device(adapter->vlgrp, vid))
5432 continue;
5433 igb_vlan_rx_add_vid(adapter->netdev, vid);
5434 }
5435 }
5436 }
5437
5438 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5439 {
5440 struct e1000_mac_info *mac = &adapter->hw.mac;
5441
5442 mac->autoneg = 0;
5443
5444 switch (spddplx) {
5445 case SPEED_10 + DUPLEX_HALF:
5446 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5447 break;
5448 case SPEED_10 + DUPLEX_FULL:
5449 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5450 break;
5451 case SPEED_100 + DUPLEX_HALF:
5452 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5453 break;
5454 case SPEED_100 + DUPLEX_FULL:
5455 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5456 break;
5457 case SPEED_1000 + DUPLEX_FULL:
5458 mac->autoneg = 1;
5459 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5460 break;
5461 case SPEED_1000 + DUPLEX_HALF: /* not supported */
5462 default:
5463 dev_err(&adapter->pdev->dev,
5464 "Unsupported Speed/Duplex configuration\n");
5465 return -EINVAL;
5466 }
5467 return 0;
5468 }
5469
5470 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5471 {
5472 struct net_device *netdev = pci_get_drvdata(pdev);
5473 struct igb_adapter *adapter = netdev_priv(netdev);
5474 struct e1000_hw *hw = &adapter->hw;
5475 u32 ctrl, rctl, status;
5476 u32 wufc = adapter->wol;
5477 #ifdef CONFIG_PM
5478 int retval = 0;
5479 #endif
5480
5481 netif_device_detach(netdev);
5482
5483 if (netif_running(netdev))
5484 igb_close(netdev);
5485
5486 igb_clear_interrupt_scheme(adapter);
5487
5488 #ifdef CONFIG_PM
5489 retval = pci_save_state(pdev);
5490 if (retval)
5491 return retval;
5492 #endif
5493
5494 status = rd32(E1000_STATUS);
5495 if (status & E1000_STATUS_LU)
5496 wufc &= ~E1000_WUFC_LNKC;
5497
5498 if (wufc) {
5499 igb_setup_rctl(adapter);
5500 igb_set_rx_mode(netdev);
5501
5502 /* turn on all-multi mode if wake on multicast is enabled */
5503 if (wufc & E1000_WUFC_MC) {
5504 rctl = rd32(E1000_RCTL);
5505 rctl |= E1000_RCTL_MPE;
5506 wr32(E1000_RCTL, rctl);
5507 }
5508
5509 ctrl = rd32(E1000_CTRL);
5510 /* advertise wake from D3Cold */
5511 #define E1000_CTRL_ADVD3WUC 0x00100000
5512 /* phy power management enable */
5513 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5514 ctrl |= E1000_CTRL_ADVD3WUC;
5515 wr32(E1000_CTRL, ctrl);
5516
5517 /* Allow time for pending master requests to run */
5518 igb_disable_pcie_master(&adapter->hw);
5519
5520 wr32(E1000_WUC, E1000_WUC_PME_EN);
5521 wr32(E1000_WUFC, wufc);
5522 } else {
5523 wr32(E1000_WUC, 0);
5524 wr32(E1000_WUFC, 0);
5525 }
5526
5527 *enable_wake = wufc || adapter->en_mng_pt;
5528 if (!*enable_wake)
5529 igb_shutdown_serdes_link_82575(hw);
5530
5531 /* Release control of h/w to f/w. If f/w is AMT enabled, this
5532 * would have already happened in close and is redundant. */
5533 igb_release_hw_control(adapter);
5534
5535 pci_disable_device(pdev);
5536
5537 return 0;
5538 }
5539
5540 #ifdef CONFIG_PM
5541 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5542 {
5543 int retval;
5544 bool wake;
5545
5546 retval = __igb_shutdown(pdev, &wake);
5547 if (retval)
5548 return retval;
5549
5550 if (wake) {
5551 pci_prepare_to_sleep(pdev);
5552 } else {
5553 pci_wake_from_d3(pdev, false);
5554 pci_set_power_state(pdev, PCI_D3hot);
5555 }
5556
5557 return 0;
5558 }
5559
5560 static int igb_resume(struct pci_dev *pdev)
5561 {
5562 struct net_device *netdev = pci_get_drvdata(pdev);
5563 struct igb_adapter *adapter = netdev_priv(netdev);
5564 struct e1000_hw *hw = &adapter->hw;
5565 u32 err;
5566
5567 pci_set_power_state(pdev, PCI_D0);
5568 pci_restore_state(pdev);
5569
5570 err = pci_enable_device_mem(pdev);
5571 if (err) {
5572 dev_err(&pdev->dev,
5573 "igb: Cannot enable PCI device from suspend\n");
5574 return err;
5575 }
5576 pci_set_master(pdev);
5577
5578 pci_enable_wake(pdev, PCI_D3hot, 0);
5579 pci_enable_wake(pdev, PCI_D3cold, 0);
5580
5581 if (igb_init_interrupt_scheme(adapter)) {
5582 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5583 return -ENOMEM;
5584 }
5585
5586 /* e1000_power_up_phy(adapter); */
5587
5588 igb_reset(adapter);
5589
5590 /* let the f/w know that the h/w is now under the control of the
5591 * driver. */
5592 igb_get_hw_control(adapter);
5593
5594 wr32(E1000_WUS, ~0);
5595
5596 if (netif_running(netdev)) {
5597 err = igb_open(netdev);
5598 if (err)
5599 return err;
5600 }
5601
5602 netif_device_attach(netdev);
5603
5604 return 0;
5605 }
5606 #endif
5607
5608 static void igb_shutdown(struct pci_dev *pdev)
5609 {
5610 bool wake;
5611
5612 __igb_shutdown(pdev, &wake);
5613
5614 if (system_state == SYSTEM_POWER_OFF) {
5615 pci_wake_from_d3(pdev, wake);
5616 pci_set_power_state(pdev, PCI_D3hot);
5617 }
5618 }
5619
5620 #ifdef CONFIG_NET_POLL_CONTROLLER
5621 /*
5622 * Polling 'interrupt' - used by things like netconsole to send skbs
5623 * without having to re-enable interrupts. It's not called while
5624 * the interrupt routine is executing.
5625 */
5626 static void igb_netpoll(struct net_device *netdev)
5627 {
5628 struct igb_adapter *adapter = netdev_priv(netdev);
5629 struct e1000_hw *hw = &adapter->hw;
5630 int i;
5631
5632 if (!adapter->msix_entries) {
5633 struct igb_q_vector *q_vector = adapter->q_vector[0];
5634 igb_irq_disable(adapter);
5635 napi_schedule(&q_vector->napi);
5636 return;
5637 }
5638
5639 for (i = 0; i < adapter->num_q_vectors; i++) {
5640 struct igb_q_vector *q_vector = adapter->q_vector[i];
5641 wr32(E1000_EIMC, q_vector->eims_value);
5642 napi_schedule(&q_vector->napi);
5643 }
5644 }
5645 #endif /* CONFIG_NET_POLL_CONTROLLER */
5646
5647 /**
5648 * igb_io_error_detected - called when PCI error is detected
5649 * @pdev: Pointer to PCI device
5650 * @state: The current pci connection state
5651 *
5652 * This function is called after a PCI bus error affecting
5653 * this device has been detected.
5654 */
5655 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5656 pci_channel_state_t state)
5657 {
5658 struct net_device *netdev = pci_get_drvdata(pdev);
5659 struct igb_adapter *adapter = netdev_priv(netdev);
5660
5661 netif_device_detach(netdev);
5662
5663 if (state == pci_channel_io_perm_failure)
5664 return PCI_ERS_RESULT_DISCONNECT;
5665
5666 if (netif_running(netdev))
5667 igb_down(adapter);
5668 pci_disable_device(pdev);
5669
5670 /* Request a slot slot reset. */
5671 return PCI_ERS_RESULT_NEED_RESET;
5672 }
5673
5674 /**
5675 * igb_io_slot_reset - called after the pci bus has been reset.
5676 * @pdev: Pointer to PCI device
5677 *
5678 * Restart the card from scratch, as if from a cold-boot. Implementation
5679 * resembles the first-half of the igb_resume routine.
5680 */
5681 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5682 {
5683 struct net_device *netdev = pci_get_drvdata(pdev);
5684 struct igb_adapter *adapter = netdev_priv(netdev);
5685 struct e1000_hw *hw = &adapter->hw;
5686 pci_ers_result_t result;
5687 int err;
5688
5689 if (pci_enable_device_mem(pdev)) {
5690 dev_err(&pdev->dev,
5691 "Cannot re-enable PCI device after reset.\n");
5692 result = PCI_ERS_RESULT_DISCONNECT;
5693 } else {
5694 pci_set_master(pdev);
5695 pci_restore_state(pdev);
5696
5697 pci_enable_wake(pdev, PCI_D3hot, 0);
5698 pci_enable_wake(pdev, PCI_D3cold, 0);
5699
5700 igb_reset(adapter);
5701 wr32(E1000_WUS, ~0);
5702 result = PCI_ERS_RESULT_RECOVERED;
5703 }
5704
5705 err = pci_cleanup_aer_uncorrect_error_status(pdev);
5706 if (err) {
5707 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
5708 "failed 0x%0x\n", err);
5709 /* non-fatal, continue */
5710 }
5711
5712 return result;
5713 }
5714
5715 /**
5716 * igb_io_resume - called when traffic can start flowing again.
5717 * @pdev: Pointer to PCI device
5718 *
5719 * This callback is called when the error recovery driver tells us that
5720 * its OK to resume normal operation. Implementation resembles the
5721 * second-half of the igb_resume routine.
5722 */
5723 static void igb_io_resume(struct pci_dev *pdev)
5724 {
5725 struct net_device *netdev = pci_get_drvdata(pdev);
5726 struct igb_adapter *adapter = netdev_priv(netdev);
5727
5728 if (netif_running(netdev)) {
5729 if (igb_up(adapter)) {
5730 dev_err(&pdev->dev, "igb_up failed after reset\n");
5731 return;
5732 }
5733 }
5734
5735 netif_device_attach(netdev);
5736
5737 /* let the f/w know that the h/w is now under the control of the
5738 * driver. */
5739 igb_get_hw_control(adapter);
5740 }
5741
5742 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
5743 u8 qsel)
5744 {
5745 u32 rar_low, rar_high;
5746 struct e1000_hw *hw = &adapter->hw;
5747
5748 /* HW expects these in little endian so we reverse the byte order
5749 * from network order (big endian) to little endian
5750 */
5751 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
5752 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
5753 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
5754
5755 /* Indicate to hardware the Address is Valid. */
5756 rar_high |= E1000_RAH_AV;
5757
5758 if (hw->mac.type == e1000_82575)
5759 rar_high |= E1000_RAH_POOL_1 * qsel;
5760 else
5761 rar_high |= E1000_RAH_POOL_1 << qsel;
5762
5763 wr32(E1000_RAL(index), rar_low);
5764 wrfl();
5765 wr32(E1000_RAH(index), rar_high);
5766 wrfl();
5767 }
5768
5769 static int igb_set_vf_mac(struct igb_adapter *adapter,
5770 int vf, unsigned char *mac_addr)
5771 {
5772 struct e1000_hw *hw = &adapter->hw;
5773 /* VF MAC addresses start at end of receive addresses and moves
5774 * torwards the first, as a result a collision should not be possible */
5775 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5776
5777 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
5778
5779 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
5780
5781 return 0;
5782 }
5783
5784 static void igb_vmm_control(struct igb_adapter *adapter)
5785 {
5786 struct e1000_hw *hw = &adapter->hw;
5787 u32 reg;
5788
5789 /* replication is not supported for 82575 */
5790 if (hw->mac.type == e1000_82575)
5791 return;
5792
5793 /* enable replication vlan tag stripping */
5794 reg = rd32(E1000_RPLOLR);
5795 reg |= E1000_RPLOLR_STRVLAN;
5796 wr32(E1000_RPLOLR, reg);
5797
5798 /* notify HW that the MAC is adding vlan tags */
5799 reg = rd32(E1000_DTXCTL);
5800 reg |= E1000_DTXCTL_VLAN_ADDED;
5801 wr32(E1000_DTXCTL, reg);
5802
5803 if (adapter->vfs_allocated_count) {
5804 igb_vmdq_set_loopback_pf(hw, true);
5805 igb_vmdq_set_replication_pf(hw, true);
5806 } else {
5807 igb_vmdq_set_loopback_pf(hw, false);
5808 igb_vmdq_set_replication_pf(hw, false);
5809 }
5810 }
5811
5812 /* igb_main.c */
This page took 0.264654 seconds and 5 git commands to generate.