igb: make tx ring map and free functionality non-static
[deliverable/linux.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2009 Intel Corporation.
5
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
9
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
14
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
21
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "1.3.16-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60 [board_82575] = &e1000_82575_info,
61 };
62
63 static struct pci_device_id igb_pci_tbl[] = {
64 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
65 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
66 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
67 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
68 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
69 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
70 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
74 /* required last entry */
75 {0, }
76 };
77
78 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
79
80 void igb_reset(struct igb_adapter *);
81 static int igb_setup_all_tx_resources(struct igb_adapter *);
82 static int igb_setup_all_rx_resources(struct igb_adapter *);
83 static void igb_free_all_tx_resources(struct igb_adapter *);
84 static void igb_free_all_rx_resources(struct igb_adapter *);
85 static void igb_setup_mrqc(struct igb_adapter *);
86 void igb_update_stats(struct igb_adapter *);
87 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
88 static void __devexit igb_remove(struct pci_dev *pdev);
89 static int igb_sw_init(struct igb_adapter *);
90 static int igb_open(struct net_device *);
91 static int igb_close(struct net_device *);
92 static void igb_configure_tx(struct igb_adapter *);
93 static void igb_configure_rx(struct igb_adapter *);
94 static void igb_clean_all_tx_rings(struct igb_adapter *);
95 static void igb_clean_all_rx_rings(struct igb_adapter *);
96 static void igb_clean_tx_ring(struct igb_ring *);
97 static void igb_clean_rx_ring(struct igb_ring *);
98 static void igb_set_rx_mode(struct net_device *);
99 static void igb_update_phy_info(unsigned long);
100 static void igb_watchdog(unsigned long);
101 static void igb_watchdog_task(struct work_struct *);
102 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
103 static struct net_device_stats *igb_get_stats(struct net_device *);
104 static int igb_change_mtu(struct net_device *, int);
105 static int igb_set_mac(struct net_device *, void *);
106 static void igb_set_uta(struct igb_adapter *adapter);
107 static irqreturn_t igb_intr(int irq, void *);
108 static irqreturn_t igb_intr_msi(int irq, void *);
109 static irqreturn_t igb_msix_other(int irq, void *);
110 static irqreturn_t igb_msix_ring(int irq, void *);
111 #ifdef CONFIG_IGB_DCA
112 static void igb_update_dca(struct igb_q_vector *);
113 static void igb_setup_dca(struct igb_adapter *);
114 #endif /* CONFIG_IGB_DCA */
115 static bool igb_clean_tx_irq(struct igb_q_vector *);
116 static int igb_poll(struct napi_struct *, int);
117 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
118 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
119 static void igb_tx_timeout(struct net_device *);
120 static void igb_reset_task(struct work_struct *);
121 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
122 static void igb_vlan_rx_add_vid(struct net_device *, u16);
123 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
124 static void igb_restore_vlan(struct igb_adapter *);
125 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
126 static void igb_ping_all_vfs(struct igb_adapter *);
127 static void igb_msg_task(struct igb_adapter *);
128 static int igb_rcv_msg_from_vf(struct igb_adapter *, u32);
129 static void igb_vmm_control(struct igb_adapter *);
130 static int igb_set_vf_mac(struct igb_adapter *adapter, int, unsigned char *);
131 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
132
133 static inline void igb_set_vmolr(struct e1000_hw *hw, int vfn)
134 {
135 u32 reg_data;
136
137 reg_data = rd32(E1000_VMOLR(vfn));
138 reg_data |= E1000_VMOLR_BAM | /* Accept broadcast */
139 E1000_VMOLR_ROMPE | /* Accept packets matched in MTA */
140 E1000_VMOLR_AUPE | /* Accept untagged packets */
141 E1000_VMOLR_STRVLAN; /* Strip vlan tags */
142 wr32(E1000_VMOLR(vfn), reg_data);
143 }
144
145 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
146 int vfn)
147 {
148 struct e1000_hw *hw = &adapter->hw;
149 u32 vmolr;
150
151 /* if it isn't the PF check to see if VFs are enabled and
152 * increase the size to support vlan tags */
153 if (vfn < adapter->vfs_allocated_count &&
154 adapter->vf_data[vfn].vlans_enabled)
155 size += VLAN_TAG_SIZE;
156
157 vmolr = rd32(E1000_VMOLR(vfn));
158 vmolr &= ~E1000_VMOLR_RLPML_MASK;
159 vmolr |= size | E1000_VMOLR_LPE;
160 wr32(E1000_VMOLR(vfn), vmolr);
161
162 return 0;
163 }
164
165 #ifdef CONFIG_PM
166 static int igb_suspend(struct pci_dev *, pm_message_t);
167 static int igb_resume(struct pci_dev *);
168 #endif
169 static void igb_shutdown(struct pci_dev *);
170 #ifdef CONFIG_IGB_DCA
171 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
172 static struct notifier_block dca_notifier = {
173 .notifier_call = igb_notify_dca,
174 .next = NULL,
175 .priority = 0
176 };
177 #endif
178 #ifdef CONFIG_NET_POLL_CONTROLLER
179 /* for netdump / net console */
180 static void igb_netpoll(struct net_device *);
181 #endif
182 #ifdef CONFIG_PCI_IOV
183 static unsigned int max_vfs = 0;
184 module_param(max_vfs, uint, 0);
185 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
186 "per physical function");
187 #endif /* CONFIG_PCI_IOV */
188
189 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
190 pci_channel_state_t);
191 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
192 static void igb_io_resume(struct pci_dev *);
193
194 static struct pci_error_handlers igb_err_handler = {
195 .error_detected = igb_io_error_detected,
196 .slot_reset = igb_io_slot_reset,
197 .resume = igb_io_resume,
198 };
199
200
201 static struct pci_driver igb_driver = {
202 .name = igb_driver_name,
203 .id_table = igb_pci_tbl,
204 .probe = igb_probe,
205 .remove = __devexit_p(igb_remove),
206 #ifdef CONFIG_PM
207 /* Power Managment Hooks */
208 .suspend = igb_suspend,
209 .resume = igb_resume,
210 #endif
211 .shutdown = igb_shutdown,
212 .err_handler = &igb_err_handler
213 };
214
215 static int global_quad_port_a; /* global quad port a indication */
216
217 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
218 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
219 MODULE_LICENSE("GPL");
220 MODULE_VERSION(DRV_VERSION);
221
222 /**
223 * Scale the NIC clock cycle by a large factor so that
224 * relatively small clock corrections can be added or
225 * substracted at each clock tick. The drawbacks of a
226 * large factor are a) that the clock register overflows
227 * more quickly (not such a big deal) and b) that the
228 * increment per tick has to fit into 24 bits.
229 *
230 * Note that
231 * TIMINCA = IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS *
232 * IGB_TSYNC_SCALE
233 * TIMINCA += TIMINCA * adjustment [ppm] / 1e9
234 *
235 * The base scale factor is intentionally a power of two
236 * so that the division in %struct timecounter can be done with
237 * a shift.
238 */
239 #define IGB_TSYNC_SHIFT (19)
240 #define IGB_TSYNC_SCALE (1<<IGB_TSYNC_SHIFT)
241
242 /**
243 * The duration of one clock cycle of the NIC.
244 *
245 * @todo This hard-coded value is part of the specification and might change
246 * in future hardware revisions. Add revision check.
247 */
248 #define IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS 16
249
250 #if (IGB_TSYNC_SCALE * IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS) >= (1<<24)
251 # error IGB_TSYNC_SCALE and/or IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS are too large to fit into TIMINCA
252 #endif
253
254 /**
255 * igb_read_clock - read raw cycle counter (to be used by time counter)
256 */
257 static cycle_t igb_read_clock(const struct cyclecounter *tc)
258 {
259 struct igb_adapter *adapter =
260 container_of(tc, struct igb_adapter, cycles);
261 struct e1000_hw *hw = &adapter->hw;
262 u64 stamp;
263
264 stamp = rd32(E1000_SYSTIML);
265 stamp |= (u64)rd32(E1000_SYSTIMH) << 32ULL;
266
267 return stamp;
268 }
269
270 #ifdef DEBUG
271 /**
272 * igb_get_hw_dev_name - return device name string
273 * used by hardware layer to print debugging information
274 **/
275 char *igb_get_hw_dev_name(struct e1000_hw *hw)
276 {
277 struct igb_adapter *adapter = hw->back;
278 return adapter->netdev->name;
279 }
280
281 /**
282 * igb_get_time_str - format current NIC and system time as string
283 */
284 static char *igb_get_time_str(struct igb_adapter *adapter,
285 char buffer[160])
286 {
287 cycle_t hw = adapter->cycles.read(&adapter->cycles);
288 struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
289 struct timespec sys;
290 struct timespec delta;
291 getnstimeofday(&sys);
292
293 delta = timespec_sub(nic, sys);
294
295 sprintf(buffer,
296 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
297 hw,
298 (long)nic.tv_sec, nic.tv_nsec,
299 (long)sys.tv_sec, sys.tv_nsec,
300 (long)delta.tv_sec, delta.tv_nsec);
301
302 return buffer;
303 }
304 #endif
305
306 /**
307 * igb_init_module - Driver Registration Routine
308 *
309 * igb_init_module is the first routine called when the driver is
310 * loaded. All it does is register with the PCI subsystem.
311 **/
312 static int __init igb_init_module(void)
313 {
314 int ret;
315 printk(KERN_INFO "%s - version %s\n",
316 igb_driver_string, igb_driver_version);
317
318 printk(KERN_INFO "%s\n", igb_copyright);
319
320 global_quad_port_a = 0;
321
322 #ifdef CONFIG_IGB_DCA
323 dca_register_notify(&dca_notifier);
324 #endif
325
326 ret = pci_register_driver(&igb_driver);
327 return ret;
328 }
329
330 module_init(igb_init_module);
331
332 /**
333 * igb_exit_module - Driver Exit Cleanup Routine
334 *
335 * igb_exit_module is called just before the driver is removed
336 * from memory.
337 **/
338 static void __exit igb_exit_module(void)
339 {
340 #ifdef CONFIG_IGB_DCA
341 dca_unregister_notify(&dca_notifier);
342 #endif
343 pci_unregister_driver(&igb_driver);
344 }
345
346 module_exit(igb_exit_module);
347
348 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
349 /**
350 * igb_cache_ring_register - Descriptor ring to register mapping
351 * @adapter: board private structure to initialize
352 *
353 * Once we know the feature-set enabled for the device, we'll cache
354 * the register offset the descriptor ring is assigned to.
355 **/
356 static void igb_cache_ring_register(struct igb_adapter *adapter)
357 {
358 int i;
359 u32 rbase_offset = adapter->vfs_allocated_count;
360
361 switch (adapter->hw.mac.type) {
362 case e1000_82576:
363 /* The queues are allocated for virtualization such that VF 0
364 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
365 * In order to avoid collision we start at the first free queue
366 * and continue consuming queues in the same sequence
367 */
368 for (i = 0; i < adapter->num_rx_queues; i++)
369 adapter->rx_ring[i].reg_idx = rbase_offset +
370 Q_IDX_82576(i);
371 for (i = 0; i < adapter->num_tx_queues; i++)
372 adapter->tx_ring[i].reg_idx = rbase_offset +
373 Q_IDX_82576(i);
374 break;
375 case e1000_82575:
376 default:
377 for (i = 0; i < adapter->num_rx_queues; i++)
378 adapter->rx_ring[i].reg_idx = i;
379 for (i = 0; i < adapter->num_tx_queues; i++)
380 adapter->tx_ring[i].reg_idx = i;
381 break;
382 }
383 }
384
385 static void igb_free_queues(struct igb_adapter *adapter)
386 {
387 kfree(adapter->tx_ring);
388 kfree(adapter->rx_ring);
389
390 adapter->tx_ring = NULL;
391 adapter->rx_ring = NULL;
392
393 adapter->num_rx_queues = 0;
394 adapter->num_tx_queues = 0;
395 }
396
397 /**
398 * igb_alloc_queues - Allocate memory for all rings
399 * @adapter: board private structure to initialize
400 *
401 * We allocate one ring per queue at run-time since we don't know the
402 * number of queues at compile-time.
403 **/
404 static int igb_alloc_queues(struct igb_adapter *adapter)
405 {
406 int i;
407
408 adapter->tx_ring = kcalloc(adapter->num_tx_queues,
409 sizeof(struct igb_ring), GFP_KERNEL);
410 if (!adapter->tx_ring)
411 goto err;
412
413 adapter->rx_ring = kcalloc(adapter->num_rx_queues,
414 sizeof(struct igb_ring), GFP_KERNEL);
415 if (!adapter->rx_ring)
416 goto err;
417
418 for (i = 0; i < adapter->num_tx_queues; i++) {
419 struct igb_ring *ring = &(adapter->tx_ring[i]);
420 ring->count = adapter->tx_ring_count;
421 ring->queue_index = i;
422 ring->pdev = adapter->pdev;
423 ring->netdev = adapter->netdev;
424 /* For 82575, context index must be unique per ring. */
425 if (adapter->hw.mac.type == e1000_82575)
426 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
427 }
428
429 for (i = 0; i < adapter->num_rx_queues; i++) {
430 struct igb_ring *ring = &(adapter->rx_ring[i]);
431 ring->count = adapter->rx_ring_count;
432 ring->queue_index = i;
433 ring->pdev = adapter->pdev;
434 ring->netdev = adapter->netdev;
435 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
436 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
437 /* set flag indicating ring supports SCTP checksum offload */
438 if (adapter->hw.mac.type >= e1000_82576)
439 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
440 }
441
442 igb_cache_ring_register(adapter);
443
444 return 0;
445
446 err:
447 igb_free_queues(adapter);
448
449 return -ENOMEM;
450 }
451
452 #define IGB_N0_QUEUE -1
453 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
454 {
455 u32 msixbm = 0;
456 struct igb_adapter *adapter = q_vector->adapter;
457 struct e1000_hw *hw = &adapter->hw;
458 u32 ivar, index;
459 int rx_queue = IGB_N0_QUEUE;
460 int tx_queue = IGB_N0_QUEUE;
461
462 if (q_vector->rx_ring)
463 rx_queue = q_vector->rx_ring->reg_idx;
464 if (q_vector->tx_ring)
465 tx_queue = q_vector->tx_ring->reg_idx;
466
467 switch (hw->mac.type) {
468 case e1000_82575:
469 /* The 82575 assigns vectors using a bitmask, which matches the
470 bitmask for the EICR/EIMS/EIMC registers. To assign one
471 or more queues to a vector, we write the appropriate bits
472 into the MSIXBM register for that vector. */
473 if (rx_queue > IGB_N0_QUEUE)
474 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
475 if (tx_queue > IGB_N0_QUEUE)
476 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
477 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
478 q_vector->eims_value = msixbm;
479 break;
480 case e1000_82576:
481 /* 82576 uses a table-based method for assigning vectors.
482 Each queue has a single entry in the table to which we write
483 a vector number along with a "valid" bit. Sadly, the layout
484 of the table is somewhat counterintuitive. */
485 if (rx_queue > IGB_N0_QUEUE) {
486 index = (rx_queue & 0x7);
487 ivar = array_rd32(E1000_IVAR0, index);
488 if (rx_queue < 8) {
489 /* vector goes into low byte of register */
490 ivar = ivar & 0xFFFFFF00;
491 ivar |= msix_vector | E1000_IVAR_VALID;
492 } else {
493 /* vector goes into third byte of register */
494 ivar = ivar & 0xFF00FFFF;
495 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
496 }
497 array_wr32(E1000_IVAR0, index, ivar);
498 }
499 if (tx_queue > IGB_N0_QUEUE) {
500 index = (tx_queue & 0x7);
501 ivar = array_rd32(E1000_IVAR0, index);
502 if (tx_queue < 8) {
503 /* vector goes into second byte of register */
504 ivar = ivar & 0xFFFF00FF;
505 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
506 } else {
507 /* vector goes into high byte of register */
508 ivar = ivar & 0x00FFFFFF;
509 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
510 }
511 array_wr32(E1000_IVAR0, index, ivar);
512 }
513 q_vector->eims_value = 1 << msix_vector;
514 break;
515 default:
516 BUG();
517 break;
518 }
519 }
520
521 /**
522 * igb_configure_msix - Configure MSI-X hardware
523 *
524 * igb_configure_msix sets up the hardware to properly
525 * generate MSI-X interrupts.
526 **/
527 static void igb_configure_msix(struct igb_adapter *adapter)
528 {
529 u32 tmp;
530 int i, vector = 0;
531 struct e1000_hw *hw = &adapter->hw;
532
533 adapter->eims_enable_mask = 0;
534
535 /* set vector for other causes, i.e. link changes */
536 switch (hw->mac.type) {
537 case e1000_82575:
538 tmp = rd32(E1000_CTRL_EXT);
539 /* enable MSI-X PBA support*/
540 tmp |= E1000_CTRL_EXT_PBA_CLR;
541
542 /* Auto-Mask interrupts upon ICR read. */
543 tmp |= E1000_CTRL_EXT_EIAME;
544 tmp |= E1000_CTRL_EXT_IRCA;
545
546 wr32(E1000_CTRL_EXT, tmp);
547
548 /* enable msix_other interrupt */
549 array_wr32(E1000_MSIXBM(0), vector++,
550 E1000_EIMS_OTHER);
551 adapter->eims_other = E1000_EIMS_OTHER;
552
553 break;
554
555 case e1000_82576:
556 /* Turn on MSI-X capability first, or our settings
557 * won't stick. And it will take days to debug. */
558 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
559 E1000_GPIE_PBA | E1000_GPIE_EIAME |
560 E1000_GPIE_NSICR);
561
562 /* enable msix_other interrupt */
563 adapter->eims_other = 1 << vector;
564 tmp = (vector++ | E1000_IVAR_VALID) << 8;
565
566 wr32(E1000_IVAR_MISC, tmp);
567 break;
568 default:
569 /* do nothing, since nothing else supports MSI-X */
570 break;
571 } /* switch (hw->mac.type) */
572
573 adapter->eims_enable_mask |= adapter->eims_other;
574
575 for (i = 0; i < adapter->num_q_vectors; i++) {
576 struct igb_q_vector *q_vector = adapter->q_vector[i];
577 igb_assign_vector(q_vector, vector++);
578 adapter->eims_enable_mask |= q_vector->eims_value;
579 }
580
581 wrfl();
582 }
583
584 /**
585 * igb_request_msix - Initialize MSI-X interrupts
586 *
587 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
588 * kernel.
589 **/
590 static int igb_request_msix(struct igb_adapter *adapter)
591 {
592 struct net_device *netdev = adapter->netdev;
593 struct e1000_hw *hw = &adapter->hw;
594 int i, err = 0, vector = 0;
595
596 err = request_irq(adapter->msix_entries[vector].vector,
597 &igb_msix_other, 0, netdev->name, adapter);
598 if (err)
599 goto out;
600 vector++;
601
602 for (i = 0; i < adapter->num_q_vectors; i++) {
603 struct igb_q_vector *q_vector = adapter->q_vector[i];
604
605 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
606
607 if (q_vector->rx_ring && q_vector->tx_ring)
608 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
609 q_vector->rx_ring->queue_index);
610 else if (q_vector->tx_ring)
611 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
612 q_vector->tx_ring->queue_index);
613 else if (q_vector->rx_ring)
614 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
615 q_vector->rx_ring->queue_index);
616 else
617 sprintf(q_vector->name, "%s-unused", netdev->name);
618
619 err = request_irq(adapter->msix_entries[vector].vector,
620 &igb_msix_ring, 0, q_vector->name,
621 q_vector);
622 if (err)
623 goto out;
624 vector++;
625 }
626
627 igb_configure_msix(adapter);
628 return 0;
629 out:
630 return err;
631 }
632
633 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
634 {
635 if (adapter->msix_entries) {
636 pci_disable_msix(adapter->pdev);
637 kfree(adapter->msix_entries);
638 adapter->msix_entries = NULL;
639 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
640 pci_disable_msi(adapter->pdev);
641 }
642 }
643
644 /**
645 * igb_free_q_vectors - Free memory allocated for interrupt vectors
646 * @adapter: board private structure to initialize
647 *
648 * This function frees the memory allocated to the q_vectors. In addition if
649 * NAPI is enabled it will delete any references to the NAPI struct prior
650 * to freeing the q_vector.
651 **/
652 static void igb_free_q_vectors(struct igb_adapter *adapter)
653 {
654 int v_idx;
655
656 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
657 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
658 adapter->q_vector[v_idx] = NULL;
659 netif_napi_del(&q_vector->napi);
660 kfree(q_vector);
661 }
662 adapter->num_q_vectors = 0;
663 }
664
665 /**
666 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
667 *
668 * This function resets the device so that it has 0 rx queues, tx queues, and
669 * MSI-X interrupts allocated.
670 */
671 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
672 {
673 igb_free_queues(adapter);
674 igb_free_q_vectors(adapter);
675 igb_reset_interrupt_capability(adapter);
676 }
677
678 /**
679 * igb_set_interrupt_capability - set MSI or MSI-X if supported
680 *
681 * Attempt to configure interrupts using the best available
682 * capabilities of the hardware and kernel.
683 **/
684 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
685 {
686 int err;
687 int numvecs, i;
688
689 /* Number of supported queues. */
690 adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
691 adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus());
692
693 /* start with one vector for every rx queue */
694 numvecs = adapter->num_rx_queues;
695
696 /* if tx handler is seperate add 1 for every tx queue */
697 numvecs += adapter->num_tx_queues;
698
699 /* store the number of vectors reserved for queues */
700 adapter->num_q_vectors = numvecs;
701
702 /* add 1 vector for link status interrupts */
703 numvecs++;
704 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
705 GFP_KERNEL);
706 if (!adapter->msix_entries)
707 goto msi_only;
708
709 for (i = 0; i < numvecs; i++)
710 adapter->msix_entries[i].entry = i;
711
712 err = pci_enable_msix(adapter->pdev,
713 adapter->msix_entries,
714 numvecs);
715 if (err == 0)
716 goto out;
717
718 igb_reset_interrupt_capability(adapter);
719
720 /* If we can't do MSI-X, try MSI */
721 msi_only:
722 #ifdef CONFIG_PCI_IOV
723 /* disable SR-IOV for non MSI-X configurations */
724 if (adapter->vf_data) {
725 struct e1000_hw *hw = &adapter->hw;
726 /* disable iov and allow time for transactions to clear */
727 pci_disable_sriov(adapter->pdev);
728 msleep(500);
729
730 kfree(adapter->vf_data);
731 adapter->vf_data = NULL;
732 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
733 msleep(100);
734 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
735 }
736 #endif
737 adapter->num_rx_queues = 1;
738 adapter->num_tx_queues = 1;
739 adapter->num_q_vectors = 1;
740 if (!pci_enable_msi(adapter->pdev))
741 adapter->flags |= IGB_FLAG_HAS_MSI;
742 out:
743 /* Notify the stack of the (possibly) reduced Tx Queue count. */
744 adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
745 return;
746 }
747
748 /**
749 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
750 * @adapter: board private structure to initialize
751 *
752 * We allocate one q_vector per queue interrupt. If allocation fails we
753 * return -ENOMEM.
754 **/
755 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
756 {
757 struct igb_q_vector *q_vector;
758 struct e1000_hw *hw = &adapter->hw;
759 int v_idx;
760
761 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
762 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
763 if (!q_vector)
764 goto err_out;
765 q_vector->adapter = adapter;
766 q_vector->itr_shift = (hw->mac.type == e1000_82575) ? 16 : 0;
767 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
768 q_vector->itr_val = IGB_START_ITR;
769 q_vector->set_itr = 1;
770 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
771 adapter->q_vector[v_idx] = q_vector;
772 }
773 return 0;
774
775 err_out:
776 while (v_idx) {
777 v_idx--;
778 q_vector = adapter->q_vector[v_idx];
779 netif_napi_del(&q_vector->napi);
780 kfree(q_vector);
781 adapter->q_vector[v_idx] = NULL;
782 }
783 return -ENOMEM;
784 }
785
786 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
787 int ring_idx, int v_idx)
788 {
789 struct igb_q_vector *q_vector;
790
791 q_vector = adapter->q_vector[v_idx];
792 q_vector->rx_ring = &adapter->rx_ring[ring_idx];
793 q_vector->rx_ring->q_vector = q_vector;
794 q_vector->itr_val = adapter->itr;
795 }
796
797 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
798 int ring_idx, int v_idx)
799 {
800 struct igb_q_vector *q_vector;
801
802 q_vector = adapter->q_vector[v_idx];
803 q_vector->tx_ring = &adapter->tx_ring[ring_idx];
804 q_vector->tx_ring->q_vector = q_vector;
805 q_vector->itr_val = adapter->itr;
806 }
807
808 /**
809 * igb_map_ring_to_vector - maps allocated queues to vectors
810 *
811 * This function maps the recently allocated queues to vectors.
812 **/
813 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
814 {
815 int i;
816 int v_idx = 0;
817
818 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
819 (adapter->num_q_vectors < adapter->num_tx_queues))
820 return -ENOMEM;
821
822 if (adapter->num_q_vectors >=
823 (adapter->num_rx_queues + adapter->num_tx_queues)) {
824 for (i = 0; i < adapter->num_rx_queues; i++)
825 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
826 for (i = 0; i < adapter->num_tx_queues; i++)
827 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
828 } else {
829 for (i = 0; i < adapter->num_rx_queues; i++) {
830 if (i < adapter->num_tx_queues)
831 igb_map_tx_ring_to_vector(adapter, i, v_idx);
832 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
833 }
834 for (; i < adapter->num_tx_queues; i++)
835 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
836 }
837 return 0;
838 }
839
840 /**
841 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
842 *
843 * This function initializes the interrupts and allocates all of the queues.
844 **/
845 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
846 {
847 struct pci_dev *pdev = adapter->pdev;
848 int err;
849
850 igb_set_interrupt_capability(adapter);
851
852 err = igb_alloc_q_vectors(adapter);
853 if (err) {
854 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
855 goto err_alloc_q_vectors;
856 }
857
858 err = igb_alloc_queues(adapter);
859 if (err) {
860 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
861 goto err_alloc_queues;
862 }
863
864 err = igb_map_ring_to_vector(adapter);
865 if (err) {
866 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
867 goto err_map_queues;
868 }
869
870
871 return 0;
872 err_map_queues:
873 igb_free_queues(adapter);
874 err_alloc_queues:
875 igb_free_q_vectors(adapter);
876 err_alloc_q_vectors:
877 igb_reset_interrupt_capability(adapter);
878 return err;
879 }
880
881 /**
882 * igb_request_irq - initialize interrupts
883 *
884 * Attempts to configure interrupts using the best available
885 * capabilities of the hardware and kernel.
886 **/
887 static int igb_request_irq(struct igb_adapter *adapter)
888 {
889 struct net_device *netdev = adapter->netdev;
890 struct pci_dev *pdev = adapter->pdev;
891 struct e1000_hw *hw = &adapter->hw;
892 int err = 0;
893
894 if (adapter->msix_entries) {
895 err = igb_request_msix(adapter);
896 if (!err)
897 goto request_done;
898 /* fall back to MSI */
899 igb_clear_interrupt_scheme(adapter);
900 if (!pci_enable_msi(adapter->pdev))
901 adapter->flags |= IGB_FLAG_HAS_MSI;
902 igb_free_all_tx_resources(adapter);
903 igb_free_all_rx_resources(adapter);
904 adapter->num_tx_queues = 1;
905 adapter->num_rx_queues = 1;
906 adapter->num_q_vectors = 1;
907 err = igb_alloc_q_vectors(adapter);
908 if (err) {
909 dev_err(&pdev->dev,
910 "Unable to allocate memory for vectors\n");
911 goto request_done;
912 }
913 err = igb_alloc_queues(adapter);
914 if (err) {
915 dev_err(&pdev->dev,
916 "Unable to allocate memory for queues\n");
917 igb_free_q_vectors(adapter);
918 goto request_done;
919 }
920 igb_setup_all_tx_resources(adapter);
921 igb_setup_all_rx_resources(adapter);
922 } else {
923 switch (hw->mac.type) {
924 case e1000_82575:
925 wr32(E1000_MSIXBM(0),
926 (E1000_EICR_RX_QUEUE0 |
927 E1000_EICR_TX_QUEUE0 |
928 E1000_EIMS_OTHER));
929 break;
930 case e1000_82576:
931 wr32(E1000_IVAR0, E1000_IVAR_VALID);
932 break;
933 default:
934 break;
935 }
936 }
937
938 if (adapter->flags & IGB_FLAG_HAS_MSI) {
939 err = request_irq(adapter->pdev->irq, &igb_intr_msi, 0,
940 netdev->name, adapter);
941 if (!err)
942 goto request_done;
943
944 /* fall back to legacy interrupts */
945 igb_reset_interrupt_capability(adapter);
946 adapter->flags &= ~IGB_FLAG_HAS_MSI;
947 }
948
949 err = request_irq(adapter->pdev->irq, &igb_intr, IRQF_SHARED,
950 netdev->name, adapter);
951
952 if (err)
953 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
954 err);
955
956 request_done:
957 return err;
958 }
959
960 static void igb_free_irq(struct igb_adapter *adapter)
961 {
962 if (adapter->msix_entries) {
963 int vector = 0, i;
964
965 free_irq(adapter->msix_entries[vector++].vector, adapter);
966
967 for (i = 0; i < adapter->num_q_vectors; i++) {
968 struct igb_q_vector *q_vector = adapter->q_vector[i];
969 free_irq(adapter->msix_entries[vector++].vector,
970 q_vector);
971 }
972 } else {
973 free_irq(adapter->pdev->irq, adapter);
974 }
975 }
976
977 /**
978 * igb_irq_disable - Mask off interrupt generation on the NIC
979 * @adapter: board private structure
980 **/
981 static void igb_irq_disable(struct igb_adapter *adapter)
982 {
983 struct e1000_hw *hw = &adapter->hw;
984
985 if (adapter->msix_entries) {
986 u32 regval = rd32(E1000_EIAM);
987 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
988 wr32(E1000_EIMC, adapter->eims_enable_mask);
989 regval = rd32(E1000_EIAC);
990 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
991 }
992
993 wr32(E1000_IAM, 0);
994 wr32(E1000_IMC, ~0);
995 wrfl();
996 synchronize_irq(adapter->pdev->irq);
997 }
998
999 /**
1000 * igb_irq_enable - Enable default interrupt generation settings
1001 * @adapter: board private structure
1002 **/
1003 static void igb_irq_enable(struct igb_adapter *adapter)
1004 {
1005 struct e1000_hw *hw = &adapter->hw;
1006
1007 if (adapter->msix_entries) {
1008 u32 regval = rd32(E1000_EIAC);
1009 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1010 regval = rd32(E1000_EIAM);
1011 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1012 wr32(E1000_EIMS, adapter->eims_enable_mask);
1013 if (adapter->vfs_allocated_count)
1014 wr32(E1000_MBVFIMR, 0xFF);
1015 wr32(E1000_IMS, (E1000_IMS_LSC | E1000_IMS_VMMB |
1016 E1000_IMS_DOUTSYNC));
1017 } else {
1018 wr32(E1000_IMS, IMS_ENABLE_MASK);
1019 wr32(E1000_IAM, IMS_ENABLE_MASK);
1020 }
1021 }
1022
1023 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1024 {
1025 struct net_device *netdev = adapter->netdev;
1026 u16 vid = adapter->hw.mng_cookie.vlan_id;
1027 u16 old_vid = adapter->mng_vlan_id;
1028 if (adapter->vlgrp) {
1029 if (!vlan_group_get_device(adapter->vlgrp, vid)) {
1030 if (adapter->hw.mng_cookie.status &
1031 E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1032 igb_vlan_rx_add_vid(netdev, vid);
1033 adapter->mng_vlan_id = vid;
1034 } else
1035 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1036
1037 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1038 (vid != old_vid) &&
1039 !vlan_group_get_device(adapter->vlgrp, old_vid))
1040 igb_vlan_rx_kill_vid(netdev, old_vid);
1041 } else
1042 adapter->mng_vlan_id = vid;
1043 }
1044 }
1045
1046 /**
1047 * igb_release_hw_control - release control of the h/w to f/w
1048 * @adapter: address of board private structure
1049 *
1050 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1051 * For ASF and Pass Through versions of f/w this means that the
1052 * driver is no longer loaded.
1053 *
1054 **/
1055 static void igb_release_hw_control(struct igb_adapter *adapter)
1056 {
1057 struct e1000_hw *hw = &adapter->hw;
1058 u32 ctrl_ext;
1059
1060 /* Let firmware take over control of h/w */
1061 ctrl_ext = rd32(E1000_CTRL_EXT);
1062 wr32(E1000_CTRL_EXT,
1063 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1064 }
1065
1066
1067 /**
1068 * igb_get_hw_control - get control of the h/w from f/w
1069 * @adapter: address of board private structure
1070 *
1071 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1072 * For ASF and Pass Through versions of f/w this means that
1073 * the driver is loaded.
1074 *
1075 **/
1076 static void igb_get_hw_control(struct igb_adapter *adapter)
1077 {
1078 struct e1000_hw *hw = &adapter->hw;
1079 u32 ctrl_ext;
1080
1081 /* Let firmware know the driver has taken over */
1082 ctrl_ext = rd32(E1000_CTRL_EXT);
1083 wr32(E1000_CTRL_EXT,
1084 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1085 }
1086
1087 /**
1088 * igb_configure - configure the hardware for RX and TX
1089 * @adapter: private board structure
1090 **/
1091 static void igb_configure(struct igb_adapter *adapter)
1092 {
1093 struct net_device *netdev = adapter->netdev;
1094 int i;
1095
1096 igb_get_hw_control(adapter);
1097 igb_set_rx_mode(netdev);
1098
1099 igb_restore_vlan(adapter);
1100
1101 igb_setup_tctl(adapter);
1102 igb_setup_mrqc(adapter);
1103 igb_setup_rctl(adapter);
1104
1105 igb_configure_tx(adapter);
1106 igb_configure_rx(adapter);
1107
1108 igb_rx_fifo_flush_82575(&adapter->hw);
1109
1110 /* call igb_desc_unused which always leaves
1111 * at least 1 descriptor unused to make sure
1112 * next_to_use != next_to_clean */
1113 for (i = 0; i < adapter->num_rx_queues; i++) {
1114 struct igb_ring *ring = &adapter->rx_ring[i];
1115 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1116 }
1117
1118
1119 adapter->tx_queue_len = netdev->tx_queue_len;
1120 }
1121
1122
1123 /**
1124 * igb_up - Open the interface and prepare it to handle traffic
1125 * @adapter: board private structure
1126 **/
1127
1128 int igb_up(struct igb_adapter *adapter)
1129 {
1130 struct e1000_hw *hw = &adapter->hw;
1131 int i;
1132
1133 /* hardware has been reset, we need to reload some things */
1134 igb_configure(adapter);
1135
1136 clear_bit(__IGB_DOWN, &adapter->state);
1137
1138 for (i = 0; i < adapter->num_q_vectors; i++) {
1139 struct igb_q_vector *q_vector = adapter->q_vector[i];
1140 napi_enable(&q_vector->napi);
1141 }
1142 if (adapter->msix_entries)
1143 igb_configure_msix(adapter);
1144
1145 igb_set_vmolr(hw, adapter->vfs_allocated_count);
1146
1147 /* Clear any pending interrupts. */
1148 rd32(E1000_ICR);
1149 igb_irq_enable(adapter);
1150
1151 /* notify VFs that reset has been completed */
1152 if (adapter->vfs_allocated_count) {
1153 u32 reg_data = rd32(E1000_CTRL_EXT);
1154 reg_data |= E1000_CTRL_EXT_PFRSTD;
1155 wr32(E1000_CTRL_EXT, reg_data);
1156 }
1157
1158 netif_tx_start_all_queues(adapter->netdev);
1159
1160 /* Fire a link change interrupt to start the watchdog. */
1161 wr32(E1000_ICS, E1000_ICS_LSC);
1162 return 0;
1163 }
1164
1165 void igb_down(struct igb_adapter *adapter)
1166 {
1167 struct e1000_hw *hw = &adapter->hw;
1168 struct net_device *netdev = adapter->netdev;
1169 u32 tctl, rctl;
1170 int i;
1171
1172 /* signal that we're down so the interrupt handler does not
1173 * reschedule our watchdog timer */
1174 set_bit(__IGB_DOWN, &adapter->state);
1175
1176 /* disable receives in the hardware */
1177 rctl = rd32(E1000_RCTL);
1178 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1179 /* flush and sleep below */
1180
1181 netif_tx_stop_all_queues(netdev);
1182
1183 /* disable transmits in the hardware */
1184 tctl = rd32(E1000_TCTL);
1185 tctl &= ~E1000_TCTL_EN;
1186 wr32(E1000_TCTL, tctl);
1187 /* flush both disables and wait for them to finish */
1188 wrfl();
1189 msleep(10);
1190
1191 for (i = 0; i < adapter->num_q_vectors; i++) {
1192 struct igb_q_vector *q_vector = adapter->q_vector[i];
1193 napi_disable(&q_vector->napi);
1194 }
1195
1196 igb_irq_disable(adapter);
1197
1198 del_timer_sync(&adapter->watchdog_timer);
1199 del_timer_sync(&adapter->phy_info_timer);
1200
1201 netdev->tx_queue_len = adapter->tx_queue_len;
1202 netif_carrier_off(netdev);
1203
1204 /* record the stats before reset*/
1205 igb_update_stats(adapter);
1206
1207 adapter->link_speed = 0;
1208 adapter->link_duplex = 0;
1209
1210 if (!pci_channel_offline(adapter->pdev))
1211 igb_reset(adapter);
1212 igb_clean_all_tx_rings(adapter);
1213 igb_clean_all_rx_rings(adapter);
1214 #ifdef CONFIG_IGB_DCA
1215
1216 /* since we reset the hardware DCA settings were cleared */
1217 igb_setup_dca(adapter);
1218 #endif
1219 }
1220
1221 void igb_reinit_locked(struct igb_adapter *adapter)
1222 {
1223 WARN_ON(in_interrupt());
1224 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1225 msleep(1);
1226 igb_down(adapter);
1227 igb_up(adapter);
1228 clear_bit(__IGB_RESETTING, &adapter->state);
1229 }
1230
1231 void igb_reset(struct igb_adapter *adapter)
1232 {
1233 struct e1000_hw *hw = &adapter->hw;
1234 struct e1000_mac_info *mac = &hw->mac;
1235 struct e1000_fc_info *fc = &hw->fc;
1236 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1237 u16 hwm;
1238
1239 /* Repartition Pba for greater than 9k mtu
1240 * To take effect CTRL.RST is required.
1241 */
1242 switch (mac->type) {
1243 case e1000_82576:
1244 pba = E1000_PBA_64K;
1245 break;
1246 case e1000_82575:
1247 default:
1248 pba = E1000_PBA_34K;
1249 break;
1250 }
1251
1252 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1253 (mac->type < e1000_82576)) {
1254 /* adjust PBA for jumbo frames */
1255 wr32(E1000_PBA, pba);
1256
1257 /* To maintain wire speed transmits, the Tx FIFO should be
1258 * large enough to accommodate two full transmit packets,
1259 * rounded up to the next 1KB and expressed in KB. Likewise,
1260 * the Rx FIFO should be large enough to accommodate at least
1261 * one full receive packet and is similarly rounded up and
1262 * expressed in KB. */
1263 pba = rd32(E1000_PBA);
1264 /* upper 16 bits has Tx packet buffer allocation size in KB */
1265 tx_space = pba >> 16;
1266 /* lower 16 bits has Rx packet buffer allocation size in KB */
1267 pba &= 0xffff;
1268 /* the tx fifo also stores 16 bytes of information about the tx
1269 * but don't include ethernet FCS because hardware appends it */
1270 min_tx_space = (adapter->max_frame_size +
1271 sizeof(union e1000_adv_tx_desc) -
1272 ETH_FCS_LEN) * 2;
1273 min_tx_space = ALIGN(min_tx_space, 1024);
1274 min_tx_space >>= 10;
1275 /* software strips receive CRC, so leave room for it */
1276 min_rx_space = adapter->max_frame_size;
1277 min_rx_space = ALIGN(min_rx_space, 1024);
1278 min_rx_space >>= 10;
1279
1280 /* If current Tx allocation is less than the min Tx FIFO size,
1281 * and the min Tx FIFO size is less than the current Rx FIFO
1282 * allocation, take space away from current Rx allocation */
1283 if (tx_space < min_tx_space &&
1284 ((min_tx_space - tx_space) < pba)) {
1285 pba = pba - (min_tx_space - tx_space);
1286
1287 /* if short on rx space, rx wins and must trump tx
1288 * adjustment */
1289 if (pba < min_rx_space)
1290 pba = min_rx_space;
1291 }
1292 wr32(E1000_PBA, pba);
1293 }
1294
1295 /* flow control settings */
1296 /* The high water mark must be low enough to fit one full frame
1297 * (or the size used for early receive) above it in the Rx FIFO.
1298 * Set it to the lower of:
1299 * - 90% of the Rx FIFO size, or
1300 * - the full Rx FIFO size minus one full frame */
1301 hwm = min(((pba << 10) * 9 / 10),
1302 ((pba << 10) - 2 * adapter->max_frame_size));
1303
1304 if (mac->type < e1000_82576) {
1305 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */
1306 fc->low_water = fc->high_water - 8;
1307 } else {
1308 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1309 fc->low_water = fc->high_water - 16;
1310 }
1311 fc->pause_time = 0xFFFF;
1312 fc->send_xon = 1;
1313 fc->current_mode = fc->requested_mode;
1314
1315 /* disable receive for all VFs and wait one second */
1316 if (adapter->vfs_allocated_count) {
1317 int i;
1318 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1319 adapter->vf_data[i].clear_to_send = false;
1320
1321 /* ping all the active vfs to let them know we are going down */
1322 igb_ping_all_vfs(adapter);
1323
1324 /* disable transmits and receives */
1325 wr32(E1000_VFRE, 0);
1326 wr32(E1000_VFTE, 0);
1327 }
1328
1329 /* Allow time for pending master requests to run */
1330 adapter->hw.mac.ops.reset_hw(&adapter->hw);
1331 wr32(E1000_WUC, 0);
1332
1333 if (adapter->hw.mac.ops.init_hw(&adapter->hw))
1334 dev_err(&adapter->pdev->dev, "Hardware Error\n");
1335
1336 igb_update_mng_vlan(adapter);
1337
1338 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1339 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1340
1341 igb_reset_adaptive(&adapter->hw);
1342 igb_get_phy_info(&adapter->hw);
1343 }
1344
1345 static const struct net_device_ops igb_netdev_ops = {
1346 .ndo_open = igb_open,
1347 .ndo_stop = igb_close,
1348 .ndo_start_xmit = igb_xmit_frame_adv,
1349 .ndo_get_stats = igb_get_stats,
1350 .ndo_set_rx_mode = igb_set_rx_mode,
1351 .ndo_set_multicast_list = igb_set_rx_mode,
1352 .ndo_set_mac_address = igb_set_mac,
1353 .ndo_change_mtu = igb_change_mtu,
1354 .ndo_do_ioctl = igb_ioctl,
1355 .ndo_tx_timeout = igb_tx_timeout,
1356 .ndo_validate_addr = eth_validate_addr,
1357 .ndo_vlan_rx_register = igb_vlan_rx_register,
1358 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1359 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1360 #ifdef CONFIG_NET_POLL_CONTROLLER
1361 .ndo_poll_controller = igb_netpoll,
1362 #endif
1363 };
1364
1365 /**
1366 * igb_probe - Device Initialization Routine
1367 * @pdev: PCI device information struct
1368 * @ent: entry in igb_pci_tbl
1369 *
1370 * Returns 0 on success, negative on failure
1371 *
1372 * igb_probe initializes an adapter identified by a pci_dev structure.
1373 * The OS initialization, configuring of the adapter private structure,
1374 * and a hardware reset occur.
1375 **/
1376 static int __devinit igb_probe(struct pci_dev *pdev,
1377 const struct pci_device_id *ent)
1378 {
1379 struct net_device *netdev;
1380 struct igb_adapter *adapter;
1381 struct e1000_hw *hw;
1382 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1383 unsigned long mmio_start, mmio_len;
1384 int err, pci_using_dac;
1385 u16 eeprom_data = 0;
1386 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1387 u32 part_num;
1388
1389 err = pci_enable_device_mem(pdev);
1390 if (err)
1391 return err;
1392
1393 pci_using_dac = 0;
1394 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1395 if (!err) {
1396 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1397 if (!err)
1398 pci_using_dac = 1;
1399 } else {
1400 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1401 if (err) {
1402 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1403 if (err) {
1404 dev_err(&pdev->dev, "No usable DMA "
1405 "configuration, aborting\n");
1406 goto err_dma;
1407 }
1408 }
1409 }
1410
1411 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1412 IORESOURCE_MEM),
1413 igb_driver_name);
1414 if (err)
1415 goto err_pci_reg;
1416
1417 pci_enable_pcie_error_reporting(pdev);
1418
1419 pci_set_master(pdev);
1420 pci_save_state(pdev);
1421
1422 err = -ENOMEM;
1423 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1424 IGB_ABS_MAX_TX_QUEUES);
1425 if (!netdev)
1426 goto err_alloc_etherdev;
1427
1428 SET_NETDEV_DEV(netdev, &pdev->dev);
1429
1430 pci_set_drvdata(pdev, netdev);
1431 adapter = netdev_priv(netdev);
1432 adapter->netdev = netdev;
1433 adapter->pdev = pdev;
1434 hw = &adapter->hw;
1435 hw->back = adapter;
1436 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1437
1438 mmio_start = pci_resource_start(pdev, 0);
1439 mmio_len = pci_resource_len(pdev, 0);
1440
1441 err = -EIO;
1442 hw->hw_addr = ioremap(mmio_start, mmio_len);
1443 if (!hw->hw_addr)
1444 goto err_ioremap;
1445
1446 netdev->netdev_ops = &igb_netdev_ops;
1447 igb_set_ethtool_ops(netdev);
1448 netdev->watchdog_timeo = 5 * HZ;
1449
1450 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1451
1452 netdev->mem_start = mmio_start;
1453 netdev->mem_end = mmio_start + mmio_len;
1454
1455 /* PCI config space info */
1456 hw->vendor_id = pdev->vendor;
1457 hw->device_id = pdev->device;
1458 hw->revision_id = pdev->revision;
1459 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1460 hw->subsystem_device_id = pdev->subsystem_device;
1461
1462 /* setup the private structure */
1463 hw->back = adapter;
1464 /* Copy the default MAC, PHY and NVM function pointers */
1465 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1466 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1467 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1468 /* Initialize skew-specific constants */
1469 err = ei->get_invariants(hw);
1470 if (err)
1471 goto err_sw_init;
1472
1473 #ifdef CONFIG_PCI_IOV
1474 /* since iov functionality isn't critical to base device function we
1475 * can accept failure. If it fails we don't allow iov to be enabled */
1476 if (hw->mac.type == e1000_82576) {
1477 /* 82576 supports a maximum of 7 VFs in addition to the PF */
1478 unsigned int num_vfs = (max_vfs > 7) ? 7 : max_vfs;
1479 int i;
1480 unsigned char mac_addr[ETH_ALEN];
1481
1482 if (num_vfs) {
1483 adapter->vf_data = kcalloc(num_vfs,
1484 sizeof(struct vf_data_storage),
1485 GFP_KERNEL);
1486 if (!adapter->vf_data) {
1487 dev_err(&pdev->dev,
1488 "Could not allocate VF private data - "
1489 "IOV enable failed\n");
1490 } else {
1491 err = pci_enable_sriov(pdev, num_vfs);
1492 if (!err) {
1493 adapter->vfs_allocated_count = num_vfs;
1494 dev_info(&pdev->dev,
1495 "%d vfs allocated\n",
1496 num_vfs);
1497 for (i = 0;
1498 i < adapter->vfs_allocated_count;
1499 i++) {
1500 random_ether_addr(mac_addr);
1501 igb_set_vf_mac(adapter, i,
1502 mac_addr);
1503 }
1504 } else {
1505 kfree(adapter->vf_data);
1506 adapter->vf_data = NULL;
1507 }
1508 }
1509 }
1510 }
1511
1512 #endif
1513 /* setup the private structure */
1514 err = igb_sw_init(adapter);
1515 if (err)
1516 goto err_sw_init;
1517
1518 igb_get_bus_info_pcie(hw);
1519
1520 hw->phy.autoneg_wait_to_complete = false;
1521 hw->mac.adaptive_ifs = true;
1522
1523 /* Copper options */
1524 if (hw->phy.media_type == e1000_media_type_copper) {
1525 hw->phy.mdix = AUTO_ALL_MODES;
1526 hw->phy.disable_polarity_correction = false;
1527 hw->phy.ms_type = e1000_ms_hw_default;
1528 }
1529
1530 if (igb_check_reset_block(hw))
1531 dev_info(&pdev->dev,
1532 "PHY reset is blocked due to SOL/IDER session.\n");
1533
1534 netdev->features = NETIF_F_SG |
1535 NETIF_F_IP_CSUM |
1536 NETIF_F_HW_VLAN_TX |
1537 NETIF_F_HW_VLAN_RX |
1538 NETIF_F_HW_VLAN_FILTER;
1539
1540 netdev->features |= NETIF_F_IPV6_CSUM;
1541 netdev->features |= NETIF_F_TSO;
1542 netdev->features |= NETIF_F_TSO6;
1543
1544 netdev->features |= NETIF_F_GRO;
1545
1546 netdev->vlan_features |= NETIF_F_TSO;
1547 netdev->vlan_features |= NETIF_F_TSO6;
1548 netdev->vlan_features |= NETIF_F_IP_CSUM;
1549 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1550 netdev->vlan_features |= NETIF_F_SG;
1551
1552 if (pci_using_dac)
1553 netdev->features |= NETIF_F_HIGHDMA;
1554
1555 if (adapter->hw.mac.type == e1000_82576)
1556 netdev->features |= NETIF_F_SCTP_CSUM;
1557
1558 adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw);
1559
1560 /* before reading the NVM, reset the controller to put the device in a
1561 * known good starting state */
1562 hw->mac.ops.reset_hw(hw);
1563
1564 /* make sure the NVM is good */
1565 if (igb_validate_nvm_checksum(hw) < 0) {
1566 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1567 err = -EIO;
1568 goto err_eeprom;
1569 }
1570
1571 /* copy the MAC address out of the NVM */
1572 if (hw->mac.ops.read_mac_addr(hw))
1573 dev_err(&pdev->dev, "NVM Read Error\n");
1574
1575 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1576 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1577
1578 if (!is_valid_ether_addr(netdev->perm_addr)) {
1579 dev_err(&pdev->dev, "Invalid MAC Address\n");
1580 err = -EIO;
1581 goto err_eeprom;
1582 }
1583
1584 setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1585 (unsigned long) adapter);
1586 setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1587 (unsigned long) adapter);
1588
1589 INIT_WORK(&adapter->reset_task, igb_reset_task);
1590 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1591
1592 /* Initialize link properties that are user-changeable */
1593 adapter->fc_autoneg = true;
1594 hw->mac.autoneg = true;
1595 hw->phy.autoneg_advertised = 0x2f;
1596
1597 hw->fc.requested_mode = e1000_fc_default;
1598 hw->fc.current_mode = e1000_fc_default;
1599
1600 adapter->itr_setting = IGB_DEFAULT_ITR;
1601 adapter->itr = IGB_START_ITR;
1602
1603 igb_validate_mdi_setting(hw);
1604
1605 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1606 * enable the ACPI Magic Packet filter
1607 */
1608
1609 if (hw->bus.func == 0)
1610 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1611 else if (hw->bus.func == 1)
1612 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1613
1614 if (eeprom_data & eeprom_apme_mask)
1615 adapter->eeprom_wol |= E1000_WUFC_MAG;
1616
1617 /* now that we have the eeprom settings, apply the special cases where
1618 * the eeprom may be wrong or the board simply won't support wake on
1619 * lan on a particular port */
1620 switch (pdev->device) {
1621 case E1000_DEV_ID_82575GB_QUAD_COPPER:
1622 adapter->eeprom_wol = 0;
1623 break;
1624 case E1000_DEV_ID_82575EB_FIBER_SERDES:
1625 case E1000_DEV_ID_82576_FIBER:
1626 case E1000_DEV_ID_82576_SERDES:
1627 /* Wake events only supported on port A for dual fiber
1628 * regardless of eeprom setting */
1629 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1630 adapter->eeprom_wol = 0;
1631 break;
1632 case E1000_DEV_ID_82576_QUAD_COPPER:
1633 /* if quad port adapter, disable WoL on all but port A */
1634 if (global_quad_port_a != 0)
1635 adapter->eeprom_wol = 0;
1636 else
1637 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1638 /* Reset for multiple quad port adapters */
1639 if (++global_quad_port_a == 4)
1640 global_quad_port_a = 0;
1641 break;
1642 }
1643
1644 /* initialize the wol settings based on the eeprom settings */
1645 adapter->wol = adapter->eeprom_wol;
1646 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1647
1648 /* reset the hardware with the new settings */
1649 igb_reset(adapter);
1650
1651 /* let the f/w know that the h/w is now under the control of the
1652 * driver. */
1653 igb_get_hw_control(adapter);
1654
1655 strcpy(netdev->name, "eth%d");
1656 err = register_netdev(netdev);
1657 if (err)
1658 goto err_register;
1659
1660 /* carrier off reporting is important to ethtool even BEFORE open */
1661 netif_carrier_off(netdev);
1662
1663 #ifdef CONFIG_IGB_DCA
1664 if (dca_add_requester(&pdev->dev) == 0) {
1665 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1666 dev_info(&pdev->dev, "DCA enabled\n");
1667 igb_setup_dca(adapter);
1668 }
1669 #endif
1670
1671 /*
1672 * Initialize hardware timer: we keep it running just in case
1673 * that some program needs it later on.
1674 */
1675 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1676 adapter->cycles.read = igb_read_clock;
1677 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1678 adapter->cycles.mult = 1;
1679 adapter->cycles.shift = IGB_TSYNC_SHIFT;
1680 wr32(E1000_TIMINCA,
1681 (1<<24) |
1682 IGB_TSYNC_CYCLE_TIME_IN_NANOSECONDS * IGB_TSYNC_SCALE);
1683 #if 0
1684 /*
1685 * Avoid rollover while we initialize by resetting the time counter.
1686 */
1687 wr32(E1000_SYSTIML, 0x00000000);
1688 wr32(E1000_SYSTIMH, 0x00000000);
1689 #else
1690 /*
1691 * Set registers so that rollover occurs soon to test this.
1692 */
1693 wr32(E1000_SYSTIML, 0x00000000);
1694 wr32(E1000_SYSTIMH, 0xFF800000);
1695 #endif
1696 wrfl();
1697 timecounter_init(&adapter->clock,
1698 &adapter->cycles,
1699 ktime_to_ns(ktime_get_real()));
1700
1701 /*
1702 * Synchronize our NIC clock against system wall clock. NIC
1703 * time stamp reading requires ~3us per sample, each sample
1704 * was pretty stable even under load => only require 10
1705 * samples for each offset comparison.
1706 */
1707 memset(&adapter->compare, 0, sizeof(adapter->compare));
1708 adapter->compare.source = &adapter->clock;
1709 adapter->compare.target = ktime_get_real;
1710 adapter->compare.num_samples = 10;
1711 timecompare_update(&adapter->compare, 0);
1712
1713 #ifdef DEBUG
1714 {
1715 char buffer[160];
1716 printk(KERN_DEBUG
1717 "igb: %s: hw %p initialized timer\n",
1718 igb_get_time_str(adapter, buffer),
1719 &adapter->hw);
1720 }
1721 #endif
1722
1723 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1724 /* print bus type/speed/width info */
1725 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1726 netdev->name,
1727 ((hw->bus.speed == e1000_bus_speed_2500)
1728 ? "2.5Gb/s" : "unknown"),
1729 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1730 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1731 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1732 "unknown"),
1733 netdev->dev_addr);
1734
1735 igb_read_part_num(hw, &part_num);
1736 dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1737 (part_num >> 8), (part_num & 0xff));
1738
1739 dev_info(&pdev->dev,
1740 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1741 adapter->msix_entries ? "MSI-X" :
1742 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1743 adapter->num_rx_queues, adapter->num_tx_queues);
1744
1745 return 0;
1746
1747 err_register:
1748 igb_release_hw_control(adapter);
1749 err_eeprom:
1750 if (!igb_check_reset_block(hw))
1751 igb_reset_phy(hw);
1752
1753 if (hw->flash_address)
1754 iounmap(hw->flash_address);
1755 err_sw_init:
1756 igb_clear_interrupt_scheme(adapter);
1757 iounmap(hw->hw_addr);
1758 err_ioremap:
1759 free_netdev(netdev);
1760 err_alloc_etherdev:
1761 pci_release_selected_regions(pdev, pci_select_bars(pdev,
1762 IORESOURCE_MEM));
1763 err_pci_reg:
1764 err_dma:
1765 pci_disable_device(pdev);
1766 return err;
1767 }
1768
1769 /**
1770 * igb_remove - Device Removal Routine
1771 * @pdev: PCI device information struct
1772 *
1773 * igb_remove is called by the PCI subsystem to alert the driver
1774 * that it should release a PCI device. The could be caused by a
1775 * Hot-Plug event, or because the driver is going to be removed from
1776 * memory.
1777 **/
1778 static void __devexit igb_remove(struct pci_dev *pdev)
1779 {
1780 struct net_device *netdev = pci_get_drvdata(pdev);
1781 struct igb_adapter *adapter = netdev_priv(netdev);
1782 struct e1000_hw *hw = &adapter->hw;
1783
1784 /* flush_scheduled work may reschedule our watchdog task, so
1785 * explicitly disable watchdog tasks from being rescheduled */
1786 set_bit(__IGB_DOWN, &adapter->state);
1787 del_timer_sync(&adapter->watchdog_timer);
1788 del_timer_sync(&adapter->phy_info_timer);
1789
1790 flush_scheduled_work();
1791
1792 #ifdef CONFIG_IGB_DCA
1793 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1794 dev_info(&pdev->dev, "DCA disabled\n");
1795 dca_remove_requester(&pdev->dev);
1796 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1797 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1798 }
1799 #endif
1800
1801 /* Release control of h/w to f/w. If f/w is AMT enabled, this
1802 * would have already happened in close and is redundant. */
1803 igb_release_hw_control(adapter);
1804
1805 unregister_netdev(netdev);
1806
1807 if (!igb_check_reset_block(&adapter->hw))
1808 igb_reset_phy(&adapter->hw);
1809
1810 igb_clear_interrupt_scheme(adapter);
1811
1812 #ifdef CONFIG_PCI_IOV
1813 /* reclaim resources allocated to VFs */
1814 if (adapter->vf_data) {
1815 /* disable iov and allow time for transactions to clear */
1816 pci_disable_sriov(pdev);
1817 msleep(500);
1818
1819 kfree(adapter->vf_data);
1820 adapter->vf_data = NULL;
1821 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1822 msleep(100);
1823 dev_info(&pdev->dev, "IOV Disabled\n");
1824 }
1825 #endif
1826 iounmap(hw->hw_addr);
1827 if (hw->flash_address)
1828 iounmap(hw->flash_address);
1829 pci_release_selected_regions(pdev, pci_select_bars(pdev,
1830 IORESOURCE_MEM));
1831
1832 free_netdev(netdev);
1833
1834 pci_disable_pcie_error_reporting(pdev);
1835
1836 pci_disable_device(pdev);
1837 }
1838
1839 /**
1840 * igb_sw_init - Initialize general software structures (struct igb_adapter)
1841 * @adapter: board private structure to initialize
1842 *
1843 * igb_sw_init initializes the Adapter private data structure.
1844 * Fields are initialized based on PCI device information and
1845 * OS network device settings (MTU size).
1846 **/
1847 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1848 {
1849 struct e1000_hw *hw = &adapter->hw;
1850 struct net_device *netdev = adapter->netdev;
1851 struct pci_dev *pdev = adapter->pdev;
1852
1853 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1854
1855 adapter->tx_ring_count = IGB_DEFAULT_TXD;
1856 adapter->rx_ring_count = IGB_DEFAULT_RXD;
1857 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1858 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1859
1860 /* This call may decrease the number of queues depending on
1861 * interrupt mode. */
1862 if (igb_init_interrupt_scheme(adapter)) {
1863 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1864 return -ENOMEM;
1865 }
1866
1867 /* Explicitly disable IRQ since the NIC can be in any state. */
1868 igb_irq_disable(adapter);
1869
1870 set_bit(__IGB_DOWN, &adapter->state);
1871 return 0;
1872 }
1873
1874 /**
1875 * igb_open - Called when a network interface is made active
1876 * @netdev: network interface device structure
1877 *
1878 * Returns 0 on success, negative value on failure
1879 *
1880 * The open entry point is called when a network interface is made
1881 * active by the system (IFF_UP). At this point all resources needed
1882 * for transmit and receive operations are allocated, the interrupt
1883 * handler is registered with the OS, the watchdog timer is started,
1884 * and the stack is notified that the interface is ready.
1885 **/
1886 static int igb_open(struct net_device *netdev)
1887 {
1888 struct igb_adapter *adapter = netdev_priv(netdev);
1889 struct e1000_hw *hw = &adapter->hw;
1890 int err;
1891 int i;
1892
1893 /* disallow open during test */
1894 if (test_bit(__IGB_TESTING, &adapter->state))
1895 return -EBUSY;
1896
1897 netif_carrier_off(netdev);
1898
1899 /* allocate transmit descriptors */
1900 err = igb_setup_all_tx_resources(adapter);
1901 if (err)
1902 goto err_setup_tx;
1903
1904 /* allocate receive descriptors */
1905 err = igb_setup_all_rx_resources(adapter);
1906 if (err)
1907 goto err_setup_rx;
1908
1909 /* e1000_power_up_phy(adapter); */
1910
1911 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1912 if ((adapter->hw.mng_cookie.status &
1913 E1000_MNG_DHCP_COOKIE_STATUS_VLAN))
1914 igb_update_mng_vlan(adapter);
1915
1916 /* before we allocate an interrupt, we must be ready to handle it.
1917 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1918 * as soon as we call pci_request_irq, so we have to setup our
1919 * clean_rx handler before we do so. */
1920 igb_configure(adapter);
1921
1922 igb_set_vmolr(hw, adapter->vfs_allocated_count);
1923
1924 err = igb_request_irq(adapter);
1925 if (err)
1926 goto err_req_irq;
1927
1928 /* From here on the code is the same as igb_up() */
1929 clear_bit(__IGB_DOWN, &adapter->state);
1930
1931 for (i = 0; i < adapter->num_q_vectors; i++) {
1932 struct igb_q_vector *q_vector = adapter->q_vector[i];
1933 napi_enable(&q_vector->napi);
1934 }
1935
1936 /* Clear any pending interrupts. */
1937 rd32(E1000_ICR);
1938
1939 igb_irq_enable(adapter);
1940
1941 /* notify VFs that reset has been completed */
1942 if (adapter->vfs_allocated_count) {
1943 u32 reg_data = rd32(E1000_CTRL_EXT);
1944 reg_data |= E1000_CTRL_EXT_PFRSTD;
1945 wr32(E1000_CTRL_EXT, reg_data);
1946 }
1947
1948 netif_tx_start_all_queues(netdev);
1949
1950 /* Fire a link status change interrupt to start the watchdog. */
1951 wr32(E1000_ICS, E1000_ICS_LSC);
1952
1953 return 0;
1954
1955 err_req_irq:
1956 igb_release_hw_control(adapter);
1957 /* e1000_power_down_phy(adapter); */
1958 igb_free_all_rx_resources(adapter);
1959 err_setup_rx:
1960 igb_free_all_tx_resources(adapter);
1961 err_setup_tx:
1962 igb_reset(adapter);
1963
1964 return err;
1965 }
1966
1967 /**
1968 * igb_close - Disables a network interface
1969 * @netdev: network interface device structure
1970 *
1971 * Returns 0, this is not allowed to fail
1972 *
1973 * The close entry point is called when an interface is de-activated
1974 * by the OS. The hardware is still under the driver's control, but
1975 * needs to be disabled. A global MAC reset is issued to stop the
1976 * hardware, and all transmit and receive resources are freed.
1977 **/
1978 static int igb_close(struct net_device *netdev)
1979 {
1980 struct igb_adapter *adapter = netdev_priv(netdev);
1981
1982 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
1983 igb_down(adapter);
1984
1985 igb_free_irq(adapter);
1986
1987 igb_free_all_tx_resources(adapter);
1988 igb_free_all_rx_resources(adapter);
1989
1990 /* kill manageability vlan ID if supported, but not if a vlan with
1991 * the same ID is registered on the host OS (let 8021q kill it) */
1992 if ((adapter->hw.mng_cookie.status &
1993 E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
1994 !(adapter->vlgrp &&
1995 vlan_group_get_device(adapter->vlgrp, adapter->mng_vlan_id)))
1996 igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
1997
1998 return 0;
1999 }
2000
2001 /**
2002 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2003 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2004 *
2005 * Return 0 on success, negative on failure
2006 **/
2007 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2008 {
2009 struct pci_dev *pdev = tx_ring->pdev;
2010 int size;
2011
2012 size = sizeof(struct igb_buffer) * tx_ring->count;
2013 tx_ring->buffer_info = vmalloc(size);
2014 if (!tx_ring->buffer_info)
2015 goto err;
2016 memset(tx_ring->buffer_info, 0, size);
2017
2018 /* round up to nearest 4K */
2019 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2020 tx_ring->size = ALIGN(tx_ring->size, 4096);
2021
2022 tx_ring->desc = pci_alloc_consistent(pdev, tx_ring->size,
2023 &tx_ring->dma);
2024
2025 if (!tx_ring->desc)
2026 goto err;
2027
2028 tx_ring->next_to_use = 0;
2029 tx_ring->next_to_clean = 0;
2030 return 0;
2031
2032 err:
2033 vfree(tx_ring->buffer_info);
2034 dev_err(&pdev->dev,
2035 "Unable to allocate memory for the transmit descriptor ring\n");
2036 return -ENOMEM;
2037 }
2038
2039 /**
2040 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2041 * (Descriptors) for all queues
2042 * @adapter: board private structure
2043 *
2044 * Return 0 on success, negative on failure
2045 **/
2046 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2047 {
2048 int i, err = 0;
2049 int r_idx;
2050
2051 for (i = 0; i < adapter->num_tx_queues; i++) {
2052 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
2053 if (err) {
2054 dev_err(&adapter->pdev->dev,
2055 "Allocation for Tx Queue %u failed\n", i);
2056 for (i--; i >= 0; i--)
2057 igb_free_tx_resources(&adapter->tx_ring[i]);
2058 break;
2059 }
2060 }
2061
2062 for (i = 0; i < IGB_MAX_TX_QUEUES; i++) {
2063 r_idx = i % adapter->num_tx_queues;
2064 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2065 }
2066 return err;
2067 }
2068
2069 /**
2070 * igb_setup_tctl - configure the transmit control registers
2071 * @adapter: Board private structure
2072 **/
2073 void igb_setup_tctl(struct igb_adapter *adapter)
2074 {
2075 struct e1000_hw *hw = &adapter->hw;
2076 u32 tctl;
2077
2078 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2079 wr32(E1000_TXDCTL(0), 0);
2080
2081 /* Program the Transmit Control Register */
2082 tctl = rd32(E1000_TCTL);
2083 tctl &= ~E1000_TCTL_CT;
2084 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2085 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2086
2087 igb_config_collision_dist(hw);
2088
2089 /* Enable transmits */
2090 tctl |= E1000_TCTL_EN;
2091
2092 wr32(E1000_TCTL, tctl);
2093 }
2094
2095 /**
2096 * igb_configure_tx_ring - Configure transmit ring after Reset
2097 * @adapter: board private structure
2098 * @ring: tx ring to configure
2099 *
2100 * Configure a transmit ring after a reset.
2101 **/
2102 void igb_configure_tx_ring(struct igb_adapter *adapter,
2103 struct igb_ring *ring)
2104 {
2105 struct e1000_hw *hw = &adapter->hw;
2106 u32 txdctl;
2107 u64 tdba = ring->dma;
2108 int reg_idx = ring->reg_idx;
2109
2110 /* disable the queue */
2111 txdctl = rd32(E1000_TXDCTL(reg_idx));
2112 wr32(E1000_TXDCTL(reg_idx),
2113 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2114 wrfl();
2115 mdelay(10);
2116
2117 wr32(E1000_TDLEN(reg_idx),
2118 ring->count * sizeof(union e1000_adv_tx_desc));
2119 wr32(E1000_TDBAL(reg_idx),
2120 tdba & 0x00000000ffffffffULL);
2121 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2122
2123 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2124 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2125 writel(0, ring->head);
2126 writel(0, ring->tail);
2127
2128 txdctl |= IGB_TX_PTHRESH;
2129 txdctl |= IGB_TX_HTHRESH << 8;
2130 txdctl |= IGB_TX_WTHRESH << 16;
2131
2132 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2133 wr32(E1000_TXDCTL(reg_idx), txdctl);
2134 }
2135
2136 /**
2137 * igb_configure_tx - Configure transmit Unit after Reset
2138 * @adapter: board private structure
2139 *
2140 * Configure the Tx unit of the MAC after a reset.
2141 **/
2142 static void igb_configure_tx(struct igb_adapter *adapter)
2143 {
2144 int i;
2145
2146 for (i = 0; i < adapter->num_tx_queues; i++)
2147 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2148 }
2149
2150 /**
2151 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2152 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2153 *
2154 * Returns 0 on success, negative on failure
2155 **/
2156 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2157 {
2158 struct pci_dev *pdev = rx_ring->pdev;
2159 int size, desc_len;
2160
2161 size = sizeof(struct igb_buffer) * rx_ring->count;
2162 rx_ring->buffer_info = vmalloc(size);
2163 if (!rx_ring->buffer_info)
2164 goto err;
2165 memset(rx_ring->buffer_info, 0, size);
2166
2167 desc_len = sizeof(union e1000_adv_rx_desc);
2168
2169 /* Round up to nearest 4K */
2170 rx_ring->size = rx_ring->count * desc_len;
2171 rx_ring->size = ALIGN(rx_ring->size, 4096);
2172
2173 rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2174 &rx_ring->dma);
2175
2176 if (!rx_ring->desc)
2177 goto err;
2178
2179 rx_ring->next_to_clean = 0;
2180 rx_ring->next_to_use = 0;
2181
2182 return 0;
2183
2184 err:
2185 vfree(rx_ring->buffer_info);
2186 dev_err(&pdev->dev, "Unable to allocate memory for "
2187 "the receive descriptor ring\n");
2188 return -ENOMEM;
2189 }
2190
2191 /**
2192 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2193 * (Descriptors) for all queues
2194 * @adapter: board private structure
2195 *
2196 * Return 0 on success, negative on failure
2197 **/
2198 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2199 {
2200 int i, err = 0;
2201
2202 for (i = 0; i < adapter->num_rx_queues; i++) {
2203 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2204 if (err) {
2205 dev_err(&adapter->pdev->dev,
2206 "Allocation for Rx Queue %u failed\n", i);
2207 for (i--; i >= 0; i--)
2208 igb_free_rx_resources(&adapter->rx_ring[i]);
2209 break;
2210 }
2211 }
2212
2213 return err;
2214 }
2215
2216 /**
2217 * igb_setup_mrqc - configure the multiple receive queue control registers
2218 * @adapter: Board private structure
2219 **/
2220 static void igb_setup_mrqc(struct igb_adapter *adapter)
2221 {
2222 struct e1000_hw *hw = &adapter->hw;
2223 u32 mrqc, rxcsum;
2224 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2225 union e1000_reta {
2226 u32 dword;
2227 u8 bytes[4];
2228 } reta;
2229 static const u8 rsshash[40] = {
2230 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2231 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2232 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2233 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2234
2235 /* Fill out hash function seeds */
2236 for (j = 0; j < 10; j++) {
2237 u32 rsskey = rsshash[(j * 4)];
2238 rsskey |= rsshash[(j * 4) + 1] << 8;
2239 rsskey |= rsshash[(j * 4) + 2] << 16;
2240 rsskey |= rsshash[(j * 4) + 3] << 24;
2241 array_wr32(E1000_RSSRK(0), j, rsskey);
2242 }
2243
2244 num_rx_queues = adapter->num_rx_queues;
2245
2246 if (adapter->vfs_allocated_count) {
2247 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2248 switch (hw->mac.type) {
2249 case e1000_82576:
2250 shift = 3;
2251 num_rx_queues = 2;
2252 break;
2253 case e1000_82575:
2254 shift = 2;
2255 shift2 = 6;
2256 default:
2257 break;
2258 }
2259 } else {
2260 if (hw->mac.type == e1000_82575)
2261 shift = 6;
2262 }
2263
2264 for (j = 0; j < (32 * 4); j++) {
2265 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2266 if (shift2)
2267 reta.bytes[j & 3] |= num_rx_queues << shift2;
2268 if ((j & 3) == 3)
2269 wr32(E1000_RETA(j >> 2), reta.dword);
2270 }
2271
2272 /*
2273 * Disable raw packet checksumming so that RSS hash is placed in
2274 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2275 * offloads as they are enabled by default
2276 */
2277 rxcsum = rd32(E1000_RXCSUM);
2278 rxcsum |= E1000_RXCSUM_PCSD;
2279
2280 if (adapter->hw.mac.type >= e1000_82576)
2281 /* Enable Receive Checksum Offload for SCTP */
2282 rxcsum |= E1000_RXCSUM_CRCOFL;
2283
2284 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2285 wr32(E1000_RXCSUM, rxcsum);
2286
2287 /* If VMDq is enabled then we set the appropriate mode for that, else
2288 * we default to RSS so that an RSS hash is calculated per packet even
2289 * if we are only using one queue */
2290 if (adapter->vfs_allocated_count) {
2291 if (hw->mac.type > e1000_82575) {
2292 /* Set the default pool for the PF's first queue */
2293 u32 vtctl = rd32(E1000_VT_CTL);
2294 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2295 E1000_VT_CTL_DISABLE_DEF_POOL);
2296 vtctl |= adapter->vfs_allocated_count <<
2297 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2298 wr32(E1000_VT_CTL, vtctl);
2299 }
2300 if (adapter->num_rx_queues > 1)
2301 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2302 else
2303 mrqc = E1000_MRQC_ENABLE_VMDQ;
2304 } else {
2305 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2306 }
2307 igb_vmm_control(adapter);
2308
2309 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2310 E1000_MRQC_RSS_FIELD_IPV4_TCP);
2311 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2312 E1000_MRQC_RSS_FIELD_IPV6_TCP);
2313 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2314 E1000_MRQC_RSS_FIELD_IPV6_UDP);
2315 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2316 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2317
2318 wr32(E1000_MRQC, mrqc);
2319 }
2320
2321 /**
2322 * igb_setup_rctl - configure the receive control registers
2323 * @adapter: Board private structure
2324 **/
2325 void igb_setup_rctl(struct igb_adapter *adapter)
2326 {
2327 struct e1000_hw *hw = &adapter->hw;
2328 u32 rctl;
2329
2330 rctl = rd32(E1000_RCTL);
2331
2332 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2333 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2334
2335 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2336 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2337
2338 /*
2339 * enable stripping of CRC. It's unlikely this will break BMC
2340 * redirection as it did with e1000. Newer features require
2341 * that the HW strips the CRC.
2342 */
2343 rctl |= E1000_RCTL_SECRC;
2344
2345 /*
2346 * disable store bad packets and clear size bits.
2347 */
2348 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2349
2350 /* enable LPE to prevent packets larger than max_frame_size */
2351 rctl |= E1000_RCTL_LPE;
2352
2353 /* disable queue 0 to prevent tail write w/o re-config */
2354 wr32(E1000_RXDCTL(0), 0);
2355
2356 /* Attention!!! For SR-IOV PF driver operations you must enable
2357 * queue drop for all VF and PF queues to prevent head of line blocking
2358 * if an un-trusted VF does not provide descriptors to hardware.
2359 */
2360 if (adapter->vfs_allocated_count) {
2361 u32 vmolr;
2362
2363 /* set all queue drop enable bits */
2364 wr32(E1000_QDE, ALL_QUEUES);
2365
2366 vmolr = rd32(E1000_VMOLR(adapter->vfs_allocated_count));
2367 if (rctl & E1000_RCTL_LPE)
2368 vmolr |= E1000_VMOLR_LPE;
2369 if (adapter->num_rx_queues > 1)
2370 vmolr |= E1000_VMOLR_RSSE;
2371 wr32(E1000_VMOLR(adapter->vfs_allocated_count), vmolr);
2372 }
2373
2374 wr32(E1000_RCTL, rctl);
2375 }
2376
2377 /**
2378 * igb_rlpml_set - set maximum receive packet size
2379 * @adapter: board private structure
2380 *
2381 * Configure maximum receivable packet size.
2382 **/
2383 static void igb_rlpml_set(struct igb_adapter *adapter)
2384 {
2385 u32 max_frame_size = adapter->max_frame_size;
2386 struct e1000_hw *hw = &adapter->hw;
2387 u16 pf_id = adapter->vfs_allocated_count;
2388
2389 if (adapter->vlgrp)
2390 max_frame_size += VLAN_TAG_SIZE;
2391
2392 /* if vfs are enabled we set RLPML to the largest possible request
2393 * size and set the VMOLR RLPML to the size we need */
2394 if (pf_id) {
2395 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2396 max_frame_size = MAX_STD_JUMBO_FRAME_SIZE + VLAN_TAG_SIZE;
2397 }
2398
2399 wr32(E1000_RLPML, max_frame_size);
2400 }
2401
2402 /**
2403 * igb_configure_rx_ring - Configure a receive ring after Reset
2404 * @adapter: board private structure
2405 * @ring: receive ring to be configured
2406 *
2407 * Configure the Rx unit of the MAC after a reset.
2408 **/
2409 void igb_configure_rx_ring(struct igb_adapter *adapter,
2410 struct igb_ring *ring)
2411 {
2412 struct e1000_hw *hw = &adapter->hw;
2413 u64 rdba = ring->dma;
2414 int reg_idx = ring->reg_idx;
2415 u32 srrctl, rxdctl;
2416
2417 /* disable the queue */
2418 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2419 wr32(E1000_RXDCTL(reg_idx),
2420 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2421
2422 /* Set DMA base address registers */
2423 wr32(E1000_RDBAL(reg_idx),
2424 rdba & 0x00000000ffffffffULL);
2425 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2426 wr32(E1000_RDLEN(reg_idx),
2427 ring->count * sizeof(union e1000_adv_rx_desc));
2428
2429 /* initialize head and tail */
2430 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2431 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2432 writel(0, ring->head);
2433 writel(0, ring->tail);
2434
2435 /* set descriptor configuration */
2436 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2437 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2438 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2439 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2440 srrctl |= IGB_RXBUFFER_16384 >>
2441 E1000_SRRCTL_BSIZEPKT_SHIFT;
2442 #else
2443 srrctl |= (PAGE_SIZE / 2) >>
2444 E1000_SRRCTL_BSIZEPKT_SHIFT;
2445 #endif
2446 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2447 } else {
2448 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2449 E1000_SRRCTL_BSIZEPKT_SHIFT;
2450 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2451 }
2452
2453 wr32(E1000_SRRCTL(reg_idx), srrctl);
2454
2455 /* enable receive descriptor fetching */
2456 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2457 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2458 rxdctl &= 0xFFF00000;
2459 rxdctl |= IGB_RX_PTHRESH;
2460 rxdctl |= IGB_RX_HTHRESH << 8;
2461 rxdctl |= IGB_RX_WTHRESH << 16;
2462 wr32(E1000_RXDCTL(reg_idx), rxdctl);
2463 }
2464
2465 /**
2466 * igb_configure_rx - Configure receive Unit after Reset
2467 * @adapter: board private structure
2468 *
2469 * Configure the Rx unit of the MAC after a reset.
2470 **/
2471 static void igb_configure_rx(struct igb_adapter *adapter)
2472 {
2473 int i;
2474
2475 /* set UTA to appropriate mode */
2476 igb_set_uta(adapter);
2477
2478 /* set the correct pool for the PF default MAC address in entry 0 */
2479 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2480 adapter->vfs_allocated_count);
2481
2482 /* Setup the HW Rx Head and Tail Descriptor Pointers and
2483 * the Base and Length of the Rx Descriptor Ring */
2484 for (i = 0; i < adapter->num_rx_queues; i++)
2485 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2486 }
2487
2488 /**
2489 * igb_free_tx_resources - Free Tx Resources per Queue
2490 * @tx_ring: Tx descriptor ring for a specific queue
2491 *
2492 * Free all transmit software resources
2493 **/
2494 void igb_free_tx_resources(struct igb_ring *tx_ring)
2495 {
2496 igb_clean_tx_ring(tx_ring);
2497
2498 vfree(tx_ring->buffer_info);
2499 tx_ring->buffer_info = NULL;
2500
2501 pci_free_consistent(tx_ring->pdev, tx_ring->size,
2502 tx_ring->desc, tx_ring->dma);
2503
2504 tx_ring->desc = NULL;
2505 }
2506
2507 /**
2508 * igb_free_all_tx_resources - Free Tx Resources for All Queues
2509 * @adapter: board private structure
2510 *
2511 * Free all transmit software resources
2512 **/
2513 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2514 {
2515 int i;
2516
2517 for (i = 0; i < adapter->num_tx_queues; i++)
2518 igb_free_tx_resources(&adapter->tx_ring[i]);
2519 }
2520
2521 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2522 struct igb_buffer *buffer_info)
2523 {
2524 buffer_info->dma = 0;
2525 if (buffer_info->skb) {
2526 skb_dma_unmap(&tx_ring->pdev->dev,
2527 buffer_info->skb,
2528 DMA_TO_DEVICE);
2529 dev_kfree_skb_any(buffer_info->skb);
2530 buffer_info->skb = NULL;
2531 }
2532 buffer_info->time_stamp = 0;
2533 /* buffer_info must be completely set up in the transmit path */
2534 }
2535
2536 /**
2537 * igb_clean_tx_ring - Free Tx Buffers
2538 * @tx_ring: ring to be cleaned
2539 **/
2540 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2541 {
2542 struct igb_buffer *buffer_info;
2543 unsigned long size;
2544 unsigned int i;
2545
2546 if (!tx_ring->buffer_info)
2547 return;
2548 /* Free all the Tx ring sk_buffs */
2549
2550 for (i = 0; i < tx_ring->count; i++) {
2551 buffer_info = &tx_ring->buffer_info[i];
2552 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2553 }
2554
2555 size = sizeof(struct igb_buffer) * tx_ring->count;
2556 memset(tx_ring->buffer_info, 0, size);
2557
2558 /* Zero out the descriptor ring */
2559
2560 memset(tx_ring->desc, 0, tx_ring->size);
2561
2562 tx_ring->next_to_use = 0;
2563 tx_ring->next_to_clean = 0;
2564
2565 writel(0, tx_ring->head);
2566 writel(0, tx_ring->tail);
2567 }
2568
2569 /**
2570 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2571 * @adapter: board private structure
2572 **/
2573 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2574 {
2575 int i;
2576
2577 for (i = 0; i < adapter->num_tx_queues; i++)
2578 igb_clean_tx_ring(&adapter->tx_ring[i]);
2579 }
2580
2581 /**
2582 * igb_free_rx_resources - Free Rx Resources
2583 * @rx_ring: ring to clean the resources from
2584 *
2585 * Free all receive software resources
2586 **/
2587 void igb_free_rx_resources(struct igb_ring *rx_ring)
2588 {
2589 igb_clean_rx_ring(rx_ring);
2590
2591 vfree(rx_ring->buffer_info);
2592 rx_ring->buffer_info = NULL;
2593
2594 pci_free_consistent(rx_ring->pdev, rx_ring->size,
2595 rx_ring->desc, rx_ring->dma);
2596
2597 rx_ring->desc = NULL;
2598 }
2599
2600 /**
2601 * igb_free_all_rx_resources - Free Rx Resources for All Queues
2602 * @adapter: board private structure
2603 *
2604 * Free all receive software resources
2605 **/
2606 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2607 {
2608 int i;
2609
2610 for (i = 0; i < adapter->num_rx_queues; i++)
2611 igb_free_rx_resources(&adapter->rx_ring[i]);
2612 }
2613
2614 /**
2615 * igb_clean_rx_ring - Free Rx Buffers per Queue
2616 * @rx_ring: ring to free buffers from
2617 **/
2618 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2619 {
2620 struct igb_buffer *buffer_info;
2621 unsigned long size;
2622 unsigned int i;
2623
2624 if (!rx_ring->buffer_info)
2625 return;
2626 /* Free all the Rx ring sk_buffs */
2627 for (i = 0; i < rx_ring->count; i++) {
2628 buffer_info = &rx_ring->buffer_info[i];
2629 if (buffer_info->dma) {
2630 pci_unmap_single(rx_ring->pdev,
2631 buffer_info->dma,
2632 rx_ring->rx_buffer_len,
2633 PCI_DMA_FROMDEVICE);
2634 buffer_info->dma = 0;
2635 }
2636
2637 if (buffer_info->skb) {
2638 dev_kfree_skb(buffer_info->skb);
2639 buffer_info->skb = NULL;
2640 }
2641 if (buffer_info->page_dma) {
2642 pci_unmap_page(rx_ring->pdev,
2643 buffer_info->page_dma,
2644 PAGE_SIZE / 2,
2645 PCI_DMA_FROMDEVICE);
2646 buffer_info->page_dma = 0;
2647 }
2648 if (buffer_info->page) {
2649 put_page(buffer_info->page);
2650 buffer_info->page = NULL;
2651 buffer_info->page_offset = 0;
2652 }
2653 }
2654
2655 size = sizeof(struct igb_buffer) * rx_ring->count;
2656 memset(rx_ring->buffer_info, 0, size);
2657
2658 /* Zero out the descriptor ring */
2659 memset(rx_ring->desc, 0, rx_ring->size);
2660
2661 rx_ring->next_to_clean = 0;
2662 rx_ring->next_to_use = 0;
2663
2664 writel(0, rx_ring->head);
2665 writel(0, rx_ring->tail);
2666 }
2667
2668 /**
2669 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2670 * @adapter: board private structure
2671 **/
2672 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2673 {
2674 int i;
2675
2676 for (i = 0; i < adapter->num_rx_queues; i++)
2677 igb_clean_rx_ring(&adapter->rx_ring[i]);
2678 }
2679
2680 /**
2681 * igb_set_mac - Change the Ethernet Address of the NIC
2682 * @netdev: network interface device structure
2683 * @p: pointer to an address structure
2684 *
2685 * Returns 0 on success, negative on failure
2686 **/
2687 static int igb_set_mac(struct net_device *netdev, void *p)
2688 {
2689 struct igb_adapter *adapter = netdev_priv(netdev);
2690 struct e1000_hw *hw = &adapter->hw;
2691 struct sockaddr *addr = p;
2692
2693 if (!is_valid_ether_addr(addr->sa_data))
2694 return -EADDRNOTAVAIL;
2695
2696 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2697 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2698
2699 /* set the correct pool for the new PF MAC address in entry 0 */
2700 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2701 adapter->vfs_allocated_count);
2702
2703 return 0;
2704 }
2705
2706 /**
2707 * igb_write_mc_addr_list - write multicast addresses to MTA
2708 * @netdev: network interface device structure
2709 *
2710 * Writes multicast address list to the MTA hash table.
2711 * Returns: -ENOMEM on failure
2712 * 0 on no addresses written
2713 * X on writing X addresses to MTA
2714 **/
2715 static int igb_write_mc_addr_list(struct net_device *netdev)
2716 {
2717 struct igb_adapter *adapter = netdev_priv(netdev);
2718 struct e1000_hw *hw = &adapter->hw;
2719 struct dev_mc_list *mc_ptr = netdev->mc_list;
2720 u8 *mta_list;
2721 u32 vmolr = 0;
2722 int i;
2723
2724 if (!netdev->mc_count) {
2725 /* nothing to program, so clear mc list */
2726 igb_update_mc_addr_list(hw, NULL, 0);
2727 igb_restore_vf_multicasts(adapter);
2728 return 0;
2729 }
2730
2731 mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
2732 if (!mta_list)
2733 return -ENOMEM;
2734
2735 /* set vmolr receive overflow multicast bit */
2736 vmolr |= E1000_VMOLR_ROMPE;
2737
2738 /* The shared function expects a packed array of only addresses. */
2739 mc_ptr = netdev->mc_list;
2740
2741 for (i = 0; i < netdev->mc_count; i++) {
2742 if (!mc_ptr)
2743 break;
2744 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2745 mc_ptr = mc_ptr->next;
2746 }
2747 igb_update_mc_addr_list(hw, mta_list, i);
2748 kfree(mta_list);
2749
2750 return netdev->mc_count;
2751 }
2752
2753 /**
2754 * igb_write_uc_addr_list - write unicast addresses to RAR table
2755 * @netdev: network interface device structure
2756 *
2757 * Writes unicast address list to the RAR table.
2758 * Returns: -ENOMEM on failure/insufficient address space
2759 * 0 on no addresses written
2760 * X on writing X addresses to the RAR table
2761 **/
2762 static int igb_write_uc_addr_list(struct net_device *netdev)
2763 {
2764 struct igb_adapter *adapter = netdev_priv(netdev);
2765 struct e1000_hw *hw = &adapter->hw;
2766 unsigned int vfn = adapter->vfs_allocated_count;
2767 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2768 int count = 0;
2769
2770 /* return ENOMEM indicating insufficient memory for addresses */
2771 if (netdev->uc.count > rar_entries)
2772 return -ENOMEM;
2773
2774 if (netdev->uc.count && rar_entries) {
2775 struct netdev_hw_addr *ha;
2776 list_for_each_entry(ha, &netdev->uc.list, list) {
2777 if (!rar_entries)
2778 break;
2779 igb_rar_set_qsel(adapter, ha->addr,
2780 rar_entries--,
2781 vfn);
2782 count++;
2783 }
2784 }
2785 /* write the addresses in reverse order to avoid write combining */
2786 for (; rar_entries > 0 ; rar_entries--) {
2787 wr32(E1000_RAH(rar_entries), 0);
2788 wr32(E1000_RAL(rar_entries), 0);
2789 }
2790 wrfl();
2791
2792 return count;
2793 }
2794
2795 /**
2796 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2797 * @netdev: network interface device structure
2798 *
2799 * The set_rx_mode entry point is called whenever the unicast or multicast
2800 * address lists or the network interface flags are updated. This routine is
2801 * responsible for configuring the hardware for proper unicast, multicast,
2802 * promiscuous mode, and all-multi behavior.
2803 **/
2804 static void igb_set_rx_mode(struct net_device *netdev)
2805 {
2806 struct igb_adapter *adapter = netdev_priv(netdev);
2807 struct e1000_hw *hw = &adapter->hw;
2808 unsigned int vfn = adapter->vfs_allocated_count;
2809 u32 rctl, vmolr = 0;
2810 int count;
2811
2812 /* Check for Promiscuous and All Multicast modes */
2813 rctl = rd32(E1000_RCTL);
2814
2815 /* clear the effected bits */
2816 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2817
2818 if (netdev->flags & IFF_PROMISC) {
2819 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2820 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2821 } else {
2822 if (netdev->flags & IFF_ALLMULTI) {
2823 rctl |= E1000_RCTL_MPE;
2824 vmolr |= E1000_VMOLR_MPME;
2825 } else {
2826 /*
2827 * Write addresses to the MTA, if the attempt fails
2828 * then we should just turn on promiscous mode so
2829 * that we can at least receive multicast traffic
2830 */
2831 count = igb_write_mc_addr_list(netdev);
2832 if (count < 0) {
2833 rctl |= E1000_RCTL_MPE;
2834 vmolr |= E1000_VMOLR_MPME;
2835 } else if (count) {
2836 vmolr |= E1000_VMOLR_ROMPE;
2837 }
2838 }
2839 /*
2840 * Write addresses to available RAR registers, if there is not
2841 * sufficient space to store all the addresses then enable
2842 * unicast promiscous mode
2843 */
2844 count = igb_write_uc_addr_list(netdev);
2845 if (count < 0) {
2846 rctl |= E1000_RCTL_UPE;
2847 vmolr |= E1000_VMOLR_ROPE;
2848 }
2849 rctl |= E1000_RCTL_VFE;
2850 }
2851 wr32(E1000_RCTL, rctl);
2852
2853 /*
2854 * In order to support SR-IOV and eventually VMDq it is necessary to set
2855 * the VMOLR to enable the appropriate modes. Without this workaround
2856 * we will have issues with VLAN tag stripping not being done for frames
2857 * that are only arriving because we are the default pool
2858 */
2859 if (hw->mac.type < e1000_82576)
2860 return;
2861
2862 vmolr |= rd32(E1000_VMOLR(vfn)) &
2863 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
2864 wr32(E1000_VMOLR(vfn), vmolr);
2865 igb_restore_vf_multicasts(adapter);
2866 }
2867
2868 /* Need to wait a few seconds after link up to get diagnostic information from
2869 * the phy */
2870 static void igb_update_phy_info(unsigned long data)
2871 {
2872 struct igb_adapter *adapter = (struct igb_adapter *) data;
2873 igb_get_phy_info(&adapter->hw);
2874 }
2875
2876 /**
2877 * igb_has_link - check shared code for link and determine up/down
2878 * @adapter: pointer to driver private info
2879 **/
2880 static bool igb_has_link(struct igb_adapter *adapter)
2881 {
2882 struct e1000_hw *hw = &adapter->hw;
2883 bool link_active = false;
2884 s32 ret_val = 0;
2885
2886 /* get_link_status is set on LSC (link status) interrupt or
2887 * rx sequence error interrupt. get_link_status will stay
2888 * false until the e1000_check_for_link establishes link
2889 * for copper adapters ONLY
2890 */
2891 switch (hw->phy.media_type) {
2892 case e1000_media_type_copper:
2893 if (hw->mac.get_link_status) {
2894 ret_val = hw->mac.ops.check_for_link(hw);
2895 link_active = !hw->mac.get_link_status;
2896 } else {
2897 link_active = true;
2898 }
2899 break;
2900 case e1000_media_type_internal_serdes:
2901 ret_val = hw->mac.ops.check_for_link(hw);
2902 link_active = hw->mac.serdes_has_link;
2903 break;
2904 default:
2905 case e1000_media_type_unknown:
2906 break;
2907 }
2908
2909 return link_active;
2910 }
2911
2912 /**
2913 * igb_watchdog - Timer Call-back
2914 * @data: pointer to adapter cast into an unsigned long
2915 **/
2916 static void igb_watchdog(unsigned long data)
2917 {
2918 struct igb_adapter *adapter = (struct igb_adapter *)data;
2919 /* Do the rest outside of interrupt context */
2920 schedule_work(&adapter->watchdog_task);
2921 }
2922
2923 static void igb_watchdog_task(struct work_struct *work)
2924 {
2925 struct igb_adapter *adapter = container_of(work,
2926 struct igb_adapter, watchdog_task);
2927 struct e1000_hw *hw = &adapter->hw;
2928 struct net_device *netdev = adapter->netdev;
2929 struct igb_ring *tx_ring = adapter->tx_ring;
2930 u32 link;
2931 int i;
2932
2933 link = igb_has_link(adapter);
2934 if ((netif_carrier_ok(netdev)) && link)
2935 goto link_up;
2936
2937 if (link) {
2938 if (!netif_carrier_ok(netdev)) {
2939 u32 ctrl;
2940 hw->mac.ops.get_speed_and_duplex(&adapter->hw,
2941 &adapter->link_speed,
2942 &adapter->link_duplex);
2943
2944 ctrl = rd32(E1000_CTRL);
2945 /* Links status message must follow this format */
2946 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
2947 "Flow Control: %s\n",
2948 netdev->name,
2949 adapter->link_speed,
2950 adapter->link_duplex == FULL_DUPLEX ?
2951 "Full Duplex" : "Half Duplex",
2952 ((ctrl & E1000_CTRL_TFCE) && (ctrl &
2953 E1000_CTRL_RFCE)) ? "RX/TX" : ((ctrl &
2954 E1000_CTRL_RFCE) ? "RX" : ((ctrl &
2955 E1000_CTRL_TFCE) ? "TX" : "None")));
2956
2957 /* tweak tx_queue_len according to speed/duplex and
2958 * adjust the timeout factor */
2959 netdev->tx_queue_len = adapter->tx_queue_len;
2960 adapter->tx_timeout_factor = 1;
2961 switch (adapter->link_speed) {
2962 case SPEED_10:
2963 netdev->tx_queue_len = 10;
2964 adapter->tx_timeout_factor = 14;
2965 break;
2966 case SPEED_100:
2967 netdev->tx_queue_len = 100;
2968 /* maybe add some timeout factor ? */
2969 break;
2970 }
2971
2972 netif_carrier_on(netdev);
2973
2974 igb_ping_all_vfs(adapter);
2975
2976 /* link state has changed, schedule phy info update */
2977 if (!test_bit(__IGB_DOWN, &adapter->state))
2978 mod_timer(&adapter->phy_info_timer,
2979 round_jiffies(jiffies + 2 * HZ));
2980 }
2981 } else {
2982 if (netif_carrier_ok(netdev)) {
2983 adapter->link_speed = 0;
2984 adapter->link_duplex = 0;
2985 /* Links status message must follow this format */
2986 printk(KERN_INFO "igb: %s NIC Link is Down\n",
2987 netdev->name);
2988 netif_carrier_off(netdev);
2989
2990 igb_ping_all_vfs(adapter);
2991
2992 /* link state has changed, schedule phy info update */
2993 if (!test_bit(__IGB_DOWN, &adapter->state))
2994 mod_timer(&adapter->phy_info_timer,
2995 round_jiffies(jiffies + 2 * HZ));
2996 }
2997 }
2998
2999 link_up:
3000 igb_update_stats(adapter);
3001
3002 hw->mac.tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
3003 adapter->tpt_old = adapter->stats.tpt;
3004 hw->mac.collision_delta = adapter->stats.colc - adapter->colc_old;
3005 adapter->colc_old = adapter->stats.colc;
3006
3007 adapter->gorc = adapter->stats.gorc - adapter->gorc_old;
3008 adapter->gorc_old = adapter->stats.gorc;
3009 adapter->gotc = adapter->stats.gotc - adapter->gotc_old;
3010 adapter->gotc_old = adapter->stats.gotc;
3011
3012 igb_update_adaptive(&adapter->hw);
3013
3014 if (!netif_carrier_ok(netdev)) {
3015 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3016 /* We've lost link, so the controller stops DMA,
3017 * but we've got queued Tx work that's never going
3018 * to get done, so reset controller to flush Tx.
3019 * (Do the reset outside of interrupt context). */
3020 adapter->tx_timeout_count++;
3021 schedule_work(&adapter->reset_task);
3022 /* return immediately since reset is imminent */
3023 return;
3024 }
3025 }
3026
3027 /* Cause software interrupt to ensure rx ring is cleaned */
3028 if (adapter->msix_entries) {
3029 u32 eics = 0;
3030 for (i = 0; i < adapter->num_q_vectors; i++) {
3031 struct igb_q_vector *q_vector = adapter->q_vector[i];
3032 eics |= q_vector->eims_value;
3033 }
3034 wr32(E1000_EICS, eics);
3035 } else {
3036 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3037 }
3038
3039 /* Force detection of hung controller every watchdog period */
3040 tx_ring->detect_tx_hung = true;
3041
3042 /* Reset the timer */
3043 if (!test_bit(__IGB_DOWN, &adapter->state))
3044 mod_timer(&adapter->watchdog_timer,
3045 round_jiffies(jiffies + 2 * HZ));
3046 }
3047
3048 enum latency_range {
3049 lowest_latency = 0,
3050 low_latency = 1,
3051 bulk_latency = 2,
3052 latency_invalid = 255
3053 };
3054
3055
3056 /**
3057 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3058 *
3059 * Stores a new ITR value based on strictly on packet size. This
3060 * algorithm is less sophisticated than that used in igb_update_itr,
3061 * due to the difficulty of synchronizing statistics across multiple
3062 * receive rings. The divisors and thresholds used by this fuction
3063 * were determined based on theoretical maximum wire speed and testing
3064 * data, in order to minimize response time while increasing bulk
3065 * throughput.
3066 * This functionality is controlled by the InterruptThrottleRate module
3067 * parameter (see igb_param.c)
3068 * NOTE: This function is called only when operating in a multiqueue
3069 * receive environment.
3070 * @q_vector: pointer to q_vector
3071 **/
3072 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3073 {
3074 int new_val = q_vector->itr_val;
3075 int avg_wire_size = 0;
3076 struct igb_adapter *adapter = q_vector->adapter;
3077
3078 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3079 * ints/sec - ITR timer value of 120 ticks.
3080 */
3081 if (adapter->link_speed != SPEED_1000) {
3082 new_val = 976;
3083 goto set_itr_val;
3084 }
3085
3086 if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3087 struct igb_ring *ring = q_vector->rx_ring;
3088 avg_wire_size = ring->total_bytes / ring->total_packets;
3089 }
3090
3091 if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3092 struct igb_ring *ring = q_vector->tx_ring;
3093 avg_wire_size = max_t(u32, avg_wire_size,
3094 (ring->total_bytes /
3095 ring->total_packets));
3096 }
3097
3098 /* if avg_wire_size isn't set no work was done */
3099 if (!avg_wire_size)
3100 goto clear_counts;
3101
3102 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3103 avg_wire_size += 24;
3104
3105 /* Don't starve jumbo frames */
3106 avg_wire_size = min(avg_wire_size, 3000);
3107
3108 /* Give a little boost to mid-size frames */
3109 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3110 new_val = avg_wire_size / 3;
3111 else
3112 new_val = avg_wire_size / 2;
3113
3114 set_itr_val:
3115 if (new_val != q_vector->itr_val) {
3116 q_vector->itr_val = new_val;
3117 q_vector->set_itr = 1;
3118 }
3119 clear_counts:
3120 if (q_vector->rx_ring) {
3121 q_vector->rx_ring->total_bytes = 0;
3122 q_vector->rx_ring->total_packets = 0;
3123 }
3124 if (q_vector->tx_ring) {
3125 q_vector->tx_ring->total_bytes = 0;
3126 q_vector->tx_ring->total_packets = 0;
3127 }
3128 }
3129
3130 /**
3131 * igb_update_itr - update the dynamic ITR value based on statistics
3132 * Stores a new ITR value based on packets and byte
3133 * counts during the last interrupt. The advantage of per interrupt
3134 * computation is faster updates and more accurate ITR for the current
3135 * traffic pattern. Constants in this function were computed
3136 * based on theoretical maximum wire speed and thresholds were set based
3137 * on testing data as well as attempting to minimize response time
3138 * while increasing bulk throughput.
3139 * this functionality is controlled by the InterruptThrottleRate module
3140 * parameter (see igb_param.c)
3141 * NOTE: These calculations are only valid when operating in a single-
3142 * queue environment.
3143 * @adapter: pointer to adapter
3144 * @itr_setting: current q_vector->itr_val
3145 * @packets: the number of packets during this measurement interval
3146 * @bytes: the number of bytes during this measurement interval
3147 **/
3148 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3149 int packets, int bytes)
3150 {
3151 unsigned int retval = itr_setting;
3152
3153 if (packets == 0)
3154 goto update_itr_done;
3155
3156 switch (itr_setting) {
3157 case lowest_latency:
3158 /* handle TSO and jumbo frames */
3159 if (bytes/packets > 8000)
3160 retval = bulk_latency;
3161 else if ((packets < 5) && (bytes > 512))
3162 retval = low_latency;
3163 break;
3164 case low_latency: /* 50 usec aka 20000 ints/s */
3165 if (bytes > 10000) {
3166 /* this if handles the TSO accounting */
3167 if (bytes/packets > 8000) {
3168 retval = bulk_latency;
3169 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3170 retval = bulk_latency;
3171 } else if ((packets > 35)) {
3172 retval = lowest_latency;
3173 }
3174 } else if (bytes/packets > 2000) {
3175 retval = bulk_latency;
3176 } else if (packets <= 2 && bytes < 512) {
3177 retval = lowest_latency;
3178 }
3179 break;
3180 case bulk_latency: /* 250 usec aka 4000 ints/s */
3181 if (bytes > 25000) {
3182 if (packets > 35)
3183 retval = low_latency;
3184 } else if (bytes < 1500) {
3185 retval = low_latency;
3186 }
3187 break;
3188 }
3189
3190 update_itr_done:
3191 return retval;
3192 }
3193
3194 static void igb_set_itr(struct igb_adapter *adapter)
3195 {
3196 struct igb_q_vector *q_vector = adapter->q_vector[0];
3197 u16 current_itr;
3198 u32 new_itr = q_vector->itr_val;
3199
3200 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3201 if (adapter->link_speed != SPEED_1000) {
3202 current_itr = 0;
3203 new_itr = 4000;
3204 goto set_itr_now;
3205 }
3206
3207 adapter->rx_itr = igb_update_itr(adapter,
3208 adapter->rx_itr,
3209 adapter->rx_ring->total_packets,
3210 adapter->rx_ring->total_bytes);
3211
3212 adapter->tx_itr = igb_update_itr(adapter,
3213 adapter->tx_itr,
3214 adapter->tx_ring->total_packets,
3215 adapter->tx_ring->total_bytes);
3216 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3217
3218 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3219 if (adapter->itr_setting == 3 && current_itr == lowest_latency)
3220 current_itr = low_latency;
3221
3222 switch (current_itr) {
3223 /* counts and packets in update_itr are dependent on these numbers */
3224 case lowest_latency:
3225 new_itr = 56; /* aka 70,000 ints/sec */
3226 break;
3227 case low_latency:
3228 new_itr = 196; /* aka 20,000 ints/sec */
3229 break;
3230 case bulk_latency:
3231 new_itr = 980; /* aka 4,000 ints/sec */
3232 break;
3233 default:
3234 break;
3235 }
3236
3237 set_itr_now:
3238 adapter->rx_ring->total_bytes = 0;
3239 adapter->rx_ring->total_packets = 0;
3240 adapter->tx_ring->total_bytes = 0;
3241 adapter->tx_ring->total_packets = 0;
3242
3243 if (new_itr != q_vector->itr_val) {
3244 /* this attempts to bias the interrupt rate towards Bulk
3245 * by adding intermediate steps when interrupt rate is
3246 * increasing */
3247 new_itr = new_itr > q_vector->itr_val ?
3248 max((new_itr * q_vector->itr_val) /
3249 (new_itr + (q_vector->itr_val >> 2)),
3250 new_itr) :
3251 new_itr;
3252 /* Don't write the value here; it resets the adapter's
3253 * internal timer, and causes us to delay far longer than
3254 * we should between interrupts. Instead, we write the ITR
3255 * value at the beginning of the next interrupt so the timing
3256 * ends up being correct.
3257 */
3258 q_vector->itr_val = new_itr;
3259 q_vector->set_itr = 1;
3260 }
3261
3262 return;
3263 }
3264
3265 #define IGB_TX_FLAGS_CSUM 0x00000001
3266 #define IGB_TX_FLAGS_VLAN 0x00000002
3267 #define IGB_TX_FLAGS_TSO 0x00000004
3268 #define IGB_TX_FLAGS_IPV4 0x00000008
3269 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3270 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3271 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3272
3273 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3274 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3275 {
3276 struct e1000_adv_tx_context_desc *context_desc;
3277 unsigned int i;
3278 int err;
3279 struct igb_buffer *buffer_info;
3280 u32 info = 0, tu_cmd = 0;
3281 u32 mss_l4len_idx, l4len;
3282 *hdr_len = 0;
3283
3284 if (skb_header_cloned(skb)) {
3285 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3286 if (err)
3287 return err;
3288 }
3289
3290 l4len = tcp_hdrlen(skb);
3291 *hdr_len += l4len;
3292
3293 if (skb->protocol == htons(ETH_P_IP)) {
3294 struct iphdr *iph = ip_hdr(skb);
3295 iph->tot_len = 0;
3296 iph->check = 0;
3297 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3298 iph->daddr, 0,
3299 IPPROTO_TCP,
3300 0);
3301 } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
3302 ipv6_hdr(skb)->payload_len = 0;
3303 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3304 &ipv6_hdr(skb)->daddr,
3305 0, IPPROTO_TCP, 0);
3306 }
3307
3308 i = tx_ring->next_to_use;
3309
3310 buffer_info = &tx_ring->buffer_info[i];
3311 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3312 /* VLAN MACLEN IPLEN */
3313 if (tx_flags & IGB_TX_FLAGS_VLAN)
3314 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3315 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3316 *hdr_len += skb_network_offset(skb);
3317 info |= skb_network_header_len(skb);
3318 *hdr_len += skb_network_header_len(skb);
3319 context_desc->vlan_macip_lens = cpu_to_le32(info);
3320
3321 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3322 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3323
3324 if (skb->protocol == htons(ETH_P_IP))
3325 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3326 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3327
3328 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3329
3330 /* MSS L4LEN IDX */
3331 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3332 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3333
3334 /* For 82575, context index must be unique per ring. */
3335 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3336 mss_l4len_idx |= tx_ring->reg_idx << 4;
3337
3338 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3339 context_desc->seqnum_seed = 0;
3340
3341 buffer_info->time_stamp = jiffies;
3342 buffer_info->next_to_watch = i;
3343 buffer_info->dma = 0;
3344 i++;
3345 if (i == tx_ring->count)
3346 i = 0;
3347
3348 tx_ring->next_to_use = i;
3349
3350 return true;
3351 }
3352
3353 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3354 struct sk_buff *skb, u32 tx_flags)
3355 {
3356 struct e1000_adv_tx_context_desc *context_desc;
3357 struct pci_dev *pdev = tx_ring->pdev;
3358 struct igb_buffer *buffer_info;
3359 u32 info = 0, tu_cmd = 0;
3360 unsigned int i;
3361
3362 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3363 (tx_flags & IGB_TX_FLAGS_VLAN)) {
3364 i = tx_ring->next_to_use;
3365 buffer_info = &tx_ring->buffer_info[i];
3366 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3367
3368 if (tx_flags & IGB_TX_FLAGS_VLAN)
3369 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3370 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3371 if (skb->ip_summed == CHECKSUM_PARTIAL)
3372 info |= skb_network_header_len(skb);
3373
3374 context_desc->vlan_macip_lens = cpu_to_le32(info);
3375
3376 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3377
3378 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3379 __be16 protocol;
3380
3381 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3382 const struct vlan_ethhdr *vhdr =
3383 (const struct vlan_ethhdr*)skb->data;
3384
3385 protocol = vhdr->h_vlan_encapsulated_proto;
3386 } else {
3387 protocol = skb->protocol;
3388 }
3389
3390 switch (protocol) {
3391 case cpu_to_be16(ETH_P_IP):
3392 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3393 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3394 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3395 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3396 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3397 break;
3398 case cpu_to_be16(ETH_P_IPV6):
3399 /* XXX what about other V6 headers?? */
3400 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3401 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3402 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3403 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3404 break;
3405 default:
3406 if (unlikely(net_ratelimit()))
3407 dev_warn(&pdev->dev,
3408 "partial checksum but proto=%x!\n",
3409 skb->protocol);
3410 break;
3411 }
3412 }
3413
3414 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3415 context_desc->seqnum_seed = 0;
3416 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3417 context_desc->mss_l4len_idx =
3418 cpu_to_le32(tx_ring->reg_idx << 4);
3419
3420 buffer_info->time_stamp = jiffies;
3421 buffer_info->next_to_watch = i;
3422 buffer_info->dma = 0;
3423
3424 i++;
3425 if (i == tx_ring->count)
3426 i = 0;
3427 tx_ring->next_to_use = i;
3428
3429 return true;
3430 }
3431 return false;
3432 }
3433
3434 #define IGB_MAX_TXD_PWR 16
3435 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
3436
3437 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3438 unsigned int first)
3439 {
3440 struct igb_buffer *buffer_info;
3441 struct pci_dev *pdev = tx_ring->pdev;
3442 unsigned int len = skb_headlen(skb);
3443 unsigned int count = 0, i;
3444 unsigned int f;
3445 dma_addr_t *map;
3446
3447 i = tx_ring->next_to_use;
3448
3449 if (skb_dma_map(&pdev->dev, skb, DMA_TO_DEVICE)) {
3450 dev_err(&pdev->dev, "TX DMA map failed\n");
3451 return 0;
3452 }
3453
3454 map = skb_shinfo(skb)->dma_maps;
3455
3456 buffer_info = &tx_ring->buffer_info[i];
3457 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3458 buffer_info->length = len;
3459 /* set time_stamp *before* dma to help avoid a possible race */
3460 buffer_info->time_stamp = jiffies;
3461 buffer_info->next_to_watch = i;
3462 buffer_info->dma = skb_shinfo(skb)->dma_head;
3463
3464 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3465 struct skb_frag_struct *frag;
3466
3467 i++;
3468 if (i == tx_ring->count)
3469 i = 0;
3470
3471 frag = &skb_shinfo(skb)->frags[f];
3472 len = frag->size;
3473
3474 buffer_info = &tx_ring->buffer_info[i];
3475 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3476 buffer_info->length = len;
3477 buffer_info->time_stamp = jiffies;
3478 buffer_info->next_to_watch = i;
3479 buffer_info->dma = map[count];
3480 count++;
3481 }
3482
3483 tx_ring->buffer_info[i].skb = skb;
3484 tx_ring->buffer_info[first].next_to_watch = i;
3485
3486 return count + 1;
3487 }
3488
3489 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3490 int tx_flags, int count, u32 paylen,
3491 u8 hdr_len)
3492 {
3493 union e1000_adv_tx_desc *tx_desc = NULL;
3494 struct igb_buffer *buffer_info;
3495 u32 olinfo_status = 0, cmd_type_len;
3496 unsigned int i;
3497
3498 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3499 E1000_ADVTXD_DCMD_DEXT);
3500
3501 if (tx_flags & IGB_TX_FLAGS_VLAN)
3502 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3503
3504 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3505 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3506
3507 if (tx_flags & IGB_TX_FLAGS_TSO) {
3508 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3509
3510 /* insert tcp checksum */
3511 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3512
3513 /* insert ip checksum */
3514 if (tx_flags & IGB_TX_FLAGS_IPV4)
3515 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3516
3517 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3518 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3519 }
3520
3521 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3522 (tx_flags & (IGB_TX_FLAGS_CSUM |
3523 IGB_TX_FLAGS_TSO |
3524 IGB_TX_FLAGS_VLAN)))
3525 olinfo_status |= tx_ring->reg_idx << 4;
3526
3527 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3528
3529 i = tx_ring->next_to_use;
3530 while (count--) {
3531 buffer_info = &tx_ring->buffer_info[i];
3532 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3533 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3534 tx_desc->read.cmd_type_len =
3535 cpu_to_le32(cmd_type_len | buffer_info->length);
3536 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3537 i++;
3538 if (i == tx_ring->count)
3539 i = 0;
3540 }
3541
3542 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3543 /* Force memory writes to complete before letting h/w
3544 * know there are new descriptors to fetch. (Only
3545 * applicable for weak-ordered memory model archs,
3546 * such as IA-64). */
3547 wmb();
3548
3549 tx_ring->next_to_use = i;
3550 writel(i, tx_ring->tail);
3551 /* we need this if more than one processor can write to our tail
3552 * at a time, it syncronizes IO on IA64/Altix systems */
3553 mmiowb();
3554 }
3555
3556 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3557 {
3558 struct net_device *netdev = tx_ring->netdev;
3559
3560 netif_stop_subqueue(netdev, tx_ring->queue_index);
3561
3562 /* Herbert's original patch had:
3563 * smp_mb__after_netif_stop_queue();
3564 * but since that doesn't exist yet, just open code it. */
3565 smp_mb();
3566
3567 /* We need to check again in a case another CPU has just
3568 * made room available. */
3569 if (igb_desc_unused(tx_ring) < size)
3570 return -EBUSY;
3571
3572 /* A reprieve! */
3573 netif_wake_subqueue(netdev, tx_ring->queue_index);
3574 tx_ring->tx_stats.restart_queue++;
3575 return 0;
3576 }
3577
3578 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3579 {
3580 if (igb_desc_unused(tx_ring) >= size)
3581 return 0;
3582 return __igb_maybe_stop_tx(tx_ring, size);
3583 }
3584
3585 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3586 struct igb_ring *tx_ring)
3587 {
3588 struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3589 unsigned int first;
3590 unsigned int tx_flags = 0;
3591 u8 hdr_len = 0;
3592 int count = 0;
3593 int tso = 0;
3594 union skb_shared_tx *shtx;
3595
3596 /* need: 1 descriptor per page,
3597 * + 2 desc gap to keep tail from touching head,
3598 * + 1 desc for skb->data,
3599 * + 1 desc for context descriptor,
3600 * otherwise try next time */
3601 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3602 /* this is a hard error */
3603 return NETDEV_TX_BUSY;
3604 }
3605
3606 /*
3607 * TODO: check that there currently is no other packet with
3608 * time stamping in the queue
3609 *
3610 * When doing time stamping, keep the connection to the socket
3611 * a while longer: it is still needed by skb_hwtstamp_tx(),
3612 * called either in igb_tx_hwtstamp() or by our caller when
3613 * doing software time stamping.
3614 */
3615 shtx = skb_tx(skb);
3616 if (unlikely(shtx->hardware)) {
3617 shtx->in_progress = 1;
3618 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3619 }
3620
3621 if (adapter->vlgrp && vlan_tx_tag_present(skb)) {
3622 tx_flags |= IGB_TX_FLAGS_VLAN;
3623 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3624 }
3625
3626 if (skb->protocol == htons(ETH_P_IP))
3627 tx_flags |= IGB_TX_FLAGS_IPV4;
3628
3629 first = tx_ring->next_to_use;
3630 if (skb_is_gso(skb)) {
3631 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3632 if (tso < 0) {
3633 dev_kfree_skb_any(skb);
3634 return NETDEV_TX_OK;
3635 }
3636 }
3637
3638 if (tso)
3639 tx_flags |= IGB_TX_FLAGS_TSO;
3640 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3641 (skb->ip_summed == CHECKSUM_PARTIAL))
3642 tx_flags |= IGB_TX_FLAGS_CSUM;
3643
3644 /*
3645 * count reflects descriptors mapped, if 0 then mapping error
3646 * has occured and we need to rewind the descriptor queue
3647 */
3648 count = igb_tx_map_adv(tx_ring, skb, first);
3649
3650 if (!count) {
3651 dev_kfree_skb_any(skb);
3652 tx_ring->buffer_info[first].time_stamp = 0;
3653 tx_ring->next_to_use = first;
3654 return NETDEV_TX_OK;
3655 }
3656
3657 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3658
3659 /* Make sure there is space in the ring for the next send. */
3660 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3661
3662 return NETDEV_TX_OK;
3663 }
3664
3665 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3666 struct net_device *netdev)
3667 {
3668 struct igb_adapter *adapter = netdev_priv(netdev);
3669 struct igb_ring *tx_ring;
3670 int r_idx = 0;
3671
3672 if (test_bit(__IGB_DOWN, &adapter->state)) {
3673 dev_kfree_skb_any(skb);
3674 return NETDEV_TX_OK;
3675 }
3676
3677 if (skb->len <= 0) {
3678 dev_kfree_skb_any(skb);
3679 return NETDEV_TX_OK;
3680 }
3681
3682 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3683 tx_ring = adapter->multi_tx_table[r_idx];
3684
3685 /* This goes back to the question of how to logically map a tx queue
3686 * to a flow. Right now, performance is impacted slightly negatively
3687 * if using multiple tx queues. If the stack breaks away from a
3688 * single qdisc implementation, we can look at this again. */
3689 return igb_xmit_frame_ring_adv(skb, tx_ring);
3690 }
3691
3692 /**
3693 * igb_tx_timeout - Respond to a Tx Hang
3694 * @netdev: network interface device structure
3695 **/
3696 static void igb_tx_timeout(struct net_device *netdev)
3697 {
3698 struct igb_adapter *adapter = netdev_priv(netdev);
3699 struct e1000_hw *hw = &adapter->hw;
3700
3701 /* Do the reset outside of interrupt context */
3702 adapter->tx_timeout_count++;
3703 schedule_work(&adapter->reset_task);
3704 wr32(E1000_EICS,
3705 (adapter->eims_enable_mask & ~adapter->eims_other));
3706 }
3707
3708 static void igb_reset_task(struct work_struct *work)
3709 {
3710 struct igb_adapter *adapter;
3711 adapter = container_of(work, struct igb_adapter, reset_task);
3712
3713 igb_reinit_locked(adapter);
3714 }
3715
3716 /**
3717 * igb_get_stats - Get System Network Statistics
3718 * @netdev: network interface device structure
3719 *
3720 * Returns the address of the device statistics structure.
3721 * The statistics are actually updated from the timer callback.
3722 **/
3723 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3724 {
3725 /* only return the current stats */
3726 return &netdev->stats;
3727 }
3728
3729 /**
3730 * igb_change_mtu - Change the Maximum Transfer Unit
3731 * @netdev: network interface device structure
3732 * @new_mtu: new value for maximum frame size
3733 *
3734 * Returns 0 on success, negative on failure
3735 **/
3736 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3737 {
3738 struct igb_adapter *adapter = netdev_priv(netdev);
3739 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3740 u32 rx_buffer_len, i;
3741
3742 if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) ||
3743 (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3744 dev_err(&adapter->pdev->dev, "Invalid MTU setting\n");
3745 return -EINVAL;
3746 }
3747
3748 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3749 dev_err(&adapter->pdev->dev, "MTU > 9216 not supported.\n");
3750 return -EINVAL;
3751 }
3752
3753 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3754 msleep(1);
3755
3756 /* igb_down has a dependency on max_frame_size */
3757 adapter->max_frame_size = max_frame;
3758 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3759 * means we reserve 2 more, this pushes us to allocate from the next
3760 * larger slab size.
3761 * i.e. RXBUFFER_2048 --> size-4096 slab
3762 */
3763
3764 if (max_frame <= IGB_RXBUFFER_1024)
3765 rx_buffer_len = IGB_RXBUFFER_1024;
3766 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3767 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3768 else
3769 rx_buffer_len = IGB_RXBUFFER_128;
3770
3771 if (netif_running(netdev))
3772 igb_down(adapter);
3773
3774 dev_info(&adapter->pdev->dev, "changing MTU from %d to %d\n",
3775 netdev->mtu, new_mtu);
3776 netdev->mtu = new_mtu;
3777
3778 for (i = 0; i < adapter->num_rx_queues; i++)
3779 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3780
3781 if (netif_running(netdev))
3782 igb_up(adapter);
3783 else
3784 igb_reset(adapter);
3785
3786 clear_bit(__IGB_RESETTING, &adapter->state);
3787
3788 return 0;
3789 }
3790
3791 /**
3792 * igb_update_stats - Update the board statistics counters
3793 * @adapter: board private structure
3794 **/
3795
3796 void igb_update_stats(struct igb_adapter *adapter)
3797 {
3798 struct net_device *netdev = adapter->netdev;
3799 struct e1000_hw *hw = &adapter->hw;
3800 struct pci_dev *pdev = adapter->pdev;
3801 u16 phy_tmp;
3802
3803 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3804
3805 /*
3806 * Prevent stats update while adapter is being reset, or if the pci
3807 * connection is down.
3808 */
3809 if (adapter->link_speed == 0)
3810 return;
3811 if (pci_channel_offline(pdev))
3812 return;
3813
3814 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3815 adapter->stats.gprc += rd32(E1000_GPRC);
3816 adapter->stats.gorc += rd32(E1000_GORCL);
3817 rd32(E1000_GORCH); /* clear GORCL */
3818 adapter->stats.bprc += rd32(E1000_BPRC);
3819 adapter->stats.mprc += rd32(E1000_MPRC);
3820 adapter->stats.roc += rd32(E1000_ROC);
3821
3822 adapter->stats.prc64 += rd32(E1000_PRC64);
3823 adapter->stats.prc127 += rd32(E1000_PRC127);
3824 adapter->stats.prc255 += rd32(E1000_PRC255);
3825 adapter->stats.prc511 += rd32(E1000_PRC511);
3826 adapter->stats.prc1023 += rd32(E1000_PRC1023);
3827 adapter->stats.prc1522 += rd32(E1000_PRC1522);
3828 adapter->stats.symerrs += rd32(E1000_SYMERRS);
3829 adapter->stats.sec += rd32(E1000_SEC);
3830
3831 adapter->stats.mpc += rd32(E1000_MPC);
3832 adapter->stats.scc += rd32(E1000_SCC);
3833 adapter->stats.ecol += rd32(E1000_ECOL);
3834 adapter->stats.mcc += rd32(E1000_MCC);
3835 adapter->stats.latecol += rd32(E1000_LATECOL);
3836 adapter->stats.dc += rd32(E1000_DC);
3837 adapter->stats.rlec += rd32(E1000_RLEC);
3838 adapter->stats.xonrxc += rd32(E1000_XONRXC);
3839 adapter->stats.xontxc += rd32(E1000_XONTXC);
3840 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
3841 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
3842 adapter->stats.fcruc += rd32(E1000_FCRUC);
3843 adapter->stats.gptc += rd32(E1000_GPTC);
3844 adapter->stats.gotc += rd32(E1000_GOTCL);
3845 rd32(E1000_GOTCH); /* clear GOTCL */
3846 adapter->stats.rnbc += rd32(E1000_RNBC);
3847 adapter->stats.ruc += rd32(E1000_RUC);
3848 adapter->stats.rfc += rd32(E1000_RFC);
3849 adapter->stats.rjc += rd32(E1000_RJC);
3850 adapter->stats.tor += rd32(E1000_TORH);
3851 adapter->stats.tot += rd32(E1000_TOTH);
3852 adapter->stats.tpr += rd32(E1000_TPR);
3853
3854 adapter->stats.ptc64 += rd32(E1000_PTC64);
3855 adapter->stats.ptc127 += rd32(E1000_PTC127);
3856 adapter->stats.ptc255 += rd32(E1000_PTC255);
3857 adapter->stats.ptc511 += rd32(E1000_PTC511);
3858 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
3859 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
3860
3861 adapter->stats.mptc += rd32(E1000_MPTC);
3862 adapter->stats.bptc += rd32(E1000_BPTC);
3863
3864 /* used for adaptive IFS */
3865
3866 hw->mac.tx_packet_delta = rd32(E1000_TPT);
3867 adapter->stats.tpt += hw->mac.tx_packet_delta;
3868 hw->mac.collision_delta = rd32(E1000_COLC);
3869 adapter->stats.colc += hw->mac.collision_delta;
3870
3871 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
3872 adapter->stats.rxerrc += rd32(E1000_RXERRC);
3873 adapter->stats.tncrs += rd32(E1000_TNCRS);
3874 adapter->stats.tsctc += rd32(E1000_TSCTC);
3875 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
3876
3877 adapter->stats.iac += rd32(E1000_IAC);
3878 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
3879 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
3880 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
3881 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
3882 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
3883 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
3884 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
3885 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
3886
3887 /* Fill out the OS statistics structure */
3888 netdev->stats.multicast = adapter->stats.mprc;
3889 netdev->stats.collisions = adapter->stats.colc;
3890
3891 /* Rx Errors */
3892
3893 if (hw->mac.type != e1000_82575) {
3894 u32 rqdpc_tmp;
3895 u64 rqdpc_total = 0;
3896 int i;
3897 /* Read out drops stats per RX queue. Notice RQDPC (Receive
3898 * Queue Drop Packet Count) stats only gets incremented, if
3899 * the DROP_EN but it set (in the SRRCTL register for that
3900 * queue). If DROP_EN bit is NOT set, then the some what
3901 * equivalent count is stored in RNBC (not per queue basis).
3902 * Also note the drop count is due to lack of available
3903 * descriptors.
3904 */
3905 for (i = 0; i < adapter->num_rx_queues; i++) {
3906 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0xFFF;
3907 adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3908 rqdpc_total += adapter->rx_ring[i].rx_stats.drops;
3909 }
3910 netdev->stats.rx_fifo_errors = rqdpc_total;
3911 }
3912
3913 /* Note RNBC (Receive No Buffers Count) is an not an exact
3914 * drop count as the hardware FIFO might save the day. Thats
3915 * one of the reason for saving it in rx_fifo_errors, as its
3916 * potentially not a true drop.
3917 */
3918 netdev->stats.rx_fifo_errors += adapter->stats.rnbc;
3919
3920 /* RLEC on some newer hardware can be incorrect so build
3921 * our own version based on RUC and ROC */
3922 netdev->stats.rx_errors = adapter->stats.rxerrc +
3923 adapter->stats.crcerrs + adapter->stats.algnerrc +
3924 adapter->stats.ruc + adapter->stats.roc +
3925 adapter->stats.cexterr;
3926 netdev->stats.rx_length_errors = adapter->stats.ruc +
3927 adapter->stats.roc;
3928 netdev->stats.rx_crc_errors = adapter->stats.crcerrs;
3929 netdev->stats.rx_frame_errors = adapter->stats.algnerrc;
3930 netdev->stats.rx_missed_errors = adapter->stats.mpc;
3931
3932 /* Tx Errors */
3933 netdev->stats.tx_errors = adapter->stats.ecol +
3934 adapter->stats.latecol;
3935 netdev->stats.tx_aborted_errors = adapter->stats.ecol;
3936 netdev->stats.tx_window_errors = adapter->stats.latecol;
3937 netdev->stats.tx_carrier_errors = adapter->stats.tncrs;
3938
3939 /* Tx Dropped needs to be maintained elsewhere */
3940
3941 /* Phy Stats */
3942 if (hw->phy.media_type == e1000_media_type_copper) {
3943 if ((adapter->link_speed == SPEED_1000) &&
3944 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
3945 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
3946 adapter->phy_stats.idle_errors += phy_tmp;
3947 }
3948 }
3949
3950 /* Management Stats */
3951 adapter->stats.mgptc += rd32(E1000_MGTPTC);
3952 adapter->stats.mgprc += rd32(E1000_MGTPRC);
3953 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
3954 }
3955
3956 static irqreturn_t igb_msix_other(int irq, void *data)
3957 {
3958 struct igb_adapter *adapter = data;
3959 struct e1000_hw *hw = &adapter->hw;
3960 u32 icr = rd32(E1000_ICR);
3961 /* reading ICR causes bit 31 of EICR to be cleared */
3962
3963 if (icr & E1000_ICR_DOUTSYNC) {
3964 /* HW is reporting DMA is out of sync */
3965 adapter->stats.doosync++;
3966 }
3967
3968 /* Check for a mailbox event */
3969 if (icr & E1000_ICR_VMMB)
3970 igb_msg_task(adapter);
3971
3972 if (icr & E1000_ICR_LSC) {
3973 hw->mac.get_link_status = 1;
3974 /* guard against interrupt when we're going down */
3975 if (!test_bit(__IGB_DOWN, &adapter->state))
3976 mod_timer(&adapter->watchdog_timer, jiffies + 1);
3977 }
3978
3979 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_VMMB);
3980 wr32(E1000_EIMS, adapter->eims_other);
3981
3982 return IRQ_HANDLED;
3983 }
3984
3985 static void igb_write_itr(struct igb_q_vector *q_vector)
3986 {
3987 u32 itr_val = q_vector->itr_val & 0x7FFC;
3988
3989 if (!q_vector->set_itr)
3990 return;
3991
3992 if (!itr_val)
3993 itr_val = 0x4;
3994
3995 if (q_vector->itr_shift)
3996 itr_val |= itr_val << q_vector->itr_shift;
3997 else
3998 itr_val |= 0x8000000;
3999
4000 writel(itr_val, q_vector->itr_register);
4001 q_vector->set_itr = 0;
4002 }
4003
4004 static irqreturn_t igb_msix_ring(int irq, void *data)
4005 {
4006 struct igb_q_vector *q_vector = data;
4007
4008 /* Write the ITR value calculated from the previous interrupt. */
4009 igb_write_itr(q_vector);
4010
4011 napi_schedule(&q_vector->napi);
4012
4013 return IRQ_HANDLED;
4014 }
4015
4016 #ifdef CONFIG_IGB_DCA
4017 static void igb_update_dca(struct igb_q_vector *q_vector)
4018 {
4019 struct igb_adapter *adapter = q_vector->adapter;
4020 struct e1000_hw *hw = &adapter->hw;
4021 int cpu = get_cpu();
4022
4023 if (q_vector->cpu == cpu)
4024 goto out_no_update;
4025
4026 if (q_vector->tx_ring) {
4027 int q = q_vector->tx_ring->reg_idx;
4028 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4029 if (hw->mac.type == e1000_82575) {
4030 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4031 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4032 } else {
4033 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4034 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4035 E1000_DCA_TXCTRL_CPUID_SHIFT;
4036 }
4037 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4038 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4039 }
4040 if (q_vector->rx_ring) {
4041 int q = q_vector->rx_ring->reg_idx;
4042 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4043 if (hw->mac.type == e1000_82575) {
4044 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4045 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4046 } else {
4047 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4048 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4049 E1000_DCA_RXCTRL_CPUID_SHIFT;
4050 }
4051 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4052 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4053 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4054 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4055 }
4056 q_vector->cpu = cpu;
4057 out_no_update:
4058 put_cpu();
4059 }
4060
4061 static void igb_setup_dca(struct igb_adapter *adapter)
4062 {
4063 struct e1000_hw *hw = &adapter->hw;
4064 int i;
4065
4066 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4067 return;
4068
4069 /* Always use CB2 mode, difference is masked in the CB driver. */
4070 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4071
4072 for (i = 0; i < adapter->num_q_vectors; i++) {
4073 struct igb_q_vector *q_vector = adapter->q_vector[i];
4074 q_vector->cpu = -1;
4075 igb_update_dca(q_vector);
4076 }
4077 }
4078
4079 static int __igb_notify_dca(struct device *dev, void *data)
4080 {
4081 struct net_device *netdev = dev_get_drvdata(dev);
4082 struct igb_adapter *adapter = netdev_priv(netdev);
4083 struct e1000_hw *hw = &adapter->hw;
4084 unsigned long event = *(unsigned long *)data;
4085
4086 switch (event) {
4087 case DCA_PROVIDER_ADD:
4088 /* if already enabled, don't do it again */
4089 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4090 break;
4091 /* Always use CB2 mode, difference is masked
4092 * in the CB driver. */
4093 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4094 if (dca_add_requester(dev) == 0) {
4095 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4096 dev_info(&adapter->pdev->dev, "DCA enabled\n");
4097 igb_setup_dca(adapter);
4098 break;
4099 }
4100 /* Fall Through since DCA is disabled. */
4101 case DCA_PROVIDER_REMOVE:
4102 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4103 /* without this a class_device is left
4104 * hanging around in the sysfs model */
4105 dca_remove_requester(dev);
4106 dev_info(&adapter->pdev->dev, "DCA disabled\n");
4107 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4108 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4109 }
4110 break;
4111 }
4112
4113 return 0;
4114 }
4115
4116 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4117 void *p)
4118 {
4119 int ret_val;
4120
4121 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4122 __igb_notify_dca);
4123
4124 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4125 }
4126 #endif /* CONFIG_IGB_DCA */
4127
4128 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4129 {
4130 struct e1000_hw *hw = &adapter->hw;
4131 u32 ping;
4132 int i;
4133
4134 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4135 ping = E1000_PF_CONTROL_MSG;
4136 if (adapter->vf_data[i].clear_to_send)
4137 ping |= E1000_VT_MSGTYPE_CTS;
4138 igb_write_mbx(hw, &ping, 1, i);
4139 }
4140 }
4141
4142 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4143 u32 *msgbuf, u32 vf)
4144 {
4145 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4146 u16 *hash_list = (u16 *)&msgbuf[1];
4147 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4148 int i;
4149
4150 /* only up to 30 hash values supported */
4151 if (n > 30)
4152 n = 30;
4153
4154 /* salt away the number of multi cast addresses assigned
4155 * to this VF for later use to restore when the PF multi cast
4156 * list changes
4157 */
4158 vf_data->num_vf_mc_hashes = n;
4159
4160 /* VFs are limited to using the MTA hash table for their multicast
4161 * addresses */
4162 for (i = 0; i < n; i++)
4163 vf_data->vf_mc_hashes[i] = hash_list[i];
4164
4165 /* Flush and reset the mta with the new values */
4166 igb_set_rx_mode(adapter->netdev);
4167
4168 return 0;
4169 }
4170
4171 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4172 {
4173 struct e1000_hw *hw = &adapter->hw;
4174 struct vf_data_storage *vf_data;
4175 int i, j;
4176
4177 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4178 vf_data = &adapter->vf_data[i];
4179 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4180 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4181 }
4182 }
4183
4184 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4185 {
4186 struct e1000_hw *hw = &adapter->hw;
4187 u32 pool_mask, reg, vid;
4188 int i;
4189
4190 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4191
4192 /* Find the vlan filter for this id */
4193 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4194 reg = rd32(E1000_VLVF(i));
4195
4196 /* remove the vf from the pool */
4197 reg &= ~pool_mask;
4198
4199 /* if pool is empty then remove entry from vfta */
4200 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4201 (reg & E1000_VLVF_VLANID_ENABLE)) {
4202 reg = 0;
4203 vid = reg & E1000_VLVF_VLANID_MASK;
4204 igb_vfta_set(hw, vid, false);
4205 }
4206
4207 wr32(E1000_VLVF(i), reg);
4208 }
4209
4210 adapter->vf_data[vf].vlans_enabled = 0;
4211 }
4212
4213 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4214 {
4215 struct e1000_hw *hw = &adapter->hw;
4216 u32 reg, i;
4217
4218 /* It is an error to call this function when VFs are not enabled */
4219 if (!adapter->vfs_allocated_count)
4220 return -1;
4221
4222 /* Find the vlan filter for this id */
4223 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4224 reg = rd32(E1000_VLVF(i));
4225 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4226 vid == (reg & E1000_VLVF_VLANID_MASK))
4227 break;
4228 }
4229
4230 if (add) {
4231 if (i == E1000_VLVF_ARRAY_SIZE) {
4232 /* Did not find a matching VLAN ID entry that was
4233 * enabled. Search for a free filter entry, i.e.
4234 * one without the enable bit set
4235 */
4236 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4237 reg = rd32(E1000_VLVF(i));
4238 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4239 break;
4240 }
4241 }
4242 if (i < E1000_VLVF_ARRAY_SIZE) {
4243 /* Found an enabled/available entry */
4244 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4245
4246 /* if !enabled we need to set this up in vfta */
4247 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4248 /* add VID to filter table, if bit already set
4249 * PF must have added it outside of table */
4250 if (igb_vfta_set(hw, vid, true))
4251 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT +
4252 adapter->vfs_allocated_count);
4253 reg |= E1000_VLVF_VLANID_ENABLE;
4254 }
4255 reg &= ~E1000_VLVF_VLANID_MASK;
4256 reg |= vid;
4257
4258 wr32(E1000_VLVF(i), reg);
4259
4260 /* do not modify RLPML for PF devices */
4261 if (vf >= adapter->vfs_allocated_count)
4262 return 0;
4263
4264 if (!adapter->vf_data[vf].vlans_enabled) {
4265 u32 size;
4266 reg = rd32(E1000_VMOLR(vf));
4267 size = reg & E1000_VMOLR_RLPML_MASK;
4268 size += 4;
4269 reg &= ~E1000_VMOLR_RLPML_MASK;
4270 reg |= size;
4271 wr32(E1000_VMOLR(vf), reg);
4272 }
4273 adapter->vf_data[vf].vlans_enabled++;
4274
4275 return 0;
4276 }
4277 } else {
4278 if (i < E1000_VLVF_ARRAY_SIZE) {
4279 /* remove vf from the pool */
4280 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4281 /* if pool is empty then remove entry from vfta */
4282 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4283 reg = 0;
4284 igb_vfta_set(hw, vid, false);
4285 }
4286 wr32(E1000_VLVF(i), reg);
4287
4288 /* do not modify RLPML for PF devices */
4289 if (vf >= adapter->vfs_allocated_count)
4290 return 0;
4291
4292 adapter->vf_data[vf].vlans_enabled--;
4293 if (!adapter->vf_data[vf].vlans_enabled) {
4294 u32 size;
4295 reg = rd32(E1000_VMOLR(vf));
4296 size = reg & E1000_VMOLR_RLPML_MASK;
4297 size -= 4;
4298 reg &= ~E1000_VMOLR_RLPML_MASK;
4299 reg |= size;
4300 wr32(E1000_VMOLR(vf), reg);
4301 }
4302 return 0;
4303 }
4304 }
4305 return -1;
4306 }
4307
4308 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4309 {
4310 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4311 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4312
4313 return igb_vlvf_set(adapter, vid, add, vf);
4314 }
4315
4316 static inline void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4317 {
4318 struct e1000_hw *hw = &adapter->hw;
4319
4320 /* disable mailbox functionality for vf */
4321 adapter->vf_data[vf].clear_to_send = false;
4322
4323 /* reset offloads to defaults */
4324 igb_set_vmolr(hw, vf);
4325
4326 /* reset vlans for device */
4327 igb_clear_vf_vfta(adapter, vf);
4328
4329 /* reset multicast table array for vf */
4330 adapter->vf_data[vf].num_vf_mc_hashes = 0;
4331
4332 /* Flush and reset the mta with the new values */
4333 igb_set_rx_mode(adapter->netdev);
4334 }
4335
4336 static inline void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4337 {
4338 struct e1000_hw *hw = &adapter->hw;
4339 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4340 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4341 u32 reg, msgbuf[3];
4342 u8 *addr = (u8 *)(&msgbuf[1]);
4343
4344 /* process all the same items cleared in a function level reset */
4345 igb_vf_reset_event(adapter, vf);
4346
4347 /* set vf mac address */
4348 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4349
4350 /* enable transmit and receive for vf */
4351 reg = rd32(E1000_VFTE);
4352 wr32(E1000_VFTE, reg | (1 << vf));
4353 reg = rd32(E1000_VFRE);
4354 wr32(E1000_VFRE, reg | (1 << vf));
4355
4356 /* enable mailbox functionality for vf */
4357 adapter->vf_data[vf].clear_to_send = true;
4358
4359 /* reply to reset with ack and vf mac address */
4360 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4361 memcpy(addr, vf_mac, 6);
4362 igb_write_mbx(hw, msgbuf, 3, vf);
4363 }
4364
4365 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4366 {
4367 unsigned char *addr = (char *)&msg[1];
4368 int err = -1;
4369
4370 if (is_valid_ether_addr(addr))
4371 err = igb_set_vf_mac(adapter, vf, addr);
4372
4373 return err;
4374
4375 }
4376
4377 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4378 {
4379 struct e1000_hw *hw = &adapter->hw;
4380 u32 msg = E1000_VT_MSGTYPE_NACK;
4381
4382 /* if device isn't clear to send it shouldn't be reading either */
4383 if (!adapter->vf_data[vf].clear_to_send)
4384 igb_write_mbx(hw, &msg, 1, vf);
4385 }
4386
4387
4388 static void igb_msg_task(struct igb_adapter *adapter)
4389 {
4390 struct e1000_hw *hw = &adapter->hw;
4391 u32 vf;
4392
4393 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4394 /* process any reset requests */
4395 if (!igb_check_for_rst(hw, vf)) {
4396 adapter->vf_data[vf].clear_to_send = false;
4397 igb_vf_reset_event(adapter, vf);
4398 }
4399
4400 /* process any messages pending */
4401 if (!igb_check_for_msg(hw, vf))
4402 igb_rcv_msg_from_vf(adapter, vf);
4403
4404 /* process any acks */
4405 if (!igb_check_for_ack(hw, vf))
4406 igb_rcv_ack_from_vf(adapter, vf);
4407
4408 }
4409 }
4410
4411 static int igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4412 {
4413 u32 mbx_size = E1000_VFMAILBOX_SIZE;
4414 u32 msgbuf[mbx_size];
4415 struct e1000_hw *hw = &adapter->hw;
4416 s32 retval;
4417
4418 retval = igb_read_mbx(hw, msgbuf, mbx_size, vf);
4419
4420 if (retval)
4421 dev_err(&adapter->pdev->dev,
4422 "Error receiving message from VF\n");
4423
4424 /* this is a message we already processed, do nothing */
4425 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4426 return retval;
4427
4428 /*
4429 * until the vf completes a reset it should not be
4430 * allowed to start any configuration.
4431 */
4432
4433 if (msgbuf[0] == E1000_VF_RESET) {
4434 igb_vf_reset_msg(adapter, vf);
4435
4436 return retval;
4437 }
4438
4439 if (!adapter->vf_data[vf].clear_to_send) {
4440 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4441 igb_write_mbx(hw, msgbuf, 1, vf);
4442 return retval;
4443 }
4444
4445 switch ((msgbuf[0] & 0xFFFF)) {
4446 case E1000_VF_SET_MAC_ADDR:
4447 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4448 break;
4449 case E1000_VF_SET_MULTICAST:
4450 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4451 break;
4452 case E1000_VF_SET_LPE:
4453 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4454 break;
4455 case E1000_VF_SET_VLAN:
4456 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4457 break;
4458 default:
4459 dev_err(&adapter->pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4460 retval = -1;
4461 break;
4462 }
4463
4464 /* notify the VF of the results of what it sent us */
4465 if (retval)
4466 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4467 else
4468 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4469
4470 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4471
4472 igb_write_mbx(hw, msgbuf, 1, vf);
4473
4474 return retval;
4475 }
4476
4477 /**
4478 * igb_set_uta - Set unicast filter table address
4479 * @adapter: board private structure
4480 *
4481 * The unicast table address is a register array of 32-bit registers.
4482 * The table is meant to be used in a way similar to how the MTA is used
4483 * however due to certain limitations in the hardware it is necessary to
4484 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4485 * enable bit to allow vlan tag stripping when promiscous mode is enabled
4486 **/
4487 static void igb_set_uta(struct igb_adapter *adapter)
4488 {
4489 struct e1000_hw *hw = &adapter->hw;
4490 int i;
4491
4492 /* The UTA table only exists on 82576 hardware and newer */
4493 if (hw->mac.type < e1000_82576)
4494 return;
4495
4496 /* we only need to do this if VMDq is enabled */
4497 if (!adapter->vfs_allocated_count)
4498 return;
4499
4500 for (i = 0; i < hw->mac.uta_reg_count; i++)
4501 array_wr32(E1000_UTA, i, ~0);
4502 }
4503
4504 /**
4505 * igb_intr_msi - Interrupt Handler
4506 * @irq: interrupt number
4507 * @data: pointer to a network interface device structure
4508 **/
4509 static irqreturn_t igb_intr_msi(int irq, void *data)
4510 {
4511 struct igb_adapter *adapter = data;
4512 struct igb_q_vector *q_vector = adapter->q_vector[0];
4513 struct e1000_hw *hw = &adapter->hw;
4514 /* read ICR disables interrupts using IAM */
4515 u32 icr = rd32(E1000_ICR);
4516
4517 igb_write_itr(q_vector);
4518
4519 if (icr & E1000_ICR_DOUTSYNC) {
4520 /* HW is reporting DMA is out of sync */
4521 adapter->stats.doosync++;
4522 }
4523
4524 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4525 hw->mac.get_link_status = 1;
4526 if (!test_bit(__IGB_DOWN, &adapter->state))
4527 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4528 }
4529
4530 napi_schedule(&q_vector->napi);
4531
4532 return IRQ_HANDLED;
4533 }
4534
4535 /**
4536 * igb_intr - Legacy Interrupt Handler
4537 * @irq: interrupt number
4538 * @data: pointer to a network interface device structure
4539 **/
4540 static irqreturn_t igb_intr(int irq, void *data)
4541 {
4542 struct igb_adapter *adapter = data;
4543 struct igb_q_vector *q_vector = adapter->q_vector[0];
4544 struct e1000_hw *hw = &adapter->hw;
4545 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
4546 * need for the IMC write */
4547 u32 icr = rd32(E1000_ICR);
4548 if (!icr)
4549 return IRQ_NONE; /* Not our interrupt */
4550
4551 igb_write_itr(q_vector);
4552
4553 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4554 * not set, then the adapter didn't send an interrupt */
4555 if (!(icr & E1000_ICR_INT_ASSERTED))
4556 return IRQ_NONE;
4557
4558 if (icr & E1000_ICR_DOUTSYNC) {
4559 /* HW is reporting DMA is out of sync */
4560 adapter->stats.doosync++;
4561 }
4562
4563 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4564 hw->mac.get_link_status = 1;
4565 /* guard against interrupt when we're going down */
4566 if (!test_bit(__IGB_DOWN, &adapter->state))
4567 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4568 }
4569
4570 napi_schedule(&q_vector->napi);
4571
4572 return IRQ_HANDLED;
4573 }
4574
4575 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4576 {
4577 struct igb_adapter *adapter = q_vector->adapter;
4578 struct e1000_hw *hw = &adapter->hw;
4579
4580 if (adapter->itr_setting & 3) {
4581 if (!adapter->msix_entries)
4582 igb_set_itr(adapter);
4583 else
4584 igb_update_ring_itr(q_vector);
4585 }
4586
4587 if (!test_bit(__IGB_DOWN, &adapter->state)) {
4588 if (adapter->msix_entries)
4589 wr32(E1000_EIMS, q_vector->eims_value);
4590 else
4591 igb_irq_enable(adapter);
4592 }
4593 }
4594
4595 /**
4596 * igb_poll - NAPI Rx polling callback
4597 * @napi: napi polling structure
4598 * @budget: count of how many packets we should handle
4599 **/
4600 static int igb_poll(struct napi_struct *napi, int budget)
4601 {
4602 struct igb_q_vector *q_vector = container_of(napi,
4603 struct igb_q_vector,
4604 napi);
4605 int tx_clean_complete = 1, work_done = 0;
4606
4607 #ifdef CONFIG_IGB_DCA
4608 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4609 igb_update_dca(q_vector);
4610 #endif
4611 if (q_vector->tx_ring)
4612 tx_clean_complete = igb_clean_tx_irq(q_vector);
4613
4614 if (q_vector->rx_ring)
4615 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4616
4617 if (!tx_clean_complete)
4618 work_done = budget;
4619
4620 /* If not enough Rx work done, exit the polling mode */
4621 if (work_done < budget) {
4622 napi_complete(napi);
4623 igb_ring_irq_enable(q_vector);
4624 }
4625
4626 return work_done;
4627 }
4628
4629 /**
4630 * igb_hwtstamp - utility function which checks for TX time stamp
4631 * @adapter: board private structure
4632 * @skb: packet that was just sent
4633 *
4634 * If we were asked to do hardware stamping and such a time stamp is
4635 * available, then it must have been for this skb here because we only
4636 * allow only one such packet into the queue.
4637 */
4638 static void igb_tx_hwtstamp(struct igb_adapter *adapter, struct sk_buff *skb)
4639 {
4640 union skb_shared_tx *shtx = skb_tx(skb);
4641 struct e1000_hw *hw = &adapter->hw;
4642
4643 if (unlikely(shtx->hardware)) {
4644 u32 valid = rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID;
4645 if (valid) {
4646 u64 regval = rd32(E1000_TXSTMPL);
4647 u64 ns;
4648 struct skb_shared_hwtstamps shhwtstamps;
4649
4650 memset(&shhwtstamps, 0, sizeof(shhwtstamps));
4651 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4652 ns = timecounter_cyc2time(&adapter->clock,
4653 regval);
4654 timecompare_update(&adapter->compare, ns);
4655 shhwtstamps.hwtstamp = ns_to_ktime(ns);
4656 shhwtstamps.syststamp =
4657 timecompare_transform(&adapter->compare, ns);
4658 skb_tstamp_tx(skb, &shhwtstamps);
4659 }
4660 }
4661 }
4662
4663 /**
4664 * igb_clean_tx_irq - Reclaim resources after transmit completes
4665 * @q_vector: pointer to q_vector containing needed info
4666 * returns true if ring is completely cleaned
4667 **/
4668 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4669 {
4670 struct igb_adapter *adapter = q_vector->adapter;
4671 struct igb_ring *tx_ring = q_vector->tx_ring;
4672 struct net_device *netdev = tx_ring->netdev;
4673 struct e1000_hw *hw = &adapter->hw;
4674 struct igb_buffer *buffer_info;
4675 struct sk_buff *skb;
4676 union e1000_adv_tx_desc *tx_desc, *eop_desc;
4677 unsigned int total_bytes = 0, total_packets = 0;
4678 unsigned int i, eop, count = 0;
4679 bool cleaned = false;
4680
4681 i = tx_ring->next_to_clean;
4682 eop = tx_ring->buffer_info[i].next_to_watch;
4683 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4684
4685 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
4686 (count < tx_ring->count)) {
4687 for (cleaned = false; !cleaned; count++) {
4688 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4689 buffer_info = &tx_ring->buffer_info[i];
4690 cleaned = (i == eop);
4691 skb = buffer_info->skb;
4692
4693 if (skb) {
4694 unsigned int segs, bytecount;
4695 /* gso_segs is currently only valid for tcp */
4696 segs = skb_shinfo(skb)->gso_segs ?: 1;
4697 /* multiply data chunks by size of headers */
4698 bytecount = ((segs - 1) * skb_headlen(skb)) +
4699 skb->len;
4700 total_packets += segs;
4701 total_bytes += bytecount;
4702
4703 igb_tx_hwtstamp(adapter, skb);
4704 }
4705
4706 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4707 tx_desc->wb.status = 0;
4708
4709 i++;
4710 if (i == tx_ring->count)
4711 i = 0;
4712 }
4713 eop = tx_ring->buffer_info[i].next_to_watch;
4714 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4715 }
4716
4717 tx_ring->next_to_clean = i;
4718
4719 if (unlikely(count &&
4720 netif_carrier_ok(netdev) &&
4721 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
4722 /* Make sure that anybody stopping the queue after this
4723 * sees the new next_to_clean.
4724 */
4725 smp_mb();
4726 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
4727 !(test_bit(__IGB_DOWN, &adapter->state))) {
4728 netif_wake_subqueue(netdev, tx_ring->queue_index);
4729 tx_ring->tx_stats.restart_queue++;
4730 }
4731 }
4732
4733 if (tx_ring->detect_tx_hung) {
4734 /* Detect a transmit hang in hardware, this serializes the
4735 * check with the clearing of time_stamp and movement of i */
4736 tx_ring->detect_tx_hung = false;
4737 if (tx_ring->buffer_info[i].time_stamp &&
4738 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
4739 (adapter->tx_timeout_factor * HZ))
4740 && !(rd32(E1000_STATUS) &
4741 E1000_STATUS_TXOFF)) {
4742
4743 /* detected Tx unit hang */
4744 dev_err(&tx_ring->pdev->dev,
4745 "Detected Tx Unit Hang\n"
4746 " Tx Queue <%d>\n"
4747 " TDH <%x>\n"
4748 " TDT <%x>\n"
4749 " next_to_use <%x>\n"
4750 " next_to_clean <%x>\n"
4751 "buffer_info[next_to_clean]\n"
4752 " time_stamp <%lx>\n"
4753 " next_to_watch <%x>\n"
4754 " jiffies <%lx>\n"
4755 " desc.status <%x>\n",
4756 tx_ring->queue_index,
4757 readl(tx_ring->head),
4758 readl(tx_ring->tail),
4759 tx_ring->next_to_use,
4760 tx_ring->next_to_clean,
4761 tx_ring->buffer_info[i].time_stamp,
4762 eop,
4763 jiffies,
4764 eop_desc->wb.status);
4765 netif_stop_subqueue(netdev, tx_ring->queue_index);
4766 }
4767 }
4768 tx_ring->total_bytes += total_bytes;
4769 tx_ring->total_packets += total_packets;
4770 tx_ring->tx_stats.bytes += total_bytes;
4771 tx_ring->tx_stats.packets += total_packets;
4772 netdev->stats.tx_bytes += total_bytes;
4773 netdev->stats.tx_packets += total_packets;
4774 return (count < tx_ring->count);
4775 }
4776
4777 /**
4778 * igb_receive_skb - helper function to handle rx indications
4779 * @q_vector: structure containing interrupt and ring information
4780 * @skb: packet to send up
4781 * @vlan_tag: vlan tag for packet
4782 **/
4783 static void igb_receive_skb(struct igb_q_vector *q_vector,
4784 struct sk_buff *skb,
4785 u16 vlan_tag)
4786 {
4787 struct igb_adapter *adapter = q_vector->adapter;
4788
4789 if (vlan_tag)
4790 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
4791 vlan_tag, skb);
4792 else
4793 napi_gro_receive(&q_vector->napi, skb);
4794 }
4795
4796 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
4797 u32 status_err, struct sk_buff *skb)
4798 {
4799 skb->ip_summed = CHECKSUM_NONE;
4800
4801 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
4802 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
4803 (status_err & E1000_RXD_STAT_IXSM))
4804 return;
4805
4806 /* TCP/UDP checksum error bit is set */
4807 if (status_err &
4808 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
4809 /*
4810 * work around errata with sctp packets where the TCPE aka
4811 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
4812 * packets, (aka let the stack check the crc32c)
4813 */
4814 if ((skb->len == 60) &&
4815 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
4816 ring->rx_stats.csum_err++;
4817
4818 /* let the stack verify checksum errors */
4819 return;
4820 }
4821 /* It must be a TCP or UDP packet with a valid checksum */
4822 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
4823 skb->ip_summed = CHECKSUM_UNNECESSARY;
4824
4825 dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
4826 }
4827
4828 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
4829 union e1000_adv_rx_desc *rx_desc)
4830 {
4831 /* HW will not DMA in data larger than the given buffer, even if it
4832 * parses the (NFS, of course) header to be larger. In that case, it
4833 * fills the header buffer and spills the rest into the page.
4834 */
4835 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
4836 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
4837 if (hlen > rx_ring->rx_buffer_len)
4838 hlen = rx_ring->rx_buffer_len;
4839 return hlen;
4840 }
4841
4842 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
4843 int *work_done, int budget)
4844 {
4845 struct igb_adapter *adapter = q_vector->adapter;
4846 struct igb_ring *rx_ring = q_vector->rx_ring;
4847 struct net_device *netdev = rx_ring->netdev;
4848 struct e1000_hw *hw = &adapter->hw;
4849 struct pci_dev *pdev = rx_ring->pdev;
4850 union e1000_adv_rx_desc *rx_desc , *next_rxd;
4851 struct igb_buffer *buffer_info , *next_buffer;
4852 struct sk_buff *skb;
4853 bool cleaned = false;
4854 int cleaned_count = 0;
4855 unsigned int total_bytes = 0, total_packets = 0;
4856 unsigned int i;
4857 u32 staterr;
4858 u16 length;
4859 u16 vlan_tag;
4860
4861 i = rx_ring->next_to_clean;
4862 buffer_info = &rx_ring->buffer_info[i];
4863 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
4864 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4865
4866 while (staterr & E1000_RXD_STAT_DD) {
4867 if (*work_done >= budget)
4868 break;
4869 (*work_done)++;
4870
4871 skb = buffer_info->skb;
4872 prefetch(skb->data - NET_IP_ALIGN);
4873 buffer_info->skb = NULL;
4874
4875 i++;
4876 if (i == rx_ring->count)
4877 i = 0;
4878 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
4879 prefetch(next_rxd);
4880 next_buffer = &rx_ring->buffer_info[i];
4881
4882 length = le16_to_cpu(rx_desc->wb.upper.length);
4883 cleaned = true;
4884 cleaned_count++;
4885
4886 if (buffer_info->dma) {
4887 pci_unmap_single(pdev, buffer_info->dma,
4888 rx_ring->rx_buffer_len,
4889 PCI_DMA_FROMDEVICE);
4890 buffer_info->dma = 0;
4891 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
4892 skb_put(skb, length);
4893 goto send_up;
4894 }
4895 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
4896 }
4897
4898 if (length) {
4899 pci_unmap_page(pdev, buffer_info->page_dma,
4900 PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
4901 buffer_info->page_dma = 0;
4902
4903 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
4904 buffer_info->page,
4905 buffer_info->page_offset,
4906 length);
4907
4908 if (page_count(buffer_info->page) != 1)
4909 buffer_info->page = NULL;
4910 else
4911 get_page(buffer_info->page);
4912
4913 skb->len += length;
4914 skb->data_len += length;
4915
4916 skb->truesize += length;
4917 }
4918
4919 if (!(staterr & E1000_RXD_STAT_EOP)) {
4920 buffer_info->skb = next_buffer->skb;
4921 buffer_info->dma = next_buffer->dma;
4922 next_buffer->skb = skb;
4923 next_buffer->dma = 0;
4924 goto next_desc;
4925 }
4926 send_up:
4927 /*
4928 * If this bit is set, then the RX registers contain
4929 * the time stamp. No other packet will be time
4930 * stamped until we read these registers, so read the
4931 * registers to make them available again. Because
4932 * only one packet can be time stamped at a time, we
4933 * know that the register values must belong to this
4934 * one here and therefore we don't need to compare
4935 * any of the additional attributes stored for it.
4936 *
4937 * If nothing went wrong, then it should have a
4938 * skb_shared_tx that we can turn into a
4939 * skb_shared_hwtstamps.
4940 *
4941 * TODO: can time stamping be triggered (thus locking
4942 * the registers) without the packet reaching this point
4943 * here? In that case RX time stamping would get stuck.
4944 *
4945 * TODO: in "time stamp all packets" mode this bit is
4946 * not set. Need a global flag for this mode and then
4947 * always read the registers. Cannot be done without
4948 * a race condition.
4949 */
4950 if (unlikely(staterr & E1000_RXD_STAT_TS)) {
4951 u64 regval;
4952 u64 ns;
4953 struct skb_shared_hwtstamps *shhwtstamps =
4954 skb_hwtstamps(skb);
4955
4956 WARN(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID),
4957 "igb: no RX time stamp available for time stamped packet");
4958 regval = rd32(E1000_RXSTMPL);
4959 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4960 ns = timecounter_cyc2time(&adapter->clock, regval);
4961 timecompare_update(&adapter->compare, ns);
4962 memset(shhwtstamps, 0, sizeof(*shhwtstamps));
4963 shhwtstamps->hwtstamp = ns_to_ktime(ns);
4964 shhwtstamps->syststamp =
4965 timecompare_transform(&adapter->compare, ns);
4966 }
4967
4968 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
4969 dev_kfree_skb_irq(skb);
4970 goto next_desc;
4971 }
4972
4973 total_bytes += skb->len;
4974 total_packets++;
4975
4976 igb_rx_checksum_adv(rx_ring, staterr, skb);
4977
4978 skb->protocol = eth_type_trans(skb, netdev);
4979 skb_record_rx_queue(skb, rx_ring->queue_index);
4980
4981 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
4982 le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
4983
4984 igb_receive_skb(q_vector, skb, vlan_tag);
4985
4986 next_desc:
4987 rx_desc->wb.upper.status_error = 0;
4988
4989 /* return some buffers to hardware, one at a time is too slow */
4990 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
4991 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
4992 cleaned_count = 0;
4993 }
4994
4995 /* use prefetched values */
4996 rx_desc = next_rxd;
4997 buffer_info = next_buffer;
4998 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4999 }
5000
5001 rx_ring->next_to_clean = i;
5002 cleaned_count = igb_desc_unused(rx_ring);
5003
5004 if (cleaned_count)
5005 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5006
5007 rx_ring->total_packets += total_packets;
5008 rx_ring->total_bytes += total_bytes;
5009 rx_ring->rx_stats.packets += total_packets;
5010 rx_ring->rx_stats.bytes += total_bytes;
5011 netdev->stats.rx_bytes += total_bytes;
5012 netdev->stats.rx_packets += total_packets;
5013 return cleaned;
5014 }
5015
5016 /**
5017 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5018 * @adapter: address of board private structure
5019 **/
5020 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5021 {
5022 struct net_device *netdev = rx_ring->netdev;
5023 union e1000_adv_rx_desc *rx_desc;
5024 struct igb_buffer *buffer_info;
5025 struct sk_buff *skb;
5026 unsigned int i;
5027 int bufsz;
5028
5029 i = rx_ring->next_to_use;
5030 buffer_info = &rx_ring->buffer_info[i];
5031
5032 bufsz = rx_ring->rx_buffer_len;
5033
5034 while (cleaned_count--) {
5035 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5036
5037 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5038 if (!buffer_info->page) {
5039 buffer_info->page = alloc_page(GFP_ATOMIC);
5040 if (!buffer_info->page) {
5041 rx_ring->rx_stats.alloc_failed++;
5042 goto no_buffers;
5043 }
5044 buffer_info->page_offset = 0;
5045 } else {
5046 buffer_info->page_offset ^= PAGE_SIZE / 2;
5047 }
5048 buffer_info->page_dma =
5049 pci_map_page(rx_ring->pdev, buffer_info->page,
5050 buffer_info->page_offset,
5051 PAGE_SIZE / 2,
5052 PCI_DMA_FROMDEVICE);
5053 }
5054
5055 if (!buffer_info->skb) {
5056 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5057 if (!skb) {
5058 rx_ring->rx_stats.alloc_failed++;
5059 goto no_buffers;
5060 }
5061
5062 buffer_info->skb = skb;
5063 buffer_info->dma = pci_map_single(rx_ring->pdev,
5064 skb->data,
5065 bufsz,
5066 PCI_DMA_FROMDEVICE);
5067 }
5068 /* Refresh the desc even if buffer_addrs didn't change because
5069 * each write-back erases this info. */
5070 if (bufsz < IGB_RXBUFFER_1024) {
5071 rx_desc->read.pkt_addr =
5072 cpu_to_le64(buffer_info->page_dma);
5073 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5074 } else {
5075 rx_desc->read.pkt_addr =
5076 cpu_to_le64(buffer_info->dma);
5077 rx_desc->read.hdr_addr = 0;
5078 }
5079
5080 i++;
5081 if (i == rx_ring->count)
5082 i = 0;
5083 buffer_info = &rx_ring->buffer_info[i];
5084 }
5085
5086 no_buffers:
5087 if (rx_ring->next_to_use != i) {
5088 rx_ring->next_to_use = i;
5089 if (i == 0)
5090 i = (rx_ring->count - 1);
5091 else
5092 i--;
5093
5094 /* Force memory writes to complete before letting h/w
5095 * know there are new descriptors to fetch. (Only
5096 * applicable for weak-ordered memory model archs,
5097 * such as IA-64). */
5098 wmb();
5099 writel(i, rx_ring->tail);
5100 }
5101 }
5102
5103 /**
5104 * igb_mii_ioctl -
5105 * @netdev:
5106 * @ifreq:
5107 * @cmd:
5108 **/
5109 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5110 {
5111 struct igb_adapter *adapter = netdev_priv(netdev);
5112 struct mii_ioctl_data *data = if_mii(ifr);
5113
5114 if (adapter->hw.phy.media_type != e1000_media_type_copper)
5115 return -EOPNOTSUPP;
5116
5117 switch (cmd) {
5118 case SIOCGMIIPHY:
5119 data->phy_id = adapter->hw.phy.addr;
5120 break;
5121 case SIOCGMIIREG:
5122 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5123 &data->val_out))
5124 return -EIO;
5125 break;
5126 case SIOCSMIIREG:
5127 default:
5128 return -EOPNOTSUPP;
5129 }
5130 return 0;
5131 }
5132
5133 /**
5134 * igb_hwtstamp_ioctl - control hardware time stamping
5135 * @netdev:
5136 * @ifreq:
5137 * @cmd:
5138 *
5139 * Outgoing time stamping can be enabled and disabled. Play nice and
5140 * disable it when requested, although it shouldn't case any overhead
5141 * when no packet needs it. At most one packet in the queue may be
5142 * marked for time stamping, otherwise it would be impossible to tell
5143 * for sure to which packet the hardware time stamp belongs.
5144 *
5145 * Incoming time stamping has to be configured via the hardware
5146 * filters. Not all combinations are supported, in particular event
5147 * type has to be specified. Matching the kind of event packet is
5148 * not supported, with the exception of "all V2 events regardless of
5149 * level 2 or 4".
5150 *
5151 **/
5152 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5153 struct ifreq *ifr, int cmd)
5154 {
5155 struct igb_adapter *adapter = netdev_priv(netdev);
5156 struct e1000_hw *hw = &adapter->hw;
5157 struct hwtstamp_config config;
5158 u32 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
5159 u32 tsync_rx_ctl_bit = E1000_TSYNCRXCTL_ENABLED;
5160 u32 tsync_rx_ctl_type = 0;
5161 u32 tsync_rx_cfg = 0;
5162 int is_l4 = 0;
5163 int is_l2 = 0;
5164 short port = 319; /* PTP */
5165 u32 regval;
5166
5167 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5168 return -EFAULT;
5169
5170 /* reserved for future extensions */
5171 if (config.flags)
5172 return -EINVAL;
5173
5174 switch (config.tx_type) {
5175 case HWTSTAMP_TX_OFF:
5176 tsync_tx_ctl_bit = 0;
5177 break;
5178 case HWTSTAMP_TX_ON:
5179 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
5180 break;
5181 default:
5182 return -ERANGE;
5183 }
5184
5185 switch (config.rx_filter) {
5186 case HWTSTAMP_FILTER_NONE:
5187 tsync_rx_ctl_bit = 0;
5188 break;
5189 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5190 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5191 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5192 case HWTSTAMP_FILTER_ALL:
5193 /*
5194 * register TSYNCRXCFG must be set, therefore it is not
5195 * possible to time stamp both Sync and Delay_Req messages
5196 * => fall back to time stamping all packets
5197 */
5198 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_ALL;
5199 config.rx_filter = HWTSTAMP_FILTER_ALL;
5200 break;
5201 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5202 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
5203 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5204 is_l4 = 1;
5205 break;
5206 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5207 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
5208 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5209 is_l4 = 1;
5210 break;
5211 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5212 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5213 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5214 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5215 is_l2 = 1;
5216 is_l4 = 1;
5217 config.rx_filter = HWTSTAMP_FILTER_SOME;
5218 break;
5219 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5220 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5221 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5222 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5223 is_l2 = 1;
5224 is_l4 = 1;
5225 config.rx_filter = HWTSTAMP_FILTER_SOME;
5226 break;
5227 case HWTSTAMP_FILTER_PTP_V2_EVENT:
5228 case HWTSTAMP_FILTER_PTP_V2_SYNC:
5229 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5230 tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5231 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5232 is_l2 = 1;
5233 break;
5234 default:
5235 return -ERANGE;
5236 }
5237
5238 /* enable/disable TX */
5239 regval = rd32(E1000_TSYNCTXCTL);
5240 regval = (regval & ~E1000_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit;
5241 wr32(E1000_TSYNCTXCTL, regval);
5242
5243 /* enable/disable RX, define which PTP packets are time stamped */
5244 regval = rd32(E1000_TSYNCRXCTL);
5245 regval = (regval & ~E1000_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit;
5246 regval = (regval & ~0xE) | tsync_rx_ctl_type;
5247 wr32(E1000_TSYNCRXCTL, regval);
5248 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5249
5250 /*
5251 * Ethertype Filter Queue Filter[0][15:0] = 0x88F7
5252 * (Ethertype to filter on)
5253 * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
5254 * Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping)
5255 */
5256 wr32(E1000_ETQF0, is_l2 ? 0x440088f7 : 0);
5257
5258 /* L4 Queue Filter[0]: only filter by source and destination port */
5259 wr32(E1000_SPQF0, htons(port));
5260 wr32(E1000_IMIREXT(0), is_l4 ?
5261 ((1<<12) | (1<<19) /* bypass size and control flags */) : 0);
5262 wr32(E1000_IMIR(0), is_l4 ?
5263 (htons(port)
5264 | (0<<16) /* immediate interrupt disabled */
5265 | 0 /* (1<<17) bit cleared: do not bypass
5266 destination port check */)
5267 : 0);
5268 wr32(E1000_FTQF0, is_l4 ?
5269 (0x11 /* UDP */
5270 | (1<<15) /* VF not compared */
5271 | (1<<27) /* Enable Timestamping */
5272 | (7<<28) /* only source port filter enabled,
5273 source/target address and protocol
5274 masked */)
5275 : ((1<<15) | (15<<28) /* all mask bits set = filter not
5276 enabled */));
5277
5278 wrfl();
5279
5280 adapter->hwtstamp_config = config;
5281
5282 /* clear TX/RX time stamp registers, just to be sure */
5283 regval = rd32(E1000_TXSTMPH);
5284 regval = rd32(E1000_RXSTMPH);
5285
5286 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5287 -EFAULT : 0;
5288 }
5289
5290 /**
5291 * igb_ioctl -
5292 * @netdev:
5293 * @ifreq:
5294 * @cmd:
5295 **/
5296 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5297 {
5298 switch (cmd) {
5299 case SIOCGMIIPHY:
5300 case SIOCGMIIREG:
5301 case SIOCSMIIREG:
5302 return igb_mii_ioctl(netdev, ifr, cmd);
5303 case SIOCSHWTSTAMP:
5304 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5305 default:
5306 return -EOPNOTSUPP;
5307 }
5308 }
5309
5310 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5311 {
5312 struct igb_adapter *adapter = hw->back;
5313 u16 cap_offset;
5314
5315 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5316 if (!cap_offset)
5317 return -E1000_ERR_CONFIG;
5318
5319 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5320
5321 return 0;
5322 }
5323
5324 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5325 {
5326 struct igb_adapter *adapter = hw->back;
5327 u16 cap_offset;
5328
5329 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5330 if (!cap_offset)
5331 return -E1000_ERR_CONFIG;
5332
5333 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5334
5335 return 0;
5336 }
5337
5338 static void igb_vlan_rx_register(struct net_device *netdev,
5339 struct vlan_group *grp)
5340 {
5341 struct igb_adapter *adapter = netdev_priv(netdev);
5342 struct e1000_hw *hw = &adapter->hw;
5343 u32 ctrl, rctl;
5344
5345 igb_irq_disable(adapter);
5346 adapter->vlgrp = grp;
5347
5348 if (grp) {
5349 /* enable VLAN tag insert/strip */
5350 ctrl = rd32(E1000_CTRL);
5351 ctrl |= E1000_CTRL_VME;
5352 wr32(E1000_CTRL, ctrl);
5353
5354 /* enable VLAN receive filtering */
5355 rctl = rd32(E1000_RCTL);
5356 rctl &= ~E1000_RCTL_CFIEN;
5357 wr32(E1000_RCTL, rctl);
5358 igb_update_mng_vlan(adapter);
5359 } else {
5360 /* disable VLAN tag insert/strip */
5361 ctrl = rd32(E1000_CTRL);
5362 ctrl &= ~E1000_CTRL_VME;
5363 wr32(E1000_CTRL, ctrl);
5364
5365 if (adapter->mng_vlan_id != (u16)IGB_MNG_VLAN_NONE) {
5366 igb_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
5367 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
5368 }
5369 }
5370
5371 igb_rlpml_set(adapter);
5372
5373 if (!test_bit(__IGB_DOWN, &adapter->state))
5374 igb_irq_enable(adapter);
5375 }
5376
5377 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5378 {
5379 struct igb_adapter *adapter = netdev_priv(netdev);
5380 struct e1000_hw *hw = &adapter->hw;
5381 int pf_id = adapter->vfs_allocated_count;
5382
5383 if ((hw->mng_cookie.status &
5384 E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
5385 (vid == adapter->mng_vlan_id))
5386 return;
5387
5388 /* add vid to vlvf if sr-iov is enabled,
5389 * if that fails add directly to filter table */
5390 if (igb_vlvf_set(adapter, vid, true, pf_id))
5391 igb_vfta_set(hw, vid, true);
5392
5393 }
5394
5395 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5396 {
5397 struct igb_adapter *adapter = netdev_priv(netdev);
5398 struct e1000_hw *hw = &adapter->hw;
5399 int pf_id = adapter->vfs_allocated_count;
5400
5401 igb_irq_disable(adapter);
5402 vlan_group_set_device(adapter->vlgrp, vid, NULL);
5403
5404 if (!test_bit(__IGB_DOWN, &adapter->state))
5405 igb_irq_enable(adapter);
5406
5407 if ((adapter->hw.mng_cookie.status &
5408 E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
5409 (vid == adapter->mng_vlan_id)) {
5410 /* release control to f/w */
5411 igb_release_hw_control(adapter);
5412 return;
5413 }
5414
5415 /* remove vid from vlvf if sr-iov is enabled,
5416 * if not in vlvf remove from vfta */
5417 if (igb_vlvf_set(adapter, vid, false, pf_id))
5418 igb_vfta_set(hw, vid, false);
5419 }
5420
5421 static void igb_restore_vlan(struct igb_adapter *adapter)
5422 {
5423 igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5424
5425 if (adapter->vlgrp) {
5426 u16 vid;
5427 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5428 if (!vlan_group_get_device(adapter->vlgrp, vid))
5429 continue;
5430 igb_vlan_rx_add_vid(adapter->netdev, vid);
5431 }
5432 }
5433 }
5434
5435 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5436 {
5437 struct e1000_mac_info *mac = &adapter->hw.mac;
5438
5439 mac->autoneg = 0;
5440
5441 switch (spddplx) {
5442 case SPEED_10 + DUPLEX_HALF:
5443 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5444 break;
5445 case SPEED_10 + DUPLEX_FULL:
5446 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5447 break;
5448 case SPEED_100 + DUPLEX_HALF:
5449 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5450 break;
5451 case SPEED_100 + DUPLEX_FULL:
5452 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5453 break;
5454 case SPEED_1000 + DUPLEX_FULL:
5455 mac->autoneg = 1;
5456 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5457 break;
5458 case SPEED_1000 + DUPLEX_HALF: /* not supported */
5459 default:
5460 dev_err(&adapter->pdev->dev,
5461 "Unsupported Speed/Duplex configuration\n");
5462 return -EINVAL;
5463 }
5464 return 0;
5465 }
5466
5467 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5468 {
5469 struct net_device *netdev = pci_get_drvdata(pdev);
5470 struct igb_adapter *adapter = netdev_priv(netdev);
5471 struct e1000_hw *hw = &adapter->hw;
5472 u32 ctrl, rctl, status;
5473 u32 wufc = adapter->wol;
5474 #ifdef CONFIG_PM
5475 int retval = 0;
5476 #endif
5477
5478 netif_device_detach(netdev);
5479
5480 if (netif_running(netdev))
5481 igb_close(netdev);
5482
5483 igb_clear_interrupt_scheme(adapter);
5484
5485 #ifdef CONFIG_PM
5486 retval = pci_save_state(pdev);
5487 if (retval)
5488 return retval;
5489 #endif
5490
5491 status = rd32(E1000_STATUS);
5492 if (status & E1000_STATUS_LU)
5493 wufc &= ~E1000_WUFC_LNKC;
5494
5495 if (wufc) {
5496 igb_setup_rctl(adapter);
5497 igb_set_rx_mode(netdev);
5498
5499 /* turn on all-multi mode if wake on multicast is enabled */
5500 if (wufc & E1000_WUFC_MC) {
5501 rctl = rd32(E1000_RCTL);
5502 rctl |= E1000_RCTL_MPE;
5503 wr32(E1000_RCTL, rctl);
5504 }
5505
5506 ctrl = rd32(E1000_CTRL);
5507 /* advertise wake from D3Cold */
5508 #define E1000_CTRL_ADVD3WUC 0x00100000
5509 /* phy power management enable */
5510 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5511 ctrl |= E1000_CTRL_ADVD3WUC;
5512 wr32(E1000_CTRL, ctrl);
5513
5514 /* Allow time for pending master requests to run */
5515 igb_disable_pcie_master(&adapter->hw);
5516
5517 wr32(E1000_WUC, E1000_WUC_PME_EN);
5518 wr32(E1000_WUFC, wufc);
5519 } else {
5520 wr32(E1000_WUC, 0);
5521 wr32(E1000_WUFC, 0);
5522 }
5523
5524 *enable_wake = wufc || adapter->en_mng_pt;
5525 if (!*enable_wake)
5526 igb_shutdown_serdes_link_82575(hw);
5527
5528 /* Release control of h/w to f/w. If f/w is AMT enabled, this
5529 * would have already happened in close and is redundant. */
5530 igb_release_hw_control(adapter);
5531
5532 pci_disable_device(pdev);
5533
5534 return 0;
5535 }
5536
5537 #ifdef CONFIG_PM
5538 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5539 {
5540 int retval;
5541 bool wake;
5542
5543 retval = __igb_shutdown(pdev, &wake);
5544 if (retval)
5545 return retval;
5546
5547 if (wake) {
5548 pci_prepare_to_sleep(pdev);
5549 } else {
5550 pci_wake_from_d3(pdev, false);
5551 pci_set_power_state(pdev, PCI_D3hot);
5552 }
5553
5554 return 0;
5555 }
5556
5557 static int igb_resume(struct pci_dev *pdev)
5558 {
5559 struct net_device *netdev = pci_get_drvdata(pdev);
5560 struct igb_adapter *adapter = netdev_priv(netdev);
5561 struct e1000_hw *hw = &adapter->hw;
5562 u32 err;
5563
5564 pci_set_power_state(pdev, PCI_D0);
5565 pci_restore_state(pdev);
5566
5567 err = pci_enable_device_mem(pdev);
5568 if (err) {
5569 dev_err(&pdev->dev,
5570 "igb: Cannot enable PCI device from suspend\n");
5571 return err;
5572 }
5573 pci_set_master(pdev);
5574
5575 pci_enable_wake(pdev, PCI_D3hot, 0);
5576 pci_enable_wake(pdev, PCI_D3cold, 0);
5577
5578 if (igb_init_interrupt_scheme(adapter)) {
5579 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5580 return -ENOMEM;
5581 }
5582
5583 /* e1000_power_up_phy(adapter); */
5584
5585 igb_reset(adapter);
5586
5587 /* let the f/w know that the h/w is now under the control of the
5588 * driver. */
5589 igb_get_hw_control(adapter);
5590
5591 wr32(E1000_WUS, ~0);
5592
5593 if (netif_running(netdev)) {
5594 err = igb_open(netdev);
5595 if (err)
5596 return err;
5597 }
5598
5599 netif_device_attach(netdev);
5600
5601 return 0;
5602 }
5603 #endif
5604
5605 static void igb_shutdown(struct pci_dev *pdev)
5606 {
5607 bool wake;
5608
5609 __igb_shutdown(pdev, &wake);
5610
5611 if (system_state == SYSTEM_POWER_OFF) {
5612 pci_wake_from_d3(pdev, wake);
5613 pci_set_power_state(pdev, PCI_D3hot);
5614 }
5615 }
5616
5617 #ifdef CONFIG_NET_POLL_CONTROLLER
5618 /*
5619 * Polling 'interrupt' - used by things like netconsole to send skbs
5620 * without having to re-enable interrupts. It's not called while
5621 * the interrupt routine is executing.
5622 */
5623 static void igb_netpoll(struct net_device *netdev)
5624 {
5625 struct igb_adapter *adapter = netdev_priv(netdev);
5626 struct e1000_hw *hw = &adapter->hw;
5627 int i;
5628
5629 if (!adapter->msix_entries) {
5630 struct igb_q_vector *q_vector = adapter->q_vector[0];
5631 igb_irq_disable(adapter);
5632 napi_schedule(&q_vector->napi);
5633 return;
5634 }
5635
5636 for (i = 0; i < adapter->num_q_vectors; i++) {
5637 struct igb_q_vector *q_vector = adapter->q_vector[i];
5638 wr32(E1000_EIMC, q_vector->eims_value);
5639 napi_schedule(&q_vector->napi);
5640 }
5641 }
5642 #endif /* CONFIG_NET_POLL_CONTROLLER */
5643
5644 /**
5645 * igb_io_error_detected - called when PCI error is detected
5646 * @pdev: Pointer to PCI device
5647 * @state: The current pci connection state
5648 *
5649 * This function is called after a PCI bus error affecting
5650 * this device has been detected.
5651 */
5652 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5653 pci_channel_state_t state)
5654 {
5655 struct net_device *netdev = pci_get_drvdata(pdev);
5656 struct igb_adapter *adapter = netdev_priv(netdev);
5657
5658 netif_device_detach(netdev);
5659
5660 if (state == pci_channel_io_perm_failure)
5661 return PCI_ERS_RESULT_DISCONNECT;
5662
5663 if (netif_running(netdev))
5664 igb_down(adapter);
5665 pci_disable_device(pdev);
5666
5667 /* Request a slot slot reset. */
5668 return PCI_ERS_RESULT_NEED_RESET;
5669 }
5670
5671 /**
5672 * igb_io_slot_reset - called after the pci bus has been reset.
5673 * @pdev: Pointer to PCI device
5674 *
5675 * Restart the card from scratch, as if from a cold-boot. Implementation
5676 * resembles the first-half of the igb_resume routine.
5677 */
5678 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5679 {
5680 struct net_device *netdev = pci_get_drvdata(pdev);
5681 struct igb_adapter *adapter = netdev_priv(netdev);
5682 struct e1000_hw *hw = &adapter->hw;
5683 pci_ers_result_t result;
5684 int err;
5685
5686 if (pci_enable_device_mem(pdev)) {
5687 dev_err(&pdev->dev,
5688 "Cannot re-enable PCI device after reset.\n");
5689 result = PCI_ERS_RESULT_DISCONNECT;
5690 } else {
5691 pci_set_master(pdev);
5692 pci_restore_state(pdev);
5693
5694 pci_enable_wake(pdev, PCI_D3hot, 0);
5695 pci_enable_wake(pdev, PCI_D3cold, 0);
5696
5697 igb_reset(adapter);
5698 wr32(E1000_WUS, ~0);
5699 result = PCI_ERS_RESULT_RECOVERED;
5700 }
5701
5702 err = pci_cleanup_aer_uncorrect_error_status(pdev);
5703 if (err) {
5704 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
5705 "failed 0x%0x\n", err);
5706 /* non-fatal, continue */
5707 }
5708
5709 return result;
5710 }
5711
5712 /**
5713 * igb_io_resume - called when traffic can start flowing again.
5714 * @pdev: Pointer to PCI device
5715 *
5716 * This callback is called when the error recovery driver tells us that
5717 * its OK to resume normal operation. Implementation resembles the
5718 * second-half of the igb_resume routine.
5719 */
5720 static void igb_io_resume(struct pci_dev *pdev)
5721 {
5722 struct net_device *netdev = pci_get_drvdata(pdev);
5723 struct igb_adapter *adapter = netdev_priv(netdev);
5724
5725 if (netif_running(netdev)) {
5726 if (igb_up(adapter)) {
5727 dev_err(&pdev->dev, "igb_up failed after reset\n");
5728 return;
5729 }
5730 }
5731
5732 netif_device_attach(netdev);
5733
5734 /* let the f/w know that the h/w is now under the control of the
5735 * driver. */
5736 igb_get_hw_control(adapter);
5737 }
5738
5739 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
5740 u8 qsel)
5741 {
5742 u32 rar_low, rar_high;
5743 struct e1000_hw *hw = &adapter->hw;
5744
5745 /* HW expects these in little endian so we reverse the byte order
5746 * from network order (big endian) to little endian
5747 */
5748 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
5749 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
5750 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
5751
5752 /* Indicate to hardware the Address is Valid. */
5753 rar_high |= E1000_RAH_AV;
5754
5755 if (hw->mac.type == e1000_82575)
5756 rar_high |= E1000_RAH_POOL_1 * qsel;
5757 else
5758 rar_high |= E1000_RAH_POOL_1 << qsel;
5759
5760 wr32(E1000_RAL(index), rar_low);
5761 wrfl();
5762 wr32(E1000_RAH(index), rar_high);
5763 wrfl();
5764 }
5765
5766 static int igb_set_vf_mac(struct igb_adapter *adapter,
5767 int vf, unsigned char *mac_addr)
5768 {
5769 struct e1000_hw *hw = &adapter->hw;
5770 /* VF MAC addresses start at end of receive addresses and moves
5771 * torwards the first, as a result a collision should not be possible */
5772 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5773
5774 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
5775
5776 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
5777
5778 return 0;
5779 }
5780
5781 static void igb_vmm_control(struct igb_adapter *adapter)
5782 {
5783 struct e1000_hw *hw = &adapter->hw;
5784 u32 reg;
5785
5786 /* replication is not supported for 82575 */
5787 if (hw->mac.type == e1000_82575)
5788 return;
5789
5790 /* enable replication vlan tag stripping */
5791 reg = rd32(E1000_RPLOLR);
5792 reg |= E1000_RPLOLR_STRVLAN;
5793 wr32(E1000_RPLOLR, reg);
5794
5795 /* notify HW that the MAC is adding vlan tags */
5796 reg = rd32(E1000_DTXCTL);
5797 reg |= E1000_DTXCTL_VLAN_ADDED;
5798 wr32(E1000_DTXCTL, reg);
5799
5800 if (adapter->vfs_allocated_count) {
5801 igb_vmdq_set_loopback_pf(hw, true);
5802 igb_vmdq_set_replication_pf(hw, true);
5803 } else {
5804 igb_vmdq_set_loopback_pf(hw, false);
5805 igb_vmdq_set_replication_pf(hw, false);
5806 }
5807 }
5808
5809 /* igb_main.c */
This page took 0.149643 seconds and 6 git commands to generate.