Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wirel...
[deliverable/linux.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2009 Intel Corporation.
5
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
9
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
14
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
21
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "1.3.16-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60 [board_82575] = &e1000_82575_info,
61 };
62
63 static struct pci_device_id igb_pci_tbl[] = {
64 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
65 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
66 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
67 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
68 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
69 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
70 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
74 /* required last entry */
75 {0, }
76 };
77
78 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
79
80 void igb_reset(struct igb_adapter *);
81 static int igb_setup_all_tx_resources(struct igb_adapter *);
82 static int igb_setup_all_rx_resources(struct igb_adapter *);
83 static void igb_free_all_tx_resources(struct igb_adapter *);
84 static void igb_free_all_rx_resources(struct igb_adapter *);
85 static void igb_setup_mrqc(struct igb_adapter *);
86 void igb_update_stats(struct igb_adapter *);
87 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
88 static void __devexit igb_remove(struct pci_dev *pdev);
89 static int igb_sw_init(struct igb_adapter *);
90 static int igb_open(struct net_device *);
91 static int igb_close(struct net_device *);
92 static void igb_configure_tx(struct igb_adapter *);
93 static void igb_configure_rx(struct igb_adapter *);
94 static void igb_clean_all_tx_rings(struct igb_adapter *);
95 static void igb_clean_all_rx_rings(struct igb_adapter *);
96 static void igb_clean_tx_ring(struct igb_ring *);
97 static void igb_clean_rx_ring(struct igb_ring *);
98 static void igb_set_rx_mode(struct net_device *);
99 static void igb_update_phy_info(unsigned long);
100 static void igb_watchdog(unsigned long);
101 static void igb_watchdog_task(struct work_struct *);
102 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
103 static struct net_device_stats *igb_get_stats(struct net_device *);
104 static int igb_change_mtu(struct net_device *, int);
105 static int igb_set_mac(struct net_device *, void *);
106 static void igb_set_uta(struct igb_adapter *adapter);
107 static irqreturn_t igb_intr(int irq, void *);
108 static irqreturn_t igb_intr_msi(int irq, void *);
109 static irqreturn_t igb_msix_other(int irq, void *);
110 static irqreturn_t igb_msix_ring(int irq, void *);
111 #ifdef CONFIG_IGB_DCA
112 static void igb_update_dca(struct igb_q_vector *);
113 static void igb_setup_dca(struct igb_adapter *);
114 #endif /* CONFIG_IGB_DCA */
115 static bool igb_clean_tx_irq(struct igb_q_vector *);
116 static int igb_poll(struct napi_struct *, int);
117 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
118 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
119 static void igb_tx_timeout(struct net_device *);
120 static void igb_reset_task(struct work_struct *);
121 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
122 static void igb_vlan_rx_add_vid(struct net_device *, u16);
123 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
124 static void igb_restore_vlan(struct igb_adapter *);
125 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
126 static void igb_ping_all_vfs(struct igb_adapter *);
127 static void igb_msg_task(struct igb_adapter *);
128 static void igb_vmm_control(struct igb_adapter *);
129 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
130 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
131
132 #ifdef CONFIG_PM
133 static int igb_suspend(struct pci_dev *, pm_message_t);
134 static int igb_resume(struct pci_dev *);
135 #endif
136 static void igb_shutdown(struct pci_dev *);
137 #ifdef CONFIG_IGB_DCA
138 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
139 static struct notifier_block dca_notifier = {
140 .notifier_call = igb_notify_dca,
141 .next = NULL,
142 .priority = 0
143 };
144 #endif
145 #ifdef CONFIG_NET_POLL_CONTROLLER
146 /* for netdump / net console */
147 static void igb_netpoll(struct net_device *);
148 #endif
149 #ifdef CONFIG_PCI_IOV
150 static unsigned int max_vfs = 0;
151 module_param(max_vfs, uint, 0);
152 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
153 "per physical function");
154 #endif /* CONFIG_PCI_IOV */
155
156 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
157 pci_channel_state_t);
158 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
159 static void igb_io_resume(struct pci_dev *);
160
161 static struct pci_error_handlers igb_err_handler = {
162 .error_detected = igb_io_error_detected,
163 .slot_reset = igb_io_slot_reset,
164 .resume = igb_io_resume,
165 };
166
167
168 static struct pci_driver igb_driver = {
169 .name = igb_driver_name,
170 .id_table = igb_pci_tbl,
171 .probe = igb_probe,
172 .remove = __devexit_p(igb_remove),
173 #ifdef CONFIG_PM
174 /* Power Managment Hooks */
175 .suspend = igb_suspend,
176 .resume = igb_resume,
177 #endif
178 .shutdown = igb_shutdown,
179 .err_handler = &igb_err_handler
180 };
181
182 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
183 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
184 MODULE_LICENSE("GPL");
185 MODULE_VERSION(DRV_VERSION);
186
187 /**
188 * igb_read_clock - read raw cycle counter (to be used by time counter)
189 */
190 static cycle_t igb_read_clock(const struct cyclecounter *tc)
191 {
192 struct igb_adapter *adapter =
193 container_of(tc, struct igb_adapter, cycles);
194 struct e1000_hw *hw = &adapter->hw;
195 u64 stamp = 0;
196 int shift = 0;
197
198 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
199 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
200 return stamp;
201 }
202
203 #ifdef DEBUG
204 /**
205 * igb_get_hw_dev_name - return device name string
206 * used by hardware layer to print debugging information
207 **/
208 char *igb_get_hw_dev_name(struct e1000_hw *hw)
209 {
210 struct igb_adapter *adapter = hw->back;
211 return adapter->netdev->name;
212 }
213
214 /**
215 * igb_get_time_str - format current NIC and system time as string
216 */
217 static char *igb_get_time_str(struct igb_adapter *adapter,
218 char buffer[160])
219 {
220 cycle_t hw = adapter->cycles.read(&adapter->cycles);
221 struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
222 struct timespec sys;
223 struct timespec delta;
224 getnstimeofday(&sys);
225
226 delta = timespec_sub(nic, sys);
227
228 sprintf(buffer,
229 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
230 hw,
231 (long)nic.tv_sec, nic.tv_nsec,
232 (long)sys.tv_sec, sys.tv_nsec,
233 (long)delta.tv_sec, delta.tv_nsec);
234
235 return buffer;
236 }
237 #endif
238
239 /**
240 * igb_init_module - Driver Registration Routine
241 *
242 * igb_init_module is the first routine called when the driver is
243 * loaded. All it does is register with the PCI subsystem.
244 **/
245 static int __init igb_init_module(void)
246 {
247 int ret;
248 printk(KERN_INFO "%s - version %s\n",
249 igb_driver_string, igb_driver_version);
250
251 printk(KERN_INFO "%s\n", igb_copyright);
252
253 #ifdef CONFIG_IGB_DCA
254 dca_register_notify(&dca_notifier);
255 #endif
256 ret = pci_register_driver(&igb_driver);
257 return ret;
258 }
259
260 module_init(igb_init_module);
261
262 /**
263 * igb_exit_module - Driver Exit Cleanup Routine
264 *
265 * igb_exit_module is called just before the driver is removed
266 * from memory.
267 **/
268 static void __exit igb_exit_module(void)
269 {
270 #ifdef CONFIG_IGB_DCA
271 dca_unregister_notify(&dca_notifier);
272 #endif
273 pci_unregister_driver(&igb_driver);
274 }
275
276 module_exit(igb_exit_module);
277
278 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
279 /**
280 * igb_cache_ring_register - Descriptor ring to register mapping
281 * @adapter: board private structure to initialize
282 *
283 * Once we know the feature-set enabled for the device, we'll cache
284 * the register offset the descriptor ring is assigned to.
285 **/
286 static void igb_cache_ring_register(struct igb_adapter *adapter)
287 {
288 int i = 0, j = 0;
289 u32 rbase_offset = adapter->vfs_allocated_count;
290
291 switch (adapter->hw.mac.type) {
292 case e1000_82576:
293 /* The queues are allocated for virtualization such that VF 0
294 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
295 * In order to avoid collision we start at the first free queue
296 * and continue consuming queues in the same sequence
297 */
298 if (adapter->vfs_allocated_count) {
299 for (; i < adapter->num_rx_queues; i++)
300 adapter->rx_ring[i].reg_idx = rbase_offset +
301 Q_IDX_82576(i);
302 for (; j < adapter->num_tx_queues; j++)
303 adapter->tx_ring[j].reg_idx = rbase_offset +
304 Q_IDX_82576(j);
305 }
306 case e1000_82575:
307 default:
308 for (; i < adapter->num_rx_queues; i++)
309 adapter->rx_ring[i].reg_idx = rbase_offset + i;
310 for (; j < adapter->num_tx_queues; j++)
311 adapter->tx_ring[j].reg_idx = rbase_offset + j;
312 break;
313 }
314 }
315
316 static void igb_free_queues(struct igb_adapter *adapter)
317 {
318 kfree(adapter->tx_ring);
319 kfree(adapter->rx_ring);
320
321 adapter->tx_ring = NULL;
322 adapter->rx_ring = NULL;
323
324 adapter->num_rx_queues = 0;
325 adapter->num_tx_queues = 0;
326 }
327
328 /**
329 * igb_alloc_queues - Allocate memory for all rings
330 * @adapter: board private structure to initialize
331 *
332 * We allocate one ring per queue at run-time since we don't know the
333 * number of queues at compile-time.
334 **/
335 static int igb_alloc_queues(struct igb_adapter *adapter)
336 {
337 int i;
338
339 adapter->tx_ring = kcalloc(adapter->num_tx_queues,
340 sizeof(struct igb_ring), GFP_KERNEL);
341 if (!adapter->tx_ring)
342 goto err;
343
344 adapter->rx_ring = kcalloc(adapter->num_rx_queues,
345 sizeof(struct igb_ring), GFP_KERNEL);
346 if (!adapter->rx_ring)
347 goto err;
348
349 for (i = 0; i < adapter->num_tx_queues; i++) {
350 struct igb_ring *ring = &(adapter->tx_ring[i]);
351 ring->count = adapter->tx_ring_count;
352 ring->queue_index = i;
353 ring->pdev = adapter->pdev;
354 ring->netdev = adapter->netdev;
355 /* For 82575, context index must be unique per ring. */
356 if (adapter->hw.mac.type == e1000_82575)
357 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
358 }
359
360 for (i = 0; i < adapter->num_rx_queues; i++) {
361 struct igb_ring *ring = &(adapter->rx_ring[i]);
362 ring->count = adapter->rx_ring_count;
363 ring->queue_index = i;
364 ring->pdev = adapter->pdev;
365 ring->netdev = adapter->netdev;
366 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
367 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
368 /* set flag indicating ring supports SCTP checksum offload */
369 if (adapter->hw.mac.type >= e1000_82576)
370 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
371 }
372
373 igb_cache_ring_register(adapter);
374
375 return 0;
376
377 err:
378 igb_free_queues(adapter);
379
380 return -ENOMEM;
381 }
382
383 #define IGB_N0_QUEUE -1
384 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
385 {
386 u32 msixbm = 0;
387 struct igb_adapter *adapter = q_vector->adapter;
388 struct e1000_hw *hw = &adapter->hw;
389 u32 ivar, index;
390 int rx_queue = IGB_N0_QUEUE;
391 int tx_queue = IGB_N0_QUEUE;
392
393 if (q_vector->rx_ring)
394 rx_queue = q_vector->rx_ring->reg_idx;
395 if (q_vector->tx_ring)
396 tx_queue = q_vector->tx_ring->reg_idx;
397
398 switch (hw->mac.type) {
399 case e1000_82575:
400 /* The 82575 assigns vectors using a bitmask, which matches the
401 bitmask for the EICR/EIMS/EIMC registers. To assign one
402 or more queues to a vector, we write the appropriate bits
403 into the MSIXBM register for that vector. */
404 if (rx_queue > IGB_N0_QUEUE)
405 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
406 if (tx_queue > IGB_N0_QUEUE)
407 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
408 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
409 q_vector->eims_value = msixbm;
410 break;
411 case e1000_82576:
412 /* 82576 uses a table-based method for assigning vectors.
413 Each queue has a single entry in the table to which we write
414 a vector number along with a "valid" bit. Sadly, the layout
415 of the table is somewhat counterintuitive. */
416 if (rx_queue > IGB_N0_QUEUE) {
417 index = (rx_queue & 0x7);
418 ivar = array_rd32(E1000_IVAR0, index);
419 if (rx_queue < 8) {
420 /* vector goes into low byte of register */
421 ivar = ivar & 0xFFFFFF00;
422 ivar |= msix_vector | E1000_IVAR_VALID;
423 } else {
424 /* vector goes into third byte of register */
425 ivar = ivar & 0xFF00FFFF;
426 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
427 }
428 array_wr32(E1000_IVAR0, index, ivar);
429 }
430 if (tx_queue > IGB_N0_QUEUE) {
431 index = (tx_queue & 0x7);
432 ivar = array_rd32(E1000_IVAR0, index);
433 if (tx_queue < 8) {
434 /* vector goes into second byte of register */
435 ivar = ivar & 0xFFFF00FF;
436 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
437 } else {
438 /* vector goes into high byte of register */
439 ivar = ivar & 0x00FFFFFF;
440 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
441 }
442 array_wr32(E1000_IVAR0, index, ivar);
443 }
444 q_vector->eims_value = 1 << msix_vector;
445 break;
446 default:
447 BUG();
448 break;
449 }
450 }
451
452 /**
453 * igb_configure_msix - Configure MSI-X hardware
454 *
455 * igb_configure_msix sets up the hardware to properly
456 * generate MSI-X interrupts.
457 **/
458 static void igb_configure_msix(struct igb_adapter *adapter)
459 {
460 u32 tmp;
461 int i, vector = 0;
462 struct e1000_hw *hw = &adapter->hw;
463
464 adapter->eims_enable_mask = 0;
465
466 /* set vector for other causes, i.e. link changes */
467 switch (hw->mac.type) {
468 case e1000_82575:
469 tmp = rd32(E1000_CTRL_EXT);
470 /* enable MSI-X PBA support*/
471 tmp |= E1000_CTRL_EXT_PBA_CLR;
472
473 /* Auto-Mask interrupts upon ICR read. */
474 tmp |= E1000_CTRL_EXT_EIAME;
475 tmp |= E1000_CTRL_EXT_IRCA;
476
477 wr32(E1000_CTRL_EXT, tmp);
478
479 /* enable msix_other interrupt */
480 array_wr32(E1000_MSIXBM(0), vector++,
481 E1000_EIMS_OTHER);
482 adapter->eims_other = E1000_EIMS_OTHER;
483
484 break;
485
486 case e1000_82576:
487 /* Turn on MSI-X capability first, or our settings
488 * won't stick. And it will take days to debug. */
489 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
490 E1000_GPIE_PBA | E1000_GPIE_EIAME |
491 E1000_GPIE_NSICR);
492
493 /* enable msix_other interrupt */
494 adapter->eims_other = 1 << vector;
495 tmp = (vector++ | E1000_IVAR_VALID) << 8;
496
497 wr32(E1000_IVAR_MISC, tmp);
498 break;
499 default:
500 /* do nothing, since nothing else supports MSI-X */
501 break;
502 } /* switch (hw->mac.type) */
503
504 adapter->eims_enable_mask |= adapter->eims_other;
505
506 for (i = 0; i < adapter->num_q_vectors; i++) {
507 struct igb_q_vector *q_vector = adapter->q_vector[i];
508 igb_assign_vector(q_vector, vector++);
509 adapter->eims_enable_mask |= q_vector->eims_value;
510 }
511
512 wrfl();
513 }
514
515 /**
516 * igb_request_msix - Initialize MSI-X interrupts
517 *
518 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
519 * kernel.
520 **/
521 static int igb_request_msix(struct igb_adapter *adapter)
522 {
523 struct net_device *netdev = adapter->netdev;
524 struct e1000_hw *hw = &adapter->hw;
525 int i, err = 0, vector = 0;
526
527 err = request_irq(adapter->msix_entries[vector].vector,
528 &igb_msix_other, 0, netdev->name, adapter);
529 if (err)
530 goto out;
531 vector++;
532
533 for (i = 0; i < adapter->num_q_vectors; i++) {
534 struct igb_q_vector *q_vector = adapter->q_vector[i];
535
536 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
537
538 if (q_vector->rx_ring && q_vector->tx_ring)
539 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
540 q_vector->rx_ring->queue_index);
541 else if (q_vector->tx_ring)
542 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
543 q_vector->tx_ring->queue_index);
544 else if (q_vector->rx_ring)
545 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
546 q_vector->rx_ring->queue_index);
547 else
548 sprintf(q_vector->name, "%s-unused", netdev->name);
549
550 err = request_irq(adapter->msix_entries[vector].vector,
551 &igb_msix_ring, 0, q_vector->name,
552 q_vector);
553 if (err)
554 goto out;
555 vector++;
556 }
557
558 igb_configure_msix(adapter);
559 return 0;
560 out:
561 return err;
562 }
563
564 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
565 {
566 if (adapter->msix_entries) {
567 pci_disable_msix(adapter->pdev);
568 kfree(adapter->msix_entries);
569 adapter->msix_entries = NULL;
570 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
571 pci_disable_msi(adapter->pdev);
572 }
573 }
574
575 /**
576 * igb_free_q_vectors - Free memory allocated for interrupt vectors
577 * @adapter: board private structure to initialize
578 *
579 * This function frees the memory allocated to the q_vectors. In addition if
580 * NAPI is enabled it will delete any references to the NAPI struct prior
581 * to freeing the q_vector.
582 **/
583 static void igb_free_q_vectors(struct igb_adapter *adapter)
584 {
585 int v_idx;
586
587 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
588 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
589 adapter->q_vector[v_idx] = NULL;
590 netif_napi_del(&q_vector->napi);
591 kfree(q_vector);
592 }
593 adapter->num_q_vectors = 0;
594 }
595
596 /**
597 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
598 *
599 * This function resets the device so that it has 0 rx queues, tx queues, and
600 * MSI-X interrupts allocated.
601 */
602 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
603 {
604 igb_free_queues(adapter);
605 igb_free_q_vectors(adapter);
606 igb_reset_interrupt_capability(adapter);
607 }
608
609 /**
610 * igb_set_interrupt_capability - set MSI or MSI-X if supported
611 *
612 * Attempt to configure interrupts using the best available
613 * capabilities of the hardware and kernel.
614 **/
615 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
616 {
617 int err;
618 int numvecs, i;
619
620 /* Number of supported queues. */
621 adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
622 adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus());
623
624 /* start with one vector for every rx queue */
625 numvecs = adapter->num_rx_queues;
626
627 /* if tx handler is seperate add 1 for every tx queue */
628 numvecs += adapter->num_tx_queues;
629
630 /* store the number of vectors reserved for queues */
631 adapter->num_q_vectors = numvecs;
632
633 /* add 1 vector for link status interrupts */
634 numvecs++;
635 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
636 GFP_KERNEL);
637 if (!adapter->msix_entries)
638 goto msi_only;
639
640 for (i = 0; i < numvecs; i++)
641 adapter->msix_entries[i].entry = i;
642
643 err = pci_enable_msix(adapter->pdev,
644 adapter->msix_entries,
645 numvecs);
646 if (err == 0)
647 goto out;
648
649 igb_reset_interrupt_capability(adapter);
650
651 /* If we can't do MSI-X, try MSI */
652 msi_only:
653 #ifdef CONFIG_PCI_IOV
654 /* disable SR-IOV for non MSI-X configurations */
655 if (adapter->vf_data) {
656 struct e1000_hw *hw = &adapter->hw;
657 /* disable iov and allow time for transactions to clear */
658 pci_disable_sriov(adapter->pdev);
659 msleep(500);
660
661 kfree(adapter->vf_data);
662 adapter->vf_data = NULL;
663 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
664 msleep(100);
665 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
666 }
667 #endif
668 adapter->vfs_allocated_count = 0;
669 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
670 adapter->num_rx_queues = 1;
671 adapter->num_tx_queues = 1;
672 adapter->num_q_vectors = 1;
673 if (!pci_enable_msi(adapter->pdev))
674 adapter->flags |= IGB_FLAG_HAS_MSI;
675 out:
676 /* Notify the stack of the (possibly) reduced Tx Queue count. */
677 adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
678 return;
679 }
680
681 /**
682 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
683 * @adapter: board private structure to initialize
684 *
685 * We allocate one q_vector per queue interrupt. If allocation fails we
686 * return -ENOMEM.
687 **/
688 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
689 {
690 struct igb_q_vector *q_vector;
691 struct e1000_hw *hw = &adapter->hw;
692 int v_idx;
693
694 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
695 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
696 if (!q_vector)
697 goto err_out;
698 q_vector->adapter = adapter;
699 q_vector->itr_shift = (hw->mac.type == e1000_82575) ? 16 : 0;
700 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
701 q_vector->itr_val = IGB_START_ITR;
702 q_vector->set_itr = 1;
703 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
704 adapter->q_vector[v_idx] = q_vector;
705 }
706 return 0;
707
708 err_out:
709 while (v_idx) {
710 v_idx--;
711 q_vector = adapter->q_vector[v_idx];
712 netif_napi_del(&q_vector->napi);
713 kfree(q_vector);
714 adapter->q_vector[v_idx] = NULL;
715 }
716 return -ENOMEM;
717 }
718
719 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
720 int ring_idx, int v_idx)
721 {
722 struct igb_q_vector *q_vector;
723
724 q_vector = adapter->q_vector[v_idx];
725 q_vector->rx_ring = &adapter->rx_ring[ring_idx];
726 q_vector->rx_ring->q_vector = q_vector;
727 q_vector->itr_val = adapter->rx_itr_setting;
728 if (q_vector->itr_val && q_vector->itr_val <= 3)
729 q_vector->itr_val = IGB_START_ITR;
730 }
731
732 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
733 int ring_idx, int v_idx)
734 {
735 struct igb_q_vector *q_vector;
736
737 q_vector = adapter->q_vector[v_idx];
738 q_vector->tx_ring = &adapter->tx_ring[ring_idx];
739 q_vector->tx_ring->q_vector = q_vector;
740 q_vector->itr_val = adapter->tx_itr_setting;
741 if (q_vector->itr_val && q_vector->itr_val <= 3)
742 q_vector->itr_val = IGB_START_ITR;
743 }
744
745 /**
746 * igb_map_ring_to_vector - maps allocated queues to vectors
747 *
748 * This function maps the recently allocated queues to vectors.
749 **/
750 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
751 {
752 int i;
753 int v_idx = 0;
754
755 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
756 (adapter->num_q_vectors < adapter->num_tx_queues))
757 return -ENOMEM;
758
759 if (adapter->num_q_vectors >=
760 (adapter->num_rx_queues + adapter->num_tx_queues)) {
761 for (i = 0; i < adapter->num_rx_queues; i++)
762 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
763 for (i = 0; i < adapter->num_tx_queues; i++)
764 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
765 } else {
766 for (i = 0; i < adapter->num_rx_queues; i++) {
767 if (i < adapter->num_tx_queues)
768 igb_map_tx_ring_to_vector(adapter, i, v_idx);
769 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
770 }
771 for (; i < adapter->num_tx_queues; i++)
772 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
773 }
774 return 0;
775 }
776
777 /**
778 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
779 *
780 * This function initializes the interrupts and allocates all of the queues.
781 **/
782 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
783 {
784 struct pci_dev *pdev = adapter->pdev;
785 int err;
786
787 igb_set_interrupt_capability(adapter);
788
789 err = igb_alloc_q_vectors(adapter);
790 if (err) {
791 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
792 goto err_alloc_q_vectors;
793 }
794
795 err = igb_alloc_queues(adapter);
796 if (err) {
797 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
798 goto err_alloc_queues;
799 }
800
801 err = igb_map_ring_to_vector(adapter);
802 if (err) {
803 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
804 goto err_map_queues;
805 }
806
807
808 return 0;
809 err_map_queues:
810 igb_free_queues(adapter);
811 err_alloc_queues:
812 igb_free_q_vectors(adapter);
813 err_alloc_q_vectors:
814 igb_reset_interrupt_capability(adapter);
815 return err;
816 }
817
818 /**
819 * igb_request_irq - initialize interrupts
820 *
821 * Attempts to configure interrupts using the best available
822 * capabilities of the hardware and kernel.
823 **/
824 static int igb_request_irq(struct igb_adapter *adapter)
825 {
826 struct net_device *netdev = adapter->netdev;
827 struct pci_dev *pdev = adapter->pdev;
828 struct e1000_hw *hw = &adapter->hw;
829 int err = 0;
830
831 if (adapter->msix_entries) {
832 err = igb_request_msix(adapter);
833 if (!err)
834 goto request_done;
835 /* fall back to MSI */
836 igb_clear_interrupt_scheme(adapter);
837 if (!pci_enable_msi(adapter->pdev))
838 adapter->flags |= IGB_FLAG_HAS_MSI;
839 igb_free_all_tx_resources(adapter);
840 igb_free_all_rx_resources(adapter);
841 adapter->num_tx_queues = 1;
842 adapter->num_rx_queues = 1;
843 adapter->num_q_vectors = 1;
844 err = igb_alloc_q_vectors(adapter);
845 if (err) {
846 dev_err(&pdev->dev,
847 "Unable to allocate memory for vectors\n");
848 goto request_done;
849 }
850 err = igb_alloc_queues(adapter);
851 if (err) {
852 dev_err(&pdev->dev,
853 "Unable to allocate memory for queues\n");
854 igb_free_q_vectors(adapter);
855 goto request_done;
856 }
857 igb_setup_all_tx_resources(adapter);
858 igb_setup_all_rx_resources(adapter);
859 } else {
860 switch (hw->mac.type) {
861 case e1000_82575:
862 wr32(E1000_MSIXBM(0),
863 (E1000_EICR_RX_QUEUE0 |
864 E1000_EICR_TX_QUEUE0 |
865 E1000_EIMS_OTHER));
866 break;
867 case e1000_82576:
868 wr32(E1000_IVAR0, E1000_IVAR_VALID);
869 break;
870 default:
871 break;
872 }
873 }
874
875 if (adapter->flags & IGB_FLAG_HAS_MSI) {
876 err = request_irq(adapter->pdev->irq, &igb_intr_msi, 0,
877 netdev->name, adapter);
878 if (!err)
879 goto request_done;
880
881 /* fall back to legacy interrupts */
882 igb_reset_interrupt_capability(adapter);
883 adapter->flags &= ~IGB_FLAG_HAS_MSI;
884 }
885
886 err = request_irq(adapter->pdev->irq, &igb_intr, IRQF_SHARED,
887 netdev->name, adapter);
888
889 if (err)
890 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
891 err);
892
893 request_done:
894 return err;
895 }
896
897 static void igb_free_irq(struct igb_adapter *adapter)
898 {
899 if (adapter->msix_entries) {
900 int vector = 0, i;
901
902 free_irq(adapter->msix_entries[vector++].vector, adapter);
903
904 for (i = 0; i < adapter->num_q_vectors; i++) {
905 struct igb_q_vector *q_vector = adapter->q_vector[i];
906 free_irq(adapter->msix_entries[vector++].vector,
907 q_vector);
908 }
909 } else {
910 free_irq(adapter->pdev->irq, adapter);
911 }
912 }
913
914 /**
915 * igb_irq_disable - Mask off interrupt generation on the NIC
916 * @adapter: board private structure
917 **/
918 static void igb_irq_disable(struct igb_adapter *adapter)
919 {
920 struct e1000_hw *hw = &adapter->hw;
921
922 /*
923 * we need to be careful when disabling interrupts. The VFs are also
924 * mapped into these registers and so clearing the bits can cause
925 * issues on the VF drivers so we only need to clear what we set
926 */
927 if (adapter->msix_entries) {
928 u32 regval = rd32(E1000_EIAM);
929 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
930 wr32(E1000_EIMC, adapter->eims_enable_mask);
931 regval = rd32(E1000_EIAC);
932 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
933 }
934
935 wr32(E1000_IAM, 0);
936 wr32(E1000_IMC, ~0);
937 wrfl();
938 synchronize_irq(adapter->pdev->irq);
939 }
940
941 /**
942 * igb_irq_enable - Enable default interrupt generation settings
943 * @adapter: board private structure
944 **/
945 static void igb_irq_enable(struct igb_adapter *adapter)
946 {
947 struct e1000_hw *hw = &adapter->hw;
948
949 if (adapter->msix_entries) {
950 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
951 u32 regval = rd32(E1000_EIAC);
952 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
953 regval = rd32(E1000_EIAM);
954 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
955 wr32(E1000_EIMS, adapter->eims_enable_mask);
956 if (adapter->vfs_allocated_count) {
957 wr32(E1000_MBVFIMR, 0xFF);
958 ims |= E1000_IMS_VMMB;
959 }
960 wr32(E1000_IMS, ims);
961 } else {
962 wr32(E1000_IMS, IMS_ENABLE_MASK);
963 wr32(E1000_IAM, IMS_ENABLE_MASK);
964 }
965 }
966
967 static void igb_update_mng_vlan(struct igb_adapter *adapter)
968 {
969 struct e1000_hw *hw = &adapter->hw;
970 u16 vid = adapter->hw.mng_cookie.vlan_id;
971 u16 old_vid = adapter->mng_vlan_id;
972
973 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
974 /* add VID to filter table */
975 igb_vfta_set(hw, vid, true);
976 adapter->mng_vlan_id = vid;
977 } else {
978 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
979 }
980
981 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
982 (vid != old_vid) &&
983 !vlan_group_get_device(adapter->vlgrp, old_vid)) {
984 /* remove VID from filter table */
985 igb_vfta_set(hw, old_vid, false);
986 }
987 }
988
989 /**
990 * igb_release_hw_control - release control of the h/w to f/w
991 * @adapter: address of board private structure
992 *
993 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
994 * For ASF and Pass Through versions of f/w this means that the
995 * driver is no longer loaded.
996 *
997 **/
998 static void igb_release_hw_control(struct igb_adapter *adapter)
999 {
1000 struct e1000_hw *hw = &adapter->hw;
1001 u32 ctrl_ext;
1002
1003 /* Let firmware take over control of h/w */
1004 ctrl_ext = rd32(E1000_CTRL_EXT);
1005 wr32(E1000_CTRL_EXT,
1006 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1007 }
1008
1009 /**
1010 * igb_get_hw_control - get control of the h/w from f/w
1011 * @adapter: address of board private structure
1012 *
1013 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1014 * For ASF and Pass Through versions of f/w this means that
1015 * the driver is loaded.
1016 *
1017 **/
1018 static void igb_get_hw_control(struct igb_adapter *adapter)
1019 {
1020 struct e1000_hw *hw = &adapter->hw;
1021 u32 ctrl_ext;
1022
1023 /* Let firmware know the driver has taken over */
1024 ctrl_ext = rd32(E1000_CTRL_EXT);
1025 wr32(E1000_CTRL_EXT,
1026 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1027 }
1028
1029 /**
1030 * igb_configure - configure the hardware for RX and TX
1031 * @adapter: private board structure
1032 **/
1033 static void igb_configure(struct igb_adapter *adapter)
1034 {
1035 struct net_device *netdev = adapter->netdev;
1036 int i;
1037
1038 igb_get_hw_control(adapter);
1039 igb_set_rx_mode(netdev);
1040
1041 igb_restore_vlan(adapter);
1042
1043 igb_setup_tctl(adapter);
1044 igb_setup_mrqc(adapter);
1045 igb_setup_rctl(adapter);
1046
1047 igb_configure_tx(adapter);
1048 igb_configure_rx(adapter);
1049
1050 igb_rx_fifo_flush_82575(&adapter->hw);
1051
1052 /* call igb_desc_unused which always leaves
1053 * at least 1 descriptor unused to make sure
1054 * next_to_use != next_to_clean */
1055 for (i = 0; i < adapter->num_rx_queues; i++) {
1056 struct igb_ring *ring = &adapter->rx_ring[i];
1057 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1058 }
1059
1060
1061 adapter->tx_queue_len = netdev->tx_queue_len;
1062 }
1063
1064
1065 /**
1066 * igb_up - Open the interface and prepare it to handle traffic
1067 * @adapter: board private structure
1068 **/
1069 int igb_up(struct igb_adapter *adapter)
1070 {
1071 struct e1000_hw *hw = &adapter->hw;
1072 int i;
1073
1074 /* hardware has been reset, we need to reload some things */
1075 igb_configure(adapter);
1076
1077 clear_bit(__IGB_DOWN, &adapter->state);
1078
1079 for (i = 0; i < adapter->num_q_vectors; i++) {
1080 struct igb_q_vector *q_vector = adapter->q_vector[i];
1081 napi_enable(&q_vector->napi);
1082 }
1083 if (adapter->msix_entries)
1084 igb_configure_msix(adapter);
1085
1086 /* Clear any pending interrupts. */
1087 rd32(E1000_ICR);
1088 igb_irq_enable(adapter);
1089
1090 /* notify VFs that reset has been completed */
1091 if (adapter->vfs_allocated_count) {
1092 u32 reg_data = rd32(E1000_CTRL_EXT);
1093 reg_data |= E1000_CTRL_EXT_PFRSTD;
1094 wr32(E1000_CTRL_EXT, reg_data);
1095 }
1096
1097 netif_tx_start_all_queues(adapter->netdev);
1098
1099 /* start the watchdog. */
1100 hw->mac.get_link_status = 1;
1101 schedule_work(&adapter->watchdog_task);
1102
1103 return 0;
1104 }
1105
1106 void igb_down(struct igb_adapter *adapter)
1107 {
1108 struct net_device *netdev = adapter->netdev;
1109 struct e1000_hw *hw = &adapter->hw;
1110 u32 tctl, rctl;
1111 int i;
1112
1113 /* signal that we're down so the interrupt handler does not
1114 * reschedule our watchdog timer */
1115 set_bit(__IGB_DOWN, &adapter->state);
1116
1117 /* disable receives in the hardware */
1118 rctl = rd32(E1000_RCTL);
1119 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1120 /* flush and sleep below */
1121
1122 netif_tx_stop_all_queues(netdev);
1123
1124 /* disable transmits in the hardware */
1125 tctl = rd32(E1000_TCTL);
1126 tctl &= ~E1000_TCTL_EN;
1127 wr32(E1000_TCTL, tctl);
1128 /* flush both disables and wait for them to finish */
1129 wrfl();
1130 msleep(10);
1131
1132 for (i = 0; i < adapter->num_q_vectors; i++) {
1133 struct igb_q_vector *q_vector = adapter->q_vector[i];
1134 napi_disable(&q_vector->napi);
1135 }
1136
1137 igb_irq_disable(adapter);
1138
1139 del_timer_sync(&adapter->watchdog_timer);
1140 del_timer_sync(&adapter->phy_info_timer);
1141
1142 netdev->tx_queue_len = adapter->tx_queue_len;
1143 netif_carrier_off(netdev);
1144
1145 /* record the stats before reset*/
1146 igb_update_stats(adapter);
1147
1148 adapter->link_speed = 0;
1149 adapter->link_duplex = 0;
1150
1151 if (!pci_channel_offline(adapter->pdev))
1152 igb_reset(adapter);
1153 igb_clean_all_tx_rings(adapter);
1154 igb_clean_all_rx_rings(adapter);
1155 #ifdef CONFIG_IGB_DCA
1156
1157 /* since we reset the hardware DCA settings were cleared */
1158 igb_setup_dca(adapter);
1159 #endif
1160 }
1161
1162 void igb_reinit_locked(struct igb_adapter *adapter)
1163 {
1164 WARN_ON(in_interrupt());
1165 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1166 msleep(1);
1167 igb_down(adapter);
1168 igb_up(adapter);
1169 clear_bit(__IGB_RESETTING, &adapter->state);
1170 }
1171
1172 void igb_reset(struct igb_adapter *adapter)
1173 {
1174 struct pci_dev *pdev = adapter->pdev;
1175 struct e1000_hw *hw = &adapter->hw;
1176 struct e1000_mac_info *mac = &hw->mac;
1177 struct e1000_fc_info *fc = &hw->fc;
1178 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1179 u16 hwm;
1180
1181 /* Repartition Pba for greater than 9k mtu
1182 * To take effect CTRL.RST is required.
1183 */
1184 switch (mac->type) {
1185 case e1000_82576:
1186 pba = rd32(E1000_RXPBS);
1187 pba &= E1000_RXPBS_SIZE_MASK_82576;
1188 break;
1189 case e1000_82575:
1190 default:
1191 pba = E1000_PBA_34K;
1192 break;
1193 }
1194
1195 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1196 (mac->type < e1000_82576)) {
1197 /* adjust PBA for jumbo frames */
1198 wr32(E1000_PBA, pba);
1199
1200 /* To maintain wire speed transmits, the Tx FIFO should be
1201 * large enough to accommodate two full transmit packets,
1202 * rounded up to the next 1KB and expressed in KB. Likewise,
1203 * the Rx FIFO should be large enough to accommodate at least
1204 * one full receive packet and is similarly rounded up and
1205 * expressed in KB. */
1206 pba = rd32(E1000_PBA);
1207 /* upper 16 bits has Tx packet buffer allocation size in KB */
1208 tx_space = pba >> 16;
1209 /* lower 16 bits has Rx packet buffer allocation size in KB */
1210 pba &= 0xffff;
1211 /* the tx fifo also stores 16 bytes of information about the tx
1212 * but don't include ethernet FCS because hardware appends it */
1213 min_tx_space = (adapter->max_frame_size +
1214 sizeof(union e1000_adv_tx_desc) -
1215 ETH_FCS_LEN) * 2;
1216 min_tx_space = ALIGN(min_tx_space, 1024);
1217 min_tx_space >>= 10;
1218 /* software strips receive CRC, so leave room for it */
1219 min_rx_space = adapter->max_frame_size;
1220 min_rx_space = ALIGN(min_rx_space, 1024);
1221 min_rx_space >>= 10;
1222
1223 /* If current Tx allocation is less than the min Tx FIFO size,
1224 * and the min Tx FIFO size is less than the current Rx FIFO
1225 * allocation, take space away from current Rx allocation */
1226 if (tx_space < min_tx_space &&
1227 ((min_tx_space - tx_space) < pba)) {
1228 pba = pba - (min_tx_space - tx_space);
1229
1230 /* if short on rx space, rx wins and must trump tx
1231 * adjustment */
1232 if (pba < min_rx_space)
1233 pba = min_rx_space;
1234 }
1235 wr32(E1000_PBA, pba);
1236 }
1237
1238 /* flow control settings */
1239 /* The high water mark must be low enough to fit one full frame
1240 * (or the size used for early receive) above it in the Rx FIFO.
1241 * Set it to the lower of:
1242 * - 90% of the Rx FIFO size, or
1243 * - the full Rx FIFO size minus one full frame */
1244 hwm = min(((pba << 10) * 9 / 10),
1245 ((pba << 10) - 2 * adapter->max_frame_size));
1246
1247 if (mac->type < e1000_82576) {
1248 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */
1249 fc->low_water = fc->high_water - 8;
1250 } else {
1251 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1252 fc->low_water = fc->high_water - 16;
1253 }
1254 fc->pause_time = 0xFFFF;
1255 fc->send_xon = 1;
1256 fc->current_mode = fc->requested_mode;
1257
1258 /* disable receive for all VFs and wait one second */
1259 if (adapter->vfs_allocated_count) {
1260 int i;
1261 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1262 adapter->vf_data[i].flags = 0;
1263
1264 /* ping all the active vfs to let them know we are going down */
1265 igb_ping_all_vfs(adapter);
1266
1267 /* disable transmits and receives */
1268 wr32(E1000_VFRE, 0);
1269 wr32(E1000_VFTE, 0);
1270 }
1271
1272 /* Allow time for pending master requests to run */
1273 hw->mac.ops.reset_hw(hw);
1274 wr32(E1000_WUC, 0);
1275
1276 if (hw->mac.ops.init_hw(hw))
1277 dev_err(&pdev->dev, "Hardware Error\n");
1278
1279 igb_update_mng_vlan(adapter);
1280
1281 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1282 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1283
1284 igb_reset_adaptive(hw);
1285 igb_get_phy_info(hw);
1286 }
1287
1288 static const struct net_device_ops igb_netdev_ops = {
1289 .ndo_open = igb_open,
1290 .ndo_stop = igb_close,
1291 .ndo_start_xmit = igb_xmit_frame_adv,
1292 .ndo_get_stats = igb_get_stats,
1293 .ndo_set_rx_mode = igb_set_rx_mode,
1294 .ndo_set_multicast_list = igb_set_rx_mode,
1295 .ndo_set_mac_address = igb_set_mac,
1296 .ndo_change_mtu = igb_change_mtu,
1297 .ndo_do_ioctl = igb_ioctl,
1298 .ndo_tx_timeout = igb_tx_timeout,
1299 .ndo_validate_addr = eth_validate_addr,
1300 .ndo_vlan_rx_register = igb_vlan_rx_register,
1301 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1302 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1303 #ifdef CONFIG_NET_POLL_CONTROLLER
1304 .ndo_poll_controller = igb_netpoll,
1305 #endif
1306 };
1307
1308 /**
1309 * igb_probe - Device Initialization Routine
1310 * @pdev: PCI device information struct
1311 * @ent: entry in igb_pci_tbl
1312 *
1313 * Returns 0 on success, negative on failure
1314 *
1315 * igb_probe initializes an adapter identified by a pci_dev structure.
1316 * The OS initialization, configuring of the adapter private structure,
1317 * and a hardware reset occur.
1318 **/
1319 static int __devinit igb_probe(struct pci_dev *pdev,
1320 const struct pci_device_id *ent)
1321 {
1322 struct net_device *netdev;
1323 struct igb_adapter *adapter;
1324 struct e1000_hw *hw;
1325 u16 eeprom_data = 0;
1326 static int global_quad_port_a; /* global quad port a indication */
1327 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1328 unsigned long mmio_start, mmio_len;
1329 int err, pci_using_dac;
1330 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1331 u32 part_num;
1332
1333 err = pci_enable_device_mem(pdev);
1334 if (err)
1335 return err;
1336
1337 pci_using_dac = 0;
1338 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1339 if (!err) {
1340 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1341 if (!err)
1342 pci_using_dac = 1;
1343 } else {
1344 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1345 if (err) {
1346 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1347 if (err) {
1348 dev_err(&pdev->dev, "No usable DMA "
1349 "configuration, aborting\n");
1350 goto err_dma;
1351 }
1352 }
1353 }
1354
1355 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1356 IORESOURCE_MEM),
1357 igb_driver_name);
1358 if (err)
1359 goto err_pci_reg;
1360
1361 pci_enable_pcie_error_reporting(pdev);
1362
1363 pci_set_master(pdev);
1364 pci_save_state(pdev);
1365
1366 err = -ENOMEM;
1367 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1368 IGB_ABS_MAX_TX_QUEUES);
1369 if (!netdev)
1370 goto err_alloc_etherdev;
1371
1372 SET_NETDEV_DEV(netdev, &pdev->dev);
1373
1374 pci_set_drvdata(pdev, netdev);
1375 adapter = netdev_priv(netdev);
1376 adapter->netdev = netdev;
1377 adapter->pdev = pdev;
1378 hw = &adapter->hw;
1379 hw->back = adapter;
1380 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1381
1382 mmio_start = pci_resource_start(pdev, 0);
1383 mmio_len = pci_resource_len(pdev, 0);
1384
1385 err = -EIO;
1386 hw->hw_addr = ioremap(mmio_start, mmio_len);
1387 if (!hw->hw_addr)
1388 goto err_ioremap;
1389
1390 netdev->netdev_ops = &igb_netdev_ops;
1391 igb_set_ethtool_ops(netdev);
1392 netdev->watchdog_timeo = 5 * HZ;
1393
1394 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1395
1396 netdev->mem_start = mmio_start;
1397 netdev->mem_end = mmio_start + mmio_len;
1398
1399 /* PCI config space info */
1400 hw->vendor_id = pdev->vendor;
1401 hw->device_id = pdev->device;
1402 hw->revision_id = pdev->revision;
1403 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1404 hw->subsystem_device_id = pdev->subsystem_device;
1405
1406 /* Copy the default MAC, PHY and NVM function pointers */
1407 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1408 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1409 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1410 /* Initialize skew-specific constants */
1411 err = ei->get_invariants(hw);
1412 if (err)
1413 goto err_sw_init;
1414
1415 /* setup the private structure */
1416 err = igb_sw_init(adapter);
1417 if (err)
1418 goto err_sw_init;
1419
1420 igb_get_bus_info_pcie(hw);
1421
1422 hw->phy.autoneg_wait_to_complete = false;
1423 hw->mac.adaptive_ifs = true;
1424
1425 /* Copper options */
1426 if (hw->phy.media_type == e1000_media_type_copper) {
1427 hw->phy.mdix = AUTO_ALL_MODES;
1428 hw->phy.disable_polarity_correction = false;
1429 hw->phy.ms_type = e1000_ms_hw_default;
1430 }
1431
1432 if (igb_check_reset_block(hw))
1433 dev_info(&pdev->dev,
1434 "PHY reset is blocked due to SOL/IDER session.\n");
1435
1436 netdev->features = NETIF_F_SG |
1437 NETIF_F_IP_CSUM |
1438 NETIF_F_HW_VLAN_TX |
1439 NETIF_F_HW_VLAN_RX |
1440 NETIF_F_HW_VLAN_FILTER;
1441
1442 netdev->features |= NETIF_F_IPV6_CSUM;
1443 netdev->features |= NETIF_F_TSO;
1444 netdev->features |= NETIF_F_TSO6;
1445 netdev->features |= NETIF_F_GRO;
1446
1447 netdev->vlan_features |= NETIF_F_TSO;
1448 netdev->vlan_features |= NETIF_F_TSO6;
1449 netdev->vlan_features |= NETIF_F_IP_CSUM;
1450 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1451 netdev->vlan_features |= NETIF_F_SG;
1452
1453 if (pci_using_dac)
1454 netdev->features |= NETIF_F_HIGHDMA;
1455
1456 if (hw->mac.type >= e1000_82576)
1457 netdev->features |= NETIF_F_SCTP_CSUM;
1458
1459 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1460
1461 /* before reading the NVM, reset the controller to put the device in a
1462 * known good starting state */
1463 hw->mac.ops.reset_hw(hw);
1464
1465 /* make sure the NVM is good */
1466 if (igb_validate_nvm_checksum(hw) < 0) {
1467 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1468 err = -EIO;
1469 goto err_eeprom;
1470 }
1471
1472 /* copy the MAC address out of the NVM */
1473 if (hw->mac.ops.read_mac_addr(hw))
1474 dev_err(&pdev->dev, "NVM Read Error\n");
1475
1476 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1477 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1478
1479 if (!is_valid_ether_addr(netdev->perm_addr)) {
1480 dev_err(&pdev->dev, "Invalid MAC Address\n");
1481 err = -EIO;
1482 goto err_eeprom;
1483 }
1484
1485 setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1486 (unsigned long) adapter);
1487 setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1488 (unsigned long) adapter);
1489
1490 INIT_WORK(&adapter->reset_task, igb_reset_task);
1491 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1492
1493 /* Initialize link properties that are user-changeable */
1494 adapter->fc_autoneg = true;
1495 hw->mac.autoneg = true;
1496 hw->phy.autoneg_advertised = 0x2f;
1497
1498 hw->fc.requested_mode = e1000_fc_default;
1499 hw->fc.current_mode = e1000_fc_default;
1500
1501 igb_validate_mdi_setting(hw);
1502
1503 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1504 * enable the ACPI Magic Packet filter
1505 */
1506
1507 if (hw->bus.func == 0)
1508 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1509 else if (hw->bus.func == 1)
1510 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1511
1512 if (eeprom_data & eeprom_apme_mask)
1513 adapter->eeprom_wol |= E1000_WUFC_MAG;
1514
1515 /* now that we have the eeprom settings, apply the special cases where
1516 * the eeprom may be wrong or the board simply won't support wake on
1517 * lan on a particular port */
1518 switch (pdev->device) {
1519 case E1000_DEV_ID_82575GB_QUAD_COPPER:
1520 adapter->eeprom_wol = 0;
1521 break;
1522 case E1000_DEV_ID_82575EB_FIBER_SERDES:
1523 case E1000_DEV_ID_82576_FIBER:
1524 case E1000_DEV_ID_82576_SERDES:
1525 /* Wake events only supported on port A for dual fiber
1526 * regardless of eeprom setting */
1527 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1528 adapter->eeprom_wol = 0;
1529 break;
1530 case E1000_DEV_ID_82576_QUAD_COPPER:
1531 /* if quad port adapter, disable WoL on all but port A */
1532 if (global_quad_port_a != 0)
1533 adapter->eeprom_wol = 0;
1534 else
1535 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1536 /* Reset for multiple quad port adapters */
1537 if (++global_quad_port_a == 4)
1538 global_quad_port_a = 0;
1539 break;
1540 }
1541
1542 /* initialize the wol settings based on the eeprom settings */
1543 adapter->wol = adapter->eeprom_wol;
1544 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1545
1546 /* reset the hardware with the new settings */
1547 igb_reset(adapter);
1548
1549 /* let the f/w know that the h/w is now under the control of the
1550 * driver. */
1551 igb_get_hw_control(adapter);
1552
1553 strcpy(netdev->name, "eth%d");
1554 err = register_netdev(netdev);
1555 if (err)
1556 goto err_register;
1557
1558 /* carrier off reporting is important to ethtool even BEFORE open */
1559 netif_carrier_off(netdev);
1560
1561 #ifdef CONFIG_IGB_DCA
1562 if (dca_add_requester(&pdev->dev) == 0) {
1563 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1564 dev_info(&pdev->dev, "DCA enabled\n");
1565 igb_setup_dca(adapter);
1566 }
1567
1568 #endif
1569 switch (hw->mac.type) {
1570 case e1000_82576:
1571 /*
1572 * Initialize hardware timer: we keep it running just in case
1573 * that some program needs it later on.
1574 */
1575 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1576 adapter->cycles.read = igb_read_clock;
1577 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1578 adapter->cycles.mult = 1;
1579 /**
1580 * Scale the NIC clock cycle by a large factor so that
1581 * relatively small clock corrections can be added or
1582 * substracted at each clock tick. The drawbacks of a large
1583 * factor are a) that the clock register overflows more quickly
1584 * (not such a big deal) and b) that the increment per tick has
1585 * to fit into 24 bits. As a result we need to use a shift of
1586 * 19 so we can fit a value of 16 into the TIMINCA register.
1587 */
1588 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1589 wr32(E1000_TIMINCA,
1590 (1 << E1000_TIMINCA_16NS_SHIFT) |
1591 (16 << IGB_82576_TSYNC_SHIFT));
1592
1593 /* Set registers so that rollover occurs soon to test this. */
1594 wr32(E1000_SYSTIML, 0x00000000);
1595 wr32(E1000_SYSTIMH, 0xFF800000);
1596 wrfl();
1597
1598 timecounter_init(&adapter->clock,
1599 &adapter->cycles,
1600 ktime_to_ns(ktime_get_real()));
1601 /*
1602 * Synchronize our NIC clock against system wall clock. NIC
1603 * time stamp reading requires ~3us per sample, each sample
1604 * was pretty stable even under load => only require 10
1605 * samples for each offset comparison.
1606 */
1607 memset(&adapter->compare, 0, sizeof(adapter->compare));
1608 adapter->compare.source = &adapter->clock;
1609 adapter->compare.target = ktime_get_real;
1610 adapter->compare.num_samples = 10;
1611 timecompare_update(&adapter->compare, 0);
1612 break;
1613 case e1000_82575:
1614 /* 82575 does not support timesync */
1615 default:
1616 break;
1617 }
1618
1619 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1620 /* print bus type/speed/width info */
1621 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1622 netdev->name,
1623 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1624 "unknown"),
1625 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1626 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1627 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1628 "unknown"),
1629 netdev->dev_addr);
1630
1631 igb_read_part_num(hw, &part_num);
1632 dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1633 (part_num >> 8), (part_num & 0xff));
1634
1635 dev_info(&pdev->dev,
1636 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1637 adapter->msix_entries ? "MSI-X" :
1638 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1639 adapter->num_rx_queues, adapter->num_tx_queues);
1640
1641 return 0;
1642
1643 err_register:
1644 igb_release_hw_control(adapter);
1645 err_eeprom:
1646 if (!igb_check_reset_block(hw))
1647 igb_reset_phy(hw);
1648
1649 if (hw->flash_address)
1650 iounmap(hw->flash_address);
1651 err_sw_init:
1652 igb_clear_interrupt_scheme(adapter);
1653 iounmap(hw->hw_addr);
1654 err_ioremap:
1655 free_netdev(netdev);
1656 err_alloc_etherdev:
1657 pci_release_selected_regions(pdev,
1658 pci_select_bars(pdev, IORESOURCE_MEM));
1659 err_pci_reg:
1660 err_dma:
1661 pci_disable_device(pdev);
1662 return err;
1663 }
1664
1665 /**
1666 * igb_remove - Device Removal Routine
1667 * @pdev: PCI device information struct
1668 *
1669 * igb_remove is called by the PCI subsystem to alert the driver
1670 * that it should release a PCI device. The could be caused by a
1671 * Hot-Plug event, or because the driver is going to be removed from
1672 * memory.
1673 **/
1674 static void __devexit igb_remove(struct pci_dev *pdev)
1675 {
1676 struct net_device *netdev = pci_get_drvdata(pdev);
1677 struct igb_adapter *adapter = netdev_priv(netdev);
1678 struct e1000_hw *hw = &adapter->hw;
1679
1680 /* flush_scheduled work may reschedule our watchdog task, so
1681 * explicitly disable watchdog tasks from being rescheduled */
1682 set_bit(__IGB_DOWN, &adapter->state);
1683 del_timer_sync(&adapter->watchdog_timer);
1684 del_timer_sync(&adapter->phy_info_timer);
1685
1686 flush_scheduled_work();
1687
1688 #ifdef CONFIG_IGB_DCA
1689 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1690 dev_info(&pdev->dev, "DCA disabled\n");
1691 dca_remove_requester(&pdev->dev);
1692 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1693 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1694 }
1695 #endif
1696
1697 /* Release control of h/w to f/w. If f/w is AMT enabled, this
1698 * would have already happened in close and is redundant. */
1699 igb_release_hw_control(adapter);
1700
1701 unregister_netdev(netdev);
1702
1703 if (!igb_check_reset_block(hw))
1704 igb_reset_phy(hw);
1705
1706 igb_clear_interrupt_scheme(adapter);
1707
1708 #ifdef CONFIG_PCI_IOV
1709 /* reclaim resources allocated to VFs */
1710 if (adapter->vf_data) {
1711 /* disable iov and allow time for transactions to clear */
1712 pci_disable_sriov(pdev);
1713 msleep(500);
1714
1715 kfree(adapter->vf_data);
1716 adapter->vf_data = NULL;
1717 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1718 msleep(100);
1719 dev_info(&pdev->dev, "IOV Disabled\n");
1720 }
1721 #endif
1722
1723 iounmap(hw->hw_addr);
1724 if (hw->flash_address)
1725 iounmap(hw->flash_address);
1726 pci_release_selected_regions(pdev,
1727 pci_select_bars(pdev, IORESOURCE_MEM));
1728
1729 free_netdev(netdev);
1730
1731 pci_disable_pcie_error_reporting(pdev);
1732
1733 pci_disable_device(pdev);
1734 }
1735
1736 /**
1737 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1738 * @adapter: board private structure to initialize
1739 *
1740 * This function initializes the vf specific data storage and then attempts to
1741 * allocate the VFs. The reason for ordering it this way is because it is much
1742 * mor expensive time wise to disable SR-IOV than it is to allocate and free
1743 * the memory for the VFs.
1744 **/
1745 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1746 {
1747 #ifdef CONFIG_PCI_IOV
1748 struct pci_dev *pdev = adapter->pdev;
1749
1750 if (adapter->vfs_allocated_count > 7)
1751 adapter->vfs_allocated_count = 7;
1752
1753 if (adapter->vfs_allocated_count) {
1754 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1755 sizeof(struct vf_data_storage),
1756 GFP_KERNEL);
1757 /* if allocation failed then we do not support SR-IOV */
1758 if (!adapter->vf_data) {
1759 adapter->vfs_allocated_count = 0;
1760 dev_err(&pdev->dev, "Unable to allocate memory for VF "
1761 "Data Storage\n");
1762 }
1763 }
1764
1765 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1766 kfree(adapter->vf_data);
1767 adapter->vf_data = NULL;
1768 #endif /* CONFIG_PCI_IOV */
1769 adapter->vfs_allocated_count = 0;
1770 #ifdef CONFIG_PCI_IOV
1771 } else {
1772 unsigned char mac_addr[ETH_ALEN];
1773 int i;
1774 dev_info(&pdev->dev, "%d vfs allocated\n",
1775 adapter->vfs_allocated_count);
1776 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1777 random_ether_addr(mac_addr);
1778 igb_set_vf_mac(adapter, i, mac_addr);
1779 }
1780 }
1781 #endif /* CONFIG_PCI_IOV */
1782 }
1783
1784 /**
1785 * igb_sw_init - Initialize general software structures (struct igb_adapter)
1786 * @adapter: board private structure to initialize
1787 *
1788 * igb_sw_init initializes the Adapter private data structure.
1789 * Fields are initialized based on PCI device information and
1790 * OS network device settings (MTU size).
1791 **/
1792 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1793 {
1794 struct e1000_hw *hw = &adapter->hw;
1795 struct net_device *netdev = adapter->netdev;
1796 struct pci_dev *pdev = adapter->pdev;
1797
1798 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1799
1800 adapter->tx_ring_count = IGB_DEFAULT_TXD;
1801 adapter->rx_ring_count = IGB_DEFAULT_RXD;
1802 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1803 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1804
1805 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1806 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1807
1808 #ifdef CONFIG_PCI_IOV
1809 if (hw->mac.type == e1000_82576)
1810 adapter->vfs_allocated_count = max_vfs;
1811
1812 #endif /* CONFIG_PCI_IOV */
1813 /* This call may decrease the number of queues */
1814 if (igb_init_interrupt_scheme(adapter)) {
1815 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1816 return -ENOMEM;
1817 }
1818
1819 igb_probe_vfs(adapter);
1820
1821 /* Explicitly disable IRQ since the NIC can be in any state. */
1822 igb_irq_disable(adapter);
1823
1824 set_bit(__IGB_DOWN, &adapter->state);
1825 return 0;
1826 }
1827
1828 /**
1829 * igb_open - Called when a network interface is made active
1830 * @netdev: network interface device structure
1831 *
1832 * Returns 0 on success, negative value on failure
1833 *
1834 * The open entry point is called when a network interface is made
1835 * active by the system (IFF_UP). At this point all resources needed
1836 * for transmit and receive operations are allocated, the interrupt
1837 * handler is registered with the OS, the watchdog timer is started,
1838 * and the stack is notified that the interface is ready.
1839 **/
1840 static int igb_open(struct net_device *netdev)
1841 {
1842 struct igb_adapter *adapter = netdev_priv(netdev);
1843 struct e1000_hw *hw = &adapter->hw;
1844 int err;
1845 int i;
1846
1847 /* disallow open during test */
1848 if (test_bit(__IGB_TESTING, &adapter->state))
1849 return -EBUSY;
1850
1851 netif_carrier_off(netdev);
1852
1853 /* allocate transmit descriptors */
1854 err = igb_setup_all_tx_resources(adapter);
1855 if (err)
1856 goto err_setup_tx;
1857
1858 /* allocate receive descriptors */
1859 err = igb_setup_all_rx_resources(adapter);
1860 if (err)
1861 goto err_setup_rx;
1862
1863 /* e1000_power_up_phy(adapter); */
1864
1865 /* before we allocate an interrupt, we must be ready to handle it.
1866 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1867 * as soon as we call pci_request_irq, so we have to setup our
1868 * clean_rx handler before we do so. */
1869 igb_configure(adapter);
1870
1871 err = igb_request_irq(adapter);
1872 if (err)
1873 goto err_req_irq;
1874
1875 /* From here on the code is the same as igb_up() */
1876 clear_bit(__IGB_DOWN, &adapter->state);
1877
1878 for (i = 0; i < adapter->num_q_vectors; i++) {
1879 struct igb_q_vector *q_vector = adapter->q_vector[i];
1880 napi_enable(&q_vector->napi);
1881 }
1882
1883 /* Clear any pending interrupts. */
1884 rd32(E1000_ICR);
1885
1886 igb_irq_enable(adapter);
1887
1888 /* notify VFs that reset has been completed */
1889 if (adapter->vfs_allocated_count) {
1890 u32 reg_data = rd32(E1000_CTRL_EXT);
1891 reg_data |= E1000_CTRL_EXT_PFRSTD;
1892 wr32(E1000_CTRL_EXT, reg_data);
1893 }
1894
1895 netif_tx_start_all_queues(netdev);
1896
1897 /* start the watchdog. */
1898 hw->mac.get_link_status = 1;
1899 schedule_work(&adapter->watchdog_task);
1900
1901 return 0;
1902
1903 err_req_irq:
1904 igb_release_hw_control(adapter);
1905 /* e1000_power_down_phy(adapter); */
1906 igb_free_all_rx_resources(adapter);
1907 err_setup_rx:
1908 igb_free_all_tx_resources(adapter);
1909 err_setup_tx:
1910 igb_reset(adapter);
1911
1912 return err;
1913 }
1914
1915 /**
1916 * igb_close - Disables a network interface
1917 * @netdev: network interface device structure
1918 *
1919 * Returns 0, this is not allowed to fail
1920 *
1921 * The close entry point is called when an interface is de-activated
1922 * by the OS. The hardware is still under the driver's control, but
1923 * needs to be disabled. A global MAC reset is issued to stop the
1924 * hardware, and all transmit and receive resources are freed.
1925 **/
1926 static int igb_close(struct net_device *netdev)
1927 {
1928 struct igb_adapter *adapter = netdev_priv(netdev);
1929
1930 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
1931 igb_down(adapter);
1932
1933 igb_free_irq(adapter);
1934
1935 igb_free_all_tx_resources(adapter);
1936 igb_free_all_rx_resources(adapter);
1937
1938 return 0;
1939 }
1940
1941 /**
1942 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
1943 * @tx_ring: tx descriptor ring (for a specific queue) to setup
1944 *
1945 * Return 0 on success, negative on failure
1946 **/
1947 int igb_setup_tx_resources(struct igb_ring *tx_ring)
1948 {
1949 struct pci_dev *pdev = tx_ring->pdev;
1950 int size;
1951
1952 size = sizeof(struct igb_buffer) * tx_ring->count;
1953 tx_ring->buffer_info = vmalloc(size);
1954 if (!tx_ring->buffer_info)
1955 goto err;
1956 memset(tx_ring->buffer_info, 0, size);
1957
1958 /* round up to nearest 4K */
1959 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
1960 tx_ring->size = ALIGN(tx_ring->size, 4096);
1961
1962 tx_ring->desc = pci_alloc_consistent(pdev,
1963 tx_ring->size,
1964 &tx_ring->dma);
1965
1966 if (!tx_ring->desc)
1967 goto err;
1968
1969 tx_ring->next_to_use = 0;
1970 tx_ring->next_to_clean = 0;
1971 return 0;
1972
1973 err:
1974 vfree(tx_ring->buffer_info);
1975 dev_err(&pdev->dev,
1976 "Unable to allocate memory for the transmit descriptor ring\n");
1977 return -ENOMEM;
1978 }
1979
1980 /**
1981 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
1982 * (Descriptors) for all queues
1983 * @adapter: board private structure
1984 *
1985 * Return 0 on success, negative on failure
1986 **/
1987 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
1988 {
1989 struct pci_dev *pdev = adapter->pdev;
1990 int i, err = 0;
1991
1992 for (i = 0; i < adapter->num_tx_queues; i++) {
1993 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
1994 if (err) {
1995 dev_err(&pdev->dev,
1996 "Allocation for Tx Queue %u failed\n", i);
1997 for (i--; i >= 0; i--)
1998 igb_free_tx_resources(&adapter->tx_ring[i]);
1999 break;
2000 }
2001 }
2002
2003 for (i = 0; i < IGB_MAX_TX_QUEUES; i++) {
2004 int r_idx = i % adapter->num_tx_queues;
2005 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2006 }
2007 return err;
2008 }
2009
2010 /**
2011 * igb_setup_tctl - configure the transmit control registers
2012 * @adapter: Board private structure
2013 **/
2014 void igb_setup_tctl(struct igb_adapter *adapter)
2015 {
2016 struct e1000_hw *hw = &adapter->hw;
2017 u32 tctl;
2018
2019 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2020 wr32(E1000_TXDCTL(0), 0);
2021
2022 /* Program the Transmit Control Register */
2023 tctl = rd32(E1000_TCTL);
2024 tctl &= ~E1000_TCTL_CT;
2025 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2026 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2027
2028 igb_config_collision_dist(hw);
2029
2030 /* Enable transmits */
2031 tctl |= E1000_TCTL_EN;
2032
2033 wr32(E1000_TCTL, tctl);
2034 }
2035
2036 /**
2037 * igb_configure_tx_ring - Configure transmit ring after Reset
2038 * @adapter: board private structure
2039 * @ring: tx ring to configure
2040 *
2041 * Configure a transmit ring after a reset.
2042 **/
2043 void igb_configure_tx_ring(struct igb_adapter *adapter,
2044 struct igb_ring *ring)
2045 {
2046 struct e1000_hw *hw = &adapter->hw;
2047 u32 txdctl;
2048 u64 tdba = ring->dma;
2049 int reg_idx = ring->reg_idx;
2050
2051 /* disable the queue */
2052 txdctl = rd32(E1000_TXDCTL(reg_idx));
2053 wr32(E1000_TXDCTL(reg_idx),
2054 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2055 wrfl();
2056 mdelay(10);
2057
2058 wr32(E1000_TDLEN(reg_idx),
2059 ring->count * sizeof(union e1000_adv_tx_desc));
2060 wr32(E1000_TDBAL(reg_idx),
2061 tdba & 0x00000000ffffffffULL);
2062 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2063
2064 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2065 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2066 writel(0, ring->head);
2067 writel(0, ring->tail);
2068
2069 txdctl |= IGB_TX_PTHRESH;
2070 txdctl |= IGB_TX_HTHRESH << 8;
2071 txdctl |= IGB_TX_WTHRESH << 16;
2072
2073 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2074 wr32(E1000_TXDCTL(reg_idx), txdctl);
2075 }
2076
2077 /**
2078 * igb_configure_tx - Configure transmit Unit after Reset
2079 * @adapter: board private structure
2080 *
2081 * Configure the Tx unit of the MAC after a reset.
2082 **/
2083 static void igb_configure_tx(struct igb_adapter *adapter)
2084 {
2085 int i;
2086
2087 for (i = 0; i < adapter->num_tx_queues; i++)
2088 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2089 }
2090
2091 /**
2092 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2093 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2094 *
2095 * Returns 0 on success, negative on failure
2096 **/
2097 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2098 {
2099 struct pci_dev *pdev = rx_ring->pdev;
2100 int size, desc_len;
2101
2102 size = sizeof(struct igb_buffer) * rx_ring->count;
2103 rx_ring->buffer_info = vmalloc(size);
2104 if (!rx_ring->buffer_info)
2105 goto err;
2106 memset(rx_ring->buffer_info, 0, size);
2107
2108 desc_len = sizeof(union e1000_adv_rx_desc);
2109
2110 /* Round up to nearest 4K */
2111 rx_ring->size = rx_ring->count * desc_len;
2112 rx_ring->size = ALIGN(rx_ring->size, 4096);
2113
2114 rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2115 &rx_ring->dma);
2116
2117 if (!rx_ring->desc)
2118 goto err;
2119
2120 rx_ring->next_to_clean = 0;
2121 rx_ring->next_to_use = 0;
2122
2123 return 0;
2124
2125 err:
2126 vfree(rx_ring->buffer_info);
2127 rx_ring->buffer_info = NULL;
2128 dev_err(&pdev->dev, "Unable to allocate memory for "
2129 "the receive descriptor ring\n");
2130 return -ENOMEM;
2131 }
2132
2133 /**
2134 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2135 * (Descriptors) for all queues
2136 * @adapter: board private structure
2137 *
2138 * Return 0 on success, negative on failure
2139 **/
2140 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2141 {
2142 struct pci_dev *pdev = adapter->pdev;
2143 int i, err = 0;
2144
2145 for (i = 0; i < adapter->num_rx_queues; i++) {
2146 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2147 if (err) {
2148 dev_err(&pdev->dev,
2149 "Allocation for Rx Queue %u failed\n", i);
2150 for (i--; i >= 0; i--)
2151 igb_free_rx_resources(&adapter->rx_ring[i]);
2152 break;
2153 }
2154 }
2155
2156 return err;
2157 }
2158
2159 /**
2160 * igb_setup_mrqc - configure the multiple receive queue control registers
2161 * @adapter: Board private structure
2162 **/
2163 static void igb_setup_mrqc(struct igb_adapter *adapter)
2164 {
2165 struct e1000_hw *hw = &adapter->hw;
2166 u32 mrqc, rxcsum;
2167 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2168 union e1000_reta {
2169 u32 dword;
2170 u8 bytes[4];
2171 } reta;
2172 static const u8 rsshash[40] = {
2173 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2174 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2175 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2176 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2177
2178 /* Fill out hash function seeds */
2179 for (j = 0; j < 10; j++) {
2180 u32 rsskey = rsshash[(j * 4)];
2181 rsskey |= rsshash[(j * 4) + 1] << 8;
2182 rsskey |= rsshash[(j * 4) + 2] << 16;
2183 rsskey |= rsshash[(j * 4) + 3] << 24;
2184 array_wr32(E1000_RSSRK(0), j, rsskey);
2185 }
2186
2187 num_rx_queues = adapter->num_rx_queues;
2188
2189 if (adapter->vfs_allocated_count) {
2190 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2191 switch (hw->mac.type) {
2192 case e1000_82576:
2193 shift = 3;
2194 num_rx_queues = 2;
2195 break;
2196 case e1000_82575:
2197 shift = 2;
2198 shift2 = 6;
2199 default:
2200 break;
2201 }
2202 } else {
2203 if (hw->mac.type == e1000_82575)
2204 shift = 6;
2205 }
2206
2207 for (j = 0; j < (32 * 4); j++) {
2208 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2209 if (shift2)
2210 reta.bytes[j & 3] |= num_rx_queues << shift2;
2211 if ((j & 3) == 3)
2212 wr32(E1000_RETA(j >> 2), reta.dword);
2213 }
2214
2215 /*
2216 * Disable raw packet checksumming so that RSS hash is placed in
2217 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2218 * offloads as they are enabled by default
2219 */
2220 rxcsum = rd32(E1000_RXCSUM);
2221 rxcsum |= E1000_RXCSUM_PCSD;
2222
2223 if (adapter->hw.mac.type >= e1000_82576)
2224 /* Enable Receive Checksum Offload for SCTP */
2225 rxcsum |= E1000_RXCSUM_CRCOFL;
2226
2227 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2228 wr32(E1000_RXCSUM, rxcsum);
2229
2230 /* If VMDq is enabled then we set the appropriate mode for that, else
2231 * we default to RSS so that an RSS hash is calculated per packet even
2232 * if we are only using one queue */
2233 if (adapter->vfs_allocated_count) {
2234 if (hw->mac.type > e1000_82575) {
2235 /* Set the default pool for the PF's first queue */
2236 u32 vtctl = rd32(E1000_VT_CTL);
2237 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2238 E1000_VT_CTL_DISABLE_DEF_POOL);
2239 vtctl |= adapter->vfs_allocated_count <<
2240 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2241 wr32(E1000_VT_CTL, vtctl);
2242 }
2243 if (adapter->num_rx_queues > 1)
2244 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2245 else
2246 mrqc = E1000_MRQC_ENABLE_VMDQ;
2247 } else {
2248 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2249 }
2250 igb_vmm_control(adapter);
2251
2252 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2253 E1000_MRQC_RSS_FIELD_IPV4_TCP);
2254 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2255 E1000_MRQC_RSS_FIELD_IPV6_TCP);
2256 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2257 E1000_MRQC_RSS_FIELD_IPV6_UDP);
2258 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2259 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2260
2261 wr32(E1000_MRQC, mrqc);
2262 }
2263
2264 /**
2265 * igb_setup_rctl - configure the receive control registers
2266 * @adapter: Board private structure
2267 **/
2268 void igb_setup_rctl(struct igb_adapter *adapter)
2269 {
2270 struct e1000_hw *hw = &adapter->hw;
2271 u32 rctl;
2272
2273 rctl = rd32(E1000_RCTL);
2274
2275 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2276 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2277
2278 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2279 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2280
2281 /*
2282 * enable stripping of CRC. It's unlikely this will break BMC
2283 * redirection as it did with e1000. Newer features require
2284 * that the HW strips the CRC.
2285 */
2286 rctl |= E1000_RCTL_SECRC;
2287
2288 /* disable store bad packets and clear size bits. */
2289 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2290
2291 /* enable LPE to prevent packets larger than max_frame_size */
2292 rctl |= E1000_RCTL_LPE;
2293
2294 /* disable queue 0 to prevent tail write w/o re-config */
2295 wr32(E1000_RXDCTL(0), 0);
2296
2297 /* Attention!!! For SR-IOV PF driver operations you must enable
2298 * queue drop for all VF and PF queues to prevent head of line blocking
2299 * if an un-trusted VF does not provide descriptors to hardware.
2300 */
2301 if (adapter->vfs_allocated_count) {
2302 /* set all queue drop enable bits */
2303 wr32(E1000_QDE, ALL_QUEUES);
2304 }
2305
2306 wr32(E1000_RCTL, rctl);
2307 }
2308
2309 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2310 int vfn)
2311 {
2312 struct e1000_hw *hw = &adapter->hw;
2313 u32 vmolr;
2314
2315 /* if it isn't the PF check to see if VFs are enabled and
2316 * increase the size to support vlan tags */
2317 if (vfn < adapter->vfs_allocated_count &&
2318 adapter->vf_data[vfn].vlans_enabled)
2319 size += VLAN_TAG_SIZE;
2320
2321 vmolr = rd32(E1000_VMOLR(vfn));
2322 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2323 vmolr |= size | E1000_VMOLR_LPE;
2324 wr32(E1000_VMOLR(vfn), vmolr);
2325
2326 return 0;
2327 }
2328
2329 /**
2330 * igb_rlpml_set - set maximum receive packet size
2331 * @adapter: board private structure
2332 *
2333 * Configure maximum receivable packet size.
2334 **/
2335 static void igb_rlpml_set(struct igb_adapter *adapter)
2336 {
2337 u32 max_frame_size = adapter->max_frame_size;
2338 struct e1000_hw *hw = &adapter->hw;
2339 u16 pf_id = adapter->vfs_allocated_count;
2340
2341 if (adapter->vlgrp)
2342 max_frame_size += VLAN_TAG_SIZE;
2343
2344 /* if vfs are enabled we set RLPML to the largest possible request
2345 * size and set the VMOLR RLPML to the size we need */
2346 if (pf_id) {
2347 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2348 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2349 }
2350
2351 wr32(E1000_RLPML, max_frame_size);
2352 }
2353
2354 static inline void igb_set_vmolr(struct igb_adapter *adapter, int vfn)
2355 {
2356 struct e1000_hw *hw = &adapter->hw;
2357 u32 vmolr;
2358
2359 /*
2360 * This register exists only on 82576 and newer so if we are older then
2361 * we should exit and do nothing
2362 */
2363 if (hw->mac.type < e1000_82576)
2364 return;
2365
2366 vmolr = rd32(E1000_VMOLR(vfn));
2367 vmolr |= E1000_VMOLR_AUPE | /* Accept untagged packets */
2368 E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2369
2370 /* clear all bits that might not be set */
2371 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2372
2373 if (adapter->num_rx_queues > 1 && vfn == adapter->vfs_allocated_count)
2374 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2375 /*
2376 * for VMDq only allow the VFs and pool 0 to accept broadcast and
2377 * multicast packets
2378 */
2379 if (vfn <= adapter->vfs_allocated_count)
2380 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
2381
2382 wr32(E1000_VMOLR(vfn), vmolr);
2383 }
2384
2385 /**
2386 * igb_configure_rx_ring - Configure a receive ring after Reset
2387 * @adapter: board private structure
2388 * @ring: receive ring to be configured
2389 *
2390 * Configure the Rx unit of the MAC after a reset.
2391 **/
2392 void igb_configure_rx_ring(struct igb_adapter *adapter,
2393 struct igb_ring *ring)
2394 {
2395 struct e1000_hw *hw = &adapter->hw;
2396 u64 rdba = ring->dma;
2397 int reg_idx = ring->reg_idx;
2398 u32 srrctl, rxdctl;
2399
2400 /* disable the queue */
2401 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2402 wr32(E1000_RXDCTL(reg_idx),
2403 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2404
2405 /* Set DMA base address registers */
2406 wr32(E1000_RDBAL(reg_idx),
2407 rdba & 0x00000000ffffffffULL);
2408 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2409 wr32(E1000_RDLEN(reg_idx),
2410 ring->count * sizeof(union e1000_adv_rx_desc));
2411
2412 /* initialize head and tail */
2413 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2414 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2415 writel(0, ring->head);
2416 writel(0, ring->tail);
2417
2418 /* set descriptor configuration */
2419 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2420 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2421 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2422 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2423 srrctl |= IGB_RXBUFFER_16384 >>
2424 E1000_SRRCTL_BSIZEPKT_SHIFT;
2425 #else
2426 srrctl |= (PAGE_SIZE / 2) >>
2427 E1000_SRRCTL_BSIZEPKT_SHIFT;
2428 #endif
2429 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2430 } else {
2431 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2432 E1000_SRRCTL_BSIZEPKT_SHIFT;
2433 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2434 }
2435
2436 wr32(E1000_SRRCTL(reg_idx), srrctl);
2437
2438 /* set filtering for VMDQ pools */
2439 igb_set_vmolr(adapter, reg_idx & 0x7);
2440
2441 /* enable receive descriptor fetching */
2442 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2443 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2444 rxdctl &= 0xFFF00000;
2445 rxdctl |= IGB_RX_PTHRESH;
2446 rxdctl |= IGB_RX_HTHRESH << 8;
2447 rxdctl |= IGB_RX_WTHRESH << 16;
2448 wr32(E1000_RXDCTL(reg_idx), rxdctl);
2449 }
2450
2451 /**
2452 * igb_configure_rx - Configure receive Unit after Reset
2453 * @adapter: board private structure
2454 *
2455 * Configure the Rx unit of the MAC after a reset.
2456 **/
2457 static void igb_configure_rx(struct igb_adapter *adapter)
2458 {
2459 int i;
2460
2461 /* set UTA to appropriate mode */
2462 igb_set_uta(adapter);
2463
2464 /* set the correct pool for the PF default MAC address in entry 0 */
2465 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2466 adapter->vfs_allocated_count);
2467
2468 /* Setup the HW Rx Head and Tail Descriptor Pointers and
2469 * the Base and Length of the Rx Descriptor Ring */
2470 for (i = 0; i < adapter->num_rx_queues; i++)
2471 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2472 }
2473
2474 /**
2475 * igb_free_tx_resources - Free Tx Resources per Queue
2476 * @tx_ring: Tx descriptor ring for a specific queue
2477 *
2478 * Free all transmit software resources
2479 **/
2480 void igb_free_tx_resources(struct igb_ring *tx_ring)
2481 {
2482 igb_clean_tx_ring(tx_ring);
2483
2484 vfree(tx_ring->buffer_info);
2485 tx_ring->buffer_info = NULL;
2486
2487 /* if not set, then don't free */
2488 if (!tx_ring->desc)
2489 return;
2490
2491 pci_free_consistent(tx_ring->pdev, tx_ring->size,
2492 tx_ring->desc, tx_ring->dma);
2493
2494 tx_ring->desc = NULL;
2495 }
2496
2497 /**
2498 * igb_free_all_tx_resources - Free Tx Resources for All Queues
2499 * @adapter: board private structure
2500 *
2501 * Free all transmit software resources
2502 **/
2503 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2504 {
2505 int i;
2506
2507 for (i = 0; i < adapter->num_tx_queues; i++)
2508 igb_free_tx_resources(&adapter->tx_ring[i]);
2509 }
2510
2511 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2512 struct igb_buffer *buffer_info)
2513 {
2514 buffer_info->dma = 0;
2515 if (buffer_info->skb) {
2516 skb_dma_unmap(&tx_ring->pdev->dev,
2517 buffer_info->skb,
2518 DMA_TO_DEVICE);
2519 dev_kfree_skb_any(buffer_info->skb);
2520 buffer_info->skb = NULL;
2521 }
2522 buffer_info->time_stamp = 0;
2523 /* buffer_info must be completely set up in the transmit path */
2524 }
2525
2526 /**
2527 * igb_clean_tx_ring - Free Tx Buffers
2528 * @tx_ring: ring to be cleaned
2529 **/
2530 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2531 {
2532 struct igb_buffer *buffer_info;
2533 unsigned long size;
2534 unsigned int i;
2535
2536 if (!tx_ring->buffer_info)
2537 return;
2538 /* Free all the Tx ring sk_buffs */
2539
2540 for (i = 0; i < tx_ring->count; i++) {
2541 buffer_info = &tx_ring->buffer_info[i];
2542 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2543 }
2544
2545 size = sizeof(struct igb_buffer) * tx_ring->count;
2546 memset(tx_ring->buffer_info, 0, size);
2547
2548 /* Zero out the descriptor ring */
2549 memset(tx_ring->desc, 0, tx_ring->size);
2550
2551 tx_ring->next_to_use = 0;
2552 tx_ring->next_to_clean = 0;
2553 }
2554
2555 /**
2556 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2557 * @adapter: board private structure
2558 **/
2559 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2560 {
2561 int i;
2562
2563 for (i = 0; i < adapter->num_tx_queues; i++)
2564 igb_clean_tx_ring(&adapter->tx_ring[i]);
2565 }
2566
2567 /**
2568 * igb_free_rx_resources - Free Rx Resources
2569 * @rx_ring: ring to clean the resources from
2570 *
2571 * Free all receive software resources
2572 **/
2573 void igb_free_rx_resources(struct igb_ring *rx_ring)
2574 {
2575 igb_clean_rx_ring(rx_ring);
2576
2577 vfree(rx_ring->buffer_info);
2578 rx_ring->buffer_info = NULL;
2579
2580 /* if not set, then don't free */
2581 if (!rx_ring->desc)
2582 return;
2583
2584 pci_free_consistent(rx_ring->pdev, rx_ring->size,
2585 rx_ring->desc, rx_ring->dma);
2586
2587 rx_ring->desc = NULL;
2588 }
2589
2590 /**
2591 * igb_free_all_rx_resources - Free Rx Resources for All Queues
2592 * @adapter: board private structure
2593 *
2594 * Free all receive software resources
2595 **/
2596 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2597 {
2598 int i;
2599
2600 for (i = 0; i < adapter->num_rx_queues; i++)
2601 igb_free_rx_resources(&adapter->rx_ring[i]);
2602 }
2603
2604 /**
2605 * igb_clean_rx_ring - Free Rx Buffers per Queue
2606 * @rx_ring: ring to free buffers from
2607 **/
2608 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2609 {
2610 struct igb_buffer *buffer_info;
2611 unsigned long size;
2612 unsigned int i;
2613
2614 if (!rx_ring->buffer_info)
2615 return;
2616
2617 /* Free all the Rx ring sk_buffs */
2618 for (i = 0; i < rx_ring->count; i++) {
2619 buffer_info = &rx_ring->buffer_info[i];
2620 if (buffer_info->dma) {
2621 pci_unmap_single(rx_ring->pdev,
2622 buffer_info->dma,
2623 rx_ring->rx_buffer_len,
2624 PCI_DMA_FROMDEVICE);
2625 buffer_info->dma = 0;
2626 }
2627
2628 if (buffer_info->skb) {
2629 dev_kfree_skb(buffer_info->skb);
2630 buffer_info->skb = NULL;
2631 }
2632 if (buffer_info->page_dma) {
2633 pci_unmap_page(rx_ring->pdev,
2634 buffer_info->page_dma,
2635 PAGE_SIZE / 2,
2636 PCI_DMA_FROMDEVICE);
2637 buffer_info->page_dma = 0;
2638 }
2639 if (buffer_info->page) {
2640 put_page(buffer_info->page);
2641 buffer_info->page = NULL;
2642 buffer_info->page_offset = 0;
2643 }
2644 }
2645
2646 size = sizeof(struct igb_buffer) * rx_ring->count;
2647 memset(rx_ring->buffer_info, 0, size);
2648
2649 /* Zero out the descriptor ring */
2650 memset(rx_ring->desc, 0, rx_ring->size);
2651
2652 rx_ring->next_to_clean = 0;
2653 rx_ring->next_to_use = 0;
2654 }
2655
2656 /**
2657 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2658 * @adapter: board private structure
2659 **/
2660 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2661 {
2662 int i;
2663
2664 for (i = 0; i < adapter->num_rx_queues; i++)
2665 igb_clean_rx_ring(&adapter->rx_ring[i]);
2666 }
2667
2668 /**
2669 * igb_set_mac - Change the Ethernet Address of the NIC
2670 * @netdev: network interface device structure
2671 * @p: pointer to an address structure
2672 *
2673 * Returns 0 on success, negative on failure
2674 **/
2675 static int igb_set_mac(struct net_device *netdev, void *p)
2676 {
2677 struct igb_adapter *adapter = netdev_priv(netdev);
2678 struct e1000_hw *hw = &adapter->hw;
2679 struct sockaddr *addr = p;
2680
2681 if (!is_valid_ether_addr(addr->sa_data))
2682 return -EADDRNOTAVAIL;
2683
2684 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2685 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2686
2687 /* set the correct pool for the new PF MAC address in entry 0 */
2688 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2689 adapter->vfs_allocated_count);
2690
2691 return 0;
2692 }
2693
2694 /**
2695 * igb_write_mc_addr_list - write multicast addresses to MTA
2696 * @netdev: network interface device structure
2697 *
2698 * Writes multicast address list to the MTA hash table.
2699 * Returns: -ENOMEM on failure
2700 * 0 on no addresses written
2701 * X on writing X addresses to MTA
2702 **/
2703 static int igb_write_mc_addr_list(struct net_device *netdev)
2704 {
2705 struct igb_adapter *adapter = netdev_priv(netdev);
2706 struct e1000_hw *hw = &adapter->hw;
2707 struct dev_mc_list *mc_ptr = netdev->mc_list;
2708 u8 *mta_list;
2709 u32 vmolr = 0;
2710 int i;
2711
2712 if (!netdev->mc_count) {
2713 /* nothing to program, so clear mc list */
2714 igb_update_mc_addr_list(hw, NULL, 0);
2715 igb_restore_vf_multicasts(adapter);
2716 return 0;
2717 }
2718
2719 mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
2720 if (!mta_list)
2721 return -ENOMEM;
2722
2723 /* set vmolr receive overflow multicast bit */
2724 vmolr |= E1000_VMOLR_ROMPE;
2725
2726 /* The shared function expects a packed array of only addresses. */
2727 mc_ptr = netdev->mc_list;
2728
2729 for (i = 0; i < netdev->mc_count; i++) {
2730 if (!mc_ptr)
2731 break;
2732 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2733 mc_ptr = mc_ptr->next;
2734 }
2735 igb_update_mc_addr_list(hw, mta_list, i);
2736 kfree(mta_list);
2737
2738 return netdev->mc_count;
2739 }
2740
2741 /**
2742 * igb_write_uc_addr_list - write unicast addresses to RAR table
2743 * @netdev: network interface device structure
2744 *
2745 * Writes unicast address list to the RAR table.
2746 * Returns: -ENOMEM on failure/insufficient address space
2747 * 0 on no addresses written
2748 * X on writing X addresses to the RAR table
2749 **/
2750 static int igb_write_uc_addr_list(struct net_device *netdev)
2751 {
2752 struct igb_adapter *adapter = netdev_priv(netdev);
2753 struct e1000_hw *hw = &adapter->hw;
2754 unsigned int vfn = adapter->vfs_allocated_count;
2755 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2756 int count = 0;
2757
2758 /* return ENOMEM indicating insufficient memory for addresses */
2759 if (netdev->uc.count > rar_entries)
2760 return -ENOMEM;
2761
2762 if (netdev->uc.count && rar_entries) {
2763 struct netdev_hw_addr *ha;
2764 list_for_each_entry(ha, &netdev->uc.list, list) {
2765 if (!rar_entries)
2766 break;
2767 igb_rar_set_qsel(adapter, ha->addr,
2768 rar_entries--,
2769 vfn);
2770 count++;
2771 }
2772 }
2773 /* write the addresses in reverse order to avoid write combining */
2774 for (; rar_entries > 0 ; rar_entries--) {
2775 wr32(E1000_RAH(rar_entries), 0);
2776 wr32(E1000_RAL(rar_entries), 0);
2777 }
2778 wrfl();
2779
2780 return count;
2781 }
2782
2783 /**
2784 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2785 * @netdev: network interface device structure
2786 *
2787 * The set_rx_mode entry point is called whenever the unicast or multicast
2788 * address lists or the network interface flags are updated. This routine is
2789 * responsible for configuring the hardware for proper unicast, multicast,
2790 * promiscuous mode, and all-multi behavior.
2791 **/
2792 static void igb_set_rx_mode(struct net_device *netdev)
2793 {
2794 struct igb_adapter *adapter = netdev_priv(netdev);
2795 struct e1000_hw *hw = &adapter->hw;
2796 unsigned int vfn = adapter->vfs_allocated_count;
2797 u32 rctl, vmolr = 0;
2798 int count;
2799
2800 /* Check for Promiscuous and All Multicast modes */
2801 rctl = rd32(E1000_RCTL);
2802
2803 /* clear the effected bits */
2804 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2805
2806 if (netdev->flags & IFF_PROMISC) {
2807 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2808 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2809 } else {
2810 if (netdev->flags & IFF_ALLMULTI) {
2811 rctl |= E1000_RCTL_MPE;
2812 vmolr |= E1000_VMOLR_MPME;
2813 } else {
2814 /*
2815 * Write addresses to the MTA, if the attempt fails
2816 * then we should just turn on promiscous mode so
2817 * that we can at least receive multicast traffic
2818 */
2819 count = igb_write_mc_addr_list(netdev);
2820 if (count < 0) {
2821 rctl |= E1000_RCTL_MPE;
2822 vmolr |= E1000_VMOLR_MPME;
2823 } else if (count) {
2824 vmolr |= E1000_VMOLR_ROMPE;
2825 }
2826 }
2827 /*
2828 * Write addresses to available RAR registers, if there is not
2829 * sufficient space to store all the addresses then enable
2830 * unicast promiscous mode
2831 */
2832 count = igb_write_uc_addr_list(netdev);
2833 if (count < 0) {
2834 rctl |= E1000_RCTL_UPE;
2835 vmolr |= E1000_VMOLR_ROPE;
2836 }
2837 rctl |= E1000_RCTL_VFE;
2838 }
2839 wr32(E1000_RCTL, rctl);
2840
2841 /*
2842 * In order to support SR-IOV and eventually VMDq it is necessary to set
2843 * the VMOLR to enable the appropriate modes. Without this workaround
2844 * we will have issues with VLAN tag stripping not being done for frames
2845 * that are only arriving because we are the default pool
2846 */
2847 if (hw->mac.type < e1000_82576)
2848 return;
2849
2850 vmolr |= rd32(E1000_VMOLR(vfn)) &
2851 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
2852 wr32(E1000_VMOLR(vfn), vmolr);
2853 igb_restore_vf_multicasts(adapter);
2854 }
2855
2856 /* Need to wait a few seconds after link up to get diagnostic information from
2857 * the phy */
2858 static void igb_update_phy_info(unsigned long data)
2859 {
2860 struct igb_adapter *adapter = (struct igb_adapter *) data;
2861 igb_get_phy_info(&adapter->hw);
2862 }
2863
2864 /**
2865 * igb_has_link - check shared code for link and determine up/down
2866 * @adapter: pointer to driver private info
2867 **/
2868 static bool igb_has_link(struct igb_adapter *adapter)
2869 {
2870 struct e1000_hw *hw = &adapter->hw;
2871 bool link_active = false;
2872 s32 ret_val = 0;
2873
2874 /* get_link_status is set on LSC (link status) interrupt or
2875 * rx sequence error interrupt. get_link_status will stay
2876 * false until the e1000_check_for_link establishes link
2877 * for copper adapters ONLY
2878 */
2879 switch (hw->phy.media_type) {
2880 case e1000_media_type_copper:
2881 if (hw->mac.get_link_status) {
2882 ret_val = hw->mac.ops.check_for_link(hw);
2883 link_active = !hw->mac.get_link_status;
2884 } else {
2885 link_active = true;
2886 }
2887 break;
2888 case e1000_media_type_internal_serdes:
2889 ret_val = hw->mac.ops.check_for_link(hw);
2890 link_active = hw->mac.serdes_has_link;
2891 break;
2892 default:
2893 case e1000_media_type_unknown:
2894 break;
2895 }
2896
2897 return link_active;
2898 }
2899
2900 /**
2901 * igb_watchdog - Timer Call-back
2902 * @data: pointer to adapter cast into an unsigned long
2903 **/
2904 static void igb_watchdog(unsigned long data)
2905 {
2906 struct igb_adapter *adapter = (struct igb_adapter *)data;
2907 /* Do the rest outside of interrupt context */
2908 schedule_work(&adapter->watchdog_task);
2909 }
2910
2911 static void igb_watchdog_task(struct work_struct *work)
2912 {
2913 struct igb_adapter *adapter = container_of(work,
2914 struct igb_adapter,
2915 watchdog_task);
2916 struct e1000_hw *hw = &adapter->hw;
2917 struct net_device *netdev = adapter->netdev;
2918 struct igb_ring *tx_ring = adapter->tx_ring;
2919 u32 link;
2920 int i;
2921
2922 link = igb_has_link(adapter);
2923 if (link) {
2924 if (!netif_carrier_ok(netdev)) {
2925 u32 ctrl;
2926 hw->mac.ops.get_speed_and_duplex(hw,
2927 &adapter->link_speed,
2928 &adapter->link_duplex);
2929
2930 ctrl = rd32(E1000_CTRL);
2931 /* Links status message must follow this format */
2932 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
2933 "Flow Control: %s\n",
2934 netdev->name,
2935 adapter->link_speed,
2936 adapter->link_duplex == FULL_DUPLEX ?
2937 "Full Duplex" : "Half Duplex",
2938 ((ctrl & E1000_CTRL_TFCE) &&
2939 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
2940 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
2941 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
2942
2943 /* tweak tx_queue_len according to speed/duplex and
2944 * adjust the timeout factor */
2945 netdev->tx_queue_len = adapter->tx_queue_len;
2946 adapter->tx_timeout_factor = 1;
2947 switch (adapter->link_speed) {
2948 case SPEED_10:
2949 netdev->tx_queue_len = 10;
2950 adapter->tx_timeout_factor = 14;
2951 break;
2952 case SPEED_100:
2953 netdev->tx_queue_len = 100;
2954 /* maybe add some timeout factor ? */
2955 break;
2956 }
2957
2958 netif_carrier_on(netdev);
2959
2960 igb_ping_all_vfs(adapter);
2961
2962 /* link state has changed, schedule phy info update */
2963 if (!test_bit(__IGB_DOWN, &adapter->state))
2964 mod_timer(&adapter->phy_info_timer,
2965 round_jiffies(jiffies + 2 * HZ));
2966 }
2967 } else {
2968 if (netif_carrier_ok(netdev)) {
2969 adapter->link_speed = 0;
2970 adapter->link_duplex = 0;
2971 /* Links status message must follow this format */
2972 printk(KERN_INFO "igb: %s NIC Link is Down\n",
2973 netdev->name);
2974 netif_carrier_off(netdev);
2975
2976 igb_ping_all_vfs(adapter);
2977
2978 /* link state has changed, schedule phy info update */
2979 if (!test_bit(__IGB_DOWN, &adapter->state))
2980 mod_timer(&adapter->phy_info_timer,
2981 round_jiffies(jiffies + 2 * HZ));
2982 }
2983 }
2984
2985 igb_update_stats(adapter);
2986 igb_update_adaptive(hw);
2987
2988 if (!netif_carrier_ok(netdev)) {
2989 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
2990 /* We've lost link, so the controller stops DMA,
2991 * but we've got queued Tx work that's never going
2992 * to get done, so reset controller to flush Tx.
2993 * (Do the reset outside of interrupt context). */
2994 adapter->tx_timeout_count++;
2995 schedule_work(&adapter->reset_task);
2996 /* return immediately since reset is imminent */
2997 return;
2998 }
2999 }
3000
3001 /* Force detection of hung controller every watchdog period */
3002 for (i = 0; i < adapter->num_tx_queues; i++)
3003 adapter->tx_ring[i].detect_tx_hung = true;
3004
3005 /* Cause software interrupt to ensure rx ring is cleaned */
3006 if (adapter->msix_entries) {
3007 u32 eics = 0;
3008 for (i = 0; i < adapter->num_q_vectors; i++) {
3009 struct igb_q_vector *q_vector = adapter->q_vector[i];
3010 eics |= q_vector->eims_value;
3011 }
3012 wr32(E1000_EICS, eics);
3013 } else {
3014 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3015 }
3016
3017 /* Reset the timer */
3018 if (!test_bit(__IGB_DOWN, &adapter->state))
3019 mod_timer(&adapter->watchdog_timer,
3020 round_jiffies(jiffies + 2 * HZ));
3021 }
3022
3023 enum latency_range {
3024 lowest_latency = 0,
3025 low_latency = 1,
3026 bulk_latency = 2,
3027 latency_invalid = 255
3028 };
3029
3030 /**
3031 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3032 *
3033 * Stores a new ITR value based on strictly on packet size. This
3034 * algorithm is less sophisticated than that used in igb_update_itr,
3035 * due to the difficulty of synchronizing statistics across multiple
3036 * receive rings. The divisors and thresholds used by this fuction
3037 * were determined based on theoretical maximum wire speed and testing
3038 * data, in order to minimize response time while increasing bulk
3039 * throughput.
3040 * This functionality is controlled by the InterruptThrottleRate module
3041 * parameter (see igb_param.c)
3042 * NOTE: This function is called only when operating in a multiqueue
3043 * receive environment.
3044 * @q_vector: pointer to q_vector
3045 **/
3046 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3047 {
3048 int new_val = q_vector->itr_val;
3049 int avg_wire_size = 0;
3050 struct igb_adapter *adapter = q_vector->adapter;
3051
3052 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3053 * ints/sec - ITR timer value of 120 ticks.
3054 */
3055 if (adapter->link_speed != SPEED_1000) {
3056 new_val = 976;
3057 goto set_itr_val;
3058 }
3059
3060 if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3061 struct igb_ring *ring = q_vector->rx_ring;
3062 avg_wire_size = ring->total_bytes / ring->total_packets;
3063 }
3064
3065 if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3066 struct igb_ring *ring = q_vector->tx_ring;
3067 avg_wire_size = max_t(u32, avg_wire_size,
3068 (ring->total_bytes /
3069 ring->total_packets));
3070 }
3071
3072 /* if avg_wire_size isn't set no work was done */
3073 if (!avg_wire_size)
3074 goto clear_counts;
3075
3076 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3077 avg_wire_size += 24;
3078
3079 /* Don't starve jumbo frames */
3080 avg_wire_size = min(avg_wire_size, 3000);
3081
3082 /* Give a little boost to mid-size frames */
3083 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3084 new_val = avg_wire_size / 3;
3085 else
3086 new_val = avg_wire_size / 2;
3087
3088 set_itr_val:
3089 if (new_val != q_vector->itr_val) {
3090 q_vector->itr_val = new_val;
3091 q_vector->set_itr = 1;
3092 }
3093 clear_counts:
3094 if (q_vector->rx_ring) {
3095 q_vector->rx_ring->total_bytes = 0;
3096 q_vector->rx_ring->total_packets = 0;
3097 }
3098 if (q_vector->tx_ring) {
3099 q_vector->tx_ring->total_bytes = 0;
3100 q_vector->tx_ring->total_packets = 0;
3101 }
3102 }
3103
3104 /**
3105 * igb_update_itr - update the dynamic ITR value based on statistics
3106 * Stores a new ITR value based on packets and byte
3107 * counts during the last interrupt. The advantage of per interrupt
3108 * computation is faster updates and more accurate ITR for the current
3109 * traffic pattern. Constants in this function were computed
3110 * based on theoretical maximum wire speed and thresholds were set based
3111 * on testing data as well as attempting to minimize response time
3112 * while increasing bulk throughput.
3113 * this functionality is controlled by the InterruptThrottleRate module
3114 * parameter (see igb_param.c)
3115 * NOTE: These calculations are only valid when operating in a single-
3116 * queue environment.
3117 * @adapter: pointer to adapter
3118 * @itr_setting: current q_vector->itr_val
3119 * @packets: the number of packets during this measurement interval
3120 * @bytes: the number of bytes during this measurement interval
3121 **/
3122 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3123 int packets, int bytes)
3124 {
3125 unsigned int retval = itr_setting;
3126
3127 if (packets == 0)
3128 goto update_itr_done;
3129
3130 switch (itr_setting) {
3131 case lowest_latency:
3132 /* handle TSO and jumbo frames */
3133 if (bytes/packets > 8000)
3134 retval = bulk_latency;
3135 else if ((packets < 5) && (bytes > 512))
3136 retval = low_latency;
3137 break;
3138 case low_latency: /* 50 usec aka 20000 ints/s */
3139 if (bytes > 10000) {
3140 /* this if handles the TSO accounting */
3141 if (bytes/packets > 8000) {
3142 retval = bulk_latency;
3143 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3144 retval = bulk_latency;
3145 } else if ((packets > 35)) {
3146 retval = lowest_latency;
3147 }
3148 } else if (bytes/packets > 2000) {
3149 retval = bulk_latency;
3150 } else if (packets <= 2 && bytes < 512) {
3151 retval = lowest_latency;
3152 }
3153 break;
3154 case bulk_latency: /* 250 usec aka 4000 ints/s */
3155 if (bytes > 25000) {
3156 if (packets > 35)
3157 retval = low_latency;
3158 } else if (bytes < 1500) {
3159 retval = low_latency;
3160 }
3161 break;
3162 }
3163
3164 update_itr_done:
3165 return retval;
3166 }
3167
3168 static void igb_set_itr(struct igb_adapter *adapter)
3169 {
3170 struct igb_q_vector *q_vector = adapter->q_vector[0];
3171 u16 current_itr;
3172 u32 new_itr = q_vector->itr_val;
3173
3174 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3175 if (adapter->link_speed != SPEED_1000) {
3176 current_itr = 0;
3177 new_itr = 4000;
3178 goto set_itr_now;
3179 }
3180
3181 adapter->rx_itr = igb_update_itr(adapter,
3182 adapter->rx_itr,
3183 adapter->rx_ring->total_packets,
3184 adapter->rx_ring->total_bytes);
3185
3186 adapter->tx_itr = igb_update_itr(adapter,
3187 adapter->tx_itr,
3188 adapter->tx_ring->total_packets,
3189 adapter->tx_ring->total_bytes);
3190 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3191
3192 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3193 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3194 current_itr = low_latency;
3195
3196 switch (current_itr) {
3197 /* counts and packets in update_itr are dependent on these numbers */
3198 case lowest_latency:
3199 new_itr = 56; /* aka 70,000 ints/sec */
3200 break;
3201 case low_latency:
3202 new_itr = 196; /* aka 20,000 ints/sec */
3203 break;
3204 case bulk_latency:
3205 new_itr = 980; /* aka 4,000 ints/sec */
3206 break;
3207 default:
3208 break;
3209 }
3210
3211 set_itr_now:
3212 adapter->rx_ring->total_bytes = 0;
3213 adapter->rx_ring->total_packets = 0;
3214 adapter->tx_ring->total_bytes = 0;
3215 adapter->tx_ring->total_packets = 0;
3216
3217 if (new_itr != q_vector->itr_val) {
3218 /* this attempts to bias the interrupt rate towards Bulk
3219 * by adding intermediate steps when interrupt rate is
3220 * increasing */
3221 new_itr = new_itr > q_vector->itr_val ?
3222 max((new_itr * q_vector->itr_val) /
3223 (new_itr + (q_vector->itr_val >> 2)),
3224 new_itr) :
3225 new_itr;
3226 /* Don't write the value here; it resets the adapter's
3227 * internal timer, and causes us to delay far longer than
3228 * we should between interrupts. Instead, we write the ITR
3229 * value at the beginning of the next interrupt so the timing
3230 * ends up being correct.
3231 */
3232 q_vector->itr_val = new_itr;
3233 q_vector->set_itr = 1;
3234 }
3235
3236 return;
3237 }
3238
3239 #define IGB_TX_FLAGS_CSUM 0x00000001
3240 #define IGB_TX_FLAGS_VLAN 0x00000002
3241 #define IGB_TX_FLAGS_TSO 0x00000004
3242 #define IGB_TX_FLAGS_IPV4 0x00000008
3243 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3244 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3245 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3246
3247 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3248 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3249 {
3250 struct e1000_adv_tx_context_desc *context_desc;
3251 unsigned int i;
3252 int err;
3253 struct igb_buffer *buffer_info;
3254 u32 info = 0, tu_cmd = 0;
3255 u32 mss_l4len_idx, l4len;
3256 *hdr_len = 0;
3257
3258 if (skb_header_cloned(skb)) {
3259 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3260 if (err)
3261 return err;
3262 }
3263
3264 l4len = tcp_hdrlen(skb);
3265 *hdr_len += l4len;
3266
3267 if (skb->protocol == htons(ETH_P_IP)) {
3268 struct iphdr *iph = ip_hdr(skb);
3269 iph->tot_len = 0;
3270 iph->check = 0;
3271 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3272 iph->daddr, 0,
3273 IPPROTO_TCP,
3274 0);
3275 } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
3276 ipv6_hdr(skb)->payload_len = 0;
3277 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3278 &ipv6_hdr(skb)->daddr,
3279 0, IPPROTO_TCP, 0);
3280 }
3281
3282 i = tx_ring->next_to_use;
3283
3284 buffer_info = &tx_ring->buffer_info[i];
3285 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3286 /* VLAN MACLEN IPLEN */
3287 if (tx_flags & IGB_TX_FLAGS_VLAN)
3288 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3289 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3290 *hdr_len += skb_network_offset(skb);
3291 info |= skb_network_header_len(skb);
3292 *hdr_len += skb_network_header_len(skb);
3293 context_desc->vlan_macip_lens = cpu_to_le32(info);
3294
3295 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3296 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3297
3298 if (skb->protocol == htons(ETH_P_IP))
3299 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3300 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3301
3302 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3303
3304 /* MSS L4LEN IDX */
3305 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3306 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3307
3308 /* For 82575, context index must be unique per ring. */
3309 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3310 mss_l4len_idx |= tx_ring->reg_idx << 4;
3311
3312 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3313 context_desc->seqnum_seed = 0;
3314
3315 buffer_info->time_stamp = jiffies;
3316 buffer_info->next_to_watch = i;
3317 buffer_info->dma = 0;
3318 i++;
3319 if (i == tx_ring->count)
3320 i = 0;
3321
3322 tx_ring->next_to_use = i;
3323
3324 return true;
3325 }
3326
3327 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3328 struct sk_buff *skb, u32 tx_flags)
3329 {
3330 struct e1000_adv_tx_context_desc *context_desc;
3331 struct pci_dev *pdev = tx_ring->pdev;
3332 struct igb_buffer *buffer_info;
3333 u32 info = 0, tu_cmd = 0;
3334 unsigned int i;
3335
3336 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3337 (tx_flags & IGB_TX_FLAGS_VLAN)) {
3338 i = tx_ring->next_to_use;
3339 buffer_info = &tx_ring->buffer_info[i];
3340 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3341
3342 if (tx_flags & IGB_TX_FLAGS_VLAN)
3343 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3344
3345 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3346 if (skb->ip_summed == CHECKSUM_PARTIAL)
3347 info |= skb_network_header_len(skb);
3348
3349 context_desc->vlan_macip_lens = cpu_to_le32(info);
3350
3351 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3352
3353 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3354 __be16 protocol;
3355
3356 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3357 const struct vlan_ethhdr *vhdr =
3358 (const struct vlan_ethhdr*)skb->data;
3359
3360 protocol = vhdr->h_vlan_encapsulated_proto;
3361 } else {
3362 protocol = skb->protocol;
3363 }
3364
3365 switch (protocol) {
3366 case cpu_to_be16(ETH_P_IP):
3367 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3368 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3369 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3370 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3371 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3372 break;
3373 case cpu_to_be16(ETH_P_IPV6):
3374 /* XXX what about other V6 headers?? */
3375 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3376 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3377 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3378 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3379 break;
3380 default:
3381 if (unlikely(net_ratelimit()))
3382 dev_warn(&pdev->dev,
3383 "partial checksum but proto=%x!\n",
3384 skb->protocol);
3385 break;
3386 }
3387 }
3388
3389 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3390 context_desc->seqnum_seed = 0;
3391 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3392 context_desc->mss_l4len_idx =
3393 cpu_to_le32(tx_ring->reg_idx << 4);
3394
3395 buffer_info->time_stamp = jiffies;
3396 buffer_info->next_to_watch = i;
3397 buffer_info->dma = 0;
3398
3399 i++;
3400 if (i == tx_ring->count)
3401 i = 0;
3402 tx_ring->next_to_use = i;
3403
3404 return true;
3405 }
3406 return false;
3407 }
3408
3409 #define IGB_MAX_TXD_PWR 16
3410 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
3411
3412 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3413 unsigned int first)
3414 {
3415 struct igb_buffer *buffer_info;
3416 struct pci_dev *pdev = tx_ring->pdev;
3417 unsigned int len = skb_headlen(skb);
3418 unsigned int count = 0, i;
3419 unsigned int f;
3420 dma_addr_t *map;
3421
3422 i = tx_ring->next_to_use;
3423
3424 if (skb_dma_map(&pdev->dev, skb, DMA_TO_DEVICE)) {
3425 dev_err(&pdev->dev, "TX DMA map failed\n");
3426 return 0;
3427 }
3428
3429 map = skb_shinfo(skb)->dma_maps;
3430
3431 buffer_info = &tx_ring->buffer_info[i];
3432 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3433 buffer_info->length = len;
3434 /* set time_stamp *before* dma to help avoid a possible race */
3435 buffer_info->time_stamp = jiffies;
3436 buffer_info->next_to_watch = i;
3437 buffer_info->dma = skb_shinfo(skb)->dma_head;
3438
3439 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3440 struct skb_frag_struct *frag;
3441
3442 i++;
3443 if (i == tx_ring->count)
3444 i = 0;
3445
3446 frag = &skb_shinfo(skb)->frags[f];
3447 len = frag->size;
3448
3449 buffer_info = &tx_ring->buffer_info[i];
3450 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3451 buffer_info->length = len;
3452 buffer_info->time_stamp = jiffies;
3453 buffer_info->next_to_watch = i;
3454 buffer_info->dma = map[count];
3455 count++;
3456 }
3457
3458 tx_ring->buffer_info[i].skb = skb;
3459 tx_ring->buffer_info[first].next_to_watch = i;
3460
3461 return ++count;
3462 }
3463
3464 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3465 int tx_flags, int count, u32 paylen,
3466 u8 hdr_len)
3467 {
3468 union e1000_adv_tx_desc *tx_desc;
3469 struct igb_buffer *buffer_info;
3470 u32 olinfo_status = 0, cmd_type_len;
3471 unsigned int i = tx_ring->next_to_use;
3472
3473 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3474 E1000_ADVTXD_DCMD_DEXT);
3475
3476 if (tx_flags & IGB_TX_FLAGS_VLAN)
3477 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3478
3479 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3480 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3481
3482 if (tx_flags & IGB_TX_FLAGS_TSO) {
3483 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3484
3485 /* insert tcp checksum */
3486 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3487
3488 /* insert ip checksum */
3489 if (tx_flags & IGB_TX_FLAGS_IPV4)
3490 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3491
3492 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3493 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3494 }
3495
3496 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3497 (tx_flags & (IGB_TX_FLAGS_CSUM |
3498 IGB_TX_FLAGS_TSO |
3499 IGB_TX_FLAGS_VLAN)))
3500 olinfo_status |= tx_ring->reg_idx << 4;
3501
3502 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3503
3504 do {
3505 buffer_info = &tx_ring->buffer_info[i];
3506 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3507 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3508 tx_desc->read.cmd_type_len =
3509 cpu_to_le32(cmd_type_len | buffer_info->length);
3510 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3511 count--;
3512 i++;
3513 if (i == tx_ring->count)
3514 i = 0;
3515 } while (count > 0);
3516
3517 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3518 /* Force memory writes to complete before letting h/w
3519 * know there are new descriptors to fetch. (Only
3520 * applicable for weak-ordered memory model archs,
3521 * such as IA-64). */
3522 wmb();
3523
3524 tx_ring->next_to_use = i;
3525 writel(i, tx_ring->tail);
3526 /* we need this if more than one processor can write to our tail
3527 * at a time, it syncronizes IO on IA64/Altix systems */
3528 mmiowb();
3529 }
3530
3531 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3532 {
3533 struct net_device *netdev = tx_ring->netdev;
3534
3535 netif_stop_subqueue(netdev, tx_ring->queue_index);
3536
3537 /* Herbert's original patch had:
3538 * smp_mb__after_netif_stop_queue();
3539 * but since that doesn't exist yet, just open code it. */
3540 smp_mb();
3541
3542 /* We need to check again in a case another CPU has just
3543 * made room available. */
3544 if (igb_desc_unused(tx_ring) < size)
3545 return -EBUSY;
3546
3547 /* A reprieve! */
3548 netif_wake_subqueue(netdev, tx_ring->queue_index);
3549 tx_ring->tx_stats.restart_queue++;
3550 return 0;
3551 }
3552
3553 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3554 {
3555 if (igb_desc_unused(tx_ring) >= size)
3556 return 0;
3557 return __igb_maybe_stop_tx(tx_ring, size);
3558 }
3559
3560 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3561 struct igb_ring *tx_ring)
3562 {
3563 struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3564 unsigned int first;
3565 unsigned int tx_flags = 0;
3566 u8 hdr_len = 0;
3567 int tso = 0, count;
3568 union skb_shared_tx *shtx = skb_tx(skb);
3569
3570 /* need: 1 descriptor per page,
3571 * + 2 desc gap to keep tail from touching head,
3572 * + 1 desc for skb->data,
3573 * + 1 desc for context descriptor,
3574 * otherwise try next time */
3575 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3576 /* this is a hard error */
3577 return NETDEV_TX_BUSY;
3578 }
3579
3580 if (unlikely(shtx->hardware)) {
3581 shtx->in_progress = 1;
3582 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3583 }
3584
3585 if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3586 tx_flags |= IGB_TX_FLAGS_VLAN;
3587 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3588 }
3589
3590 if (skb->protocol == htons(ETH_P_IP))
3591 tx_flags |= IGB_TX_FLAGS_IPV4;
3592
3593 first = tx_ring->next_to_use;
3594 if (skb_is_gso(skb)) {
3595 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3596
3597 if (tso < 0) {
3598 dev_kfree_skb_any(skb);
3599 return NETDEV_TX_OK;
3600 }
3601 }
3602
3603 if (tso)
3604 tx_flags |= IGB_TX_FLAGS_TSO;
3605 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3606 (skb->ip_summed == CHECKSUM_PARTIAL))
3607 tx_flags |= IGB_TX_FLAGS_CSUM;
3608
3609 /*
3610 * count reflects descriptors mapped, if 0 or less then mapping error
3611 * has occured and we need to rewind the descriptor queue
3612 */
3613 count = igb_tx_map_adv(tx_ring, skb, first);
3614 if (count <= 0) {
3615 dev_kfree_skb_any(skb);
3616 tx_ring->buffer_info[first].time_stamp = 0;
3617 tx_ring->next_to_use = first;
3618 return NETDEV_TX_OK;
3619 }
3620
3621 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3622
3623 /* Make sure there is space in the ring for the next send. */
3624 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3625
3626 return NETDEV_TX_OK;
3627 }
3628
3629 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3630 struct net_device *netdev)
3631 {
3632 struct igb_adapter *adapter = netdev_priv(netdev);
3633 struct igb_ring *tx_ring;
3634 int r_idx = 0;
3635
3636 if (test_bit(__IGB_DOWN, &adapter->state)) {
3637 dev_kfree_skb_any(skb);
3638 return NETDEV_TX_OK;
3639 }
3640
3641 if (skb->len <= 0) {
3642 dev_kfree_skb_any(skb);
3643 return NETDEV_TX_OK;
3644 }
3645
3646 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3647 tx_ring = adapter->multi_tx_table[r_idx];
3648
3649 /* This goes back to the question of how to logically map a tx queue
3650 * to a flow. Right now, performance is impacted slightly negatively
3651 * if using multiple tx queues. If the stack breaks away from a
3652 * single qdisc implementation, we can look at this again. */
3653 return igb_xmit_frame_ring_adv(skb, tx_ring);
3654 }
3655
3656 /**
3657 * igb_tx_timeout - Respond to a Tx Hang
3658 * @netdev: network interface device structure
3659 **/
3660 static void igb_tx_timeout(struct net_device *netdev)
3661 {
3662 struct igb_adapter *adapter = netdev_priv(netdev);
3663 struct e1000_hw *hw = &adapter->hw;
3664
3665 /* Do the reset outside of interrupt context */
3666 adapter->tx_timeout_count++;
3667
3668 schedule_work(&adapter->reset_task);
3669 wr32(E1000_EICS,
3670 (adapter->eims_enable_mask & ~adapter->eims_other));
3671 }
3672
3673 static void igb_reset_task(struct work_struct *work)
3674 {
3675 struct igb_adapter *adapter;
3676 adapter = container_of(work, struct igb_adapter, reset_task);
3677
3678 igb_reinit_locked(adapter);
3679 }
3680
3681 /**
3682 * igb_get_stats - Get System Network Statistics
3683 * @netdev: network interface device structure
3684 *
3685 * Returns the address of the device statistics structure.
3686 * The statistics are actually updated from the timer callback.
3687 **/
3688 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3689 {
3690 /* only return the current stats */
3691 return &netdev->stats;
3692 }
3693
3694 /**
3695 * igb_change_mtu - Change the Maximum Transfer Unit
3696 * @netdev: network interface device structure
3697 * @new_mtu: new value for maximum frame size
3698 *
3699 * Returns 0 on success, negative on failure
3700 **/
3701 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3702 {
3703 struct igb_adapter *adapter = netdev_priv(netdev);
3704 struct pci_dev *pdev = adapter->pdev;
3705 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3706 u32 rx_buffer_len, i;
3707
3708 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3709 dev_err(&pdev->dev, "Invalid MTU setting\n");
3710 return -EINVAL;
3711 }
3712
3713 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3714 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3715 return -EINVAL;
3716 }
3717
3718 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3719 msleep(1);
3720
3721 /* igb_down has a dependency on max_frame_size */
3722 adapter->max_frame_size = max_frame;
3723
3724 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3725 * means we reserve 2 more, this pushes us to allocate from the next
3726 * larger slab size.
3727 * i.e. RXBUFFER_2048 --> size-4096 slab
3728 */
3729
3730 if (max_frame <= IGB_RXBUFFER_1024)
3731 rx_buffer_len = IGB_RXBUFFER_1024;
3732 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3733 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3734 else
3735 rx_buffer_len = IGB_RXBUFFER_128;
3736
3737 if (netif_running(netdev))
3738 igb_down(adapter);
3739
3740 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3741 netdev->mtu, new_mtu);
3742 netdev->mtu = new_mtu;
3743
3744 for (i = 0; i < adapter->num_rx_queues; i++)
3745 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3746
3747 if (netif_running(netdev))
3748 igb_up(adapter);
3749 else
3750 igb_reset(adapter);
3751
3752 clear_bit(__IGB_RESETTING, &adapter->state);
3753
3754 return 0;
3755 }
3756
3757 /**
3758 * igb_update_stats - Update the board statistics counters
3759 * @adapter: board private structure
3760 **/
3761
3762 void igb_update_stats(struct igb_adapter *adapter)
3763 {
3764 struct net_device *netdev = adapter->netdev;
3765 struct e1000_hw *hw = &adapter->hw;
3766 struct pci_dev *pdev = adapter->pdev;
3767 u32 rnbc;
3768 u16 phy_tmp;
3769 int i;
3770 u64 bytes, packets;
3771
3772 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3773
3774 /*
3775 * Prevent stats update while adapter is being reset, or if the pci
3776 * connection is down.
3777 */
3778 if (adapter->link_speed == 0)
3779 return;
3780 if (pci_channel_offline(pdev))
3781 return;
3782
3783 bytes = 0;
3784 packets = 0;
3785 for (i = 0; i < adapter->num_rx_queues; i++) {
3786 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3787 adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3788 netdev->stats.rx_fifo_errors += rqdpc_tmp;
3789 bytes += adapter->rx_ring[i].rx_stats.bytes;
3790 packets += adapter->rx_ring[i].rx_stats.packets;
3791 }
3792
3793 netdev->stats.rx_bytes = bytes;
3794 netdev->stats.rx_packets = packets;
3795
3796 bytes = 0;
3797 packets = 0;
3798 for (i = 0; i < adapter->num_tx_queues; i++) {
3799 bytes += adapter->tx_ring[i].tx_stats.bytes;
3800 packets += adapter->tx_ring[i].tx_stats.packets;
3801 }
3802 netdev->stats.tx_bytes = bytes;
3803 netdev->stats.tx_packets = packets;
3804
3805 /* read stats registers */
3806 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3807 adapter->stats.gprc += rd32(E1000_GPRC);
3808 adapter->stats.gorc += rd32(E1000_GORCL);
3809 rd32(E1000_GORCH); /* clear GORCL */
3810 adapter->stats.bprc += rd32(E1000_BPRC);
3811 adapter->stats.mprc += rd32(E1000_MPRC);
3812 adapter->stats.roc += rd32(E1000_ROC);
3813
3814 adapter->stats.prc64 += rd32(E1000_PRC64);
3815 adapter->stats.prc127 += rd32(E1000_PRC127);
3816 adapter->stats.prc255 += rd32(E1000_PRC255);
3817 adapter->stats.prc511 += rd32(E1000_PRC511);
3818 adapter->stats.prc1023 += rd32(E1000_PRC1023);
3819 adapter->stats.prc1522 += rd32(E1000_PRC1522);
3820 adapter->stats.symerrs += rd32(E1000_SYMERRS);
3821 adapter->stats.sec += rd32(E1000_SEC);
3822
3823 adapter->stats.mpc += rd32(E1000_MPC);
3824 adapter->stats.scc += rd32(E1000_SCC);
3825 adapter->stats.ecol += rd32(E1000_ECOL);
3826 adapter->stats.mcc += rd32(E1000_MCC);
3827 adapter->stats.latecol += rd32(E1000_LATECOL);
3828 adapter->stats.dc += rd32(E1000_DC);
3829 adapter->stats.rlec += rd32(E1000_RLEC);
3830 adapter->stats.xonrxc += rd32(E1000_XONRXC);
3831 adapter->stats.xontxc += rd32(E1000_XONTXC);
3832 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
3833 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
3834 adapter->stats.fcruc += rd32(E1000_FCRUC);
3835 adapter->stats.gptc += rd32(E1000_GPTC);
3836 adapter->stats.gotc += rd32(E1000_GOTCL);
3837 rd32(E1000_GOTCH); /* clear GOTCL */
3838 rnbc = rd32(E1000_RNBC);
3839 adapter->stats.rnbc += rnbc;
3840 netdev->stats.rx_fifo_errors += rnbc;
3841 adapter->stats.ruc += rd32(E1000_RUC);
3842 adapter->stats.rfc += rd32(E1000_RFC);
3843 adapter->stats.rjc += rd32(E1000_RJC);
3844 adapter->stats.tor += rd32(E1000_TORH);
3845 adapter->stats.tot += rd32(E1000_TOTH);
3846 adapter->stats.tpr += rd32(E1000_TPR);
3847
3848 adapter->stats.ptc64 += rd32(E1000_PTC64);
3849 adapter->stats.ptc127 += rd32(E1000_PTC127);
3850 adapter->stats.ptc255 += rd32(E1000_PTC255);
3851 adapter->stats.ptc511 += rd32(E1000_PTC511);
3852 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
3853 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
3854
3855 adapter->stats.mptc += rd32(E1000_MPTC);
3856 adapter->stats.bptc += rd32(E1000_BPTC);
3857
3858 /* used for adaptive IFS */
3859 hw->mac.tx_packet_delta = rd32(E1000_TPT);
3860 adapter->stats.tpt += hw->mac.tx_packet_delta;
3861 hw->mac.collision_delta = rd32(E1000_COLC);
3862 adapter->stats.colc += hw->mac.collision_delta;
3863
3864 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
3865 adapter->stats.rxerrc += rd32(E1000_RXERRC);
3866 adapter->stats.tncrs += rd32(E1000_TNCRS);
3867 adapter->stats.tsctc += rd32(E1000_TSCTC);
3868 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
3869
3870 adapter->stats.iac += rd32(E1000_IAC);
3871 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
3872 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
3873 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
3874 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
3875 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
3876 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
3877 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
3878 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
3879
3880 /* Fill out the OS statistics structure */
3881 netdev->stats.multicast = adapter->stats.mprc;
3882 netdev->stats.collisions = adapter->stats.colc;
3883
3884 /* Rx Errors */
3885
3886 /* RLEC on some newer hardware can be incorrect so build
3887 * our own version based on RUC and ROC */
3888 netdev->stats.rx_errors = adapter->stats.rxerrc +
3889 adapter->stats.crcerrs + adapter->stats.algnerrc +
3890 adapter->stats.ruc + adapter->stats.roc +
3891 adapter->stats.cexterr;
3892 netdev->stats.rx_length_errors = adapter->stats.ruc +
3893 adapter->stats.roc;
3894 netdev->stats.rx_crc_errors = adapter->stats.crcerrs;
3895 netdev->stats.rx_frame_errors = adapter->stats.algnerrc;
3896 netdev->stats.rx_missed_errors = adapter->stats.mpc;
3897
3898 /* Tx Errors */
3899 netdev->stats.tx_errors = adapter->stats.ecol +
3900 adapter->stats.latecol;
3901 netdev->stats.tx_aborted_errors = adapter->stats.ecol;
3902 netdev->stats.tx_window_errors = adapter->stats.latecol;
3903 netdev->stats.tx_carrier_errors = adapter->stats.tncrs;
3904
3905 /* Tx Dropped needs to be maintained elsewhere */
3906
3907 /* Phy Stats */
3908 if (hw->phy.media_type == e1000_media_type_copper) {
3909 if ((adapter->link_speed == SPEED_1000) &&
3910 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
3911 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
3912 adapter->phy_stats.idle_errors += phy_tmp;
3913 }
3914 }
3915
3916 /* Management Stats */
3917 adapter->stats.mgptc += rd32(E1000_MGTPTC);
3918 adapter->stats.mgprc += rd32(E1000_MGTPRC);
3919 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
3920 }
3921
3922 static irqreturn_t igb_msix_other(int irq, void *data)
3923 {
3924 struct igb_adapter *adapter = data;
3925 struct e1000_hw *hw = &adapter->hw;
3926 u32 icr = rd32(E1000_ICR);
3927 /* reading ICR causes bit 31 of EICR to be cleared */
3928
3929 if (icr & E1000_ICR_DOUTSYNC) {
3930 /* HW is reporting DMA is out of sync */
3931 adapter->stats.doosync++;
3932 }
3933
3934 /* Check for a mailbox event */
3935 if (icr & E1000_ICR_VMMB)
3936 igb_msg_task(adapter);
3937
3938 if (icr & E1000_ICR_LSC) {
3939 hw->mac.get_link_status = 1;
3940 /* guard against interrupt when we're going down */
3941 if (!test_bit(__IGB_DOWN, &adapter->state))
3942 mod_timer(&adapter->watchdog_timer, jiffies + 1);
3943 }
3944
3945 if (adapter->vfs_allocated_count)
3946 wr32(E1000_IMS, E1000_IMS_LSC |
3947 E1000_IMS_VMMB |
3948 E1000_IMS_DOUTSYNC);
3949 else
3950 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
3951 wr32(E1000_EIMS, adapter->eims_other);
3952
3953 return IRQ_HANDLED;
3954 }
3955
3956 static void igb_write_itr(struct igb_q_vector *q_vector)
3957 {
3958 u32 itr_val = q_vector->itr_val & 0x7FFC;
3959
3960 if (!q_vector->set_itr)
3961 return;
3962
3963 if (!itr_val)
3964 itr_val = 0x4;
3965
3966 if (q_vector->itr_shift)
3967 itr_val |= itr_val << q_vector->itr_shift;
3968 else
3969 itr_val |= 0x8000000;
3970
3971 writel(itr_val, q_vector->itr_register);
3972 q_vector->set_itr = 0;
3973 }
3974
3975 static irqreturn_t igb_msix_ring(int irq, void *data)
3976 {
3977 struct igb_q_vector *q_vector = data;
3978
3979 /* Write the ITR value calculated from the previous interrupt. */
3980 igb_write_itr(q_vector);
3981
3982 napi_schedule(&q_vector->napi);
3983
3984 return IRQ_HANDLED;
3985 }
3986
3987 #ifdef CONFIG_IGB_DCA
3988 static void igb_update_dca(struct igb_q_vector *q_vector)
3989 {
3990 struct igb_adapter *adapter = q_vector->adapter;
3991 struct e1000_hw *hw = &adapter->hw;
3992 int cpu = get_cpu();
3993
3994 if (q_vector->cpu == cpu)
3995 goto out_no_update;
3996
3997 if (q_vector->tx_ring) {
3998 int q = q_vector->tx_ring->reg_idx;
3999 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4000 if (hw->mac.type == e1000_82575) {
4001 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4002 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4003 } else {
4004 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4005 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4006 E1000_DCA_TXCTRL_CPUID_SHIFT;
4007 }
4008 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4009 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4010 }
4011 if (q_vector->rx_ring) {
4012 int q = q_vector->rx_ring->reg_idx;
4013 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4014 if (hw->mac.type == e1000_82575) {
4015 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4016 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4017 } else {
4018 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4019 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4020 E1000_DCA_RXCTRL_CPUID_SHIFT;
4021 }
4022 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4023 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4024 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4025 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4026 }
4027 q_vector->cpu = cpu;
4028 out_no_update:
4029 put_cpu();
4030 }
4031
4032 static void igb_setup_dca(struct igb_adapter *adapter)
4033 {
4034 struct e1000_hw *hw = &adapter->hw;
4035 int i;
4036
4037 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4038 return;
4039
4040 /* Always use CB2 mode, difference is masked in the CB driver. */
4041 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4042
4043 for (i = 0; i < adapter->num_q_vectors; i++) {
4044 struct igb_q_vector *q_vector = adapter->q_vector[i];
4045 q_vector->cpu = -1;
4046 igb_update_dca(q_vector);
4047 }
4048 }
4049
4050 static int __igb_notify_dca(struct device *dev, void *data)
4051 {
4052 struct net_device *netdev = dev_get_drvdata(dev);
4053 struct igb_adapter *adapter = netdev_priv(netdev);
4054 struct pci_dev *pdev = adapter->pdev;
4055 struct e1000_hw *hw = &adapter->hw;
4056 unsigned long event = *(unsigned long *)data;
4057
4058 switch (event) {
4059 case DCA_PROVIDER_ADD:
4060 /* if already enabled, don't do it again */
4061 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4062 break;
4063 if (dca_add_requester(dev) == 0) {
4064 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4065 dev_info(&pdev->dev, "DCA enabled\n");
4066 igb_setup_dca(adapter);
4067 break;
4068 }
4069 /* Fall Through since DCA is disabled. */
4070 case DCA_PROVIDER_REMOVE:
4071 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4072 /* without this a class_device is left
4073 * hanging around in the sysfs model */
4074 dca_remove_requester(dev);
4075 dev_info(&pdev->dev, "DCA disabled\n");
4076 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4077 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4078 }
4079 break;
4080 }
4081
4082 return 0;
4083 }
4084
4085 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4086 void *p)
4087 {
4088 int ret_val;
4089
4090 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4091 __igb_notify_dca);
4092
4093 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4094 }
4095 #endif /* CONFIG_IGB_DCA */
4096
4097 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4098 {
4099 struct e1000_hw *hw = &adapter->hw;
4100 u32 ping;
4101 int i;
4102
4103 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4104 ping = E1000_PF_CONTROL_MSG;
4105 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4106 ping |= E1000_VT_MSGTYPE_CTS;
4107 igb_write_mbx(hw, &ping, 1, i);
4108 }
4109 }
4110
4111 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4112 {
4113 struct e1000_hw *hw = &adapter->hw;
4114 u32 vmolr = rd32(E1000_VMOLR(vf));
4115 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4116
4117 vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4118 IGB_VF_FLAG_MULTI_PROMISC);
4119 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4120
4121 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4122 vmolr |= E1000_VMOLR_MPME;
4123 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4124 } else {
4125 /*
4126 * if we have hashes and we are clearing a multicast promisc
4127 * flag we need to write the hashes to the MTA as this step
4128 * was previously skipped
4129 */
4130 if (vf_data->num_vf_mc_hashes > 30) {
4131 vmolr |= E1000_VMOLR_MPME;
4132 } else if (vf_data->num_vf_mc_hashes) {
4133 int j;
4134 vmolr |= E1000_VMOLR_ROMPE;
4135 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4136 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4137 }
4138 }
4139
4140 wr32(E1000_VMOLR(vf), vmolr);
4141
4142 /* there are flags left unprocessed, likely not supported */
4143 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4144 return -EINVAL;
4145
4146 return 0;
4147
4148 }
4149
4150 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4151 u32 *msgbuf, u32 vf)
4152 {
4153 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4154 u16 *hash_list = (u16 *)&msgbuf[1];
4155 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4156 int i;
4157
4158 /* salt away the number of multicast addresses assigned
4159 * to this VF for later use to restore when the PF multi cast
4160 * list changes
4161 */
4162 vf_data->num_vf_mc_hashes = n;
4163
4164 /* only up to 30 hash values supported */
4165 if (n > 30)
4166 n = 30;
4167
4168 /* store the hashes for later use */
4169 for (i = 0; i < n; i++)
4170 vf_data->vf_mc_hashes[i] = hash_list[i];
4171
4172 /* Flush and reset the mta with the new values */
4173 igb_set_rx_mode(adapter->netdev);
4174
4175 return 0;
4176 }
4177
4178 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4179 {
4180 struct e1000_hw *hw = &adapter->hw;
4181 struct vf_data_storage *vf_data;
4182 int i, j;
4183
4184 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4185 u32 vmolr = rd32(E1000_VMOLR(i));
4186 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4187
4188 vf_data = &adapter->vf_data[i];
4189
4190 if ((vf_data->num_vf_mc_hashes > 30) ||
4191 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4192 vmolr |= E1000_VMOLR_MPME;
4193 } else if (vf_data->num_vf_mc_hashes) {
4194 vmolr |= E1000_VMOLR_ROMPE;
4195 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4196 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4197 }
4198 wr32(E1000_VMOLR(i), vmolr);
4199 }
4200 }
4201
4202 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4203 {
4204 struct e1000_hw *hw = &adapter->hw;
4205 u32 pool_mask, reg, vid;
4206 int i;
4207
4208 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4209
4210 /* Find the vlan filter for this id */
4211 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4212 reg = rd32(E1000_VLVF(i));
4213
4214 /* remove the vf from the pool */
4215 reg &= ~pool_mask;
4216
4217 /* if pool is empty then remove entry from vfta */
4218 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4219 (reg & E1000_VLVF_VLANID_ENABLE)) {
4220 reg = 0;
4221 vid = reg & E1000_VLVF_VLANID_MASK;
4222 igb_vfta_set(hw, vid, false);
4223 }
4224
4225 wr32(E1000_VLVF(i), reg);
4226 }
4227
4228 adapter->vf_data[vf].vlans_enabled = 0;
4229 }
4230
4231 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4232 {
4233 struct e1000_hw *hw = &adapter->hw;
4234 u32 reg, i;
4235
4236 /* The vlvf table only exists on 82576 hardware and newer */
4237 if (hw->mac.type < e1000_82576)
4238 return -1;
4239
4240 /* we only need to do this if VMDq is enabled */
4241 if (!adapter->vfs_allocated_count)
4242 return -1;
4243
4244 /* Find the vlan filter for this id */
4245 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4246 reg = rd32(E1000_VLVF(i));
4247 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4248 vid == (reg & E1000_VLVF_VLANID_MASK))
4249 break;
4250 }
4251
4252 if (add) {
4253 if (i == E1000_VLVF_ARRAY_SIZE) {
4254 /* Did not find a matching VLAN ID entry that was
4255 * enabled. Search for a free filter entry, i.e.
4256 * one without the enable bit set
4257 */
4258 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4259 reg = rd32(E1000_VLVF(i));
4260 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4261 break;
4262 }
4263 }
4264 if (i < E1000_VLVF_ARRAY_SIZE) {
4265 /* Found an enabled/available entry */
4266 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4267
4268 /* if !enabled we need to set this up in vfta */
4269 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4270 /* add VID to filter table */
4271 igb_vfta_set(hw, vid, true);
4272 reg |= E1000_VLVF_VLANID_ENABLE;
4273 }
4274 reg &= ~E1000_VLVF_VLANID_MASK;
4275 reg |= vid;
4276 wr32(E1000_VLVF(i), reg);
4277
4278 /* do not modify RLPML for PF devices */
4279 if (vf >= adapter->vfs_allocated_count)
4280 return 0;
4281
4282 if (!adapter->vf_data[vf].vlans_enabled) {
4283 u32 size;
4284 reg = rd32(E1000_VMOLR(vf));
4285 size = reg & E1000_VMOLR_RLPML_MASK;
4286 size += 4;
4287 reg &= ~E1000_VMOLR_RLPML_MASK;
4288 reg |= size;
4289 wr32(E1000_VMOLR(vf), reg);
4290 }
4291
4292 adapter->vf_data[vf].vlans_enabled++;
4293 return 0;
4294 }
4295 } else {
4296 if (i < E1000_VLVF_ARRAY_SIZE) {
4297 /* remove vf from the pool */
4298 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4299 /* if pool is empty then remove entry from vfta */
4300 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4301 reg = 0;
4302 igb_vfta_set(hw, vid, false);
4303 }
4304 wr32(E1000_VLVF(i), reg);
4305
4306 /* do not modify RLPML for PF devices */
4307 if (vf >= adapter->vfs_allocated_count)
4308 return 0;
4309
4310 adapter->vf_data[vf].vlans_enabled--;
4311 if (!adapter->vf_data[vf].vlans_enabled) {
4312 u32 size;
4313 reg = rd32(E1000_VMOLR(vf));
4314 size = reg & E1000_VMOLR_RLPML_MASK;
4315 size -= 4;
4316 reg &= ~E1000_VMOLR_RLPML_MASK;
4317 reg |= size;
4318 wr32(E1000_VMOLR(vf), reg);
4319 }
4320 return 0;
4321 }
4322 }
4323 return -1;
4324 }
4325
4326 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4327 {
4328 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4329 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4330
4331 return igb_vlvf_set(adapter, vid, add, vf);
4332 }
4333
4334 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4335 {
4336 /* clear all flags */
4337 adapter->vf_data[vf].flags = 0;
4338 adapter->vf_data[vf].last_nack = jiffies;
4339
4340 /* reset offloads to defaults */
4341 igb_set_vmolr(adapter, vf);
4342
4343 /* reset vlans for device */
4344 igb_clear_vf_vfta(adapter, vf);
4345
4346 /* reset multicast table array for vf */
4347 adapter->vf_data[vf].num_vf_mc_hashes = 0;
4348
4349 /* Flush and reset the mta with the new values */
4350 igb_set_rx_mode(adapter->netdev);
4351 }
4352
4353 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4354 {
4355 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4356
4357 /* generate a new mac address as we were hotplug removed/added */
4358 random_ether_addr(vf_mac);
4359
4360 /* process remaining reset events */
4361 igb_vf_reset(adapter, vf);
4362 }
4363
4364 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4365 {
4366 struct e1000_hw *hw = &adapter->hw;
4367 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4368 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4369 u32 reg, msgbuf[3];
4370 u8 *addr = (u8 *)(&msgbuf[1]);
4371
4372 /* process all the same items cleared in a function level reset */
4373 igb_vf_reset(adapter, vf);
4374
4375 /* set vf mac address */
4376 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4377
4378 /* enable transmit and receive for vf */
4379 reg = rd32(E1000_VFTE);
4380 wr32(E1000_VFTE, reg | (1 << vf));
4381 reg = rd32(E1000_VFRE);
4382 wr32(E1000_VFRE, reg | (1 << vf));
4383
4384 adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4385
4386 /* reply to reset with ack and vf mac address */
4387 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4388 memcpy(addr, vf_mac, 6);
4389 igb_write_mbx(hw, msgbuf, 3, vf);
4390 }
4391
4392 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4393 {
4394 unsigned char *addr = (char *)&msg[1];
4395 int err = -1;
4396
4397 if (is_valid_ether_addr(addr))
4398 err = igb_set_vf_mac(adapter, vf, addr);
4399
4400 return err;
4401 }
4402
4403 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4404 {
4405 struct e1000_hw *hw = &adapter->hw;
4406 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4407 u32 msg = E1000_VT_MSGTYPE_NACK;
4408
4409 /* if device isn't clear to send it shouldn't be reading either */
4410 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4411 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4412 igb_write_mbx(hw, &msg, 1, vf);
4413 vf_data->last_nack = jiffies;
4414 }
4415 }
4416
4417 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4418 {
4419 struct pci_dev *pdev = adapter->pdev;
4420 u32 msgbuf[E1000_VFMAILBOX_SIZE];
4421 struct e1000_hw *hw = &adapter->hw;
4422 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4423 s32 retval;
4424
4425 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4426
4427 if (retval)
4428 dev_err(&pdev->dev, "Error receiving message from VF\n");
4429
4430 /* this is a message we already processed, do nothing */
4431 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4432 return;
4433
4434 /*
4435 * until the vf completes a reset it should not be
4436 * allowed to start any configuration.
4437 */
4438
4439 if (msgbuf[0] == E1000_VF_RESET) {
4440 igb_vf_reset_msg(adapter, vf);
4441 return;
4442 }
4443
4444 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4445 msgbuf[0] = E1000_VT_MSGTYPE_NACK;
4446 if (time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4447 igb_write_mbx(hw, msgbuf, 1, vf);
4448 vf_data->last_nack = jiffies;
4449 }
4450 return;
4451 }
4452
4453 switch ((msgbuf[0] & 0xFFFF)) {
4454 case E1000_VF_SET_MAC_ADDR:
4455 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4456 break;
4457 case E1000_VF_SET_PROMISC:
4458 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4459 break;
4460 case E1000_VF_SET_MULTICAST:
4461 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4462 break;
4463 case E1000_VF_SET_LPE:
4464 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4465 break;
4466 case E1000_VF_SET_VLAN:
4467 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4468 break;
4469 default:
4470 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4471 retval = -1;
4472 break;
4473 }
4474
4475 /* notify the VF of the results of what it sent us */
4476 if (retval)
4477 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4478 else
4479 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4480
4481 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4482
4483 igb_write_mbx(hw, msgbuf, 1, vf);
4484 }
4485
4486 static void igb_msg_task(struct igb_adapter *adapter)
4487 {
4488 struct e1000_hw *hw = &adapter->hw;
4489 u32 vf;
4490
4491 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4492 /* process any reset requests */
4493 if (!igb_check_for_rst(hw, vf))
4494 igb_vf_reset_event(adapter, vf);
4495
4496 /* process any messages pending */
4497 if (!igb_check_for_msg(hw, vf))
4498 igb_rcv_msg_from_vf(adapter, vf);
4499
4500 /* process any acks */
4501 if (!igb_check_for_ack(hw, vf))
4502 igb_rcv_ack_from_vf(adapter, vf);
4503 }
4504 }
4505
4506 /**
4507 * igb_set_uta - Set unicast filter table address
4508 * @adapter: board private structure
4509 *
4510 * The unicast table address is a register array of 32-bit registers.
4511 * The table is meant to be used in a way similar to how the MTA is used
4512 * however due to certain limitations in the hardware it is necessary to
4513 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4514 * enable bit to allow vlan tag stripping when promiscous mode is enabled
4515 **/
4516 static void igb_set_uta(struct igb_adapter *adapter)
4517 {
4518 struct e1000_hw *hw = &adapter->hw;
4519 int i;
4520
4521 /* The UTA table only exists on 82576 hardware and newer */
4522 if (hw->mac.type < e1000_82576)
4523 return;
4524
4525 /* we only need to do this if VMDq is enabled */
4526 if (!adapter->vfs_allocated_count)
4527 return;
4528
4529 for (i = 0; i < hw->mac.uta_reg_count; i++)
4530 array_wr32(E1000_UTA, i, ~0);
4531 }
4532
4533 /**
4534 * igb_intr_msi - Interrupt Handler
4535 * @irq: interrupt number
4536 * @data: pointer to a network interface device structure
4537 **/
4538 static irqreturn_t igb_intr_msi(int irq, void *data)
4539 {
4540 struct igb_adapter *adapter = data;
4541 struct igb_q_vector *q_vector = adapter->q_vector[0];
4542 struct e1000_hw *hw = &adapter->hw;
4543 /* read ICR disables interrupts using IAM */
4544 u32 icr = rd32(E1000_ICR);
4545
4546 igb_write_itr(q_vector);
4547
4548 if (icr & E1000_ICR_DOUTSYNC) {
4549 /* HW is reporting DMA is out of sync */
4550 adapter->stats.doosync++;
4551 }
4552
4553 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4554 hw->mac.get_link_status = 1;
4555 if (!test_bit(__IGB_DOWN, &adapter->state))
4556 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4557 }
4558
4559 napi_schedule(&q_vector->napi);
4560
4561 return IRQ_HANDLED;
4562 }
4563
4564 /**
4565 * igb_intr - Legacy Interrupt Handler
4566 * @irq: interrupt number
4567 * @data: pointer to a network interface device structure
4568 **/
4569 static irqreturn_t igb_intr(int irq, void *data)
4570 {
4571 struct igb_adapter *adapter = data;
4572 struct igb_q_vector *q_vector = adapter->q_vector[0];
4573 struct e1000_hw *hw = &adapter->hw;
4574 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
4575 * need for the IMC write */
4576 u32 icr = rd32(E1000_ICR);
4577 if (!icr)
4578 return IRQ_NONE; /* Not our interrupt */
4579
4580 igb_write_itr(q_vector);
4581
4582 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4583 * not set, then the adapter didn't send an interrupt */
4584 if (!(icr & E1000_ICR_INT_ASSERTED))
4585 return IRQ_NONE;
4586
4587 if (icr & E1000_ICR_DOUTSYNC) {
4588 /* HW is reporting DMA is out of sync */
4589 adapter->stats.doosync++;
4590 }
4591
4592 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4593 hw->mac.get_link_status = 1;
4594 /* guard against interrupt when we're going down */
4595 if (!test_bit(__IGB_DOWN, &adapter->state))
4596 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4597 }
4598
4599 napi_schedule(&q_vector->napi);
4600
4601 return IRQ_HANDLED;
4602 }
4603
4604 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4605 {
4606 struct igb_adapter *adapter = q_vector->adapter;
4607 struct e1000_hw *hw = &adapter->hw;
4608
4609 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4610 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4611 if (!adapter->msix_entries)
4612 igb_set_itr(adapter);
4613 else
4614 igb_update_ring_itr(q_vector);
4615 }
4616
4617 if (!test_bit(__IGB_DOWN, &adapter->state)) {
4618 if (adapter->msix_entries)
4619 wr32(E1000_EIMS, q_vector->eims_value);
4620 else
4621 igb_irq_enable(adapter);
4622 }
4623 }
4624
4625 /**
4626 * igb_poll - NAPI Rx polling callback
4627 * @napi: napi polling structure
4628 * @budget: count of how many packets we should handle
4629 **/
4630 static int igb_poll(struct napi_struct *napi, int budget)
4631 {
4632 struct igb_q_vector *q_vector = container_of(napi,
4633 struct igb_q_vector,
4634 napi);
4635 int tx_clean_complete = 1, work_done = 0;
4636
4637 #ifdef CONFIG_IGB_DCA
4638 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4639 igb_update_dca(q_vector);
4640 #endif
4641 if (q_vector->tx_ring)
4642 tx_clean_complete = igb_clean_tx_irq(q_vector);
4643
4644 if (q_vector->rx_ring)
4645 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4646
4647 if (!tx_clean_complete)
4648 work_done = budget;
4649
4650 /* If not enough Rx work done, exit the polling mode */
4651 if (work_done < budget) {
4652 napi_complete(napi);
4653 igb_ring_irq_enable(q_vector);
4654 }
4655
4656 return work_done;
4657 }
4658
4659 /**
4660 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4661 * @adapter: board private structure
4662 * @shhwtstamps: timestamp structure to update
4663 * @regval: unsigned 64bit system time value.
4664 *
4665 * We need to convert the system time value stored in the RX/TXSTMP registers
4666 * into a hwtstamp which can be used by the upper level timestamping functions
4667 */
4668 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4669 struct skb_shared_hwtstamps *shhwtstamps,
4670 u64 regval)
4671 {
4672 u64 ns;
4673
4674 ns = timecounter_cyc2time(&adapter->clock, regval);
4675 timecompare_update(&adapter->compare, ns);
4676 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4677 shhwtstamps->hwtstamp = ns_to_ktime(ns);
4678 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4679 }
4680
4681 /**
4682 * igb_tx_hwtstamp - utility function which checks for TX time stamp
4683 * @q_vector: pointer to q_vector containing needed info
4684 * @skb: packet that was just sent
4685 *
4686 * If we were asked to do hardware stamping and such a time stamp is
4687 * available, then it must have been for this skb here because we only
4688 * allow only one such packet into the queue.
4689 */
4690 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4691 {
4692 struct igb_adapter *adapter = q_vector->adapter;
4693 union skb_shared_tx *shtx = skb_tx(skb);
4694 struct e1000_hw *hw = &adapter->hw;
4695 struct skb_shared_hwtstamps shhwtstamps;
4696 u64 regval;
4697
4698 /* if skb does not support hw timestamp or TX stamp not valid exit */
4699 if (likely(!shtx->hardware) ||
4700 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4701 return;
4702
4703 regval = rd32(E1000_TXSTMPL);
4704 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4705
4706 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4707 skb_tstamp_tx(skb, &shhwtstamps);
4708 }
4709
4710 /**
4711 * igb_clean_tx_irq - Reclaim resources after transmit completes
4712 * @q_vector: pointer to q_vector containing needed info
4713 * returns true if ring is completely cleaned
4714 **/
4715 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4716 {
4717 struct igb_adapter *adapter = q_vector->adapter;
4718 struct igb_ring *tx_ring = q_vector->tx_ring;
4719 struct net_device *netdev = tx_ring->netdev;
4720 struct e1000_hw *hw = &adapter->hw;
4721 struct igb_buffer *buffer_info;
4722 struct sk_buff *skb;
4723 union e1000_adv_tx_desc *tx_desc, *eop_desc;
4724 unsigned int total_bytes = 0, total_packets = 0;
4725 unsigned int i, eop, count = 0;
4726 bool cleaned = false;
4727
4728 i = tx_ring->next_to_clean;
4729 eop = tx_ring->buffer_info[i].next_to_watch;
4730 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4731
4732 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
4733 (count < tx_ring->count)) {
4734 for (cleaned = false; !cleaned; count++) {
4735 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4736 buffer_info = &tx_ring->buffer_info[i];
4737 cleaned = (i == eop);
4738 skb = buffer_info->skb;
4739
4740 if (skb) {
4741 unsigned int segs, bytecount;
4742 /* gso_segs is currently only valid for tcp */
4743 segs = skb_shinfo(skb)->gso_segs ?: 1;
4744 /* multiply data chunks by size of headers */
4745 bytecount = ((segs - 1) * skb_headlen(skb)) +
4746 skb->len;
4747 total_packets += segs;
4748 total_bytes += bytecount;
4749
4750 igb_tx_hwtstamp(q_vector, skb);
4751 }
4752
4753 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4754 tx_desc->wb.status = 0;
4755
4756 i++;
4757 if (i == tx_ring->count)
4758 i = 0;
4759 }
4760 eop = tx_ring->buffer_info[i].next_to_watch;
4761 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4762 }
4763
4764 tx_ring->next_to_clean = i;
4765
4766 if (unlikely(count &&
4767 netif_carrier_ok(netdev) &&
4768 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
4769 /* Make sure that anybody stopping the queue after this
4770 * sees the new next_to_clean.
4771 */
4772 smp_mb();
4773 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
4774 !(test_bit(__IGB_DOWN, &adapter->state))) {
4775 netif_wake_subqueue(netdev, tx_ring->queue_index);
4776 tx_ring->tx_stats.restart_queue++;
4777 }
4778 }
4779
4780 if (tx_ring->detect_tx_hung) {
4781 /* Detect a transmit hang in hardware, this serializes the
4782 * check with the clearing of time_stamp and movement of i */
4783 tx_ring->detect_tx_hung = false;
4784 if (tx_ring->buffer_info[i].time_stamp &&
4785 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
4786 (adapter->tx_timeout_factor * HZ))
4787 && !(rd32(E1000_STATUS) &
4788 E1000_STATUS_TXOFF)) {
4789
4790 /* detected Tx unit hang */
4791 dev_err(&tx_ring->pdev->dev,
4792 "Detected Tx Unit Hang\n"
4793 " Tx Queue <%d>\n"
4794 " TDH <%x>\n"
4795 " TDT <%x>\n"
4796 " next_to_use <%x>\n"
4797 " next_to_clean <%x>\n"
4798 "buffer_info[next_to_clean]\n"
4799 " time_stamp <%lx>\n"
4800 " next_to_watch <%x>\n"
4801 " jiffies <%lx>\n"
4802 " desc.status <%x>\n",
4803 tx_ring->queue_index,
4804 readl(tx_ring->head),
4805 readl(tx_ring->tail),
4806 tx_ring->next_to_use,
4807 tx_ring->next_to_clean,
4808 tx_ring->buffer_info[eop].time_stamp,
4809 eop,
4810 jiffies,
4811 eop_desc->wb.status);
4812 netif_stop_subqueue(netdev, tx_ring->queue_index);
4813 }
4814 }
4815 tx_ring->total_bytes += total_bytes;
4816 tx_ring->total_packets += total_packets;
4817 tx_ring->tx_stats.bytes += total_bytes;
4818 tx_ring->tx_stats.packets += total_packets;
4819 return (count < tx_ring->count);
4820 }
4821
4822 /**
4823 * igb_receive_skb - helper function to handle rx indications
4824 * @q_vector: structure containing interrupt and ring information
4825 * @skb: packet to send up
4826 * @vlan_tag: vlan tag for packet
4827 **/
4828 static void igb_receive_skb(struct igb_q_vector *q_vector,
4829 struct sk_buff *skb,
4830 u16 vlan_tag)
4831 {
4832 struct igb_adapter *adapter = q_vector->adapter;
4833
4834 if (vlan_tag)
4835 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
4836 vlan_tag, skb);
4837 else
4838 napi_gro_receive(&q_vector->napi, skb);
4839 }
4840
4841 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
4842 u32 status_err, struct sk_buff *skb)
4843 {
4844 skb->ip_summed = CHECKSUM_NONE;
4845
4846 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
4847 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
4848 (status_err & E1000_RXD_STAT_IXSM))
4849 return;
4850
4851 /* TCP/UDP checksum error bit is set */
4852 if (status_err &
4853 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
4854 /*
4855 * work around errata with sctp packets where the TCPE aka
4856 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
4857 * packets, (aka let the stack check the crc32c)
4858 */
4859 if ((skb->len == 60) &&
4860 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
4861 ring->rx_stats.csum_err++;
4862
4863 /* let the stack verify checksum errors */
4864 return;
4865 }
4866 /* It must be a TCP or UDP packet with a valid checksum */
4867 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
4868 skb->ip_summed = CHECKSUM_UNNECESSARY;
4869
4870 dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
4871 }
4872
4873 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
4874 struct sk_buff *skb)
4875 {
4876 struct igb_adapter *adapter = q_vector->adapter;
4877 struct e1000_hw *hw = &adapter->hw;
4878 u64 regval;
4879
4880 /*
4881 * If this bit is set, then the RX registers contain the time stamp. No
4882 * other packet will be time stamped until we read these registers, so
4883 * read the registers to make them available again. Because only one
4884 * packet can be time stamped at a time, we know that the register
4885 * values must belong to this one here and therefore we don't need to
4886 * compare any of the additional attributes stored for it.
4887 *
4888 * If nothing went wrong, then it should have a skb_shared_tx that we
4889 * can turn into a skb_shared_hwtstamps.
4890 */
4891 if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
4892 return;
4893 if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
4894 return;
4895
4896 regval = rd32(E1000_RXSTMPL);
4897 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4898
4899 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
4900 }
4901 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
4902 union e1000_adv_rx_desc *rx_desc)
4903 {
4904 /* HW will not DMA in data larger than the given buffer, even if it
4905 * parses the (NFS, of course) header to be larger. In that case, it
4906 * fills the header buffer and spills the rest into the page.
4907 */
4908 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
4909 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
4910 if (hlen > rx_ring->rx_buffer_len)
4911 hlen = rx_ring->rx_buffer_len;
4912 return hlen;
4913 }
4914
4915 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
4916 int *work_done, int budget)
4917 {
4918 struct igb_ring *rx_ring = q_vector->rx_ring;
4919 struct net_device *netdev = rx_ring->netdev;
4920 struct pci_dev *pdev = rx_ring->pdev;
4921 union e1000_adv_rx_desc *rx_desc , *next_rxd;
4922 struct igb_buffer *buffer_info , *next_buffer;
4923 struct sk_buff *skb;
4924 bool cleaned = false;
4925 int cleaned_count = 0;
4926 unsigned int total_bytes = 0, total_packets = 0;
4927 unsigned int i;
4928 u32 staterr;
4929 u16 length;
4930 u16 vlan_tag;
4931
4932 i = rx_ring->next_to_clean;
4933 buffer_info = &rx_ring->buffer_info[i];
4934 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
4935 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4936
4937 while (staterr & E1000_RXD_STAT_DD) {
4938 if (*work_done >= budget)
4939 break;
4940 (*work_done)++;
4941
4942 skb = buffer_info->skb;
4943 prefetch(skb->data - NET_IP_ALIGN);
4944 buffer_info->skb = NULL;
4945
4946 i++;
4947 if (i == rx_ring->count)
4948 i = 0;
4949
4950 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
4951 prefetch(next_rxd);
4952 next_buffer = &rx_ring->buffer_info[i];
4953
4954 length = le16_to_cpu(rx_desc->wb.upper.length);
4955 cleaned = true;
4956 cleaned_count++;
4957
4958 if (buffer_info->dma) {
4959 pci_unmap_single(pdev, buffer_info->dma,
4960 rx_ring->rx_buffer_len,
4961 PCI_DMA_FROMDEVICE);
4962 buffer_info->dma = 0;
4963 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
4964 skb_put(skb, length);
4965 goto send_up;
4966 }
4967 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
4968 }
4969
4970 if (length) {
4971 pci_unmap_page(pdev, buffer_info->page_dma,
4972 PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
4973 buffer_info->page_dma = 0;
4974
4975 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
4976 buffer_info->page,
4977 buffer_info->page_offset,
4978 length);
4979
4980 if (page_count(buffer_info->page) != 1)
4981 buffer_info->page = NULL;
4982 else
4983 get_page(buffer_info->page);
4984
4985 skb->len += length;
4986 skb->data_len += length;
4987 skb->truesize += length;
4988 }
4989
4990 if (!(staterr & E1000_RXD_STAT_EOP)) {
4991 buffer_info->skb = next_buffer->skb;
4992 buffer_info->dma = next_buffer->dma;
4993 next_buffer->skb = skb;
4994 next_buffer->dma = 0;
4995 goto next_desc;
4996 }
4997 send_up:
4998 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
4999 dev_kfree_skb_irq(skb);
5000 goto next_desc;
5001 }
5002
5003 igb_rx_hwtstamp(q_vector, staterr, skb);
5004 total_bytes += skb->len;
5005 total_packets++;
5006
5007 igb_rx_checksum_adv(rx_ring, staterr, skb);
5008
5009 skb->protocol = eth_type_trans(skb, netdev);
5010 skb_record_rx_queue(skb, rx_ring->queue_index);
5011
5012 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5013 le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5014
5015 igb_receive_skb(q_vector, skb, vlan_tag);
5016
5017 next_desc:
5018 rx_desc->wb.upper.status_error = 0;
5019
5020 /* return some buffers to hardware, one at a time is too slow */
5021 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5022 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5023 cleaned_count = 0;
5024 }
5025
5026 /* use prefetched values */
5027 rx_desc = next_rxd;
5028 buffer_info = next_buffer;
5029 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5030 }
5031
5032 rx_ring->next_to_clean = i;
5033 cleaned_count = igb_desc_unused(rx_ring);
5034
5035 if (cleaned_count)
5036 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5037
5038 rx_ring->total_packets += total_packets;
5039 rx_ring->total_bytes += total_bytes;
5040 rx_ring->rx_stats.packets += total_packets;
5041 rx_ring->rx_stats.bytes += total_bytes;
5042 return cleaned;
5043 }
5044
5045 /**
5046 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5047 * @adapter: address of board private structure
5048 **/
5049 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5050 {
5051 struct net_device *netdev = rx_ring->netdev;
5052 union e1000_adv_rx_desc *rx_desc;
5053 struct igb_buffer *buffer_info;
5054 struct sk_buff *skb;
5055 unsigned int i;
5056 int bufsz;
5057
5058 i = rx_ring->next_to_use;
5059 buffer_info = &rx_ring->buffer_info[i];
5060
5061 bufsz = rx_ring->rx_buffer_len;
5062
5063 while (cleaned_count--) {
5064 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5065
5066 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5067 if (!buffer_info->page) {
5068 buffer_info->page = netdev_alloc_page(netdev);
5069 if (!buffer_info->page) {
5070 rx_ring->rx_stats.alloc_failed++;
5071 goto no_buffers;
5072 }
5073 buffer_info->page_offset = 0;
5074 } else {
5075 buffer_info->page_offset ^= PAGE_SIZE / 2;
5076 }
5077 buffer_info->page_dma =
5078 pci_map_page(rx_ring->pdev, buffer_info->page,
5079 buffer_info->page_offset,
5080 PAGE_SIZE / 2,
5081 PCI_DMA_FROMDEVICE);
5082 if (pci_dma_mapping_error(rx_ring->pdev,
5083 buffer_info->page_dma)) {
5084 buffer_info->page_dma = 0;
5085 rx_ring->rx_stats.alloc_failed++;
5086 goto no_buffers;
5087 }
5088 }
5089
5090 skb = buffer_info->skb;
5091 if (!skb) {
5092 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5093 if (!skb) {
5094 rx_ring->rx_stats.alloc_failed++;
5095 goto no_buffers;
5096 }
5097
5098 buffer_info->skb = skb;
5099 }
5100 if (!buffer_info->dma) {
5101 buffer_info->dma = pci_map_single(rx_ring->pdev,
5102 skb->data,
5103 bufsz,
5104 PCI_DMA_FROMDEVICE);
5105 if (pci_dma_mapping_error(rx_ring->pdev,
5106 buffer_info->dma)) {
5107 buffer_info->dma = 0;
5108 rx_ring->rx_stats.alloc_failed++;
5109 goto no_buffers;
5110 }
5111 }
5112 /* Refresh the desc even if buffer_addrs didn't change because
5113 * each write-back erases this info. */
5114 if (bufsz < IGB_RXBUFFER_1024) {
5115 rx_desc->read.pkt_addr =
5116 cpu_to_le64(buffer_info->page_dma);
5117 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5118 } else {
5119 rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5120 rx_desc->read.hdr_addr = 0;
5121 }
5122
5123 i++;
5124 if (i == rx_ring->count)
5125 i = 0;
5126 buffer_info = &rx_ring->buffer_info[i];
5127 }
5128
5129 no_buffers:
5130 if (rx_ring->next_to_use != i) {
5131 rx_ring->next_to_use = i;
5132 if (i == 0)
5133 i = (rx_ring->count - 1);
5134 else
5135 i--;
5136
5137 /* Force memory writes to complete before letting h/w
5138 * know there are new descriptors to fetch. (Only
5139 * applicable for weak-ordered memory model archs,
5140 * such as IA-64). */
5141 wmb();
5142 writel(i, rx_ring->tail);
5143 }
5144 }
5145
5146 /**
5147 * igb_mii_ioctl -
5148 * @netdev:
5149 * @ifreq:
5150 * @cmd:
5151 **/
5152 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5153 {
5154 struct igb_adapter *adapter = netdev_priv(netdev);
5155 struct mii_ioctl_data *data = if_mii(ifr);
5156
5157 if (adapter->hw.phy.media_type != e1000_media_type_copper)
5158 return -EOPNOTSUPP;
5159
5160 switch (cmd) {
5161 case SIOCGMIIPHY:
5162 data->phy_id = adapter->hw.phy.addr;
5163 break;
5164 case SIOCGMIIREG:
5165 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5166 &data->val_out))
5167 return -EIO;
5168 break;
5169 case SIOCSMIIREG:
5170 default:
5171 return -EOPNOTSUPP;
5172 }
5173 return 0;
5174 }
5175
5176 /**
5177 * igb_hwtstamp_ioctl - control hardware time stamping
5178 * @netdev:
5179 * @ifreq:
5180 * @cmd:
5181 *
5182 * Outgoing time stamping can be enabled and disabled. Play nice and
5183 * disable it when requested, although it shouldn't case any overhead
5184 * when no packet needs it. At most one packet in the queue may be
5185 * marked for time stamping, otherwise it would be impossible to tell
5186 * for sure to which packet the hardware time stamp belongs.
5187 *
5188 * Incoming time stamping has to be configured via the hardware
5189 * filters. Not all combinations are supported, in particular event
5190 * type has to be specified. Matching the kind of event packet is
5191 * not supported, with the exception of "all V2 events regardless of
5192 * level 2 or 4".
5193 *
5194 **/
5195 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5196 struct ifreq *ifr, int cmd)
5197 {
5198 struct igb_adapter *adapter = netdev_priv(netdev);
5199 struct e1000_hw *hw = &adapter->hw;
5200 struct hwtstamp_config config;
5201 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5202 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5203 u32 tsync_rx_cfg = 0;
5204 bool is_l4 = false;
5205 bool is_l2 = false;
5206 u32 regval;
5207
5208 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5209 return -EFAULT;
5210
5211 /* reserved for future extensions */
5212 if (config.flags)
5213 return -EINVAL;
5214
5215 switch (config.tx_type) {
5216 case HWTSTAMP_TX_OFF:
5217 tsync_tx_ctl = 0;
5218 case HWTSTAMP_TX_ON:
5219 break;
5220 default:
5221 return -ERANGE;
5222 }
5223
5224 switch (config.rx_filter) {
5225 case HWTSTAMP_FILTER_NONE:
5226 tsync_rx_ctl = 0;
5227 break;
5228 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5229 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5230 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5231 case HWTSTAMP_FILTER_ALL:
5232 /*
5233 * register TSYNCRXCFG must be set, therefore it is not
5234 * possible to time stamp both Sync and Delay_Req messages
5235 * => fall back to time stamping all packets
5236 */
5237 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5238 config.rx_filter = HWTSTAMP_FILTER_ALL;
5239 break;
5240 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5241 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5242 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5243 is_l4 = true;
5244 break;
5245 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5246 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5247 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5248 is_l4 = true;
5249 break;
5250 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5251 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5252 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5253 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5254 is_l2 = true;
5255 is_l4 = true;
5256 config.rx_filter = HWTSTAMP_FILTER_SOME;
5257 break;
5258 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5259 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5260 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5261 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5262 is_l2 = true;
5263 is_l4 = true;
5264 config.rx_filter = HWTSTAMP_FILTER_SOME;
5265 break;
5266 case HWTSTAMP_FILTER_PTP_V2_EVENT:
5267 case HWTSTAMP_FILTER_PTP_V2_SYNC:
5268 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5269 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5270 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5271 is_l2 = true;
5272 break;
5273 default:
5274 return -ERANGE;
5275 }
5276
5277 if (hw->mac.type == e1000_82575) {
5278 if (tsync_rx_ctl | tsync_tx_ctl)
5279 return -EINVAL;
5280 return 0;
5281 }
5282
5283 /* enable/disable TX */
5284 regval = rd32(E1000_TSYNCTXCTL);
5285 regval &= ~E1000_TSYNCTXCTL_ENABLED;
5286 regval |= tsync_tx_ctl;
5287 wr32(E1000_TSYNCTXCTL, regval);
5288
5289 /* enable/disable RX */
5290 regval = rd32(E1000_TSYNCRXCTL);
5291 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5292 regval |= tsync_rx_ctl;
5293 wr32(E1000_TSYNCRXCTL, regval);
5294
5295 /* define which PTP packets are time stamped */
5296 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5297
5298 /* define ethertype filter for timestamped packets */
5299 if (is_l2)
5300 wr32(E1000_ETQF(3),
5301 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5302 E1000_ETQF_1588 | /* enable timestamping */
5303 ETH_P_1588)); /* 1588 eth protocol type */
5304 else
5305 wr32(E1000_ETQF(3), 0);
5306
5307 #define PTP_PORT 319
5308 /* L4 Queue Filter[3]: filter by destination port and protocol */
5309 if (is_l4) {
5310 u32 ftqf = (IPPROTO_UDP /* UDP */
5311 | E1000_FTQF_VF_BP /* VF not compared */
5312 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5313 | E1000_FTQF_MASK); /* mask all inputs */
5314 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5315
5316 wr32(E1000_IMIR(3), htons(PTP_PORT));
5317 wr32(E1000_IMIREXT(3),
5318 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5319 if (hw->mac.type == e1000_82576) {
5320 /* enable source port check */
5321 wr32(E1000_SPQF(3), htons(PTP_PORT));
5322 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5323 }
5324 wr32(E1000_FTQF(3), ftqf);
5325 } else {
5326 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5327 }
5328 wrfl();
5329
5330 adapter->hwtstamp_config = config;
5331
5332 /* clear TX/RX time stamp registers, just to be sure */
5333 regval = rd32(E1000_TXSTMPH);
5334 regval = rd32(E1000_RXSTMPH);
5335
5336 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5337 -EFAULT : 0;
5338 }
5339
5340 /**
5341 * igb_ioctl -
5342 * @netdev:
5343 * @ifreq:
5344 * @cmd:
5345 **/
5346 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5347 {
5348 switch (cmd) {
5349 case SIOCGMIIPHY:
5350 case SIOCGMIIREG:
5351 case SIOCSMIIREG:
5352 return igb_mii_ioctl(netdev, ifr, cmd);
5353 case SIOCSHWTSTAMP:
5354 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5355 default:
5356 return -EOPNOTSUPP;
5357 }
5358 }
5359
5360 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5361 {
5362 struct igb_adapter *adapter = hw->back;
5363 u16 cap_offset;
5364
5365 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5366 if (!cap_offset)
5367 return -E1000_ERR_CONFIG;
5368
5369 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5370
5371 return 0;
5372 }
5373
5374 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5375 {
5376 struct igb_adapter *adapter = hw->back;
5377 u16 cap_offset;
5378
5379 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5380 if (!cap_offset)
5381 return -E1000_ERR_CONFIG;
5382
5383 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5384
5385 return 0;
5386 }
5387
5388 static void igb_vlan_rx_register(struct net_device *netdev,
5389 struct vlan_group *grp)
5390 {
5391 struct igb_adapter *adapter = netdev_priv(netdev);
5392 struct e1000_hw *hw = &adapter->hw;
5393 u32 ctrl, rctl;
5394
5395 igb_irq_disable(adapter);
5396 adapter->vlgrp = grp;
5397
5398 if (grp) {
5399 /* enable VLAN tag insert/strip */
5400 ctrl = rd32(E1000_CTRL);
5401 ctrl |= E1000_CTRL_VME;
5402 wr32(E1000_CTRL, ctrl);
5403
5404 /* Disable CFI check */
5405 rctl = rd32(E1000_RCTL);
5406 rctl &= ~E1000_RCTL_CFIEN;
5407 wr32(E1000_RCTL, rctl);
5408 } else {
5409 /* disable VLAN tag insert/strip */
5410 ctrl = rd32(E1000_CTRL);
5411 ctrl &= ~E1000_CTRL_VME;
5412 wr32(E1000_CTRL, ctrl);
5413 }
5414
5415 igb_rlpml_set(adapter);
5416
5417 if (!test_bit(__IGB_DOWN, &adapter->state))
5418 igb_irq_enable(adapter);
5419 }
5420
5421 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5422 {
5423 struct igb_adapter *adapter = netdev_priv(netdev);
5424 struct e1000_hw *hw = &adapter->hw;
5425 int pf_id = adapter->vfs_allocated_count;
5426
5427 /* attempt to add filter to vlvf array */
5428 igb_vlvf_set(adapter, vid, true, pf_id);
5429
5430 /* add the filter since PF can receive vlans w/o entry in vlvf */
5431 igb_vfta_set(hw, vid, true);
5432 }
5433
5434 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5435 {
5436 struct igb_adapter *adapter = netdev_priv(netdev);
5437 struct e1000_hw *hw = &adapter->hw;
5438 int pf_id = adapter->vfs_allocated_count;
5439 s32 err;
5440
5441 igb_irq_disable(adapter);
5442 vlan_group_set_device(adapter->vlgrp, vid, NULL);
5443
5444 if (!test_bit(__IGB_DOWN, &adapter->state))
5445 igb_irq_enable(adapter);
5446
5447 /* remove vlan from VLVF table array */
5448 err = igb_vlvf_set(adapter, vid, false, pf_id);
5449
5450 /* if vid was not present in VLVF just remove it from table */
5451 if (err)
5452 igb_vfta_set(hw, vid, false);
5453 }
5454
5455 static void igb_restore_vlan(struct igb_adapter *adapter)
5456 {
5457 igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5458
5459 if (adapter->vlgrp) {
5460 u16 vid;
5461 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5462 if (!vlan_group_get_device(adapter->vlgrp, vid))
5463 continue;
5464 igb_vlan_rx_add_vid(adapter->netdev, vid);
5465 }
5466 }
5467 }
5468
5469 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5470 {
5471 struct pci_dev *pdev = adapter->pdev;
5472 struct e1000_mac_info *mac = &adapter->hw.mac;
5473
5474 mac->autoneg = 0;
5475
5476 switch (spddplx) {
5477 case SPEED_10 + DUPLEX_HALF:
5478 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5479 break;
5480 case SPEED_10 + DUPLEX_FULL:
5481 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5482 break;
5483 case SPEED_100 + DUPLEX_HALF:
5484 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5485 break;
5486 case SPEED_100 + DUPLEX_FULL:
5487 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5488 break;
5489 case SPEED_1000 + DUPLEX_FULL:
5490 mac->autoneg = 1;
5491 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5492 break;
5493 case SPEED_1000 + DUPLEX_HALF: /* not supported */
5494 default:
5495 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5496 return -EINVAL;
5497 }
5498 return 0;
5499 }
5500
5501 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5502 {
5503 struct net_device *netdev = pci_get_drvdata(pdev);
5504 struct igb_adapter *adapter = netdev_priv(netdev);
5505 struct e1000_hw *hw = &adapter->hw;
5506 u32 ctrl, rctl, status;
5507 u32 wufc = adapter->wol;
5508 #ifdef CONFIG_PM
5509 int retval = 0;
5510 #endif
5511
5512 netif_device_detach(netdev);
5513
5514 if (netif_running(netdev))
5515 igb_close(netdev);
5516
5517 igb_clear_interrupt_scheme(adapter);
5518
5519 #ifdef CONFIG_PM
5520 retval = pci_save_state(pdev);
5521 if (retval)
5522 return retval;
5523 #endif
5524
5525 status = rd32(E1000_STATUS);
5526 if (status & E1000_STATUS_LU)
5527 wufc &= ~E1000_WUFC_LNKC;
5528
5529 if (wufc) {
5530 igb_setup_rctl(adapter);
5531 igb_set_rx_mode(netdev);
5532
5533 /* turn on all-multi mode if wake on multicast is enabled */
5534 if (wufc & E1000_WUFC_MC) {
5535 rctl = rd32(E1000_RCTL);
5536 rctl |= E1000_RCTL_MPE;
5537 wr32(E1000_RCTL, rctl);
5538 }
5539
5540 ctrl = rd32(E1000_CTRL);
5541 /* advertise wake from D3Cold */
5542 #define E1000_CTRL_ADVD3WUC 0x00100000
5543 /* phy power management enable */
5544 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5545 ctrl |= E1000_CTRL_ADVD3WUC;
5546 wr32(E1000_CTRL, ctrl);
5547
5548 /* Allow time for pending master requests to run */
5549 igb_disable_pcie_master(hw);
5550
5551 wr32(E1000_WUC, E1000_WUC_PME_EN);
5552 wr32(E1000_WUFC, wufc);
5553 } else {
5554 wr32(E1000_WUC, 0);
5555 wr32(E1000_WUFC, 0);
5556 }
5557
5558 *enable_wake = wufc || adapter->en_mng_pt;
5559 if (!*enable_wake)
5560 igb_shutdown_serdes_link_82575(hw);
5561
5562 /* Release control of h/w to f/w. If f/w is AMT enabled, this
5563 * would have already happened in close and is redundant. */
5564 igb_release_hw_control(adapter);
5565
5566 pci_disable_device(pdev);
5567
5568 return 0;
5569 }
5570
5571 #ifdef CONFIG_PM
5572 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5573 {
5574 int retval;
5575 bool wake;
5576
5577 retval = __igb_shutdown(pdev, &wake);
5578 if (retval)
5579 return retval;
5580
5581 if (wake) {
5582 pci_prepare_to_sleep(pdev);
5583 } else {
5584 pci_wake_from_d3(pdev, false);
5585 pci_set_power_state(pdev, PCI_D3hot);
5586 }
5587
5588 return 0;
5589 }
5590
5591 static int igb_resume(struct pci_dev *pdev)
5592 {
5593 struct net_device *netdev = pci_get_drvdata(pdev);
5594 struct igb_adapter *adapter = netdev_priv(netdev);
5595 struct e1000_hw *hw = &adapter->hw;
5596 u32 err;
5597
5598 pci_set_power_state(pdev, PCI_D0);
5599 pci_restore_state(pdev);
5600
5601 err = pci_enable_device_mem(pdev);
5602 if (err) {
5603 dev_err(&pdev->dev,
5604 "igb: Cannot enable PCI device from suspend\n");
5605 return err;
5606 }
5607 pci_set_master(pdev);
5608
5609 pci_enable_wake(pdev, PCI_D3hot, 0);
5610 pci_enable_wake(pdev, PCI_D3cold, 0);
5611
5612 if (igb_init_interrupt_scheme(adapter)) {
5613 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5614 return -ENOMEM;
5615 }
5616
5617 /* e1000_power_up_phy(adapter); */
5618
5619 igb_reset(adapter);
5620
5621 /* let the f/w know that the h/w is now under the control of the
5622 * driver. */
5623 igb_get_hw_control(adapter);
5624
5625 wr32(E1000_WUS, ~0);
5626
5627 if (netif_running(netdev)) {
5628 err = igb_open(netdev);
5629 if (err)
5630 return err;
5631 }
5632
5633 netif_device_attach(netdev);
5634
5635 return 0;
5636 }
5637 #endif
5638
5639 static void igb_shutdown(struct pci_dev *pdev)
5640 {
5641 bool wake;
5642
5643 __igb_shutdown(pdev, &wake);
5644
5645 if (system_state == SYSTEM_POWER_OFF) {
5646 pci_wake_from_d3(pdev, wake);
5647 pci_set_power_state(pdev, PCI_D3hot);
5648 }
5649 }
5650
5651 #ifdef CONFIG_NET_POLL_CONTROLLER
5652 /*
5653 * Polling 'interrupt' - used by things like netconsole to send skbs
5654 * without having to re-enable interrupts. It's not called while
5655 * the interrupt routine is executing.
5656 */
5657 static void igb_netpoll(struct net_device *netdev)
5658 {
5659 struct igb_adapter *adapter = netdev_priv(netdev);
5660 struct e1000_hw *hw = &adapter->hw;
5661 int i;
5662
5663 if (!adapter->msix_entries) {
5664 struct igb_q_vector *q_vector = adapter->q_vector[0];
5665 igb_irq_disable(adapter);
5666 napi_schedule(&q_vector->napi);
5667 return;
5668 }
5669
5670 for (i = 0; i < adapter->num_q_vectors; i++) {
5671 struct igb_q_vector *q_vector = adapter->q_vector[i];
5672 wr32(E1000_EIMC, q_vector->eims_value);
5673 napi_schedule(&q_vector->napi);
5674 }
5675 }
5676 #endif /* CONFIG_NET_POLL_CONTROLLER */
5677
5678 /**
5679 * igb_io_error_detected - called when PCI error is detected
5680 * @pdev: Pointer to PCI device
5681 * @state: The current pci connection state
5682 *
5683 * This function is called after a PCI bus error affecting
5684 * this device has been detected.
5685 */
5686 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5687 pci_channel_state_t state)
5688 {
5689 struct net_device *netdev = pci_get_drvdata(pdev);
5690 struct igb_adapter *adapter = netdev_priv(netdev);
5691
5692 netif_device_detach(netdev);
5693
5694 if (state == pci_channel_io_perm_failure)
5695 return PCI_ERS_RESULT_DISCONNECT;
5696
5697 if (netif_running(netdev))
5698 igb_down(adapter);
5699 pci_disable_device(pdev);
5700
5701 /* Request a slot slot reset. */
5702 return PCI_ERS_RESULT_NEED_RESET;
5703 }
5704
5705 /**
5706 * igb_io_slot_reset - called after the pci bus has been reset.
5707 * @pdev: Pointer to PCI device
5708 *
5709 * Restart the card from scratch, as if from a cold-boot. Implementation
5710 * resembles the first-half of the igb_resume routine.
5711 */
5712 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5713 {
5714 struct net_device *netdev = pci_get_drvdata(pdev);
5715 struct igb_adapter *adapter = netdev_priv(netdev);
5716 struct e1000_hw *hw = &adapter->hw;
5717 pci_ers_result_t result;
5718 int err;
5719
5720 if (pci_enable_device_mem(pdev)) {
5721 dev_err(&pdev->dev,
5722 "Cannot re-enable PCI device after reset.\n");
5723 result = PCI_ERS_RESULT_DISCONNECT;
5724 } else {
5725 pci_set_master(pdev);
5726 pci_restore_state(pdev);
5727
5728 pci_enable_wake(pdev, PCI_D3hot, 0);
5729 pci_enable_wake(pdev, PCI_D3cold, 0);
5730
5731 igb_reset(adapter);
5732 wr32(E1000_WUS, ~0);
5733 result = PCI_ERS_RESULT_RECOVERED;
5734 }
5735
5736 err = pci_cleanup_aer_uncorrect_error_status(pdev);
5737 if (err) {
5738 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
5739 "failed 0x%0x\n", err);
5740 /* non-fatal, continue */
5741 }
5742
5743 return result;
5744 }
5745
5746 /**
5747 * igb_io_resume - called when traffic can start flowing again.
5748 * @pdev: Pointer to PCI device
5749 *
5750 * This callback is called when the error recovery driver tells us that
5751 * its OK to resume normal operation. Implementation resembles the
5752 * second-half of the igb_resume routine.
5753 */
5754 static void igb_io_resume(struct pci_dev *pdev)
5755 {
5756 struct net_device *netdev = pci_get_drvdata(pdev);
5757 struct igb_adapter *adapter = netdev_priv(netdev);
5758
5759 if (netif_running(netdev)) {
5760 if (igb_up(adapter)) {
5761 dev_err(&pdev->dev, "igb_up failed after reset\n");
5762 return;
5763 }
5764 }
5765
5766 netif_device_attach(netdev);
5767
5768 /* let the f/w know that the h/w is now under the control of the
5769 * driver. */
5770 igb_get_hw_control(adapter);
5771 }
5772
5773 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
5774 u8 qsel)
5775 {
5776 u32 rar_low, rar_high;
5777 struct e1000_hw *hw = &adapter->hw;
5778
5779 /* HW expects these in little endian so we reverse the byte order
5780 * from network order (big endian) to little endian
5781 */
5782 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
5783 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
5784 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
5785
5786 /* Indicate to hardware the Address is Valid. */
5787 rar_high |= E1000_RAH_AV;
5788
5789 if (hw->mac.type == e1000_82575)
5790 rar_high |= E1000_RAH_POOL_1 * qsel;
5791 else
5792 rar_high |= E1000_RAH_POOL_1 << qsel;
5793
5794 wr32(E1000_RAL(index), rar_low);
5795 wrfl();
5796 wr32(E1000_RAH(index), rar_high);
5797 wrfl();
5798 }
5799
5800 static int igb_set_vf_mac(struct igb_adapter *adapter,
5801 int vf, unsigned char *mac_addr)
5802 {
5803 struct e1000_hw *hw = &adapter->hw;
5804 /* VF MAC addresses start at end of receive addresses and moves
5805 * torwards the first, as a result a collision should not be possible */
5806 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5807
5808 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
5809
5810 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
5811
5812 return 0;
5813 }
5814
5815 static void igb_vmm_control(struct igb_adapter *adapter)
5816 {
5817 struct e1000_hw *hw = &adapter->hw;
5818 u32 reg;
5819
5820 /* replication is not supported for 82575 */
5821 if (hw->mac.type == e1000_82575)
5822 return;
5823
5824 /* enable replication vlan tag stripping */
5825 reg = rd32(E1000_RPLOLR);
5826 reg |= E1000_RPLOLR_STRVLAN;
5827 wr32(E1000_RPLOLR, reg);
5828
5829 /* notify HW that the MAC is adding vlan tags */
5830 reg = rd32(E1000_DTXCTL);
5831 reg |= E1000_DTXCTL_VLAN_ADDED;
5832 wr32(E1000_DTXCTL, reg);
5833
5834 if (adapter->vfs_allocated_count) {
5835 igb_vmdq_set_loopback_pf(hw, true);
5836 igb_vmdq_set_replication_pf(hw, true);
5837 } else {
5838 igb_vmdq_set_loopback_pf(hw, false);
5839 igb_vmdq_set_replication_pf(hw, false);
5840 }
5841 }
5842
5843 /* igb_main.c */
This page took 0.23294 seconds and 6 git commands to generate.