igb: cleanup igb xmit frame path
[deliverable/linux.git] / drivers / net / igb / igb_main.c
... / ...
CommitLineData
1/*******************************************************************************
2
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2009 Intel Corporation.
5
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
9
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
14
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
21
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26*******************************************************************************/
27
28#include <linux/module.h>
29#include <linux/types.h>
30#include <linux/init.h>
31#include <linux/vmalloc.h>
32#include <linux/pagemap.h>
33#include <linux/netdevice.h>
34#include <linux/ipv6.h>
35#include <net/checksum.h>
36#include <net/ip6_checksum.h>
37#include <linux/net_tstamp.h>
38#include <linux/mii.h>
39#include <linux/ethtool.h>
40#include <linux/if_vlan.h>
41#include <linux/pci.h>
42#include <linux/pci-aspm.h>
43#include <linux/delay.h>
44#include <linux/interrupt.h>
45#include <linux/if_ether.h>
46#include <linux/aer.h>
47#ifdef CONFIG_IGB_DCA
48#include <linux/dca.h>
49#endif
50#include "igb.h"
51
52#define DRV_VERSION "1.3.16-k2"
53char igb_driver_name[] = "igb";
54char igb_driver_version[] = DRV_VERSION;
55static const char igb_driver_string[] =
56 "Intel(R) Gigabit Ethernet Network Driver";
57static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59static const struct e1000_info *igb_info_tbl[] = {
60 [board_82575] = &e1000_82575_info,
61};
62
63static struct pci_device_id igb_pci_tbl[] = {
64 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
65 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
66 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
67 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
68 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
69 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
70 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
74 /* required last entry */
75 {0, }
76};
77
78MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
79
80void igb_reset(struct igb_adapter *);
81static int igb_setup_all_tx_resources(struct igb_adapter *);
82static int igb_setup_all_rx_resources(struct igb_adapter *);
83static void igb_free_all_tx_resources(struct igb_adapter *);
84static void igb_free_all_rx_resources(struct igb_adapter *);
85static void igb_setup_mrqc(struct igb_adapter *);
86void igb_update_stats(struct igb_adapter *);
87static int igb_probe(struct pci_dev *, const struct pci_device_id *);
88static void __devexit igb_remove(struct pci_dev *pdev);
89static int igb_sw_init(struct igb_adapter *);
90static int igb_open(struct net_device *);
91static int igb_close(struct net_device *);
92static void igb_configure_tx(struct igb_adapter *);
93static void igb_configure_rx(struct igb_adapter *);
94static void igb_clean_all_tx_rings(struct igb_adapter *);
95static void igb_clean_all_rx_rings(struct igb_adapter *);
96static void igb_clean_tx_ring(struct igb_ring *);
97static void igb_clean_rx_ring(struct igb_ring *);
98static void igb_set_rx_mode(struct net_device *);
99static void igb_update_phy_info(unsigned long);
100static void igb_watchdog(unsigned long);
101static void igb_watchdog_task(struct work_struct *);
102static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
103static struct net_device_stats *igb_get_stats(struct net_device *);
104static int igb_change_mtu(struct net_device *, int);
105static int igb_set_mac(struct net_device *, void *);
106static void igb_set_uta(struct igb_adapter *adapter);
107static irqreturn_t igb_intr(int irq, void *);
108static irqreturn_t igb_intr_msi(int irq, void *);
109static irqreturn_t igb_msix_other(int irq, void *);
110static irqreturn_t igb_msix_ring(int irq, void *);
111#ifdef CONFIG_IGB_DCA
112static void igb_update_dca(struct igb_q_vector *);
113static void igb_setup_dca(struct igb_adapter *);
114#endif /* CONFIG_IGB_DCA */
115static bool igb_clean_tx_irq(struct igb_q_vector *);
116static int igb_poll(struct napi_struct *, int);
117static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
118static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
119static void igb_tx_timeout(struct net_device *);
120static void igb_reset_task(struct work_struct *);
121static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
122static void igb_vlan_rx_add_vid(struct net_device *, u16);
123static void igb_vlan_rx_kill_vid(struct net_device *, u16);
124static void igb_restore_vlan(struct igb_adapter *);
125static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
126static void igb_ping_all_vfs(struct igb_adapter *);
127static void igb_msg_task(struct igb_adapter *);
128static void igb_vmm_control(struct igb_adapter *);
129static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
130static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
131
132#ifdef CONFIG_PM
133static int igb_suspend(struct pci_dev *, pm_message_t);
134static int igb_resume(struct pci_dev *);
135#endif
136static void igb_shutdown(struct pci_dev *);
137#ifdef CONFIG_IGB_DCA
138static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
139static struct notifier_block dca_notifier = {
140 .notifier_call = igb_notify_dca,
141 .next = NULL,
142 .priority = 0
143};
144#endif
145#ifdef CONFIG_NET_POLL_CONTROLLER
146/* for netdump / net console */
147static void igb_netpoll(struct net_device *);
148#endif
149#ifdef CONFIG_PCI_IOV
150static unsigned int max_vfs = 0;
151module_param(max_vfs, uint, 0);
152MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
153 "per physical function");
154#endif /* CONFIG_PCI_IOV */
155
156static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
157 pci_channel_state_t);
158static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
159static void igb_io_resume(struct pci_dev *);
160
161static struct pci_error_handlers igb_err_handler = {
162 .error_detected = igb_io_error_detected,
163 .slot_reset = igb_io_slot_reset,
164 .resume = igb_io_resume,
165};
166
167
168static struct pci_driver igb_driver = {
169 .name = igb_driver_name,
170 .id_table = igb_pci_tbl,
171 .probe = igb_probe,
172 .remove = __devexit_p(igb_remove),
173#ifdef CONFIG_PM
174 /* Power Managment Hooks */
175 .suspend = igb_suspend,
176 .resume = igb_resume,
177#endif
178 .shutdown = igb_shutdown,
179 .err_handler = &igb_err_handler
180};
181
182MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
183MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
184MODULE_LICENSE("GPL");
185MODULE_VERSION(DRV_VERSION);
186
187/**
188 * igb_read_clock - read raw cycle counter (to be used by time counter)
189 */
190static cycle_t igb_read_clock(const struct cyclecounter *tc)
191{
192 struct igb_adapter *adapter =
193 container_of(tc, struct igb_adapter, cycles);
194 struct e1000_hw *hw = &adapter->hw;
195 u64 stamp = 0;
196 int shift = 0;
197
198 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
199 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
200 return stamp;
201}
202
203#ifdef DEBUG
204/**
205 * igb_get_hw_dev_name - return device name string
206 * used by hardware layer to print debugging information
207 **/
208char *igb_get_hw_dev_name(struct e1000_hw *hw)
209{
210 struct igb_adapter *adapter = hw->back;
211 return adapter->netdev->name;
212}
213
214/**
215 * igb_get_time_str - format current NIC and system time as string
216 */
217static char *igb_get_time_str(struct igb_adapter *adapter,
218 char buffer[160])
219{
220 cycle_t hw = adapter->cycles.read(&adapter->cycles);
221 struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
222 struct timespec sys;
223 struct timespec delta;
224 getnstimeofday(&sys);
225
226 delta = timespec_sub(nic, sys);
227
228 sprintf(buffer,
229 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
230 hw,
231 (long)nic.tv_sec, nic.tv_nsec,
232 (long)sys.tv_sec, sys.tv_nsec,
233 (long)delta.tv_sec, delta.tv_nsec);
234
235 return buffer;
236}
237#endif
238
239/**
240 * igb_init_module - Driver Registration Routine
241 *
242 * igb_init_module is the first routine called when the driver is
243 * loaded. All it does is register with the PCI subsystem.
244 **/
245static int __init igb_init_module(void)
246{
247 int ret;
248 printk(KERN_INFO "%s - version %s\n",
249 igb_driver_string, igb_driver_version);
250
251 printk(KERN_INFO "%s\n", igb_copyright);
252
253#ifdef CONFIG_IGB_DCA
254 dca_register_notify(&dca_notifier);
255#endif
256 ret = pci_register_driver(&igb_driver);
257 return ret;
258}
259
260module_init(igb_init_module);
261
262/**
263 * igb_exit_module - Driver Exit Cleanup Routine
264 *
265 * igb_exit_module is called just before the driver is removed
266 * from memory.
267 **/
268static void __exit igb_exit_module(void)
269{
270#ifdef CONFIG_IGB_DCA
271 dca_unregister_notify(&dca_notifier);
272#endif
273 pci_unregister_driver(&igb_driver);
274}
275
276module_exit(igb_exit_module);
277
278#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
279/**
280 * igb_cache_ring_register - Descriptor ring to register mapping
281 * @adapter: board private structure to initialize
282 *
283 * Once we know the feature-set enabled for the device, we'll cache
284 * the register offset the descriptor ring is assigned to.
285 **/
286static void igb_cache_ring_register(struct igb_adapter *adapter)
287{
288 int i = 0, j = 0;
289 u32 rbase_offset = adapter->vfs_allocated_count;
290
291 switch (adapter->hw.mac.type) {
292 case e1000_82576:
293 /* The queues are allocated for virtualization such that VF 0
294 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
295 * In order to avoid collision we start at the first free queue
296 * and continue consuming queues in the same sequence
297 */
298 if (adapter->vfs_allocated_count) {
299 for (; i < adapter->num_rx_queues; i++)
300 adapter->rx_ring[i].reg_idx = rbase_offset +
301 Q_IDX_82576(i);
302 for (; j < adapter->num_tx_queues; j++)
303 adapter->tx_ring[j].reg_idx = rbase_offset +
304 Q_IDX_82576(j);
305 }
306 case e1000_82575:
307 default:
308 for (; i < adapter->num_rx_queues; i++)
309 adapter->rx_ring[i].reg_idx = rbase_offset + i;
310 for (; j < adapter->num_tx_queues; j++)
311 adapter->tx_ring[j].reg_idx = rbase_offset + j;
312 break;
313 }
314}
315
316static void igb_free_queues(struct igb_adapter *adapter)
317{
318 kfree(adapter->tx_ring);
319 kfree(adapter->rx_ring);
320
321 adapter->tx_ring = NULL;
322 adapter->rx_ring = NULL;
323
324 adapter->num_rx_queues = 0;
325 adapter->num_tx_queues = 0;
326}
327
328/**
329 * igb_alloc_queues - Allocate memory for all rings
330 * @adapter: board private structure to initialize
331 *
332 * We allocate one ring per queue at run-time since we don't know the
333 * number of queues at compile-time.
334 **/
335static int igb_alloc_queues(struct igb_adapter *adapter)
336{
337 int i;
338
339 adapter->tx_ring = kcalloc(adapter->num_tx_queues,
340 sizeof(struct igb_ring), GFP_KERNEL);
341 if (!adapter->tx_ring)
342 goto err;
343
344 adapter->rx_ring = kcalloc(adapter->num_rx_queues,
345 sizeof(struct igb_ring), GFP_KERNEL);
346 if (!adapter->rx_ring)
347 goto err;
348
349 for (i = 0; i < adapter->num_tx_queues; i++) {
350 struct igb_ring *ring = &(adapter->tx_ring[i]);
351 ring->count = adapter->tx_ring_count;
352 ring->queue_index = i;
353 ring->pdev = adapter->pdev;
354 ring->netdev = adapter->netdev;
355 /* For 82575, context index must be unique per ring. */
356 if (adapter->hw.mac.type == e1000_82575)
357 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
358 }
359
360 for (i = 0; i < adapter->num_rx_queues; i++) {
361 struct igb_ring *ring = &(adapter->rx_ring[i]);
362 ring->count = adapter->rx_ring_count;
363 ring->queue_index = i;
364 ring->pdev = adapter->pdev;
365 ring->netdev = adapter->netdev;
366 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
367 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
368 /* set flag indicating ring supports SCTP checksum offload */
369 if (adapter->hw.mac.type >= e1000_82576)
370 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
371 }
372
373 igb_cache_ring_register(adapter);
374
375 return 0;
376
377err:
378 igb_free_queues(adapter);
379
380 return -ENOMEM;
381}
382
383#define IGB_N0_QUEUE -1
384static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
385{
386 u32 msixbm = 0;
387 struct igb_adapter *adapter = q_vector->adapter;
388 struct e1000_hw *hw = &adapter->hw;
389 u32 ivar, index;
390 int rx_queue = IGB_N0_QUEUE;
391 int tx_queue = IGB_N0_QUEUE;
392
393 if (q_vector->rx_ring)
394 rx_queue = q_vector->rx_ring->reg_idx;
395 if (q_vector->tx_ring)
396 tx_queue = q_vector->tx_ring->reg_idx;
397
398 switch (hw->mac.type) {
399 case e1000_82575:
400 /* The 82575 assigns vectors using a bitmask, which matches the
401 bitmask for the EICR/EIMS/EIMC registers. To assign one
402 or more queues to a vector, we write the appropriate bits
403 into the MSIXBM register for that vector. */
404 if (rx_queue > IGB_N0_QUEUE)
405 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
406 if (tx_queue > IGB_N0_QUEUE)
407 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
408 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
409 q_vector->eims_value = msixbm;
410 break;
411 case e1000_82576:
412 /* 82576 uses a table-based method for assigning vectors.
413 Each queue has a single entry in the table to which we write
414 a vector number along with a "valid" bit. Sadly, the layout
415 of the table is somewhat counterintuitive. */
416 if (rx_queue > IGB_N0_QUEUE) {
417 index = (rx_queue & 0x7);
418 ivar = array_rd32(E1000_IVAR0, index);
419 if (rx_queue < 8) {
420 /* vector goes into low byte of register */
421 ivar = ivar & 0xFFFFFF00;
422 ivar |= msix_vector | E1000_IVAR_VALID;
423 } else {
424 /* vector goes into third byte of register */
425 ivar = ivar & 0xFF00FFFF;
426 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
427 }
428 array_wr32(E1000_IVAR0, index, ivar);
429 }
430 if (tx_queue > IGB_N0_QUEUE) {
431 index = (tx_queue & 0x7);
432 ivar = array_rd32(E1000_IVAR0, index);
433 if (tx_queue < 8) {
434 /* vector goes into second byte of register */
435 ivar = ivar & 0xFFFF00FF;
436 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
437 } else {
438 /* vector goes into high byte of register */
439 ivar = ivar & 0x00FFFFFF;
440 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
441 }
442 array_wr32(E1000_IVAR0, index, ivar);
443 }
444 q_vector->eims_value = 1 << msix_vector;
445 break;
446 default:
447 BUG();
448 break;
449 }
450}
451
452/**
453 * igb_configure_msix - Configure MSI-X hardware
454 *
455 * igb_configure_msix sets up the hardware to properly
456 * generate MSI-X interrupts.
457 **/
458static void igb_configure_msix(struct igb_adapter *adapter)
459{
460 u32 tmp;
461 int i, vector = 0;
462 struct e1000_hw *hw = &adapter->hw;
463
464 adapter->eims_enable_mask = 0;
465
466 /* set vector for other causes, i.e. link changes */
467 switch (hw->mac.type) {
468 case e1000_82575:
469 tmp = rd32(E1000_CTRL_EXT);
470 /* enable MSI-X PBA support*/
471 tmp |= E1000_CTRL_EXT_PBA_CLR;
472
473 /* Auto-Mask interrupts upon ICR read. */
474 tmp |= E1000_CTRL_EXT_EIAME;
475 tmp |= E1000_CTRL_EXT_IRCA;
476
477 wr32(E1000_CTRL_EXT, tmp);
478
479 /* enable msix_other interrupt */
480 array_wr32(E1000_MSIXBM(0), vector++,
481 E1000_EIMS_OTHER);
482 adapter->eims_other = E1000_EIMS_OTHER;
483
484 break;
485
486 case e1000_82576:
487 /* Turn on MSI-X capability first, or our settings
488 * won't stick. And it will take days to debug. */
489 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
490 E1000_GPIE_PBA | E1000_GPIE_EIAME |
491 E1000_GPIE_NSICR);
492
493 /* enable msix_other interrupt */
494 adapter->eims_other = 1 << vector;
495 tmp = (vector++ | E1000_IVAR_VALID) << 8;
496
497 wr32(E1000_IVAR_MISC, tmp);
498 break;
499 default:
500 /* do nothing, since nothing else supports MSI-X */
501 break;
502 } /* switch (hw->mac.type) */
503
504 adapter->eims_enable_mask |= adapter->eims_other;
505
506 for (i = 0; i < adapter->num_q_vectors; i++) {
507 struct igb_q_vector *q_vector = adapter->q_vector[i];
508 igb_assign_vector(q_vector, vector++);
509 adapter->eims_enable_mask |= q_vector->eims_value;
510 }
511
512 wrfl();
513}
514
515/**
516 * igb_request_msix - Initialize MSI-X interrupts
517 *
518 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
519 * kernel.
520 **/
521static int igb_request_msix(struct igb_adapter *adapter)
522{
523 struct net_device *netdev = adapter->netdev;
524 struct e1000_hw *hw = &adapter->hw;
525 int i, err = 0, vector = 0;
526
527 err = request_irq(adapter->msix_entries[vector].vector,
528 &igb_msix_other, 0, netdev->name, adapter);
529 if (err)
530 goto out;
531 vector++;
532
533 for (i = 0; i < adapter->num_q_vectors; i++) {
534 struct igb_q_vector *q_vector = adapter->q_vector[i];
535
536 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
537
538 if (q_vector->rx_ring && q_vector->tx_ring)
539 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
540 q_vector->rx_ring->queue_index);
541 else if (q_vector->tx_ring)
542 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
543 q_vector->tx_ring->queue_index);
544 else if (q_vector->rx_ring)
545 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
546 q_vector->rx_ring->queue_index);
547 else
548 sprintf(q_vector->name, "%s-unused", netdev->name);
549
550 err = request_irq(adapter->msix_entries[vector].vector,
551 &igb_msix_ring, 0, q_vector->name,
552 q_vector);
553 if (err)
554 goto out;
555 vector++;
556 }
557
558 igb_configure_msix(adapter);
559 return 0;
560out:
561 return err;
562}
563
564static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
565{
566 if (adapter->msix_entries) {
567 pci_disable_msix(adapter->pdev);
568 kfree(adapter->msix_entries);
569 adapter->msix_entries = NULL;
570 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
571 pci_disable_msi(adapter->pdev);
572 }
573}
574
575/**
576 * igb_free_q_vectors - Free memory allocated for interrupt vectors
577 * @adapter: board private structure to initialize
578 *
579 * This function frees the memory allocated to the q_vectors. In addition if
580 * NAPI is enabled it will delete any references to the NAPI struct prior
581 * to freeing the q_vector.
582 **/
583static void igb_free_q_vectors(struct igb_adapter *adapter)
584{
585 int v_idx;
586
587 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
588 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
589 adapter->q_vector[v_idx] = NULL;
590 netif_napi_del(&q_vector->napi);
591 kfree(q_vector);
592 }
593 adapter->num_q_vectors = 0;
594}
595
596/**
597 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
598 *
599 * This function resets the device so that it has 0 rx queues, tx queues, and
600 * MSI-X interrupts allocated.
601 */
602static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
603{
604 igb_free_queues(adapter);
605 igb_free_q_vectors(adapter);
606 igb_reset_interrupt_capability(adapter);
607}
608
609/**
610 * igb_set_interrupt_capability - set MSI or MSI-X if supported
611 *
612 * Attempt to configure interrupts using the best available
613 * capabilities of the hardware and kernel.
614 **/
615static void igb_set_interrupt_capability(struct igb_adapter *adapter)
616{
617 int err;
618 int numvecs, i;
619
620 /* Number of supported queues. */
621 adapter->num_rx_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
622 adapter->num_tx_queues = min_t(u32, IGB_MAX_TX_QUEUES, num_online_cpus());
623
624 /* start with one vector for every rx queue */
625 numvecs = adapter->num_rx_queues;
626
627 /* if tx handler is seperate add 1 for every tx queue */
628 numvecs += adapter->num_tx_queues;
629
630 /* store the number of vectors reserved for queues */
631 adapter->num_q_vectors = numvecs;
632
633 /* add 1 vector for link status interrupts */
634 numvecs++;
635 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
636 GFP_KERNEL);
637 if (!adapter->msix_entries)
638 goto msi_only;
639
640 for (i = 0; i < numvecs; i++)
641 adapter->msix_entries[i].entry = i;
642
643 err = pci_enable_msix(adapter->pdev,
644 adapter->msix_entries,
645 numvecs);
646 if (err == 0)
647 goto out;
648
649 igb_reset_interrupt_capability(adapter);
650
651 /* If we can't do MSI-X, try MSI */
652msi_only:
653#ifdef CONFIG_PCI_IOV
654 /* disable SR-IOV for non MSI-X configurations */
655 if (adapter->vf_data) {
656 struct e1000_hw *hw = &adapter->hw;
657 /* disable iov and allow time for transactions to clear */
658 pci_disable_sriov(adapter->pdev);
659 msleep(500);
660
661 kfree(adapter->vf_data);
662 adapter->vf_data = NULL;
663 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
664 msleep(100);
665 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
666 }
667#endif
668 adapter->vfs_allocated_count = 0;
669 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
670 adapter->num_rx_queues = 1;
671 adapter->num_tx_queues = 1;
672 adapter->num_q_vectors = 1;
673 if (!pci_enable_msi(adapter->pdev))
674 adapter->flags |= IGB_FLAG_HAS_MSI;
675out:
676 /* Notify the stack of the (possibly) reduced Tx Queue count. */
677 adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
678 return;
679}
680
681/**
682 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
683 * @adapter: board private structure to initialize
684 *
685 * We allocate one q_vector per queue interrupt. If allocation fails we
686 * return -ENOMEM.
687 **/
688static int igb_alloc_q_vectors(struct igb_adapter *adapter)
689{
690 struct igb_q_vector *q_vector;
691 struct e1000_hw *hw = &adapter->hw;
692 int v_idx;
693
694 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
695 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
696 if (!q_vector)
697 goto err_out;
698 q_vector->adapter = adapter;
699 q_vector->itr_shift = (hw->mac.type == e1000_82575) ? 16 : 0;
700 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
701 q_vector->itr_val = IGB_START_ITR;
702 q_vector->set_itr = 1;
703 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
704 adapter->q_vector[v_idx] = q_vector;
705 }
706 return 0;
707
708err_out:
709 while (v_idx) {
710 v_idx--;
711 q_vector = adapter->q_vector[v_idx];
712 netif_napi_del(&q_vector->napi);
713 kfree(q_vector);
714 adapter->q_vector[v_idx] = NULL;
715 }
716 return -ENOMEM;
717}
718
719static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
720 int ring_idx, int v_idx)
721{
722 struct igb_q_vector *q_vector;
723
724 q_vector = adapter->q_vector[v_idx];
725 q_vector->rx_ring = &adapter->rx_ring[ring_idx];
726 q_vector->rx_ring->q_vector = q_vector;
727 q_vector->itr_val = adapter->rx_itr_setting;
728 if (q_vector->itr_val && q_vector->itr_val <= 3)
729 q_vector->itr_val = IGB_START_ITR;
730}
731
732static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
733 int ring_idx, int v_idx)
734{
735 struct igb_q_vector *q_vector;
736
737 q_vector = adapter->q_vector[v_idx];
738 q_vector->tx_ring = &adapter->tx_ring[ring_idx];
739 q_vector->tx_ring->q_vector = q_vector;
740 q_vector->itr_val = adapter->tx_itr_setting;
741 if (q_vector->itr_val && q_vector->itr_val <= 3)
742 q_vector->itr_val = IGB_START_ITR;
743}
744
745/**
746 * igb_map_ring_to_vector - maps allocated queues to vectors
747 *
748 * This function maps the recently allocated queues to vectors.
749 **/
750static int igb_map_ring_to_vector(struct igb_adapter *adapter)
751{
752 int i;
753 int v_idx = 0;
754
755 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
756 (adapter->num_q_vectors < adapter->num_tx_queues))
757 return -ENOMEM;
758
759 if (adapter->num_q_vectors >=
760 (adapter->num_rx_queues + adapter->num_tx_queues)) {
761 for (i = 0; i < adapter->num_rx_queues; i++)
762 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
763 for (i = 0; i < adapter->num_tx_queues; i++)
764 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
765 } else {
766 for (i = 0; i < adapter->num_rx_queues; i++) {
767 if (i < adapter->num_tx_queues)
768 igb_map_tx_ring_to_vector(adapter, i, v_idx);
769 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
770 }
771 for (; i < adapter->num_tx_queues; i++)
772 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
773 }
774 return 0;
775}
776
777/**
778 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
779 *
780 * This function initializes the interrupts and allocates all of the queues.
781 **/
782static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
783{
784 struct pci_dev *pdev = adapter->pdev;
785 int err;
786
787 igb_set_interrupt_capability(adapter);
788
789 err = igb_alloc_q_vectors(adapter);
790 if (err) {
791 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
792 goto err_alloc_q_vectors;
793 }
794
795 err = igb_alloc_queues(adapter);
796 if (err) {
797 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
798 goto err_alloc_queues;
799 }
800
801 err = igb_map_ring_to_vector(adapter);
802 if (err) {
803 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
804 goto err_map_queues;
805 }
806
807
808 return 0;
809err_map_queues:
810 igb_free_queues(adapter);
811err_alloc_queues:
812 igb_free_q_vectors(adapter);
813err_alloc_q_vectors:
814 igb_reset_interrupt_capability(adapter);
815 return err;
816}
817
818/**
819 * igb_request_irq - initialize interrupts
820 *
821 * Attempts to configure interrupts using the best available
822 * capabilities of the hardware and kernel.
823 **/
824static int igb_request_irq(struct igb_adapter *adapter)
825{
826 struct net_device *netdev = adapter->netdev;
827 struct pci_dev *pdev = adapter->pdev;
828 struct e1000_hw *hw = &adapter->hw;
829 int err = 0;
830
831 if (adapter->msix_entries) {
832 err = igb_request_msix(adapter);
833 if (!err)
834 goto request_done;
835 /* fall back to MSI */
836 igb_clear_interrupt_scheme(adapter);
837 if (!pci_enable_msi(adapter->pdev))
838 adapter->flags |= IGB_FLAG_HAS_MSI;
839 igb_free_all_tx_resources(adapter);
840 igb_free_all_rx_resources(adapter);
841 adapter->num_tx_queues = 1;
842 adapter->num_rx_queues = 1;
843 adapter->num_q_vectors = 1;
844 err = igb_alloc_q_vectors(adapter);
845 if (err) {
846 dev_err(&pdev->dev,
847 "Unable to allocate memory for vectors\n");
848 goto request_done;
849 }
850 err = igb_alloc_queues(adapter);
851 if (err) {
852 dev_err(&pdev->dev,
853 "Unable to allocate memory for queues\n");
854 igb_free_q_vectors(adapter);
855 goto request_done;
856 }
857 igb_setup_all_tx_resources(adapter);
858 igb_setup_all_rx_resources(adapter);
859 } else {
860 switch (hw->mac.type) {
861 case e1000_82575:
862 wr32(E1000_MSIXBM(0),
863 (E1000_EICR_RX_QUEUE0 |
864 E1000_EICR_TX_QUEUE0 |
865 E1000_EIMS_OTHER));
866 break;
867 case e1000_82576:
868 wr32(E1000_IVAR0, E1000_IVAR_VALID);
869 break;
870 default:
871 break;
872 }
873 }
874
875 if (adapter->flags & IGB_FLAG_HAS_MSI) {
876 err = request_irq(adapter->pdev->irq, &igb_intr_msi, 0,
877 netdev->name, adapter);
878 if (!err)
879 goto request_done;
880
881 /* fall back to legacy interrupts */
882 igb_reset_interrupt_capability(adapter);
883 adapter->flags &= ~IGB_FLAG_HAS_MSI;
884 }
885
886 err = request_irq(adapter->pdev->irq, &igb_intr, IRQF_SHARED,
887 netdev->name, adapter);
888
889 if (err)
890 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
891 err);
892
893request_done:
894 return err;
895}
896
897static void igb_free_irq(struct igb_adapter *adapter)
898{
899 if (adapter->msix_entries) {
900 int vector = 0, i;
901
902 free_irq(adapter->msix_entries[vector++].vector, adapter);
903
904 for (i = 0; i < adapter->num_q_vectors; i++) {
905 struct igb_q_vector *q_vector = adapter->q_vector[i];
906 free_irq(adapter->msix_entries[vector++].vector,
907 q_vector);
908 }
909 } else {
910 free_irq(adapter->pdev->irq, adapter);
911 }
912}
913
914/**
915 * igb_irq_disable - Mask off interrupt generation on the NIC
916 * @adapter: board private structure
917 **/
918static void igb_irq_disable(struct igb_adapter *adapter)
919{
920 struct e1000_hw *hw = &adapter->hw;
921
922 /*
923 * we need to be careful when disabling interrupts. The VFs are also
924 * mapped into these registers and so clearing the bits can cause
925 * issues on the VF drivers so we only need to clear what we set
926 */
927 if (adapter->msix_entries) {
928 u32 regval = rd32(E1000_EIAM);
929 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
930 wr32(E1000_EIMC, adapter->eims_enable_mask);
931 regval = rd32(E1000_EIAC);
932 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
933 }
934
935 wr32(E1000_IAM, 0);
936 wr32(E1000_IMC, ~0);
937 wrfl();
938 synchronize_irq(adapter->pdev->irq);
939}
940
941/**
942 * igb_irq_enable - Enable default interrupt generation settings
943 * @adapter: board private structure
944 **/
945static void igb_irq_enable(struct igb_adapter *adapter)
946{
947 struct e1000_hw *hw = &adapter->hw;
948
949 if (adapter->msix_entries) {
950 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
951 u32 regval = rd32(E1000_EIAC);
952 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
953 regval = rd32(E1000_EIAM);
954 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
955 wr32(E1000_EIMS, adapter->eims_enable_mask);
956 if (adapter->vfs_allocated_count) {
957 wr32(E1000_MBVFIMR, 0xFF);
958 ims |= E1000_IMS_VMMB;
959 }
960 wr32(E1000_IMS, ims);
961 } else {
962 wr32(E1000_IMS, IMS_ENABLE_MASK);
963 wr32(E1000_IAM, IMS_ENABLE_MASK);
964 }
965}
966
967static void igb_update_mng_vlan(struct igb_adapter *adapter)
968{
969 struct e1000_hw *hw = &adapter->hw;
970 u16 vid = adapter->hw.mng_cookie.vlan_id;
971 u16 old_vid = adapter->mng_vlan_id;
972
973 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
974 /* add VID to filter table */
975 igb_vfta_set(hw, vid, true);
976 adapter->mng_vlan_id = vid;
977 } else {
978 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
979 }
980
981 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
982 (vid != old_vid) &&
983 !vlan_group_get_device(adapter->vlgrp, old_vid)) {
984 /* remove VID from filter table */
985 igb_vfta_set(hw, old_vid, false);
986 }
987}
988
989/**
990 * igb_release_hw_control - release control of the h/w to f/w
991 * @adapter: address of board private structure
992 *
993 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
994 * For ASF and Pass Through versions of f/w this means that the
995 * driver is no longer loaded.
996 *
997 **/
998static void igb_release_hw_control(struct igb_adapter *adapter)
999{
1000 struct e1000_hw *hw = &adapter->hw;
1001 u32 ctrl_ext;
1002
1003 /* Let firmware take over control of h/w */
1004 ctrl_ext = rd32(E1000_CTRL_EXT);
1005 wr32(E1000_CTRL_EXT,
1006 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1007}
1008
1009
1010/**
1011 * igb_get_hw_control - get control of the h/w from f/w
1012 * @adapter: address of board private structure
1013 *
1014 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1015 * For ASF and Pass Through versions of f/w this means that
1016 * the driver is loaded.
1017 *
1018 **/
1019static void igb_get_hw_control(struct igb_adapter *adapter)
1020{
1021 struct e1000_hw *hw = &adapter->hw;
1022 u32 ctrl_ext;
1023
1024 /* Let firmware know the driver has taken over */
1025 ctrl_ext = rd32(E1000_CTRL_EXT);
1026 wr32(E1000_CTRL_EXT,
1027 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1028}
1029
1030/**
1031 * igb_configure - configure the hardware for RX and TX
1032 * @adapter: private board structure
1033 **/
1034static void igb_configure(struct igb_adapter *adapter)
1035{
1036 struct net_device *netdev = adapter->netdev;
1037 int i;
1038
1039 igb_get_hw_control(adapter);
1040 igb_set_rx_mode(netdev);
1041
1042 igb_restore_vlan(adapter);
1043
1044 igb_setup_tctl(adapter);
1045 igb_setup_mrqc(adapter);
1046 igb_setup_rctl(adapter);
1047
1048 igb_configure_tx(adapter);
1049 igb_configure_rx(adapter);
1050
1051 igb_rx_fifo_flush_82575(&adapter->hw);
1052
1053 /* call igb_desc_unused which always leaves
1054 * at least 1 descriptor unused to make sure
1055 * next_to_use != next_to_clean */
1056 for (i = 0; i < adapter->num_rx_queues; i++) {
1057 struct igb_ring *ring = &adapter->rx_ring[i];
1058 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1059 }
1060
1061
1062 adapter->tx_queue_len = netdev->tx_queue_len;
1063}
1064
1065
1066/**
1067 * igb_up - Open the interface and prepare it to handle traffic
1068 * @adapter: board private structure
1069 **/
1070
1071int igb_up(struct igb_adapter *adapter)
1072{
1073 struct e1000_hw *hw = &adapter->hw;
1074 int i;
1075
1076 /* hardware has been reset, we need to reload some things */
1077 igb_configure(adapter);
1078
1079 clear_bit(__IGB_DOWN, &adapter->state);
1080
1081 for (i = 0; i < adapter->num_q_vectors; i++) {
1082 struct igb_q_vector *q_vector = adapter->q_vector[i];
1083 napi_enable(&q_vector->napi);
1084 }
1085 if (adapter->msix_entries)
1086 igb_configure_msix(adapter);
1087
1088 /* Clear any pending interrupts. */
1089 rd32(E1000_ICR);
1090 igb_irq_enable(adapter);
1091
1092 /* notify VFs that reset has been completed */
1093 if (adapter->vfs_allocated_count) {
1094 u32 reg_data = rd32(E1000_CTRL_EXT);
1095 reg_data |= E1000_CTRL_EXT_PFRSTD;
1096 wr32(E1000_CTRL_EXT, reg_data);
1097 }
1098
1099 netif_tx_start_all_queues(adapter->netdev);
1100
1101 /* start the watchdog. */
1102 hw->mac.get_link_status = 1;
1103 schedule_work(&adapter->watchdog_task);
1104
1105 return 0;
1106}
1107
1108void igb_down(struct igb_adapter *adapter)
1109{
1110 struct e1000_hw *hw = &adapter->hw;
1111 struct net_device *netdev = adapter->netdev;
1112 u32 tctl, rctl;
1113 int i;
1114
1115 /* signal that we're down so the interrupt handler does not
1116 * reschedule our watchdog timer */
1117 set_bit(__IGB_DOWN, &adapter->state);
1118
1119 /* disable receives in the hardware */
1120 rctl = rd32(E1000_RCTL);
1121 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1122 /* flush and sleep below */
1123
1124 netif_tx_stop_all_queues(netdev);
1125
1126 /* disable transmits in the hardware */
1127 tctl = rd32(E1000_TCTL);
1128 tctl &= ~E1000_TCTL_EN;
1129 wr32(E1000_TCTL, tctl);
1130 /* flush both disables and wait for them to finish */
1131 wrfl();
1132 msleep(10);
1133
1134 for (i = 0; i < adapter->num_q_vectors; i++) {
1135 struct igb_q_vector *q_vector = adapter->q_vector[i];
1136 napi_disable(&q_vector->napi);
1137 }
1138
1139 igb_irq_disable(adapter);
1140
1141 del_timer_sync(&adapter->watchdog_timer);
1142 del_timer_sync(&adapter->phy_info_timer);
1143
1144 netdev->tx_queue_len = adapter->tx_queue_len;
1145 netif_carrier_off(netdev);
1146
1147 /* record the stats before reset*/
1148 igb_update_stats(adapter);
1149
1150 adapter->link_speed = 0;
1151 adapter->link_duplex = 0;
1152
1153 if (!pci_channel_offline(adapter->pdev))
1154 igb_reset(adapter);
1155 igb_clean_all_tx_rings(adapter);
1156 igb_clean_all_rx_rings(adapter);
1157#ifdef CONFIG_IGB_DCA
1158
1159 /* since we reset the hardware DCA settings were cleared */
1160 igb_setup_dca(adapter);
1161#endif
1162}
1163
1164void igb_reinit_locked(struct igb_adapter *adapter)
1165{
1166 WARN_ON(in_interrupt());
1167 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1168 msleep(1);
1169 igb_down(adapter);
1170 igb_up(adapter);
1171 clear_bit(__IGB_RESETTING, &adapter->state);
1172}
1173
1174void igb_reset(struct igb_adapter *adapter)
1175{
1176 struct e1000_hw *hw = &adapter->hw;
1177 struct e1000_mac_info *mac = &hw->mac;
1178 struct e1000_fc_info *fc = &hw->fc;
1179 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1180 u16 hwm;
1181
1182 /* Repartition Pba for greater than 9k mtu
1183 * To take effect CTRL.RST is required.
1184 */
1185 switch (mac->type) {
1186 case e1000_82576:
1187 pba = rd32(E1000_RXPBS);
1188 pba &= E1000_RXPBS_SIZE_MASK_82576;
1189 break;
1190 case e1000_82575:
1191 default:
1192 pba = E1000_PBA_34K;
1193 break;
1194 }
1195
1196 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1197 (mac->type < e1000_82576)) {
1198 /* adjust PBA for jumbo frames */
1199 wr32(E1000_PBA, pba);
1200
1201 /* To maintain wire speed transmits, the Tx FIFO should be
1202 * large enough to accommodate two full transmit packets,
1203 * rounded up to the next 1KB and expressed in KB. Likewise,
1204 * the Rx FIFO should be large enough to accommodate at least
1205 * one full receive packet and is similarly rounded up and
1206 * expressed in KB. */
1207 pba = rd32(E1000_PBA);
1208 /* upper 16 bits has Tx packet buffer allocation size in KB */
1209 tx_space = pba >> 16;
1210 /* lower 16 bits has Rx packet buffer allocation size in KB */
1211 pba &= 0xffff;
1212 /* the tx fifo also stores 16 bytes of information about the tx
1213 * but don't include ethernet FCS because hardware appends it */
1214 min_tx_space = (adapter->max_frame_size +
1215 sizeof(union e1000_adv_tx_desc) -
1216 ETH_FCS_LEN) * 2;
1217 min_tx_space = ALIGN(min_tx_space, 1024);
1218 min_tx_space >>= 10;
1219 /* software strips receive CRC, so leave room for it */
1220 min_rx_space = adapter->max_frame_size;
1221 min_rx_space = ALIGN(min_rx_space, 1024);
1222 min_rx_space >>= 10;
1223
1224 /* If current Tx allocation is less than the min Tx FIFO size,
1225 * and the min Tx FIFO size is less than the current Rx FIFO
1226 * allocation, take space away from current Rx allocation */
1227 if (tx_space < min_tx_space &&
1228 ((min_tx_space - tx_space) < pba)) {
1229 pba = pba - (min_tx_space - tx_space);
1230
1231 /* if short on rx space, rx wins and must trump tx
1232 * adjustment */
1233 if (pba < min_rx_space)
1234 pba = min_rx_space;
1235 }
1236 wr32(E1000_PBA, pba);
1237 }
1238
1239 /* flow control settings */
1240 /* The high water mark must be low enough to fit one full frame
1241 * (or the size used for early receive) above it in the Rx FIFO.
1242 * Set it to the lower of:
1243 * - 90% of the Rx FIFO size, or
1244 * - the full Rx FIFO size minus one full frame */
1245 hwm = min(((pba << 10) * 9 / 10),
1246 ((pba << 10) - 2 * adapter->max_frame_size));
1247
1248 if (mac->type < e1000_82576) {
1249 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */
1250 fc->low_water = fc->high_water - 8;
1251 } else {
1252 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1253 fc->low_water = fc->high_water - 16;
1254 }
1255 fc->pause_time = 0xFFFF;
1256 fc->send_xon = 1;
1257 fc->current_mode = fc->requested_mode;
1258
1259 /* disable receive for all VFs and wait one second */
1260 if (adapter->vfs_allocated_count) {
1261 int i;
1262 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1263 adapter->vf_data[i].flags = 0;
1264
1265 /* ping all the active vfs to let them know we are going down */
1266 igb_ping_all_vfs(adapter);
1267
1268 /* disable transmits and receives */
1269 wr32(E1000_VFRE, 0);
1270 wr32(E1000_VFTE, 0);
1271 }
1272
1273 /* Allow time for pending master requests to run */
1274 adapter->hw.mac.ops.reset_hw(&adapter->hw);
1275 wr32(E1000_WUC, 0);
1276
1277 if (adapter->hw.mac.ops.init_hw(&adapter->hw))
1278 dev_err(&adapter->pdev->dev, "Hardware Error\n");
1279
1280 igb_update_mng_vlan(adapter);
1281
1282 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1283 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1284
1285 igb_reset_adaptive(&adapter->hw);
1286 igb_get_phy_info(&adapter->hw);
1287}
1288
1289static const struct net_device_ops igb_netdev_ops = {
1290 .ndo_open = igb_open,
1291 .ndo_stop = igb_close,
1292 .ndo_start_xmit = igb_xmit_frame_adv,
1293 .ndo_get_stats = igb_get_stats,
1294 .ndo_set_rx_mode = igb_set_rx_mode,
1295 .ndo_set_multicast_list = igb_set_rx_mode,
1296 .ndo_set_mac_address = igb_set_mac,
1297 .ndo_change_mtu = igb_change_mtu,
1298 .ndo_do_ioctl = igb_ioctl,
1299 .ndo_tx_timeout = igb_tx_timeout,
1300 .ndo_validate_addr = eth_validate_addr,
1301 .ndo_vlan_rx_register = igb_vlan_rx_register,
1302 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1303 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1304#ifdef CONFIG_NET_POLL_CONTROLLER
1305 .ndo_poll_controller = igb_netpoll,
1306#endif
1307};
1308
1309/**
1310 * igb_probe - Device Initialization Routine
1311 * @pdev: PCI device information struct
1312 * @ent: entry in igb_pci_tbl
1313 *
1314 * Returns 0 on success, negative on failure
1315 *
1316 * igb_probe initializes an adapter identified by a pci_dev structure.
1317 * The OS initialization, configuring of the adapter private structure,
1318 * and a hardware reset occur.
1319 **/
1320static int __devinit igb_probe(struct pci_dev *pdev,
1321 const struct pci_device_id *ent)
1322{
1323 struct net_device *netdev;
1324 struct igb_adapter *adapter;
1325 struct e1000_hw *hw;
1326 u16 eeprom_data = 0;
1327 static int global_quad_port_a; /* global quad port a indication */
1328 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1329 unsigned long mmio_start, mmio_len;
1330 int err, pci_using_dac;
1331 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1332 u32 part_num;
1333
1334 err = pci_enable_device_mem(pdev);
1335 if (err)
1336 return err;
1337
1338 pci_using_dac = 0;
1339 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1340 if (!err) {
1341 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1342 if (!err)
1343 pci_using_dac = 1;
1344 } else {
1345 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1346 if (err) {
1347 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1348 if (err) {
1349 dev_err(&pdev->dev, "No usable DMA "
1350 "configuration, aborting\n");
1351 goto err_dma;
1352 }
1353 }
1354 }
1355
1356 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1357 IORESOURCE_MEM),
1358 igb_driver_name);
1359 if (err)
1360 goto err_pci_reg;
1361
1362 pci_enable_pcie_error_reporting(pdev);
1363
1364 pci_set_master(pdev);
1365 pci_save_state(pdev);
1366
1367 err = -ENOMEM;
1368 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1369 IGB_ABS_MAX_TX_QUEUES);
1370 if (!netdev)
1371 goto err_alloc_etherdev;
1372
1373 SET_NETDEV_DEV(netdev, &pdev->dev);
1374
1375 pci_set_drvdata(pdev, netdev);
1376 adapter = netdev_priv(netdev);
1377 adapter->netdev = netdev;
1378 adapter->pdev = pdev;
1379 hw = &adapter->hw;
1380 hw->back = adapter;
1381 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1382
1383 mmio_start = pci_resource_start(pdev, 0);
1384 mmio_len = pci_resource_len(pdev, 0);
1385
1386 err = -EIO;
1387 hw->hw_addr = ioremap(mmio_start, mmio_len);
1388 if (!hw->hw_addr)
1389 goto err_ioremap;
1390
1391 netdev->netdev_ops = &igb_netdev_ops;
1392 igb_set_ethtool_ops(netdev);
1393 netdev->watchdog_timeo = 5 * HZ;
1394
1395 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1396
1397 netdev->mem_start = mmio_start;
1398 netdev->mem_end = mmio_start + mmio_len;
1399
1400 /* PCI config space info */
1401 hw->vendor_id = pdev->vendor;
1402 hw->device_id = pdev->device;
1403 hw->revision_id = pdev->revision;
1404 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1405 hw->subsystem_device_id = pdev->subsystem_device;
1406
1407 /* setup the private structure */
1408 hw->back = adapter;
1409 /* Copy the default MAC, PHY and NVM function pointers */
1410 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1411 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1412 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1413 /* Initialize skew-specific constants */
1414 err = ei->get_invariants(hw);
1415 if (err)
1416 goto err_sw_init;
1417
1418 /* setup the private structure */
1419 err = igb_sw_init(adapter);
1420 if (err)
1421 goto err_sw_init;
1422
1423 igb_get_bus_info_pcie(hw);
1424
1425 hw->phy.autoneg_wait_to_complete = false;
1426 hw->mac.adaptive_ifs = true;
1427
1428 /* Copper options */
1429 if (hw->phy.media_type == e1000_media_type_copper) {
1430 hw->phy.mdix = AUTO_ALL_MODES;
1431 hw->phy.disable_polarity_correction = false;
1432 hw->phy.ms_type = e1000_ms_hw_default;
1433 }
1434
1435 if (igb_check_reset_block(hw))
1436 dev_info(&pdev->dev,
1437 "PHY reset is blocked due to SOL/IDER session.\n");
1438
1439 netdev->features = NETIF_F_SG |
1440 NETIF_F_IP_CSUM |
1441 NETIF_F_HW_VLAN_TX |
1442 NETIF_F_HW_VLAN_RX |
1443 NETIF_F_HW_VLAN_FILTER;
1444
1445 netdev->features |= NETIF_F_IPV6_CSUM;
1446 netdev->features |= NETIF_F_TSO;
1447 netdev->features |= NETIF_F_TSO6;
1448
1449 netdev->features |= NETIF_F_GRO;
1450
1451 netdev->vlan_features |= NETIF_F_TSO;
1452 netdev->vlan_features |= NETIF_F_TSO6;
1453 netdev->vlan_features |= NETIF_F_IP_CSUM;
1454 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1455 netdev->vlan_features |= NETIF_F_SG;
1456
1457 if (pci_using_dac)
1458 netdev->features |= NETIF_F_HIGHDMA;
1459
1460 if (adapter->hw.mac.type == e1000_82576)
1461 netdev->features |= NETIF_F_SCTP_CSUM;
1462
1463 adapter->en_mng_pt = igb_enable_mng_pass_thru(&adapter->hw);
1464
1465 /* before reading the NVM, reset the controller to put the device in a
1466 * known good starting state */
1467 hw->mac.ops.reset_hw(hw);
1468
1469 /* make sure the NVM is good */
1470 if (igb_validate_nvm_checksum(hw) < 0) {
1471 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1472 err = -EIO;
1473 goto err_eeprom;
1474 }
1475
1476 /* copy the MAC address out of the NVM */
1477 if (hw->mac.ops.read_mac_addr(hw))
1478 dev_err(&pdev->dev, "NVM Read Error\n");
1479
1480 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1481 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1482
1483 if (!is_valid_ether_addr(netdev->perm_addr)) {
1484 dev_err(&pdev->dev, "Invalid MAC Address\n");
1485 err = -EIO;
1486 goto err_eeprom;
1487 }
1488
1489 setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1490 (unsigned long) adapter);
1491 setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1492 (unsigned long) adapter);
1493
1494 INIT_WORK(&adapter->reset_task, igb_reset_task);
1495 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1496
1497 /* Initialize link properties that are user-changeable */
1498 adapter->fc_autoneg = true;
1499 hw->mac.autoneg = true;
1500 hw->phy.autoneg_advertised = 0x2f;
1501
1502 hw->fc.requested_mode = e1000_fc_default;
1503 hw->fc.current_mode = e1000_fc_default;
1504
1505 igb_validate_mdi_setting(hw);
1506
1507 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1508 * enable the ACPI Magic Packet filter
1509 */
1510
1511 if (hw->bus.func == 0)
1512 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1513 else if (hw->bus.func == 1)
1514 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1515
1516 if (eeprom_data & eeprom_apme_mask)
1517 adapter->eeprom_wol |= E1000_WUFC_MAG;
1518
1519 /* now that we have the eeprom settings, apply the special cases where
1520 * the eeprom may be wrong or the board simply won't support wake on
1521 * lan on a particular port */
1522 switch (pdev->device) {
1523 case E1000_DEV_ID_82575GB_QUAD_COPPER:
1524 adapter->eeprom_wol = 0;
1525 break;
1526 case E1000_DEV_ID_82575EB_FIBER_SERDES:
1527 case E1000_DEV_ID_82576_FIBER:
1528 case E1000_DEV_ID_82576_SERDES:
1529 /* Wake events only supported on port A for dual fiber
1530 * regardless of eeprom setting */
1531 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1532 adapter->eeprom_wol = 0;
1533 break;
1534 case E1000_DEV_ID_82576_QUAD_COPPER:
1535 /* if quad port adapter, disable WoL on all but port A */
1536 if (global_quad_port_a != 0)
1537 adapter->eeprom_wol = 0;
1538 else
1539 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1540 /* Reset for multiple quad port adapters */
1541 if (++global_quad_port_a == 4)
1542 global_quad_port_a = 0;
1543 break;
1544 }
1545
1546 /* initialize the wol settings based on the eeprom settings */
1547 adapter->wol = adapter->eeprom_wol;
1548 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1549
1550 /* reset the hardware with the new settings */
1551 igb_reset(adapter);
1552
1553 /* let the f/w know that the h/w is now under the control of the
1554 * driver. */
1555 igb_get_hw_control(adapter);
1556
1557 strcpy(netdev->name, "eth%d");
1558 err = register_netdev(netdev);
1559 if (err)
1560 goto err_register;
1561
1562 /* carrier off reporting is important to ethtool even BEFORE open */
1563 netif_carrier_off(netdev);
1564
1565#ifdef CONFIG_IGB_DCA
1566 if (dca_add_requester(&pdev->dev) == 0) {
1567 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1568 dev_info(&pdev->dev, "DCA enabled\n");
1569 igb_setup_dca(adapter);
1570 }
1571
1572#endif
1573
1574 switch (hw->mac.type) {
1575 case e1000_82576:
1576 /*
1577 * Initialize hardware timer: we keep it running just in case
1578 * that some program needs it later on.
1579 */
1580 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1581 adapter->cycles.read = igb_read_clock;
1582 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1583 adapter->cycles.mult = 1;
1584 /**
1585 * Scale the NIC clock cycle by a large factor so that
1586 * relatively small clock corrections can be added or
1587 * substracted at each clock tick. The drawbacks of a large
1588 * factor are a) that the clock register overflows more quickly
1589 * (not such a big deal) and b) that the increment per tick has
1590 * to fit into 24 bits. As a result we need to use a shift of
1591 * 19 so we can fit a value of 16 into the TIMINCA register.
1592 */
1593 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1594 wr32(E1000_TIMINCA,
1595 (1 << E1000_TIMINCA_16NS_SHIFT) |
1596 (16 << IGB_82576_TSYNC_SHIFT));
1597
1598 /* Set registers so that rollover occurs soon to test this. */
1599 wr32(E1000_SYSTIML, 0x00000000);
1600 wr32(E1000_SYSTIMH, 0xFF800000);
1601 wrfl();
1602
1603 timecounter_init(&adapter->clock,
1604 &adapter->cycles,
1605 ktime_to_ns(ktime_get_real()));
1606 /*
1607 * Synchronize our NIC clock against system wall clock. NIC
1608 * time stamp reading requires ~3us per sample, each sample
1609 * was pretty stable even under load => only require 10
1610 * samples for each offset comparison.
1611 */
1612 memset(&adapter->compare, 0, sizeof(adapter->compare));
1613 adapter->compare.source = &adapter->clock;
1614 adapter->compare.target = ktime_get_real;
1615 adapter->compare.num_samples = 10;
1616 timecompare_update(&adapter->compare, 0);
1617 break;
1618 case e1000_82575:
1619 /* 82575 does not support timesync */
1620 default:
1621 break;
1622 }
1623
1624 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1625 /* print bus type/speed/width info */
1626 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1627 netdev->name,
1628 ((hw->bus.speed == e1000_bus_speed_2500)
1629 ? "2.5Gb/s" : "unknown"),
1630 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1631 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1632 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1633 "unknown"),
1634 netdev->dev_addr);
1635
1636 igb_read_part_num(hw, &part_num);
1637 dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1638 (part_num >> 8), (part_num & 0xff));
1639
1640 dev_info(&pdev->dev,
1641 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1642 adapter->msix_entries ? "MSI-X" :
1643 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1644 adapter->num_rx_queues, adapter->num_tx_queues);
1645
1646 return 0;
1647
1648err_register:
1649 igb_release_hw_control(adapter);
1650err_eeprom:
1651 if (!igb_check_reset_block(hw))
1652 igb_reset_phy(hw);
1653
1654 if (hw->flash_address)
1655 iounmap(hw->flash_address);
1656err_sw_init:
1657 igb_clear_interrupt_scheme(adapter);
1658 iounmap(hw->hw_addr);
1659err_ioremap:
1660 free_netdev(netdev);
1661err_alloc_etherdev:
1662 pci_release_selected_regions(pdev, pci_select_bars(pdev,
1663 IORESOURCE_MEM));
1664err_pci_reg:
1665err_dma:
1666 pci_disable_device(pdev);
1667 return err;
1668}
1669
1670/**
1671 * igb_remove - Device Removal Routine
1672 * @pdev: PCI device information struct
1673 *
1674 * igb_remove is called by the PCI subsystem to alert the driver
1675 * that it should release a PCI device. The could be caused by a
1676 * Hot-Plug event, or because the driver is going to be removed from
1677 * memory.
1678 **/
1679static void __devexit igb_remove(struct pci_dev *pdev)
1680{
1681 struct net_device *netdev = pci_get_drvdata(pdev);
1682 struct igb_adapter *adapter = netdev_priv(netdev);
1683 struct e1000_hw *hw = &adapter->hw;
1684
1685 /* flush_scheduled work may reschedule our watchdog task, so
1686 * explicitly disable watchdog tasks from being rescheduled */
1687 set_bit(__IGB_DOWN, &adapter->state);
1688 del_timer_sync(&adapter->watchdog_timer);
1689 del_timer_sync(&adapter->phy_info_timer);
1690
1691 flush_scheduled_work();
1692
1693#ifdef CONFIG_IGB_DCA
1694 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1695 dev_info(&pdev->dev, "DCA disabled\n");
1696 dca_remove_requester(&pdev->dev);
1697 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1698 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1699 }
1700#endif
1701
1702 /* Release control of h/w to f/w. If f/w is AMT enabled, this
1703 * would have already happened in close and is redundant. */
1704 igb_release_hw_control(adapter);
1705
1706 unregister_netdev(netdev);
1707
1708 if (!igb_check_reset_block(&adapter->hw))
1709 igb_reset_phy(&adapter->hw);
1710
1711 igb_clear_interrupt_scheme(adapter);
1712
1713#ifdef CONFIG_PCI_IOV
1714 /* reclaim resources allocated to VFs */
1715 if (adapter->vf_data) {
1716 /* disable iov and allow time for transactions to clear */
1717 pci_disable_sriov(pdev);
1718 msleep(500);
1719
1720 kfree(adapter->vf_data);
1721 adapter->vf_data = NULL;
1722 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1723 msleep(100);
1724 dev_info(&pdev->dev, "IOV Disabled\n");
1725 }
1726#endif
1727 iounmap(hw->hw_addr);
1728 if (hw->flash_address)
1729 iounmap(hw->flash_address);
1730 pci_release_selected_regions(pdev, pci_select_bars(pdev,
1731 IORESOURCE_MEM));
1732
1733 free_netdev(netdev);
1734
1735 pci_disable_pcie_error_reporting(pdev);
1736
1737 pci_disable_device(pdev);
1738}
1739
1740/**
1741 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1742 * @adapter: board private structure to initialize
1743 *
1744 * This function initializes the vf specific data storage and then attempts to
1745 * allocate the VFs. The reason for ordering it this way is because it is much
1746 * mor expensive time wise to disable SR-IOV than it is to allocate and free
1747 * the memory for the VFs.
1748 **/
1749static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1750{
1751#ifdef CONFIG_PCI_IOV
1752 struct pci_dev *pdev = adapter->pdev;
1753
1754 if (adapter->vfs_allocated_count > 7)
1755 adapter->vfs_allocated_count = 7;
1756
1757 if (adapter->vfs_allocated_count) {
1758 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1759 sizeof(struct vf_data_storage),
1760 GFP_KERNEL);
1761 /* if allocation failed then we do not support SR-IOV */
1762 if (!adapter->vf_data) {
1763 adapter->vfs_allocated_count = 0;
1764 dev_err(&pdev->dev, "Unable to allocate memory for VF "
1765 "Data Storage\n");
1766 }
1767 }
1768
1769 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1770 kfree(adapter->vf_data);
1771 adapter->vf_data = NULL;
1772#endif /* CONFIG_PCI_IOV */
1773 adapter->vfs_allocated_count = 0;
1774#ifdef CONFIG_PCI_IOV
1775 } else {
1776 unsigned char mac_addr[ETH_ALEN];
1777 int i;
1778 dev_info(&pdev->dev, "%d vfs allocated\n",
1779 adapter->vfs_allocated_count);
1780 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1781 random_ether_addr(mac_addr);
1782 igb_set_vf_mac(adapter, i, mac_addr);
1783 }
1784 }
1785#endif /* CONFIG_PCI_IOV */
1786}
1787
1788/**
1789 * igb_sw_init - Initialize general software structures (struct igb_adapter)
1790 * @adapter: board private structure to initialize
1791 *
1792 * igb_sw_init initializes the Adapter private data structure.
1793 * Fields are initialized based on PCI device information and
1794 * OS network device settings (MTU size).
1795 **/
1796static int __devinit igb_sw_init(struct igb_adapter *adapter)
1797{
1798 struct e1000_hw *hw = &adapter->hw;
1799 struct net_device *netdev = adapter->netdev;
1800 struct pci_dev *pdev = adapter->pdev;
1801
1802 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1803
1804 adapter->tx_ring_count = IGB_DEFAULT_TXD;
1805 adapter->rx_ring_count = IGB_DEFAULT_RXD;
1806 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1807 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1808
1809 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1810 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1811
1812#ifdef CONFIG_PCI_IOV
1813 if (hw->mac.type == e1000_82576)
1814 adapter->vfs_allocated_count = max_vfs;
1815
1816#endif /* CONFIG_PCI_IOV */
1817 /* This call may decrease the number of queues */
1818 if (igb_init_interrupt_scheme(adapter)) {
1819 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1820 return -ENOMEM;
1821 }
1822
1823 igb_probe_vfs(adapter);
1824
1825 /* Explicitly disable IRQ since the NIC can be in any state. */
1826 igb_irq_disable(adapter);
1827
1828 set_bit(__IGB_DOWN, &adapter->state);
1829 return 0;
1830}
1831
1832/**
1833 * igb_open - Called when a network interface is made active
1834 * @netdev: network interface device structure
1835 *
1836 * Returns 0 on success, negative value on failure
1837 *
1838 * The open entry point is called when a network interface is made
1839 * active by the system (IFF_UP). At this point all resources needed
1840 * for transmit and receive operations are allocated, the interrupt
1841 * handler is registered with the OS, the watchdog timer is started,
1842 * and the stack is notified that the interface is ready.
1843 **/
1844static int igb_open(struct net_device *netdev)
1845{
1846 struct igb_adapter *adapter = netdev_priv(netdev);
1847 struct e1000_hw *hw = &adapter->hw;
1848 int err;
1849 int i;
1850
1851 /* disallow open during test */
1852 if (test_bit(__IGB_TESTING, &adapter->state))
1853 return -EBUSY;
1854
1855 netif_carrier_off(netdev);
1856
1857 /* allocate transmit descriptors */
1858 err = igb_setup_all_tx_resources(adapter);
1859 if (err)
1860 goto err_setup_tx;
1861
1862 /* allocate receive descriptors */
1863 err = igb_setup_all_rx_resources(adapter);
1864 if (err)
1865 goto err_setup_rx;
1866
1867 /* e1000_power_up_phy(adapter); */
1868
1869 /* before we allocate an interrupt, we must be ready to handle it.
1870 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1871 * as soon as we call pci_request_irq, so we have to setup our
1872 * clean_rx handler before we do so. */
1873 igb_configure(adapter);
1874
1875 err = igb_request_irq(adapter);
1876 if (err)
1877 goto err_req_irq;
1878
1879 /* From here on the code is the same as igb_up() */
1880 clear_bit(__IGB_DOWN, &adapter->state);
1881
1882 for (i = 0; i < adapter->num_q_vectors; i++) {
1883 struct igb_q_vector *q_vector = adapter->q_vector[i];
1884 napi_enable(&q_vector->napi);
1885 }
1886
1887 /* Clear any pending interrupts. */
1888 rd32(E1000_ICR);
1889
1890 igb_irq_enable(adapter);
1891
1892 /* notify VFs that reset has been completed */
1893 if (adapter->vfs_allocated_count) {
1894 u32 reg_data = rd32(E1000_CTRL_EXT);
1895 reg_data |= E1000_CTRL_EXT_PFRSTD;
1896 wr32(E1000_CTRL_EXT, reg_data);
1897 }
1898
1899 netif_tx_start_all_queues(netdev);
1900
1901 /* start the watchdog. */
1902 hw->mac.get_link_status = 1;
1903 schedule_work(&adapter->watchdog_task);
1904
1905 return 0;
1906
1907err_req_irq:
1908 igb_release_hw_control(adapter);
1909 /* e1000_power_down_phy(adapter); */
1910 igb_free_all_rx_resources(adapter);
1911err_setup_rx:
1912 igb_free_all_tx_resources(adapter);
1913err_setup_tx:
1914 igb_reset(adapter);
1915
1916 return err;
1917}
1918
1919/**
1920 * igb_close - Disables a network interface
1921 * @netdev: network interface device structure
1922 *
1923 * Returns 0, this is not allowed to fail
1924 *
1925 * The close entry point is called when an interface is de-activated
1926 * by the OS. The hardware is still under the driver's control, but
1927 * needs to be disabled. A global MAC reset is issued to stop the
1928 * hardware, and all transmit and receive resources are freed.
1929 **/
1930static int igb_close(struct net_device *netdev)
1931{
1932 struct igb_adapter *adapter = netdev_priv(netdev);
1933
1934 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
1935 igb_down(adapter);
1936
1937 igb_free_irq(adapter);
1938
1939 igb_free_all_tx_resources(adapter);
1940 igb_free_all_rx_resources(adapter);
1941
1942 return 0;
1943}
1944
1945/**
1946 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
1947 * @tx_ring: tx descriptor ring (for a specific queue) to setup
1948 *
1949 * Return 0 on success, negative on failure
1950 **/
1951int igb_setup_tx_resources(struct igb_ring *tx_ring)
1952{
1953 struct pci_dev *pdev = tx_ring->pdev;
1954 int size;
1955
1956 size = sizeof(struct igb_buffer) * tx_ring->count;
1957 tx_ring->buffer_info = vmalloc(size);
1958 if (!tx_ring->buffer_info)
1959 goto err;
1960 memset(tx_ring->buffer_info, 0, size);
1961
1962 /* round up to nearest 4K */
1963 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
1964 tx_ring->size = ALIGN(tx_ring->size, 4096);
1965
1966 tx_ring->desc = pci_alloc_consistent(pdev,
1967 tx_ring->size,
1968 &tx_ring->dma);
1969
1970 if (!tx_ring->desc)
1971 goto err;
1972
1973 tx_ring->next_to_use = 0;
1974 tx_ring->next_to_clean = 0;
1975 return 0;
1976
1977err:
1978 vfree(tx_ring->buffer_info);
1979 dev_err(&pdev->dev,
1980 "Unable to allocate memory for the transmit descriptor ring\n");
1981 return -ENOMEM;
1982}
1983
1984/**
1985 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
1986 * (Descriptors) for all queues
1987 * @adapter: board private structure
1988 *
1989 * Return 0 on success, negative on failure
1990 **/
1991static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
1992{
1993 struct pci_dev *pdev = adapter->pdev;
1994 int i, err = 0;
1995
1996 for (i = 0; i < adapter->num_tx_queues; i++) {
1997 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
1998 if (err) {
1999 dev_err(&pdev->dev,
2000 "Allocation for Tx Queue %u failed\n", i);
2001 for (i--; i >= 0; i--)
2002 igb_free_tx_resources(&adapter->tx_ring[i]);
2003 break;
2004 }
2005 }
2006
2007 for (i = 0; i < IGB_MAX_TX_QUEUES; i++) {
2008 int r_idx = i % adapter->num_tx_queues;
2009 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2010 }
2011 return err;
2012}
2013
2014/**
2015 * igb_setup_tctl - configure the transmit control registers
2016 * @adapter: Board private structure
2017 **/
2018void igb_setup_tctl(struct igb_adapter *adapter)
2019{
2020 struct e1000_hw *hw = &adapter->hw;
2021 u32 tctl;
2022
2023 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2024 wr32(E1000_TXDCTL(0), 0);
2025
2026 /* Program the Transmit Control Register */
2027 tctl = rd32(E1000_TCTL);
2028 tctl &= ~E1000_TCTL_CT;
2029 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2030 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2031
2032 igb_config_collision_dist(hw);
2033
2034 /* Enable transmits */
2035 tctl |= E1000_TCTL_EN;
2036
2037 wr32(E1000_TCTL, tctl);
2038}
2039
2040/**
2041 * igb_configure_tx_ring - Configure transmit ring after Reset
2042 * @adapter: board private structure
2043 * @ring: tx ring to configure
2044 *
2045 * Configure a transmit ring after a reset.
2046 **/
2047void igb_configure_tx_ring(struct igb_adapter *adapter,
2048 struct igb_ring *ring)
2049{
2050 struct e1000_hw *hw = &adapter->hw;
2051 u32 txdctl;
2052 u64 tdba = ring->dma;
2053 int reg_idx = ring->reg_idx;
2054
2055 /* disable the queue */
2056 txdctl = rd32(E1000_TXDCTL(reg_idx));
2057 wr32(E1000_TXDCTL(reg_idx),
2058 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2059 wrfl();
2060 mdelay(10);
2061
2062 wr32(E1000_TDLEN(reg_idx),
2063 ring->count * sizeof(union e1000_adv_tx_desc));
2064 wr32(E1000_TDBAL(reg_idx),
2065 tdba & 0x00000000ffffffffULL);
2066 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2067
2068 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2069 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2070 writel(0, ring->head);
2071 writel(0, ring->tail);
2072
2073 txdctl |= IGB_TX_PTHRESH;
2074 txdctl |= IGB_TX_HTHRESH << 8;
2075 txdctl |= IGB_TX_WTHRESH << 16;
2076
2077 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2078 wr32(E1000_TXDCTL(reg_idx), txdctl);
2079}
2080
2081/**
2082 * igb_configure_tx - Configure transmit Unit after Reset
2083 * @adapter: board private structure
2084 *
2085 * Configure the Tx unit of the MAC after a reset.
2086 **/
2087static void igb_configure_tx(struct igb_adapter *adapter)
2088{
2089 int i;
2090
2091 for (i = 0; i < adapter->num_tx_queues; i++)
2092 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2093}
2094
2095/**
2096 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2097 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2098 *
2099 * Returns 0 on success, negative on failure
2100 **/
2101int igb_setup_rx_resources(struct igb_ring *rx_ring)
2102{
2103 struct pci_dev *pdev = rx_ring->pdev;
2104 int size, desc_len;
2105
2106 size = sizeof(struct igb_buffer) * rx_ring->count;
2107 rx_ring->buffer_info = vmalloc(size);
2108 if (!rx_ring->buffer_info)
2109 goto err;
2110 memset(rx_ring->buffer_info, 0, size);
2111
2112 desc_len = sizeof(union e1000_adv_rx_desc);
2113
2114 /* Round up to nearest 4K */
2115 rx_ring->size = rx_ring->count * desc_len;
2116 rx_ring->size = ALIGN(rx_ring->size, 4096);
2117
2118 rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2119 &rx_ring->dma);
2120
2121 if (!rx_ring->desc)
2122 goto err;
2123
2124 rx_ring->next_to_clean = 0;
2125 rx_ring->next_to_use = 0;
2126
2127 return 0;
2128
2129err:
2130 vfree(rx_ring->buffer_info);
2131 rx_ring->buffer_info = NULL;
2132 dev_err(&pdev->dev, "Unable to allocate memory for "
2133 "the receive descriptor ring\n");
2134 return -ENOMEM;
2135}
2136
2137/**
2138 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2139 * (Descriptors) for all queues
2140 * @adapter: board private structure
2141 *
2142 * Return 0 on success, negative on failure
2143 **/
2144static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2145{
2146 struct pci_dev *pdev = adapter->pdev;
2147 int i, err = 0;
2148
2149 for (i = 0; i < adapter->num_rx_queues; i++) {
2150 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2151 if (err) {
2152 dev_err(&pdev->dev,
2153 "Allocation for Rx Queue %u failed\n", i);
2154 for (i--; i >= 0; i--)
2155 igb_free_rx_resources(&adapter->rx_ring[i]);
2156 break;
2157 }
2158 }
2159
2160 return err;
2161}
2162
2163/**
2164 * igb_setup_mrqc - configure the multiple receive queue control registers
2165 * @adapter: Board private structure
2166 **/
2167static void igb_setup_mrqc(struct igb_adapter *adapter)
2168{
2169 struct e1000_hw *hw = &adapter->hw;
2170 u32 mrqc, rxcsum;
2171 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2172 union e1000_reta {
2173 u32 dword;
2174 u8 bytes[4];
2175 } reta;
2176 static const u8 rsshash[40] = {
2177 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2178 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2179 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2180 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2181
2182 /* Fill out hash function seeds */
2183 for (j = 0; j < 10; j++) {
2184 u32 rsskey = rsshash[(j * 4)];
2185 rsskey |= rsshash[(j * 4) + 1] << 8;
2186 rsskey |= rsshash[(j * 4) + 2] << 16;
2187 rsskey |= rsshash[(j * 4) + 3] << 24;
2188 array_wr32(E1000_RSSRK(0), j, rsskey);
2189 }
2190
2191 num_rx_queues = adapter->num_rx_queues;
2192
2193 if (adapter->vfs_allocated_count) {
2194 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2195 switch (hw->mac.type) {
2196 case e1000_82576:
2197 shift = 3;
2198 num_rx_queues = 2;
2199 break;
2200 case e1000_82575:
2201 shift = 2;
2202 shift2 = 6;
2203 default:
2204 break;
2205 }
2206 } else {
2207 if (hw->mac.type == e1000_82575)
2208 shift = 6;
2209 }
2210
2211 for (j = 0; j < (32 * 4); j++) {
2212 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2213 if (shift2)
2214 reta.bytes[j & 3] |= num_rx_queues << shift2;
2215 if ((j & 3) == 3)
2216 wr32(E1000_RETA(j >> 2), reta.dword);
2217 }
2218
2219 /*
2220 * Disable raw packet checksumming so that RSS hash is placed in
2221 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2222 * offloads as they are enabled by default
2223 */
2224 rxcsum = rd32(E1000_RXCSUM);
2225 rxcsum |= E1000_RXCSUM_PCSD;
2226
2227 if (adapter->hw.mac.type >= e1000_82576)
2228 /* Enable Receive Checksum Offload for SCTP */
2229 rxcsum |= E1000_RXCSUM_CRCOFL;
2230
2231 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2232 wr32(E1000_RXCSUM, rxcsum);
2233
2234 /* If VMDq is enabled then we set the appropriate mode for that, else
2235 * we default to RSS so that an RSS hash is calculated per packet even
2236 * if we are only using one queue */
2237 if (adapter->vfs_allocated_count) {
2238 if (hw->mac.type > e1000_82575) {
2239 /* Set the default pool for the PF's first queue */
2240 u32 vtctl = rd32(E1000_VT_CTL);
2241 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2242 E1000_VT_CTL_DISABLE_DEF_POOL);
2243 vtctl |= adapter->vfs_allocated_count <<
2244 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2245 wr32(E1000_VT_CTL, vtctl);
2246 }
2247 if (adapter->num_rx_queues > 1)
2248 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2249 else
2250 mrqc = E1000_MRQC_ENABLE_VMDQ;
2251 } else {
2252 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2253 }
2254 igb_vmm_control(adapter);
2255
2256 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2257 E1000_MRQC_RSS_FIELD_IPV4_TCP);
2258 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2259 E1000_MRQC_RSS_FIELD_IPV6_TCP);
2260 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2261 E1000_MRQC_RSS_FIELD_IPV6_UDP);
2262 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2263 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2264
2265 wr32(E1000_MRQC, mrqc);
2266}
2267
2268/**
2269 * igb_setup_rctl - configure the receive control registers
2270 * @adapter: Board private structure
2271 **/
2272void igb_setup_rctl(struct igb_adapter *adapter)
2273{
2274 struct e1000_hw *hw = &adapter->hw;
2275 u32 rctl;
2276
2277 rctl = rd32(E1000_RCTL);
2278
2279 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2280 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2281
2282 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2283 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2284
2285 /*
2286 * enable stripping of CRC. It's unlikely this will break BMC
2287 * redirection as it did with e1000. Newer features require
2288 * that the HW strips the CRC.
2289 */
2290 rctl |= E1000_RCTL_SECRC;
2291
2292 /*
2293 * disable store bad packets and clear size bits.
2294 */
2295 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2296
2297 /* enable LPE to prevent packets larger than max_frame_size */
2298 rctl |= E1000_RCTL_LPE;
2299
2300 /* disable queue 0 to prevent tail write w/o re-config */
2301 wr32(E1000_RXDCTL(0), 0);
2302
2303 /* Attention!!! For SR-IOV PF driver operations you must enable
2304 * queue drop for all VF and PF queues to prevent head of line blocking
2305 * if an un-trusted VF does not provide descriptors to hardware.
2306 */
2307 if (adapter->vfs_allocated_count) {
2308 /* set all queue drop enable bits */
2309 wr32(E1000_QDE, ALL_QUEUES);
2310 }
2311
2312 wr32(E1000_RCTL, rctl);
2313}
2314
2315static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2316 int vfn)
2317{
2318 struct e1000_hw *hw = &adapter->hw;
2319 u32 vmolr;
2320
2321 /* if it isn't the PF check to see if VFs are enabled and
2322 * increase the size to support vlan tags */
2323 if (vfn < adapter->vfs_allocated_count &&
2324 adapter->vf_data[vfn].vlans_enabled)
2325 size += VLAN_TAG_SIZE;
2326
2327 vmolr = rd32(E1000_VMOLR(vfn));
2328 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2329 vmolr |= size | E1000_VMOLR_LPE;
2330 wr32(E1000_VMOLR(vfn), vmolr);
2331
2332 return 0;
2333}
2334
2335/**
2336 * igb_rlpml_set - set maximum receive packet size
2337 * @adapter: board private structure
2338 *
2339 * Configure maximum receivable packet size.
2340 **/
2341static void igb_rlpml_set(struct igb_adapter *adapter)
2342{
2343 u32 max_frame_size = adapter->max_frame_size;
2344 struct e1000_hw *hw = &adapter->hw;
2345 u16 pf_id = adapter->vfs_allocated_count;
2346
2347 if (adapter->vlgrp)
2348 max_frame_size += VLAN_TAG_SIZE;
2349
2350 /* if vfs are enabled we set RLPML to the largest possible request
2351 * size and set the VMOLR RLPML to the size we need */
2352 if (pf_id) {
2353 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2354 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2355 }
2356
2357 wr32(E1000_RLPML, max_frame_size);
2358}
2359
2360static inline void igb_set_vmolr(struct igb_adapter *adapter, int vfn)
2361{
2362 struct e1000_hw *hw = &adapter->hw;
2363 u32 vmolr;
2364
2365 /*
2366 * This register exists only on 82576 and newer so if we are older then
2367 * we should exit and do nothing
2368 */
2369 if (hw->mac.type < e1000_82576)
2370 return;
2371
2372 vmolr = rd32(E1000_VMOLR(vfn));
2373 vmolr |= E1000_VMOLR_AUPE | /* Accept untagged packets */
2374 E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2375
2376 /* clear all bits that might not be set */
2377 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2378
2379 if (adapter->num_rx_queues > 1 && vfn == adapter->vfs_allocated_count)
2380 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2381 /*
2382 * for VMDq only allow the VFs and pool 0 to accept broadcast and
2383 * multicast packets
2384 */
2385 if (vfn <= adapter->vfs_allocated_count)
2386 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
2387
2388 wr32(E1000_VMOLR(vfn), vmolr);
2389}
2390
2391/**
2392 * igb_configure_rx_ring - Configure a receive ring after Reset
2393 * @adapter: board private structure
2394 * @ring: receive ring to be configured
2395 *
2396 * Configure the Rx unit of the MAC after a reset.
2397 **/
2398void igb_configure_rx_ring(struct igb_adapter *adapter,
2399 struct igb_ring *ring)
2400{
2401 struct e1000_hw *hw = &adapter->hw;
2402 u64 rdba = ring->dma;
2403 int reg_idx = ring->reg_idx;
2404 u32 srrctl, rxdctl;
2405
2406 /* disable the queue */
2407 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2408 wr32(E1000_RXDCTL(reg_idx),
2409 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2410
2411 /* Set DMA base address registers */
2412 wr32(E1000_RDBAL(reg_idx),
2413 rdba & 0x00000000ffffffffULL);
2414 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2415 wr32(E1000_RDLEN(reg_idx),
2416 ring->count * sizeof(union e1000_adv_rx_desc));
2417
2418 /* initialize head and tail */
2419 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2420 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2421 writel(0, ring->head);
2422 writel(0, ring->tail);
2423
2424 /* set descriptor configuration */
2425 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2426 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2427 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2428#if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2429 srrctl |= IGB_RXBUFFER_16384 >>
2430 E1000_SRRCTL_BSIZEPKT_SHIFT;
2431#else
2432 srrctl |= (PAGE_SIZE / 2) >>
2433 E1000_SRRCTL_BSIZEPKT_SHIFT;
2434#endif
2435 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2436 } else {
2437 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2438 E1000_SRRCTL_BSIZEPKT_SHIFT;
2439 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2440 }
2441
2442 wr32(E1000_SRRCTL(reg_idx), srrctl);
2443
2444 /* set filtering for VMDQ pools */
2445 igb_set_vmolr(adapter, reg_idx & 0x7);
2446
2447 /* enable receive descriptor fetching */
2448 rxdctl = rd32(E1000_RXDCTL(reg_idx));
2449 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2450 rxdctl &= 0xFFF00000;
2451 rxdctl |= IGB_RX_PTHRESH;
2452 rxdctl |= IGB_RX_HTHRESH << 8;
2453 rxdctl |= IGB_RX_WTHRESH << 16;
2454 wr32(E1000_RXDCTL(reg_idx), rxdctl);
2455}
2456
2457/**
2458 * igb_configure_rx - Configure receive Unit after Reset
2459 * @adapter: board private structure
2460 *
2461 * Configure the Rx unit of the MAC after a reset.
2462 **/
2463static void igb_configure_rx(struct igb_adapter *adapter)
2464{
2465 int i;
2466
2467 /* set UTA to appropriate mode */
2468 igb_set_uta(adapter);
2469
2470 /* set the correct pool for the PF default MAC address in entry 0 */
2471 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2472 adapter->vfs_allocated_count);
2473
2474 /* Setup the HW Rx Head and Tail Descriptor Pointers and
2475 * the Base and Length of the Rx Descriptor Ring */
2476 for (i = 0; i < adapter->num_rx_queues; i++)
2477 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2478}
2479
2480/**
2481 * igb_free_tx_resources - Free Tx Resources per Queue
2482 * @tx_ring: Tx descriptor ring for a specific queue
2483 *
2484 * Free all transmit software resources
2485 **/
2486void igb_free_tx_resources(struct igb_ring *tx_ring)
2487{
2488 igb_clean_tx_ring(tx_ring);
2489
2490 vfree(tx_ring->buffer_info);
2491 tx_ring->buffer_info = NULL;
2492
2493 /* if not set, then don't free */
2494 if (!tx_ring->desc)
2495 return;
2496
2497 pci_free_consistent(tx_ring->pdev, tx_ring->size,
2498 tx_ring->desc, tx_ring->dma);
2499
2500 tx_ring->desc = NULL;
2501}
2502
2503/**
2504 * igb_free_all_tx_resources - Free Tx Resources for All Queues
2505 * @adapter: board private structure
2506 *
2507 * Free all transmit software resources
2508 **/
2509static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2510{
2511 int i;
2512
2513 for (i = 0; i < adapter->num_tx_queues; i++)
2514 igb_free_tx_resources(&adapter->tx_ring[i]);
2515}
2516
2517void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2518 struct igb_buffer *buffer_info)
2519{
2520 buffer_info->dma = 0;
2521 if (buffer_info->skb) {
2522 skb_dma_unmap(&tx_ring->pdev->dev,
2523 buffer_info->skb,
2524 DMA_TO_DEVICE);
2525 dev_kfree_skb_any(buffer_info->skb);
2526 buffer_info->skb = NULL;
2527 }
2528 buffer_info->time_stamp = 0;
2529 /* buffer_info must be completely set up in the transmit path */
2530}
2531
2532/**
2533 * igb_clean_tx_ring - Free Tx Buffers
2534 * @tx_ring: ring to be cleaned
2535 **/
2536static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2537{
2538 struct igb_buffer *buffer_info;
2539 unsigned long size;
2540 unsigned int i;
2541
2542 if (!tx_ring->buffer_info)
2543 return;
2544 /* Free all the Tx ring sk_buffs */
2545
2546 for (i = 0; i < tx_ring->count; i++) {
2547 buffer_info = &tx_ring->buffer_info[i];
2548 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2549 }
2550
2551 size = sizeof(struct igb_buffer) * tx_ring->count;
2552 memset(tx_ring->buffer_info, 0, size);
2553
2554 /* Zero out the descriptor ring */
2555 memset(tx_ring->desc, 0, tx_ring->size);
2556
2557 tx_ring->next_to_use = 0;
2558 tx_ring->next_to_clean = 0;
2559}
2560
2561/**
2562 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2563 * @adapter: board private structure
2564 **/
2565static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2566{
2567 int i;
2568
2569 for (i = 0; i < adapter->num_tx_queues; i++)
2570 igb_clean_tx_ring(&adapter->tx_ring[i]);
2571}
2572
2573/**
2574 * igb_free_rx_resources - Free Rx Resources
2575 * @rx_ring: ring to clean the resources from
2576 *
2577 * Free all receive software resources
2578 **/
2579void igb_free_rx_resources(struct igb_ring *rx_ring)
2580{
2581 igb_clean_rx_ring(rx_ring);
2582
2583 vfree(rx_ring->buffer_info);
2584 rx_ring->buffer_info = NULL;
2585
2586 /* if not set, then don't free */
2587 if (!rx_ring->desc)
2588 return;
2589
2590 pci_free_consistent(rx_ring->pdev, rx_ring->size,
2591 rx_ring->desc, rx_ring->dma);
2592
2593 rx_ring->desc = NULL;
2594}
2595
2596/**
2597 * igb_free_all_rx_resources - Free Rx Resources for All Queues
2598 * @adapter: board private structure
2599 *
2600 * Free all receive software resources
2601 **/
2602static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2603{
2604 int i;
2605
2606 for (i = 0; i < adapter->num_rx_queues; i++)
2607 igb_free_rx_resources(&adapter->rx_ring[i]);
2608}
2609
2610/**
2611 * igb_clean_rx_ring - Free Rx Buffers per Queue
2612 * @rx_ring: ring to free buffers from
2613 **/
2614static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2615{
2616 struct igb_buffer *buffer_info;
2617 unsigned long size;
2618 unsigned int i;
2619
2620 if (!rx_ring->buffer_info)
2621 return;
2622
2623 /* Free all the Rx ring sk_buffs */
2624 for (i = 0; i < rx_ring->count; i++) {
2625 buffer_info = &rx_ring->buffer_info[i];
2626 if (buffer_info->dma) {
2627 pci_unmap_single(rx_ring->pdev,
2628 buffer_info->dma,
2629 rx_ring->rx_buffer_len,
2630 PCI_DMA_FROMDEVICE);
2631 buffer_info->dma = 0;
2632 }
2633
2634 if (buffer_info->skb) {
2635 dev_kfree_skb(buffer_info->skb);
2636 buffer_info->skb = NULL;
2637 }
2638 if (buffer_info->page_dma) {
2639 pci_unmap_page(rx_ring->pdev,
2640 buffer_info->page_dma,
2641 PAGE_SIZE / 2,
2642 PCI_DMA_FROMDEVICE);
2643 buffer_info->page_dma = 0;
2644 }
2645 if (buffer_info->page) {
2646 put_page(buffer_info->page);
2647 buffer_info->page = NULL;
2648 buffer_info->page_offset = 0;
2649 }
2650 }
2651
2652 size = sizeof(struct igb_buffer) * rx_ring->count;
2653 memset(rx_ring->buffer_info, 0, size);
2654
2655 /* Zero out the descriptor ring */
2656 memset(rx_ring->desc, 0, rx_ring->size);
2657
2658 rx_ring->next_to_clean = 0;
2659 rx_ring->next_to_use = 0;
2660}
2661
2662/**
2663 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2664 * @adapter: board private structure
2665 **/
2666static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2667{
2668 int i;
2669
2670 for (i = 0; i < adapter->num_rx_queues; i++)
2671 igb_clean_rx_ring(&adapter->rx_ring[i]);
2672}
2673
2674/**
2675 * igb_set_mac - Change the Ethernet Address of the NIC
2676 * @netdev: network interface device structure
2677 * @p: pointer to an address structure
2678 *
2679 * Returns 0 on success, negative on failure
2680 **/
2681static int igb_set_mac(struct net_device *netdev, void *p)
2682{
2683 struct igb_adapter *adapter = netdev_priv(netdev);
2684 struct e1000_hw *hw = &adapter->hw;
2685 struct sockaddr *addr = p;
2686
2687 if (!is_valid_ether_addr(addr->sa_data))
2688 return -EADDRNOTAVAIL;
2689
2690 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2691 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2692
2693 /* set the correct pool for the new PF MAC address in entry 0 */
2694 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2695 adapter->vfs_allocated_count);
2696
2697 return 0;
2698}
2699
2700/**
2701 * igb_write_mc_addr_list - write multicast addresses to MTA
2702 * @netdev: network interface device structure
2703 *
2704 * Writes multicast address list to the MTA hash table.
2705 * Returns: -ENOMEM on failure
2706 * 0 on no addresses written
2707 * X on writing X addresses to MTA
2708 **/
2709static int igb_write_mc_addr_list(struct net_device *netdev)
2710{
2711 struct igb_adapter *adapter = netdev_priv(netdev);
2712 struct e1000_hw *hw = &adapter->hw;
2713 struct dev_mc_list *mc_ptr = netdev->mc_list;
2714 u8 *mta_list;
2715 u32 vmolr = 0;
2716 int i;
2717
2718 if (!netdev->mc_count) {
2719 /* nothing to program, so clear mc list */
2720 igb_update_mc_addr_list(hw, NULL, 0);
2721 igb_restore_vf_multicasts(adapter);
2722 return 0;
2723 }
2724
2725 mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
2726 if (!mta_list)
2727 return -ENOMEM;
2728
2729 /* set vmolr receive overflow multicast bit */
2730 vmolr |= E1000_VMOLR_ROMPE;
2731
2732 /* The shared function expects a packed array of only addresses. */
2733 mc_ptr = netdev->mc_list;
2734
2735 for (i = 0; i < netdev->mc_count; i++) {
2736 if (!mc_ptr)
2737 break;
2738 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2739 mc_ptr = mc_ptr->next;
2740 }
2741 igb_update_mc_addr_list(hw, mta_list, i);
2742 kfree(mta_list);
2743
2744 return netdev->mc_count;
2745}
2746
2747/**
2748 * igb_write_uc_addr_list - write unicast addresses to RAR table
2749 * @netdev: network interface device structure
2750 *
2751 * Writes unicast address list to the RAR table.
2752 * Returns: -ENOMEM on failure/insufficient address space
2753 * 0 on no addresses written
2754 * X on writing X addresses to the RAR table
2755 **/
2756static int igb_write_uc_addr_list(struct net_device *netdev)
2757{
2758 struct igb_adapter *adapter = netdev_priv(netdev);
2759 struct e1000_hw *hw = &adapter->hw;
2760 unsigned int vfn = adapter->vfs_allocated_count;
2761 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2762 int count = 0;
2763
2764 /* return ENOMEM indicating insufficient memory for addresses */
2765 if (netdev->uc.count > rar_entries)
2766 return -ENOMEM;
2767
2768 if (netdev->uc.count && rar_entries) {
2769 struct netdev_hw_addr *ha;
2770 list_for_each_entry(ha, &netdev->uc.list, list) {
2771 if (!rar_entries)
2772 break;
2773 igb_rar_set_qsel(adapter, ha->addr,
2774 rar_entries--,
2775 vfn);
2776 count++;
2777 }
2778 }
2779 /* write the addresses in reverse order to avoid write combining */
2780 for (; rar_entries > 0 ; rar_entries--) {
2781 wr32(E1000_RAH(rar_entries), 0);
2782 wr32(E1000_RAL(rar_entries), 0);
2783 }
2784 wrfl();
2785
2786 return count;
2787}
2788
2789/**
2790 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2791 * @netdev: network interface device structure
2792 *
2793 * The set_rx_mode entry point is called whenever the unicast or multicast
2794 * address lists or the network interface flags are updated. This routine is
2795 * responsible for configuring the hardware for proper unicast, multicast,
2796 * promiscuous mode, and all-multi behavior.
2797 **/
2798static void igb_set_rx_mode(struct net_device *netdev)
2799{
2800 struct igb_adapter *adapter = netdev_priv(netdev);
2801 struct e1000_hw *hw = &adapter->hw;
2802 unsigned int vfn = adapter->vfs_allocated_count;
2803 u32 rctl, vmolr = 0;
2804 int count;
2805
2806 /* Check for Promiscuous and All Multicast modes */
2807 rctl = rd32(E1000_RCTL);
2808
2809 /* clear the effected bits */
2810 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2811
2812 if (netdev->flags & IFF_PROMISC) {
2813 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2814 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2815 } else {
2816 if (netdev->flags & IFF_ALLMULTI) {
2817 rctl |= E1000_RCTL_MPE;
2818 vmolr |= E1000_VMOLR_MPME;
2819 } else {
2820 /*
2821 * Write addresses to the MTA, if the attempt fails
2822 * then we should just turn on promiscous mode so
2823 * that we can at least receive multicast traffic
2824 */
2825 count = igb_write_mc_addr_list(netdev);
2826 if (count < 0) {
2827 rctl |= E1000_RCTL_MPE;
2828 vmolr |= E1000_VMOLR_MPME;
2829 } else if (count) {
2830 vmolr |= E1000_VMOLR_ROMPE;
2831 }
2832 }
2833 /*
2834 * Write addresses to available RAR registers, if there is not
2835 * sufficient space to store all the addresses then enable
2836 * unicast promiscous mode
2837 */
2838 count = igb_write_uc_addr_list(netdev);
2839 if (count < 0) {
2840 rctl |= E1000_RCTL_UPE;
2841 vmolr |= E1000_VMOLR_ROPE;
2842 }
2843 rctl |= E1000_RCTL_VFE;
2844 }
2845 wr32(E1000_RCTL, rctl);
2846
2847 /*
2848 * In order to support SR-IOV and eventually VMDq it is necessary to set
2849 * the VMOLR to enable the appropriate modes. Without this workaround
2850 * we will have issues with VLAN tag stripping not being done for frames
2851 * that are only arriving because we are the default pool
2852 */
2853 if (hw->mac.type < e1000_82576)
2854 return;
2855
2856 vmolr |= rd32(E1000_VMOLR(vfn)) &
2857 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
2858 wr32(E1000_VMOLR(vfn), vmolr);
2859 igb_restore_vf_multicasts(adapter);
2860}
2861
2862/* Need to wait a few seconds after link up to get diagnostic information from
2863 * the phy */
2864static void igb_update_phy_info(unsigned long data)
2865{
2866 struct igb_adapter *adapter = (struct igb_adapter *) data;
2867 igb_get_phy_info(&adapter->hw);
2868}
2869
2870/**
2871 * igb_has_link - check shared code for link and determine up/down
2872 * @adapter: pointer to driver private info
2873 **/
2874static bool igb_has_link(struct igb_adapter *adapter)
2875{
2876 struct e1000_hw *hw = &adapter->hw;
2877 bool link_active = false;
2878 s32 ret_val = 0;
2879
2880 /* get_link_status is set on LSC (link status) interrupt or
2881 * rx sequence error interrupt. get_link_status will stay
2882 * false until the e1000_check_for_link establishes link
2883 * for copper adapters ONLY
2884 */
2885 switch (hw->phy.media_type) {
2886 case e1000_media_type_copper:
2887 if (hw->mac.get_link_status) {
2888 ret_val = hw->mac.ops.check_for_link(hw);
2889 link_active = !hw->mac.get_link_status;
2890 } else {
2891 link_active = true;
2892 }
2893 break;
2894 case e1000_media_type_internal_serdes:
2895 ret_val = hw->mac.ops.check_for_link(hw);
2896 link_active = hw->mac.serdes_has_link;
2897 break;
2898 default:
2899 case e1000_media_type_unknown:
2900 break;
2901 }
2902
2903 return link_active;
2904}
2905
2906/**
2907 * igb_watchdog - Timer Call-back
2908 * @data: pointer to adapter cast into an unsigned long
2909 **/
2910static void igb_watchdog(unsigned long data)
2911{
2912 struct igb_adapter *adapter = (struct igb_adapter *)data;
2913 /* Do the rest outside of interrupt context */
2914 schedule_work(&adapter->watchdog_task);
2915}
2916
2917static void igb_watchdog_task(struct work_struct *work)
2918{
2919 struct igb_adapter *adapter = container_of(work,
2920 struct igb_adapter, watchdog_task);
2921 struct e1000_hw *hw = &adapter->hw;
2922 struct net_device *netdev = adapter->netdev;
2923 struct igb_ring *tx_ring = adapter->tx_ring;
2924 u32 link;
2925 int i;
2926
2927 link = igb_has_link(adapter);
2928 if (link) {
2929 if (!netif_carrier_ok(netdev)) {
2930 u32 ctrl;
2931 hw->mac.ops.get_speed_and_duplex(&adapter->hw,
2932 &adapter->link_speed,
2933 &adapter->link_duplex);
2934
2935 ctrl = rd32(E1000_CTRL);
2936 /* Links status message must follow this format */
2937 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
2938 "Flow Control: %s\n",
2939 netdev->name,
2940 adapter->link_speed,
2941 adapter->link_duplex == FULL_DUPLEX ?
2942 "Full Duplex" : "Half Duplex",
2943 ((ctrl & E1000_CTRL_TFCE) && (ctrl &
2944 E1000_CTRL_RFCE)) ? "RX/TX" : ((ctrl &
2945 E1000_CTRL_RFCE) ? "RX" : ((ctrl &
2946 E1000_CTRL_TFCE) ? "TX" : "None")));
2947
2948 /* tweak tx_queue_len according to speed/duplex and
2949 * adjust the timeout factor */
2950 netdev->tx_queue_len = adapter->tx_queue_len;
2951 adapter->tx_timeout_factor = 1;
2952 switch (adapter->link_speed) {
2953 case SPEED_10:
2954 netdev->tx_queue_len = 10;
2955 adapter->tx_timeout_factor = 14;
2956 break;
2957 case SPEED_100:
2958 netdev->tx_queue_len = 100;
2959 /* maybe add some timeout factor ? */
2960 break;
2961 }
2962
2963 netif_carrier_on(netdev);
2964
2965 igb_ping_all_vfs(adapter);
2966
2967 /* link state has changed, schedule phy info update */
2968 if (!test_bit(__IGB_DOWN, &adapter->state))
2969 mod_timer(&adapter->phy_info_timer,
2970 round_jiffies(jiffies + 2 * HZ));
2971 }
2972 } else {
2973 if (netif_carrier_ok(netdev)) {
2974 adapter->link_speed = 0;
2975 adapter->link_duplex = 0;
2976 /* Links status message must follow this format */
2977 printk(KERN_INFO "igb: %s NIC Link is Down\n",
2978 netdev->name);
2979 netif_carrier_off(netdev);
2980
2981 igb_ping_all_vfs(adapter);
2982
2983 /* link state has changed, schedule phy info update */
2984 if (!test_bit(__IGB_DOWN, &adapter->state))
2985 mod_timer(&adapter->phy_info_timer,
2986 round_jiffies(jiffies + 2 * HZ));
2987 }
2988 }
2989
2990 igb_update_stats(adapter);
2991 igb_update_adaptive(hw);
2992
2993 if (!netif_carrier_ok(netdev)) {
2994 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
2995 /* We've lost link, so the controller stops DMA,
2996 * but we've got queued Tx work that's never going
2997 * to get done, so reset controller to flush Tx.
2998 * (Do the reset outside of interrupt context). */
2999 adapter->tx_timeout_count++;
3000 schedule_work(&adapter->reset_task);
3001 /* return immediately since reset is imminent */
3002 return;
3003 }
3004 }
3005
3006 /* Force detection of hung controller every watchdog period */
3007 for (i = 0; i < adapter->num_tx_queues; i++)
3008 adapter->tx_ring[i].detect_tx_hung = true;
3009
3010 /* Cause software interrupt to ensure rx ring is cleaned */
3011 if (adapter->msix_entries) {
3012 u32 eics = 0;
3013 for (i = 0; i < adapter->num_q_vectors; i++) {
3014 struct igb_q_vector *q_vector = adapter->q_vector[i];
3015 eics |= q_vector->eims_value;
3016 }
3017 wr32(E1000_EICS, eics);
3018 } else {
3019 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3020 }
3021
3022 /* Reset the timer */
3023 if (!test_bit(__IGB_DOWN, &adapter->state))
3024 mod_timer(&adapter->watchdog_timer,
3025 round_jiffies(jiffies + 2 * HZ));
3026}
3027
3028enum latency_range {
3029 lowest_latency = 0,
3030 low_latency = 1,
3031 bulk_latency = 2,
3032 latency_invalid = 255
3033};
3034
3035/**
3036 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3037 *
3038 * Stores a new ITR value based on strictly on packet size. This
3039 * algorithm is less sophisticated than that used in igb_update_itr,
3040 * due to the difficulty of synchronizing statistics across multiple
3041 * receive rings. The divisors and thresholds used by this fuction
3042 * were determined based on theoretical maximum wire speed and testing
3043 * data, in order to minimize response time while increasing bulk
3044 * throughput.
3045 * This functionality is controlled by the InterruptThrottleRate module
3046 * parameter (see igb_param.c)
3047 * NOTE: This function is called only when operating in a multiqueue
3048 * receive environment.
3049 * @q_vector: pointer to q_vector
3050 **/
3051static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3052{
3053 int new_val = q_vector->itr_val;
3054 int avg_wire_size = 0;
3055 struct igb_adapter *adapter = q_vector->adapter;
3056
3057 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3058 * ints/sec - ITR timer value of 120 ticks.
3059 */
3060 if (adapter->link_speed != SPEED_1000) {
3061 new_val = 976;
3062 goto set_itr_val;
3063 }
3064
3065 if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3066 struct igb_ring *ring = q_vector->rx_ring;
3067 avg_wire_size = ring->total_bytes / ring->total_packets;
3068 }
3069
3070 if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3071 struct igb_ring *ring = q_vector->tx_ring;
3072 avg_wire_size = max_t(u32, avg_wire_size,
3073 (ring->total_bytes /
3074 ring->total_packets));
3075 }
3076
3077 /* if avg_wire_size isn't set no work was done */
3078 if (!avg_wire_size)
3079 goto clear_counts;
3080
3081 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3082 avg_wire_size += 24;
3083
3084 /* Don't starve jumbo frames */
3085 avg_wire_size = min(avg_wire_size, 3000);
3086
3087 /* Give a little boost to mid-size frames */
3088 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3089 new_val = avg_wire_size / 3;
3090 else
3091 new_val = avg_wire_size / 2;
3092
3093set_itr_val:
3094 if (new_val != q_vector->itr_val) {
3095 q_vector->itr_val = new_val;
3096 q_vector->set_itr = 1;
3097 }
3098clear_counts:
3099 if (q_vector->rx_ring) {
3100 q_vector->rx_ring->total_bytes = 0;
3101 q_vector->rx_ring->total_packets = 0;
3102 }
3103 if (q_vector->tx_ring) {
3104 q_vector->tx_ring->total_bytes = 0;
3105 q_vector->tx_ring->total_packets = 0;
3106 }
3107}
3108
3109/**
3110 * igb_update_itr - update the dynamic ITR value based on statistics
3111 * Stores a new ITR value based on packets and byte
3112 * counts during the last interrupt. The advantage of per interrupt
3113 * computation is faster updates and more accurate ITR for the current
3114 * traffic pattern. Constants in this function were computed
3115 * based on theoretical maximum wire speed and thresholds were set based
3116 * on testing data as well as attempting to minimize response time
3117 * while increasing bulk throughput.
3118 * this functionality is controlled by the InterruptThrottleRate module
3119 * parameter (see igb_param.c)
3120 * NOTE: These calculations are only valid when operating in a single-
3121 * queue environment.
3122 * @adapter: pointer to adapter
3123 * @itr_setting: current q_vector->itr_val
3124 * @packets: the number of packets during this measurement interval
3125 * @bytes: the number of bytes during this measurement interval
3126 **/
3127static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3128 int packets, int bytes)
3129{
3130 unsigned int retval = itr_setting;
3131
3132 if (packets == 0)
3133 goto update_itr_done;
3134
3135 switch (itr_setting) {
3136 case lowest_latency:
3137 /* handle TSO and jumbo frames */
3138 if (bytes/packets > 8000)
3139 retval = bulk_latency;
3140 else if ((packets < 5) && (bytes > 512))
3141 retval = low_latency;
3142 break;
3143 case low_latency: /* 50 usec aka 20000 ints/s */
3144 if (bytes > 10000) {
3145 /* this if handles the TSO accounting */
3146 if (bytes/packets > 8000) {
3147 retval = bulk_latency;
3148 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3149 retval = bulk_latency;
3150 } else if ((packets > 35)) {
3151 retval = lowest_latency;
3152 }
3153 } else if (bytes/packets > 2000) {
3154 retval = bulk_latency;
3155 } else if (packets <= 2 && bytes < 512) {
3156 retval = lowest_latency;
3157 }
3158 break;
3159 case bulk_latency: /* 250 usec aka 4000 ints/s */
3160 if (bytes > 25000) {
3161 if (packets > 35)
3162 retval = low_latency;
3163 } else if (bytes < 1500) {
3164 retval = low_latency;
3165 }
3166 break;
3167 }
3168
3169update_itr_done:
3170 return retval;
3171}
3172
3173static void igb_set_itr(struct igb_adapter *adapter)
3174{
3175 struct igb_q_vector *q_vector = adapter->q_vector[0];
3176 u16 current_itr;
3177 u32 new_itr = q_vector->itr_val;
3178
3179 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3180 if (adapter->link_speed != SPEED_1000) {
3181 current_itr = 0;
3182 new_itr = 4000;
3183 goto set_itr_now;
3184 }
3185
3186 adapter->rx_itr = igb_update_itr(adapter,
3187 adapter->rx_itr,
3188 adapter->rx_ring->total_packets,
3189 adapter->rx_ring->total_bytes);
3190
3191 adapter->tx_itr = igb_update_itr(adapter,
3192 adapter->tx_itr,
3193 adapter->tx_ring->total_packets,
3194 adapter->tx_ring->total_bytes);
3195 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3196
3197 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3198 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3199 current_itr = low_latency;
3200
3201 switch (current_itr) {
3202 /* counts and packets in update_itr are dependent on these numbers */
3203 case lowest_latency:
3204 new_itr = 56; /* aka 70,000 ints/sec */
3205 break;
3206 case low_latency:
3207 new_itr = 196; /* aka 20,000 ints/sec */
3208 break;
3209 case bulk_latency:
3210 new_itr = 980; /* aka 4,000 ints/sec */
3211 break;
3212 default:
3213 break;
3214 }
3215
3216set_itr_now:
3217 adapter->rx_ring->total_bytes = 0;
3218 adapter->rx_ring->total_packets = 0;
3219 adapter->tx_ring->total_bytes = 0;
3220 adapter->tx_ring->total_packets = 0;
3221
3222 if (new_itr != q_vector->itr_val) {
3223 /* this attempts to bias the interrupt rate towards Bulk
3224 * by adding intermediate steps when interrupt rate is
3225 * increasing */
3226 new_itr = new_itr > q_vector->itr_val ?
3227 max((new_itr * q_vector->itr_val) /
3228 (new_itr + (q_vector->itr_val >> 2)),
3229 new_itr) :
3230 new_itr;
3231 /* Don't write the value here; it resets the adapter's
3232 * internal timer, and causes us to delay far longer than
3233 * we should between interrupts. Instead, we write the ITR
3234 * value at the beginning of the next interrupt so the timing
3235 * ends up being correct.
3236 */
3237 q_vector->itr_val = new_itr;
3238 q_vector->set_itr = 1;
3239 }
3240
3241 return;
3242}
3243
3244#define IGB_TX_FLAGS_CSUM 0x00000001
3245#define IGB_TX_FLAGS_VLAN 0x00000002
3246#define IGB_TX_FLAGS_TSO 0x00000004
3247#define IGB_TX_FLAGS_IPV4 0x00000008
3248#define IGB_TX_FLAGS_TSTAMP 0x00000010
3249#define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3250#define IGB_TX_FLAGS_VLAN_SHIFT 16
3251
3252static inline int igb_tso_adv(struct igb_ring *tx_ring,
3253 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3254{
3255 struct e1000_adv_tx_context_desc *context_desc;
3256 unsigned int i;
3257 int err;
3258 struct igb_buffer *buffer_info;
3259 u32 info = 0, tu_cmd = 0;
3260 u32 mss_l4len_idx, l4len;
3261 *hdr_len = 0;
3262
3263 if (skb_header_cloned(skb)) {
3264 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3265 if (err)
3266 return err;
3267 }
3268
3269 l4len = tcp_hdrlen(skb);
3270 *hdr_len += l4len;
3271
3272 if (skb->protocol == htons(ETH_P_IP)) {
3273 struct iphdr *iph = ip_hdr(skb);
3274 iph->tot_len = 0;
3275 iph->check = 0;
3276 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3277 iph->daddr, 0,
3278 IPPROTO_TCP,
3279 0);
3280 } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
3281 ipv6_hdr(skb)->payload_len = 0;
3282 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3283 &ipv6_hdr(skb)->daddr,
3284 0, IPPROTO_TCP, 0);
3285 }
3286
3287 i = tx_ring->next_to_use;
3288
3289 buffer_info = &tx_ring->buffer_info[i];
3290 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3291 /* VLAN MACLEN IPLEN */
3292 if (tx_flags & IGB_TX_FLAGS_VLAN)
3293 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3294 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3295 *hdr_len += skb_network_offset(skb);
3296 info |= skb_network_header_len(skb);
3297 *hdr_len += skb_network_header_len(skb);
3298 context_desc->vlan_macip_lens = cpu_to_le32(info);
3299
3300 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3301 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3302
3303 if (skb->protocol == htons(ETH_P_IP))
3304 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3305 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3306
3307 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3308
3309 /* MSS L4LEN IDX */
3310 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3311 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3312
3313 /* For 82575, context index must be unique per ring. */
3314 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3315 mss_l4len_idx |= tx_ring->reg_idx << 4;
3316
3317 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3318 context_desc->seqnum_seed = 0;
3319
3320 buffer_info->time_stamp = jiffies;
3321 buffer_info->next_to_watch = i;
3322 buffer_info->dma = 0;
3323 i++;
3324 if (i == tx_ring->count)
3325 i = 0;
3326
3327 tx_ring->next_to_use = i;
3328
3329 return true;
3330}
3331
3332static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3333 struct sk_buff *skb, u32 tx_flags)
3334{
3335 struct e1000_adv_tx_context_desc *context_desc;
3336 struct pci_dev *pdev = tx_ring->pdev;
3337 struct igb_buffer *buffer_info;
3338 u32 info = 0, tu_cmd = 0;
3339 unsigned int i;
3340
3341 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3342 (tx_flags & IGB_TX_FLAGS_VLAN)) {
3343 i = tx_ring->next_to_use;
3344 buffer_info = &tx_ring->buffer_info[i];
3345 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3346
3347 if (tx_flags & IGB_TX_FLAGS_VLAN)
3348 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3349
3350 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3351 if (skb->ip_summed == CHECKSUM_PARTIAL)
3352 info |= skb_network_header_len(skb);
3353
3354 context_desc->vlan_macip_lens = cpu_to_le32(info);
3355
3356 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3357
3358 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3359 __be16 protocol;
3360
3361 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3362 const struct vlan_ethhdr *vhdr =
3363 (const struct vlan_ethhdr*)skb->data;
3364
3365 protocol = vhdr->h_vlan_encapsulated_proto;
3366 } else {
3367 protocol = skb->protocol;
3368 }
3369
3370 switch (protocol) {
3371 case cpu_to_be16(ETH_P_IP):
3372 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3373 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3374 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3375 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3376 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3377 break;
3378 case cpu_to_be16(ETH_P_IPV6):
3379 /* XXX what about other V6 headers?? */
3380 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3381 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3382 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3383 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3384 break;
3385 default:
3386 if (unlikely(net_ratelimit()))
3387 dev_warn(&pdev->dev,
3388 "partial checksum but proto=%x!\n",
3389 skb->protocol);
3390 break;
3391 }
3392 }
3393
3394 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3395 context_desc->seqnum_seed = 0;
3396 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3397 context_desc->mss_l4len_idx =
3398 cpu_to_le32(tx_ring->reg_idx << 4);
3399
3400 buffer_info->time_stamp = jiffies;
3401 buffer_info->next_to_watch = i;
3402 buffer_info->dma = 0;
3403
3404 i++;
3405 if (i == tx_ring->count)
3406 i = 0;
3407 tx_ring->next_to_use = i;
3408
3409 return true;
3410 }
3411 return false;
3412}
3413
3414#define IGB_MAX_TXD_PWR 16
3415#define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
3416
3417static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3418 unsigned int first)
3419{
3420 struct igb_buffer *buffer_info;
3421 struct pci_dev *pdev = tx_ring->pdev;
3422 unsigned int len = skb_headlen(skb);
3423 unsigned int count = 0, i;
3424 unsigned int f;
3425 dma_addr_t *map;
3426
3427 i = tx_ring->next_to_use;
3428
3429 if (skb_dma_map(&pdev->dev, skb, DMA_TO_DEVICE)) {
3430 dev_err(&pdev->dev, "TX DMA map failed\n");
3431 return 0;
3432 }
3433
3434 map = skb_shinfo(skb)->dma_maps;
3435
3436 buffer_info = &tx_ring->buffer_info[i];
3437 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3438 buffer_info->length = len;
3439 /* set time_stamp *before* dma to help avoid a possible race */
3440 buffer_info->time_stamp = jiffies;
3441 buffer_info->next_to_watch = i;
3442 buffer_info->dma = skb_shinfo(skb)->dma_head;
3443
3444 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3445 struct skb_frag_struct *frag;
3446
3447 i++;
3448 if (i == tx_ring->count)
3449 i = 0;
3450
3451 frag = &skb_shinfo(skb)->frags[f];
3452 len = frag->size;
3453
3454 buffer_info = &tx_ring->buffer_info[i];
3455 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3456 buffer_info->length = len;
3457 buffer_info->time_stamp = jiffies;
3458 buffer_info->next_to_watch = i;
3459 buffer_info->dma = map[count];
3460 count++;
3461 }
3462
3463 tx_ring->buffer_info[i].skb = skb;
3464 tx_ring->buffer_info[first].next_to_watch = i;
3465
3466 return ++count;
3467}
3468
3469static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3470 int tx_flags, int count, u32 paylen,
3471 u8 hdr_len)
3472{
3473 union e1000_adv_tx_desc *tx_desc;
3474 struct igb_buffer *buffer_info;
3475 u32 olinfo_status = 0, cmd_type_len;
3476 unsigned int i = tx_ring->next_to_use;
3477
3478 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3479 E1000_ADVTXD_DCMD_DEXT);
3480
3481 if (tx_flags & IGB_TX_FLAGS_VLAN)
3482 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3483
3484 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3485 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3486
3487 if (tx_flags & IGB_TX_FLAGS_TSO) {
3488 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3489
3490 /* insert tcp checksum */
3491 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3492
3493 /* insert ip checksum */
3494 if (tx_flags & IGB_TX_FLAGS_IPV4)
3495 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3496
3497 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3498 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3499 }
3500
3501 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3502 (tx_flags & (IGB_TX_FLAGS_CSUM |
3503 IGB_TX_FLAGS_TSO |
3504 IGB_TX_FLAGS_VLAN)))
3505 olinfo_status |= tx_ring->reg_idx << 4;
3506
3507 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3508
3509 do {
3510 buffer_info = &tx_ring->buffer_info[i];
3511 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3512 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3513 tx_desc->read.cmd_type_len =
3514 cpu_to_le32(cmd_type_len | buffer_info->length);
3515 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3516 count--;
3517 i++;
3518 if (i == tx_ring->count)
3519 i = 0;
3520 } while (count > 0);
3521
3522 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3523 /* Force memory writes to complete before letting h/w
3524 * know there are new descriptors to fetch. (Only
3525 * applicable for weak-ordered memory model archs,
3526 * such as IA-64). */
3527 wmb();
3528
3529 tx_ring->next_to_use = i;
3530 writel(i, tx_ring->tail);
3531 /* we need this if more than one processor can write to our tail
3532 * at a time, it syncronizes IO on IA64/Altix systems */
3533 mmiowb();
3534}
3535
3536static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3537{
3538 struct net_device *netdev = tx_ring->netdev;
3539
3540 netif_stop_subqueue(netdev, tx_ring->queue_index);
3541
3542 /* Herbert's original patch had:
3543 * smp_mb__after_netif_stop_queue();
3544 * but since that doesn't exist yet, just open code it. */
3545 smp_mb();
3546
3547 /* We need to check again in a case another CPU has just
3548 * made room available. */
3549 if (igb_desc_unused(tx_ring) < size)
3550 return -EBUSY;
3551
3552 /* A reprieve! */
3553 netif_wake_subqueue(netdev, tx_ring->queue_index);
3554 tx_ring->tx_stats.restart_queue++;
3555 return 0;
3556}
3557
3558static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3559{
3560 if (igb_desc_unused(tx_ring) >= size)
3561 return 0;
3562 return __igb_maybe_stop_tx(tx_ring, size);
3563}
3564
3565netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3566 struct igb_ring *tx_ring)
3567{
3568 struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3569 unsigned int first;
3570 unsigned int tx_flags = 0;
3571 u8 hdr_len = 0;
3572 int tso = 0, count;
3573 union skb_shared_tx *shtx = skb_tx(skb);
3574
3575 /* need: 1 descriptor per page,
3576 * + 2 desc gap to keep tail from touching head,
3577 * + 1 desc for skb->data,
3578 * + 1 desc for context descriptor,
3579 * otherwise try next time */
3580 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3581 /* this is a hard error */
3582 return NETDEV_TX_BUSY;
3583 }
3584
3585 if (unlikely(shtx->hardware)) {
3586 shtx->in_progress = 1;
3587 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3588 }
3589
3590 if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3591 tx_flags |= IGB_TX_FLAGS_VLAN;
3592 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3593 }
3594
3595 if (skb->protocol == htons(ETH_P_IP))
3596 tx_flags |= IGB_TX_FLAGS_IPV4;
3597
3598 first = tx_ring->next_to_use;
3599 if (skb_is_gso(skb)) {
3600 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3601
3602 if (tso < 0) {
3603 dev_kfree_skb_any(skb);
3604 return NETDEV_TX_OK;
3605 }
3606 }
3607
3608 if (tso)
3609 tx_flags |= IGB_TX_FLAGS_TSO;
3610 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3611 (skb->ip_summed == CHECKSUM_PARTIAL))
3612 tx_flags |= IGB_TX_FLAGS_CSUM;
3613
3614 /*
3615 * count reflects descriptors mapped, if 0 or less then mapping error
3616 * has occured and we need to rewind the descriptor queue
3617 */
3618 count = igb_tx_map_adv(tx_ring, skb, first);
3619 if (count <= 0) {
3620 dev_kfree_skb_any(skb);
3621 tx_ring->buffer_info[first].time_stamp = 0;
3622 tx_ring->next_to_use = first;
3623 return NETDEV_TX_OK;
3624 }
3625
3626 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3627
3628 /* Make sure there is space in the ring for the next send. */
3629 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3630
3631 return NETDEV_TX_OK;
3632}
3633
3634static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3635 struct net_device *netdev)
3636{
3637 struct igb_adapter *adapter = netdev_priv(netdev);
3638 struct igb_ring *tx_ring;
3639 int r_idx = 0;
3640
3641 if (test_bit(__IGB_DOWN, &adapter->state)) {
3642 dev_kfree_skb_any(skb);
3643 return NETDEV_TX_OK;
3644 }
3645
3646 if (skb->len <= 0) {
3647 dev_kfree_skb_any(skb);
3648 return NETDEV_TX_OK;
3649 }
3650
3651 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3652 tx_ring = adapter->multi_tx_table[r_idx];
3653
3654 /* This goes back to the question of how to logically map a tx queue
3655 * to a flow. Right now, performance is impacted slightly negatively
3656 * if using multiple tx queues. If the stack breaks away from a
3657 * single qdisc implementation, we can look at this again. */
3658 return igb_xmit_frame_ring_adv(skb, tx_ring);
3659}
3660
3661/**
3662 * igb_tx_timeout - Respond to a Tx Hang
3663 * @netdev: network interface device structure
3664 **/
3665static void igb_tx_timeout(struct net_device *netdev)
3666{
3667 struct igb_adapter *adapter = netdev_priv(netdev);
3668 struct e1000_hw *hw = &adapter->hw;
3669
3670 /* Do the reset outside of interrupt context */
3671 adapter->tx_timeout_count++;
3672
3673 schedule_work(&adapter->reset_task);
3674 wr32(E1000_EICS,
3675 (adapter->eims_enable_mask & ~adapter->eims_other));
3676}
3677
3678static void igb_reset_task(struct work_struct *work)
3679{
3680 struct igb_adapter *adapter;
3681 adapter = container_of(work, struct igb_adapter, reset_task);
3682
3683 igb_reinit_locked(adapter);
3684}
3685
3686/**
3687 * igb_get_stats - Get System Network Statistics
3688 * @netdev: network interface device structure
3689 *
3690 * Returns the address of the device statistics structure.
3691 * The statistics are actually updated from the timer callback.
3692 **/
3693static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3694{
3695 /* only return the current stats */
3696 return &netdev->stats;
3697}
3698
3699/**
3700 * igb_change_mtu - Change the Maximum Transfer Unit
3701 * @netdev: network interface device structure
3702 * @new_mtu: new value for maximum frame size
3703 *
3704 * Returns 0 on success, negative on failure
3705 **/
3706static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3707{
3708 struct igb_adapter *adapter = netdev_priv(netdev);
3709 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3710 u32 rx_buffer_len, i;
3711
3712 if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) ||
3713 (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3714 dev_err(&adapter->pdev->dev, "Invalid MTU setting\n");
3715 return -EINVAL;
3716 }
3717
3718 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3719 dev_err(&adapter->pdev->dev, "MTU > 9216 not supported.\n");
3720 return -EINVAL;
3721 }
3722
3723 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3724 msleep(1);
3725
3726 /* igb_down has a dependency on max_frame_size */
3727 adapter->max_frame_size = max_frame;
3728 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3729 * means we reserve 2 more, this pushes us to allocate from the next
3730 * larger slab size.
3731 * i.e. RXBUFFER_2048 --> size-4096 slab
3732 */
3733
3734 if (max_frame <= IGB_RXBUFFER_1024)
3735 rx_buffer_len = IGB_RXBUFFER_1024;
3736 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3737 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3738 else
3739 rx_buffer_len = IGB_RXBUFFER_128;
3740
3741 if (netif_running(netdev))
3742 igb_down(adapter);
3743
3744 dev_info(&adapter->pdev->dev, "changing MTU from %d to %d\n",
3745 netdev->mtu, new_mtu);
3746 netdev->mtu = new_mtu;
3747
3748 for (i = 0; i < adapter->num_rx_queues; i++)
3749 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3750
3751 if (netif_running(netdev))
3752 igb_up(adapter);
3753 else
3754 igb_reset(adapter);
3755
3756 clear_bit(__IGB_RESETTING, &adapter->state);
3757
3758 return 0;
3759}
3760
3761/**
3762 * igb_update_stats - Update the board statistics counters
3763 * @adapter: board private structure
3764 **/
3765
3766void igb_update_stats(struct igb_adapter *adapter)
3767{
3768 struct net_device *netdev = adapter->netdev;
3769 struct e1000_hw *hw = &adapter->hw;
3770 struct pci_dev *pdev = adapter->pdev;
3771 u32 rnbc;
3772 u16 phy_tmp;
3773 int i;
3774 u64 bytes, packets;
3775
3776#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3777
3778 /*
3779 * Prevent stats update while adapter is being reset, or if the pci
3780 * connection is down.
3781 */
3782 if (adapter->link_speed == 0)
3783 return;
3784 if (pci_channel_offline(pdev))
3785 return;
3786
3787 bytes = 0;
3788 packets = 0;
3789 for (i = 0; i < adapter->num_rx_queues; i++) {
3790 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3791 adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3792 netdev->stats.rx_fifo_errors += rqdpc_tmp;
3793 bytes += adapter->rx_ring[i].rx_stats.bytes;
3794 packets += adapter->rx_ring[i].rx_stats.packets;
3795 }
3796
3797 netdev->stats.rx_bytes = bytes;
3798 netdev->stats.rx_packets = packets;
3799
3800 bytes = 0;
3801 packets = 0;
3802 for (i = 0; i < adapter->num_tx_queues; i++) {
3803 bytes += adapter->tx_ring[i].tx_stats.bytes;
3804 packets += adapter->tx_ring[i].tx_stats.packets;
3805 }
3806 netdev->stats.tx_bytes = bytes;
3807 netdev->stats.tx_packets = packets;
3808
3809 /* read stats registers */
3810 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3811 adapter->stats.gprc += rd32(E1000_GPRC);
3812 adapter->stats.gorc += rd32(E1000_GORCL);
3813 rd32(E1000_GORCH); /* clear GORCL */
3814 adapter->stats.bprc += rd32(E1000_BPRC);
3815 adapter->stats.mprc += rd32(E1000_MPRC);
3816 adapter->stats.roc += rd32(E1000_ROC);
3817
3818 adapter->stats.prc64 += rd32(E1000_PRC64);
3819 adapter->stats.prc127 += rd32(E1000_PRC127);
3820 adapter->stats.prc255 += rd32(E1000_PRC255);
3821 adapter->stats.prc511 += rd32(E1000_PRC511);
3822 adapter->stats.prc1023 += rd32(E1000_PRC1023);
3823 adapter->stats.prc1522 += rd32(E1000_PRC1522);
3824 adapter->stats.symerrs += rd32(E1000_SYMERRS);
3825 adapter->stats.sec += rd32(E1000_SEC);
3826
3827 adapter->stats.mpc += rd32(E1000_MPC);
3828 adapter->stats.scc += rd32(E1000_SCC);
3829 adapter->stats.ecol += rd32(E1000_ECOL);
3830 adapter->stats.mcc += rd32(E1000_MCC);
3831 adapter->stats.latecol += rd32(E1000_LATECOL);
3832 adapter->stats.dc += rd32(E1000_DC);
3833 adapter->stats.rlec += rd32(E1000_RLEC);
3834 adapter->stats.xonrxc += rd32(E1000_XONRXC);
3835 adapter->stats.xontxc += rd32(E1000_XONTXC);
3836 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
3837 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
3838 adapter->stats.fcruc += rd32(E1000_FCRUC);
3839 adapter->stats.gptc += rd32(E1000_GPTC);
3840 adapter->stats.gotc += rd32(E1000_GOTCL);
3841 rd32(E1000_GOTCH); /* clear GOTCL */
3842 rnbc = rd32(E1000_RNBC);
3843 adapter->stats.rnbc += rnbc;
3844 netdev->stats.rx_fifo_errors += rnbc;
3845 adapter->stats.ruc += rd32(E1000_RUC);
3846 adapter->stats.rfc += rd32(E1000_RFC);
3847 adapter->stats.rjc += rd32(E1000_RJC);
3848 adapter->stats.tor += rd32(E1000_TORH);
3849 adapter->stats.tot += rd32(E1000_TOTH);
3850 adapter->stats.tpr += rd32(E1000_TPR);
3851
3852 adapter->stats.ptc64 += rd32(E1000_PTC64);
3853 adapter->stats.ptc127 += rd32(E1000_PTC127);
3854 adapter->stats.ptc255 += rd32(E1000_PTC255);
3855 adapter->stats.ptc511 += rd32(E1000_PTC511);
3856 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
3857 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
3858
3859 adapter->stats.mptc += rd32(E1000_MPTC);
3860 adapter->stats.bptc += rd32(E1000_BPTC);
3861
3862 /* used for adaptive IFS */
3863 hw->mac.tx_packet_delta = rd32(E1000_TPT);
3864 adapter->stats.tpt += hw->mac.tx_packet_delta;
3865 hw->mac.collision_delta = rd32(E1000_COLC);
3866 adapter->stats.colc += hw->mac.collision_delta;
3867
3868 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
3869 adapter->stats.rxerrc += rd32(E1000_RXERRC);
3870 adapter->stats.tncrs += rd32(E1000_TNCRS);
3871 adapter->stats.tsctc += rd32(E1000_TSCTC);
3872 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
3873
3874 adapter->stats.iac += rd32(E1000_IAC);
3875 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
3876 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
3877 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
3878 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
3879 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
3880 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
3881 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
3882 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
3883
3884 /* Fill out the OS statistics structure */
3885 netdev->stats.multicast = adapter->stats.mprc;
3886 netdev->stats.collisions = adapter->stats.colc;
3887
3888 /* Rx Errors */
3889
3890 /* RLEC on some newer hardware can be incorrect so build
3891 * our own version based on RUC and ROC */
3892 netdev->stats.rx_errors = adapter->stats.rxerrc +
3893 adapter->stats.crcerrs + adapter->stats.algnerrc +
3894 adapter->stats.ruc + adapter->stats.roc +
3895 adapter->stats.cexterr;
3896 netdev->stats.rx_length_errors = adapter->stats.ruc +
3897 adapter->stats.roc;
3898 netdev->stats.rx_crc_errors = adapter->stats.crcerrs;
3899 netdev->stats.rx_frame_errors = adapter->stats.algnerrc;
3900 netdev->stats.rx_missed_errors = adapter->stats.mpc;
3901
3902 /* Tx Errors */
3903 netdev->stats.tx_errors = adapter->stats.ecol +
3904 adapter->stats.latecol;
3905 netdev->stats.tx_aborted_errors = adapter->stats.ecol;
3906 netdev->stats.tx_window_errors = adapter->stats.latecol;
3907 netdev->stats.tx_carrier_errors = adapter->stats.tncrs;
3908
3909 /* Tx Dropped needs to be maintained elsewhere */
3910
3911 /* Phy Stats */
3912 if (hw->phy.media_type == e1000_media_type_copper) {
3913 if ((adapter->link_speed == SPEED_1000) &&
3914 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
3915 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
3916 adapter->phy_stats.idle_errors += phy_tmp;
3917 }
3918 }
3919
3920 /* Management Stats */
3921 adapter->stats.mgptc += rd32(E1000_MGTPTC);
3922 adapter->stats.mgprc += rd32(E1000_MGTPRC);
3923 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
3924}
3925
3926static irqreturn_t igb_msix_other(int irq, void *data)
3927{
3928 struct igb_adapter *adapter = data;
3929 struct e1000_hw *hw = &adapter->hw;
3930 u32 icr = rd32(E1000_ICR);
3931 /* reading ICR causes bit 31 of EICR to be cleared */
3932
3933 if (icr & E1000_ICR_DOUTSYNC) {
3934 /* HW is reporting DMA is out of sync */
3935 adapter->stats.doosync++;
3936 }
3937
3938 /* Check for a mailbox event */
3939 if (icr & E1000_ICR_VMMB)
3940 igb_msg_task(adapter);
3941
3942 if (icr & E1000_ICR_LSC) {
3943 hw->mac.get_link_status = 1;
3944 /* guard against interrupt when we're going down */
3945 if (!test_bit(__IGB_DOWN, &adapter->state))
3946 mod_timer(&adapter->watchdog_timer, jiffies + 1);
3947 }
3948
3949 if (adapter->vfs_allocated_count)
3950 wr32(E1000_IMS, E1000_IMS_LSC |
3951 E1000_IMS_VMMB |
3952 E1000_IMS_DOUTSYNC);
3953 else
3954 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
3955 wr32(E1000_EIMS, adapter->eims_other);
3956
3957 return IRQ_HANDLED;
3958}
3959
3960static void igb_write_itr(struct igb_q_vector *q_vector)
3961{
3962 u32 itr_val = q_vector->itr_val & 0x7FFC;
3963
3964 if (!q_vector->set_itr)
3965 return;
3966
3967 if (!itr_val)
3968 itr_val = 0x4;
3969
3970 if (q_vector->itr_shift)
3971 itr_val |= itr_val << q_vector->itr_shift;
3972 else
3973 itr_val |= 0x8000000;
3974
3975 writel(itr_val, q_vector->itr_register);
3976 q_vector->set_itr = 0;
3977}
3978
3979static irqreturn_t igb_msix_ring(int irq, void *data)
3980{
3981 struct igb_q_vector *q_vector = data;
3982
3983 /* Write the ITR value calculated from the previous interrupt. */
3984 igb_write_itr(q_vector);
3985
3986 napi_schedule(&q_vector->napi);
3987
3988 return IRQ_HANDLED;
3989}
3990
3991#ifdef CONFIG_IGB_DCA
3992static void igb_update_dca(struct igb_q_vector *q_vector)
3993{
3994 struct igb_adapter *adapter = q_vector->adapter;
3995 struct e1000_hw *hw = &adapter->hw;
3996 int cpu = get_cpu();
3997
3998 if (q_vector->cpu == cpu)
3999 goto out_no_update;
4000
4001 if (q_vector->tx_ring) {
4002 int q = q_vector->tx_ring->reg_idx;
4003 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4004 if (hw->mac.type == e1000_82575) {
4005 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4006 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4007 } else {
4008 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4009 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4010 E1000_DCA_TXCTRL_CPUID_SHIFT;
4011 }
4012 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4013 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4014 }
4015 if (q_vector->rx_ring) {
4016 int q = q_vector->rx_ring->reg_idx;
4017 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4018 if (hw->mac.type == e1000_82575) {
4019 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4020 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4021 } else {
4022 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4023 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4024 E1000_DCA_RXCTRL_CPUID_SHIFT;
4025 }
4026 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4027 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4028 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4029 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4030 }
4031 q_vector->cpu = cpu;
4032out_no_update:
4033 put_cpu();
4034}
4035
4036static void igb_setup_dca(struct igb_adapter *adapter)
4037{
4038 struct e1000_hw *hw = &adapter->hw;
4039 int i;
4040
4041 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4042 return;
4043
4044 /* Always use CB2 mode, difference is masked in the CB driver. */
4045 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4046
4047 for (i = 0; i < adapter->num_q_vectors; i++) {
4048 struct igb_q_vector *q_vector = adapter->q_vector[i];
4049 q_vector->cpu = -1;
4050 igb_update_dca(q_vector);
4051 }
4052}
4053
4054static int __igb_notify_dca(struct device *dev, void *data)
4055{
4056 struct net_device *netdev = dev_get_drvdata(dev);
4057 struct igb_adapter *adapter = netdev_priv(netdev);
4058 struct e1000_hw *hw = &adapter->hw;
4059 unsigned long event = *(unsigned long *)data;
4060
4061 switch (event) {
4062 case DCA_PROVIDER_ADD:
4063 /* if already enabled, don't do it again */
4064 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4065 break;
4066 /* Always use CB2 mode, difference is masked
4067 * in the CB driver. */
4068 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4069 if (dca_add_requester(dev) == 0) {
4070 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4071 dev_info(&adapter->pdev->dev, "DCA enabled\n");
4072 igb_setup_dca(adapter);
4073 break;
4074 }
4075 /* Fall Through since DCA is disabled. */
4076 case DCA_PROVIDER_REMOVE:
4077 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4078 /* without this a class_device is left
4079 * hanging around in the sysfs model */
4080 dca_remove_requester(dev);
4081 dev_info(&adapter->pdev->dev, "DCA disabled\n");
4082 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4083 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4084 }
4085 break;
4086 }
4087
4088 return 0;
4089}
4090
4091static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4092 void *p)
4093{
4094 int ret_val;
4095
4096 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4097 __igb_notify_dca);
4098
4099 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4100}
4101#endif /* CONFIG_IGB_DCA */
4102
4103static void igb_ping_all_vfs(struct igb_adapter *adapter)
4104{
4105 struct e1000_hw *hw = &adapter->hw;
4106 u32 ping;
4107 int i;
4108
4109 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4110 ping = E1000_PF_CONTROL_MSG;
4111 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4112 ping |= E1000_VT_MSGTYPE_CTS;
4113 igb_write_mbx(hw, &ping, 1, i);
4114 }
4115}
4116
4117static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4118{
4119 struct e1000_hw *hw = &adapter->hw;
4120 u32 vmolr = rd32(E1000_VMOLR(vf));
4121 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4122
4123 vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4124 IGB_VF_FLAG_MULTI_PROMISC);
4125 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4126
4127 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4128 vmolr |= E1000_VMOLR_MPME;
4129 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4130 } else {
4131 /*
4132 * if we have hashes and we are clearing a multicast promisc
4133 * flag we need to write the hashes to the MTA as this step
4134 * was previously skipped
4135 */
4136 if (vf_data->num_vf_mc_hashes > 30) {
4137 vmolr |= E1000_VMOLR_MPME;
4138 } else if (vf_data->num_vf_mc_hashes) {
4139 int j;
4140 vmolr |= E1000_VMOLR_ROMPE;
4141 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4142 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4143 }
4144 }
4145
4146 wr32(E1000_VMOLR(vf), vmolr);
4147
4148 /* there are flags left unprocessed, likely not supported */
4149 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4150 return -EINVAL;
4151
4152 return 0;
4153
4154}
4155
4156static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4157 u32 *msgbuf, u32 vf)
4158{
4159 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4160 u16 *hash_list = (u16 *)&msgbuf[1];
4161 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4162 int i;
4163
4164 /* salt away the number of multicast addresses assigned
4165 * to this VF for later use to restore when the PF multi cast
4166 * list changes
4167 */
4168 vf_data->num_vf_mc_hashes = n;
4169
4170 /* only up to 30 hash values supported */
4171 if (n > 30)
4172 n = 30;
4173
4174 /* store the hashes for later use */
4175 for (i = 0; i < n; i++)
4176 vf_data->vf_mc_hashes[i] = hash_list[i];
4177
4178 /* Flush and reset the mta with the new values */
4179 igb_set_rx_mode(adapter->netdev);
4180
4181 return 0;
4182}
4183
4184static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4185{
4186 struct e1000_hw *hw = &adapter->hw;
4187 struct vf_data_storage *vf_data;
4188 int i, j;
4189
4190 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4191 u32 vmolr = rd32(E1000_VMOLR(i));
4192 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4193
4194 vf_data = &adapter->vf_data[i];
4195
4196 if ((vf_data->num_vf_mc_hashes > 30) ||
4197 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4198 vmolr |= E1000_VMOLR_MPME;
4199 } else if (vf_data->num_vf_mc_hashes) {
4200 vmolr |= E1000_VMOLR_ROMPE;
4201 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4202 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4203 }
4204 wr32(E1000_VMOLR(i), vmolr);
4205 }
4206}
4207
4208static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4209{
4210 struct e1000_hw *hw = &adapter->hw;
4211 u32 pool_mask, reg, vid;
4212 int i;
4213
4214 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4215
4216 /* Find the vlan filter for this id */
4217 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4218 reg = rd32(E1000_VLVF(i));
4219
4220 /* remove the vf from the pool */
4221 reg &= ~pool_mask;
4222
4223 /* if pool is empty then remove entry from vfta */
4224 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4225 (reg & E1000_VLVF_VLANID_ENABLE)) {
4226 reg = 0;
4227 vid = reg & E1000_VLVF_VLANID_MASK;
4228 igb_vfta_set(hw, vid, false);
4229 }
4230
4231 wr32(E1000_VLVF(i), reg);
4232 }
4233
4234 adapter->vf_data[vf].vlans_enabled = 0;
4235}
4236
4237static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4238{
4239 struct e1000_hw *hw = &adapter->hw;
4240 u32 reg, i;
4241
4242 /* The vlvf table only exists on 82576 hardware and newer */
4243 if (hw->mac.type < e1000_82576)
4244 return -1;
4245
4246 /* we only need to do this if VMDq is enabled */
4247 if (!adapter->vfs_allocated_count)
4248 return -1;
4249
4250 /* Find the vlan filter for this id */
4251 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4252 reg = rd32(E1000_VLVF(i));
4253 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4254 vid == (reg & E1000_VLVF_VLANID_MASK))
4255 break;
4256 }
4257
4258 if (add) {
4259 if (i == E1000_VLVF_ARRAY_SIZE) {
4260 /* Did not find a matching VLAN ID entry that was
4261 * enabled. Search for a free filter entry, i.e.
4262 * one without the enable bit set
4263 */
4264 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4265 reg = rd32(E1000_VLVF(i));
4266 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4267 break;
4268 }
4269 }
4270 if (i < E1000_VLVF_ARRAY_SIZE) {
4271 /* Found an enabled/available entry */
4272 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4273
4274 /* if !enabled we need to set this up in vfta */
4275 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4276 /* add VID to filter table */
4277 igb_vfta_set(hw, vid, true);
4278 reg |= E1000_VLVF_VLANID_ENABLE;
4279 }
4280 reg &= ~E1000_VLVF_VLANID_MASK;
4281 reg |= vid;
4282 wr32(E1000_VLVF(i), reg);
4283
4284 /* do not modify RLPML for PF devices */
4285 if (vf >= adapter->vfs_allocated_count)
4286 return 0;
4287
4288 if (!adapter->vf_data[vf].vlans_enabled) {
4289 u32 size;
4290 reg = rd32(E1000_VMOLR(vf));
4291 size = reg & E1000_VMOLR_RLPML_MASK;
4292 size += 4;
4293 reg &= ~E1000_VMOLR_RLPML_MASK;
4294 reg |= size;
4295 wr32(E1000_VMOLR(vf), reg);
4296 }
4297
4298 adapter->vf_data[vf].vlans_enabled++;
4299 return 0;
4300 }
4301 } else {
4302 if (i < E1000_VLVF_ARRAY_SIZE) {
4303 /* remove vf from the pool */
4304 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4305 /* if pool is empty then remove entry from vfta */
4306 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4307 reg = 0;
4308 igb_vfta_set(hw, vid, false);
4309 }
4310 wr32(E1000_VLVF(i), reg);
4311
4312 /* do not modify RLPML for PF devices */
4313 if (vf >= adapter->vfs_allocated_count)
4314 return 0;
4315
4316 adapter->vf_data[vf].vlans_enabled--;
4317 if (!adapter->vf_data[vf].vlans_enabled) {
4318 u32 size;
4319 reg = rd32(E1000_VMOLR(vf));
4320 size = reg & E1000_VMOLR_RLPML_MASK;
4321 size -= 4;
4322 reg &= ~E1000_VMOLR_RLPML_MASK;
4323 reg |= size;
4324 wr32(E1000_VMOLR(vf), reg);
4325 }
4326 return 0;
4327 }
4328 }
4329 return -1;
4330}
4331
4332static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4333{
4334 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4335 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4336
4337 return igb_vlvf_set(adapter, vid, add, vf);
4338}
4339
4340static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4341{
4342 /* clear all flags */
4343 adapter->vf_data[vf].flags = 0;
4344 adapter->vf_data[vf].last_nack = jiffies;
4345
4346 /* reset offloads to defaults */
4347 igb_set_vmolr(adapter, vf);
4348
4349 /* reset vlans for device */
4350 igb_clear_vf_vfta(adapter, vf);
4351
4352 /* reset multicast table array for vf */
4353 adapter->vf_data[vf].num_vf_mc_hashes = 0;
4354
4355 /* Flush and reset the mta with the new values */
4356 igb_set_rx_mode(adapter->netdev);
4357}
4358
4359static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4360{
4361 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4362
4363 /* generate a new mac address as we were hotplug removed/added */
4364 random_ether_addr(vf_mac);
4365
4366 /* process remaining reset events */
4367 igb_vf_reset(adapter, vf);
4368}
4369
4370static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4371{
4372 struct e1000_hw *hw = &adapter->hw;
4373 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4374 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4375 u32 reg, msgbuf[3];
4376 u8 *addr = (u8 *)(&msgbuf[1]);
4377
4378 /* process all the same items cleared in a function level reset */
4379 igb_vf_reset(adapter, vf);
4380
4381 /* set vf mac address */
4382 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4383
4384 /* enable transmit and receive for vf */
4385 reg = rd32(E1000_VFTE);
4386 wr32(E1000_VFTE, reg | (1 << vf));
4387 reg = rd32(E1000_VFRE);
4388 wr32(E1000_VFRE, reg | (1 << vf));
4389
4390 adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4391
4392 /* reply to reset with ack and vf mac address */
4393 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4394 memcpy(addr, vf_mac, 6);
4395 igb_write_mbx(hw, msgbuf, 3, vf);
4396}
4397
4398static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4399{
4400 unsigned char *addr = (char *)&msg[1];
4401 int err = -1;
4402
4403 if (is_valid_ether_addr(addr))
4404 err = igb_set_vf_mac(adapter, vf, addr);
4405
4406 return err;
4407}
4408
4409static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4410{
4411 struct e1000_hw *hw = &adapter->hw;
4412 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4413 u32 msg = E1000_VT_MSGTYPE_NACK;
4414
4415 /* if device isn't clear to send it shouldn't be reading either */
4416 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4417 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4418 igb_write_mbx(hw, &msg, 1, vf);
4419 vf_data->last_nack = jiffies;
4420 }
4421}
4422
4423static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4424{
4425 struct pci_dev *pdev = adapter->pdev;
4426 u32 msgbuf[E1000_VFMAILBOX_SIZE];
4427 struct e1000_hw *hw = &adapter->hw;
4428 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4429 s32 retval;
4430
4431 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4432
4433 if (retval)
4434 dev_err(&pdev->dev, "Error receiving message from VF\n");
4435
4436 /* this is a message we already processed, do nothing */
4437 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4438 return;
4439
4440 /*
4441 * until the vf completes a reset it should not be
4442 * allowed to start any configuration.
4443 */
4444
4445 if (msgbuf[0] == E1000_VF_RESET) {
4446 igb_vf_reset_msg(adapter, vf);
4447 return;
4448 }
4449
4450 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4451 msgbuf[0] = E1000_VT_MSGTYPE_NACK;
4452 if (time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4453 igb_write_mbx(hw, msgbuf, 1, vf);
4454 vf_data->last_nack = jiffies;
4455 }
4456 return;
4457 }
4458
4459 switch ((msgbuf[0] & 0xFFFF)) {
4460 case E1000_VF_SET_MAC_ADDR:
4461 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4462 break;
4463 case E1000_VF_SET_PROMISC:
4464 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4465 break;
4466 case E1000_VF_SET_MULTICAST:
4467 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4468 break;
4469 case E1000_VF_SET_LPE:
4470 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4471 break;
4472 case E1000_VF_SET_VLAN:
4473 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4474 break;
4475 default:
4476 dev_err(&adapter->pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4477 retval = -1;
4478 break;
4479 }
4480
4481 /* notify the VF of the results of what it sent us */
4482 if (retval)
4483 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4484 else
4485 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4486
4487 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4488
4489 igb_write_mbx(hw, msgbuf, 1, vf);
4490}
4491
4492static void igb_msg_task(struct igb_adapter *adapter)
4493{
4494 struct e1000_hw *hw = &adapter->hw;
4495 u32 vf;
4496
4497 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4498 /* process any reset requests */
4499 if (!igb_check_for_rst(hw, vf))
4500 igb_vf_reset_event(adapter, vf);
4501
4502 /* process any messages pending */
4503 if (!igb_check_for_msg(hw, vf))
4504 igb_rcv_msg_from_vf(adapter, vf);
4505
4506 /* process any acks */
4507 if (!igb_check_for_ack(hw, vf))
4508 igb_rcv_ack_from_vf(adapter, vf);
4509 }
4510}
4511
4512/**
4513 * igb_set_uta - Set unicast filter table address
4514 * @adapter: board private structure
4515 *
4516 * The unicast table address is a register array of 32-bit registers.
4517 * The table is meant to be used in a way similar to how the MTA is used
4518 * however due to certain limitations in the hardware it is necessary to
4519 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4520 * enable bit to allow vlan tag stripping when promiscous mode is enabled
4521 **/
4522static void igb_set_uta(struct igb_adapter *adapter)
4523{
4524 struct e1000_hw *hw = &adapter->hw;
4525 int i;
4526
4527 /* The UTA table only exists on 82576 hardware and newer */
4528 if (hw->mac.type < e1000_82576)
4529 return;
4530
4531 /* we only need to do this if VMDq is enabled */
4532 if (!adapter->vfs_allocated_count)
4533 return;
4534
4535 for (i = 0; i < hw->mac.uta_reg_count; i++)
4536 array_wr32(E1000_UTA, i, ~0);
4537}
4538
4539/**
4540 * igb_intr_msi - Interrupt Handler
4541 * @irq: interrupt number
4542 * @data: pointer to a network interface device structure
4543 **/
4544static irqreturn_t igb_intr_msi(int irq, void *data)
4545{
4546 struct igb_adapter *adapter = data;
4547 struct igb_q_vector *q_vector = adapter->q_vector[0];
4548 struct e1000_hw *hw = &adapter->hw;
4549 /* read ICR disables interrupts using IAM */
4550 u32 icr = rd32(E1000_ICR);
4551
4552 igb_write_itr(q_vector);
4553
4554 if (icr & E1000_ICR_DOUTSYNC) {
4555 /* HW is reporting DMA is out of sync */
4556 adapter->stats.doosync++;
4557 }
4558
4559 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4560 hw->mac.get_link_status = 1;
4561 if (!test_bit(__IGB_DOWN, &adapter->state))
4562 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4563 }
4564
4565 napi_schedule(&q_vector->napi);
4566
4567 return IRQ_HANDLED;
4568}
4569
4570/**
4571 * igb_intr - Legacy Interrupt Handler
4572 * @irq: interrupt number
4573 * @data: pointer to a network interface device structure
4574 **/
4575static irqreturn_t igb_intr(int irq, void *data)
4576{
4577 struct igb_adapter *adapter = data;
4578 struct igb_q_vector *q_vector = adapter->q_vector[0];
4579 struct e1000_hw *hw = &adapter->hw;
4580 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
4581 * need for the IMC write */
4582 u32 icr = rd32(E1000_ICR);
4583 if (!icr)
4584 return IRQ_NONE; /* Not our interrupt */
4585
4586 igb_write_itr(q_vector);
4587
4588 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4589 * not set, then the adapter didn't send an interrupt */
4590 if (!(icr & E1000_ICR_INT_ASSERTED))
4591 return IRQ_NONE;
4592
4593 if (icr & E1000_ICR_DOUTSYNC) {
4594 /* HW is reporting DMA is out of sync */
4595 adapter->stats.doosync++;
4596 }
4597
4598 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4599 hw->mac.get_link_status = 1;
4600 /* guard against interrupt when we're going down */
4601 if (!test_bit(__IGB_DOWN, &adapter->state))
4602 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4603 }
4604
4605 napi_schedule(&q_vector->napi);
4606
4607 return IRQ_HANDLED;
4608}
4609
4610static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4611{
4612 struct igb_adapter *adapter = q_vector->adapter;
4613 struct e1000_hw *hw = &adapter->hw;
4614
4615 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4616 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4617 if (!adapter->msix_entries)
4618 igb_set_itr(adapter);
4619 else
4620 igb_update_ring_itr(q_vector);
4621 }
4622
4623 if (!test_bit(__IGB_DOWN, &adapter->state)) {
4624 if (adapter->msix_entries)
4625 wr32(E1000_EIMS, q_vector->eims_value);
4626 else
4627 igb_irq_enable(adapter);
4628 }
4629}
4630
4631/**
4632 * igb_poll - NAPI Rx polling callback
4633 * @napi: napi polling structure
4634 * @budget: count of how many packets we should handle
4635 **/
4636static int igb_poll(struct napi_struct *napi, int budget)
4637{
4638 struct igb_q_vector *q_vector = container_of(napi,
4639 struct igb_q_vector,
4640 napi);
4641 int tx_clean_complete = 1, work_done = 0;
4642
4643#ifdef CONFIG_IGB_DCA
4644 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4645 igb_update_dca(q_vector);
4646#endif
4647 if (q_vector->tx_ring)
4648 tx_clean_complete = igb_clean_tx_irq(q_vector);
4649
4650 if (q_vector->rx_ring)
4651 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4652
4653 if (!tx_clean_complete)
4654 work_done = budget;
4655
4656 /* If not enough Rx work done, exit the polling mode */
4657 if (work_done < budget) {
4658 napi_complete(napi);
4659 igb_ring_irq_enable(q_vector);
4660 }
4661
4662 return work_done;
4663}
4664
4665/**
4666 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4667 * @adapter: board private structure
4668 * @shhwtstamps: timestamp structure to update
4669 * @regval: unsigned 64bit system time value.
4670 *
4671 * We need to convert the system time value stored in the RX/TXSTMP registers
4672 * into a hwtstamp which can be used by the upper level timestamping functions
4673 */
4674static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4675 struct skb_shared_hwtstamps *shhwtstamps,
4676 u64 regval)
4677{
4678 u64 ns;
4679
4680 ns = timecounter_cyc2time(&adapter->clock, regval);
4681 timecompare_update(&adapter->compare, ns);
4682 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4683 shhwtstamps->hwtstamp = ns_to_ktime(ns);
4684 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4685}
4686
4687/**
4688 * igb_tx_hwtstamp - utility function which checks for TX time stamp
4689 * @q_vector: pointer to q_vector containing needed info
4690 * @skb: packet that was just sent
4691 *
4692 * If we were asked to do hardware stamping and such a time stamp is
4693 * available, then it must have been for this skb here because we only
4694 * allow only one such packet into the queue.
4695 */
4696static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4697{
4698 struct igb_adapter *adapter = q_vector->adapter;
4699 union skb_shared_tx *shtx = skb_tx(skb);
4700 struct e1000_hw *hw = &adapter->hw;
4701 struct skb_shared_hwtstamps shhwtstamps;
4702 u64 regval;
4703
4704 /* if skb does not support hw timestamp or TX stamp not valid exit */
4705 if (likely(!shtx->hardware) ||
4706 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4707 return;
4708
4709 regval = rd32(E1000_TXSTMPL);
4710 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4711
4712 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4713 skb_tstamp_tx(skb, &shhwtstamps);
4714}
4715
4716/**
4717 * igb_clean_tx_irq - Reclaim resources after transmit completes
4718 * @q_vector: pointer to q_vector containing needed info
4719 * returns true if ring is completely cleaned
4720 **/
4721static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4722{
4723 struct igb_adapter *adapter = q_vector->adapter;
4724 struct igb_ring *tx_ring = q_vector->tx_ring;
4725 struct net_device *netdev = tx_ring->netdev;
4726 struct e1000_hw *hw = &adapter->hw;
4727 struct igb_buffer *buffer_info;
4728 struct sk_buff *skb;
4729 union e1000_adv_tx_desc *tx_desc, *eop_desc;
4730 unsigned int total_bytes = 0, total_packets = 0;
4731 unsigned int i, eop, count = 0;
4732 bool cleaned = false;
4733
4734 i = tx_ring->next_to_clean;
4735 eop = tx_ring->buffer_info[i].next_to_watch;
4736 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4737
4738 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
4739 (count < tx_ring->count)) {
4740 for (cleaned = false; !cleaned; count++) {
4741 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4742 buffer_info = &tx_ring->buffer_info[i];
4743 cleaned = (i == eop);
4744 skb = buffer_info->skb;
4745
4746 if (skb) {
4747 unsigned int segs, bytecount;
4748 /* gso_segs is currently only valid for tcp */
4749 segs = skb_shinfo(skb)->gso_segs ?: 1;
4750 /* multiply data chunks by size of headers */
4751 bytecount = ((segs - 1) * skb_headlen(skb)) +
4752 skb->len;
4753 total_packets += segs;
4754 total_bytes += bytecount;
4755
4756 igb_tx_hwtstamp(q_vector, skb);
4757 }
4758
4759 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4760 tx_desc->wb.status = 0;
4761
4762 i++;
4763 if (i == tx_ring->count)
4764 i = 0;
4765 }
4766 eop = tx_ring->buffer_info[i].next_to_watch;
4767 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4768 }
4769
4770 tx_ring->next_to_clean = i;
4771
4772 if (unlikely(count &&
4773 netif_carrier_ok(netdev) &&
4774 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
4775 /* Make sure that anybody stopping the queue after this
4776 * sees the new next_to_clean.
4777 */
4778 smp_mb();
4779 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
4780 !(test_bit(__IGB_DOWN, &adapter->state))) {
4781 netif_wake_subqueue(netdev, tx_ring->queue_index);
4782 tx_ring->tx_stats.restart_queue++;
4783 }
4784 }
4785
4786 if (tx_ring->detect_tx_hung) {
4787 /* Detect a transmit hang in hardware, this serializes the
4788 * check with the clearing of time_stamp and movement of i */
4789 tx_ring->detect_tx_hung = false;
4790 if (tx_ring->buffer_info[i].time_stamp &&
4791 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
4792 (adapter->tx_timeout_factor * HZ))
4793 && !(rd32(E1000_STATUS) &
4794 E1000_STATUS_TXOFF)) {
4795
4796 /* detected Tx unit hang */
4797 dev_err(&tx_ring->pdev->dev,
4798 "Detected Tx Unit Hang\n"
4799 " Tx Queue <%d>\n"
4800 " TDH <%x>\n"
4801 " TDT <%x>\n"
4802 " next_to_use <%x>\n"
4803 " next_to_clean <%x>\n"
4804 "buffer_info[next_to_clean]\n"
4805 " time_stamp <%lx>\n"
4806 " next_to_watch <%x>\n"
4807 " jiffies <%lx>\n"
4808 " desc.status <%x>\n",
4809 tx_ring->queue_index,
4810 readl(tx_ring->head),
4811 readl(tx_ring->tail),
4812 tx_ring->next_to_use,
4813 tx_ring->next_to_clean,
4814 tx_ring->buffer_info[eop].time_stamp,
4815 eop,
4816 jiffies,
4817 eop_desc->wb.status);
4818 netif_stop_subqueue(netdev, tx_ring->queue_index);
4819 }
4820 }
4821 tx_ring->total_bytes += total_bytes;
4822 tx_ring->total_packets += total_packets;
4823 tx_ring->tx_stats.bytes += total_bytes;
4824 tx_ring->tx_stats.packets += total_packets;
4825 return (count < tx_ring->count);
4826}
4827
4828/**
4829 * igb_receive_skb - helper function to handle rx indications
4830 * @q_vector: structure containing interrupt and ring information
4831 * @skb: packet to send up
4832 * @vlan_tag: vlan tag for packet
4833 **/
4834static void igb_receive_skb(struct igb_q_vector *q_vector,
4835 struct sk_buff *skb,
4836 u16 vlan_tag)
4837{
4838 struct igb_adapter *adapter = q_vector->adapter;
4839
4840 if (vlan_tag)
4841 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
4842 vlan_tag, skb);
4843 else
4844 napi_gro_receive(&q_vector->napi, skb);
4845}
4846
4847static inline void igb_rx_checksum_adv(struct igb_ring *ring,
4848 u32 status_err, struct sk_buff *skb)
4849{
4850 skb->ip_summed = CHECKSUM_NONE;
4851
4852 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
4853 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
4854 (status_err & E1000_RXD_STAT_IXSM))
4855 return;
4856
4857 /* TCP/UDP checksum error bit is set */
4858 if (status_err &
4859 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
4860 /*
4861 * work around errata with sctp packets where the TCPE aka
4862 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
4863 * packets, (aka let the stack check the crc32c)
4864 */
4865 if ((skb->len == 60) &&
4866 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
4867 ring->rx_stats.csum_err++;
4868
4869 /* let the stack verify checksum errors */
4870 return;
4871 }
4872 /* It must be a TCP or UDP packet with a valid checksum */
4873 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
4874 skb->ip_summed = CHECKSUM_UNNECESSARY;
4875
4876 dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
4877}
4878
4879static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
4880 struct sk_buff *skb)
4881{
4882 struct igb_adapter *adapter = q_vector->adapter;
4883 struct e1000_hw *hw = &adapter->hw;
4884 u64 regval;
4885
4886 /*
4887 * If this bit is set, then the RX registers contain the time stamp. No
4888 * other packet will be time stamped until we read these registers, so
4889 * read the registers to make them available again. Because only one
4890 * packet can be time stamped at a time, we know that the register
4891 * values must belong to this one here and therefore we don't need to
4892 * compare any of the additional attributes stored for it.
4893 *
4894 * If nothing went wrong, then it should have a skb_shared_tx that we
4895 * can turn into a skb_shared_hwtstamps.
4896 */
4897 if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
4898 return;
4899 if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
4900 return;
4901
4902 regval = rd32(E1000_RXSTMPL);
4903 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4904
4905 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
4906}
4907static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
4908 union e1000_adv_rx_desc *rx_desc)
4909{
4910 /* HW will not DMA in data larger than the given buffer, even if it
4911 * parses the (NFS, of course) header to be larger. In that case, it
4912 * fills the header buffer and spills the rest into the page.
4913 */
4914 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
4915 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
4916 if (hlen > rx_ring->rx_buffer_len)
4917 hlen = rx_ring->rx_buffer_len;
4918 return hlen;
4919}
4920
4921static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
4922 int *work_done, int budget)
4923{
4924 struct igb_ring *rx_ring = q_vector->rx_ring;
4925 struct net_device *netdev = rx_ring->netdev;
4926 struct pci_dev *pdev = rx_ring->pdev;
4927 union e1000_adv_rx_desc *rx_desc , *next_rxd;
4928 struct igb_buffer *buffer_info , *next_buffer;
4929 struct sk_buff *skb;
4930 bool cleaned = false;
4931 int cleaned_count = 0;
4932 unsigned int total_bytes = 0, total_packets = 0;
4933 unsigned int i;
4934 u32 staterr;
4935 u16 length;
4936 u16 vlan_tag;
4937
4938 i = rx_ring->next_to_clean;
4939 buffer_info = &rx_ring->buffer_info[i];
4940 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
4941 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4942
4943 while (staterr & E1000_RXD_STAT_DD) {
4944 if (*work_done >= budget)
4945 break;
4946 (*work_done)++;
4947
4948 skb = buffer_info->skb;
4949 prefetch(skb->data - NET_IP_ALIGN);
4950 buffer_info->skb = NULL;
4951
4952 i++;
4953 if (i == rx_ring->count)
4954 i = 0;
4955 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
4956 prefetch(next_rxd);
4957 next_buffer = &rx_ring->buffer_info[i];
4958
4959 length = le16_to_cpu(rx_desc->wb.upper.length);
4960 cleaned = true;
4961 cleaned_count++;
4962
4963 if (buffer_info->dma) {
4964 pci_unmap_single(pdev, buffer_info->dma,
4965 rx_ring->rx_buffer_len,
4966 PCI_DMA_FROMDEVICE);
4967 buffer_info->dma = 0;
4968 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
4969 skb_put(skb, length);
4970 goto send_up;
4971 }
4972 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
4973 }
4974
4975 if (length) {
4976 pci_unmap_page(pdev, buffer_info->page_dma,
4977 PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
4978 buffer_info->page_dma = 0;
4979
4980 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
4981 buffer_info->page,
4982 buffer_info->page_offset,
4983 length);
4984
4985 if (page_count(buffer_info->page) != 1)
4986 buffer_info->page = NULL;
4987 else
4988 get_page(buffer_info->page);
4989
4990 skb->len += length;
4991 skb->data_len += length;
4992
4993 skb->truesize += length;
4994 }
4995
4996 if (!(staterr & E1000_RXD_STAT_EOP)) {
4997 buffer_info->skb = next_buffer->skb;
4998 buffer_info->dma = next_buffer->dma;
4999 next_buffer->skb = skb;
5000 next_buffer->dma = 0;
5001 goto next_desc;
5002 }
5003send_up:
5004 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5005 dev_kfree_skb_irq(skb);
5006 goto next_desc;
5007 }
5008
5009 igb_rx_hwtstamp(q_vector, staterr, skb);
5010 total_bytes += skb->len;
5011 total_packets++;
5012
5013 igb_rx_checksum_adv(rx_ring, staterr, skb);
5014
5015 skb->protocol = eth_type_trans(skb, netdev);
5016 skb_record_rx_queue(skb, rx_ring->queue_index);
5017
5018 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5019 le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5020
5021 igb_receive_skb(q_vector, skb, vlan_tag);
5022
5023next_desc:
5024 rx_desc->wb.upper.status_error = 0;
5025
5026 /* return some buffers to hardware, one at a time is too slow */
5027 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5028 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5029 cleaned_count = 0;
5030 }
5031
5032 /* use prefetched values */
5033 rx_desc = next_rxd;
5034 buffer_info = next_buffer;
5035 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5036 }
5037
5038 rx_ring->next_to_clean = i;
5039 cleaned_count = igb_desc_unused(rx_ring);
5040
5041 if (cleaned_count)
5042 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5043
5044 rx_ring->total_packets += total_packets;
5045 rx_ring->total_bytes += total_bytes;
5046 rx_ring->rx_stats.packets += total_packets;
5047 rx_ring->rx_stats.bytes += total_bytes;
5048 return cleaned;
5049}
5050
5051/**
5052 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5053 * @adapter: address of board private structure
5054 **/
5055void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5056{
5057 struct net_device *netdev = rx_ring->netdev;
5058 union e1000_adv_rx_desc *rx_desc;
5059 struct igb_buffer *buffer_info;
5060 struct sk_buff *skb;
5061 unsigned int i;
5062 int bufsz;
5063
5064 i = rx_ring->next_to_use;
5065 buffer_info = &rx_ring->buffer_info[i];
5066
5067 bufsz = rx_ring->rx_buffer_len;
5068
5069 while (cleaned_count--) {
5070 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5071
5072 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5073 if (!buffer_info->page) {
5074 buffer_info->page = alloc_page(GFP_ATOMIC);
5075 if (!buffer_info->page) {
5076 rx_ring->rx_stats.alloc_failed++;
5077 goto no_buffers;
5078 }
5079 buffer_info->page_offset = 0;
5080 } else {
5081 buffer_info->page_offset ^= PAGE_SIZE / 2;
5082 }
5083 buffer_info->page_dma =
5084 pci_map_page(rx_ring->pdev, buffer_info->page,
5085 buffer_info->page_offset,
5086 PAGE_SIZE / 2,
5087 PCI_DMA_FROMDEVICE);
5088 }
5089
5090 if (!buffer_info->skb) {
5091 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5092 if (!skb) {
5093 rx_ring->rx_stats.alloc_failed++;
5094 goto no_buffers;
5095 }
5096
5097 buffer_info->skb = skb;
5098 buffer_info->dma = pci_map_single(rx_ring->pdev,
5099 skb->data,
5100 bufsz,
5101 PCI_DMA_FROMDEVICE);
5102 }
5103 /* Refresh the desc even if buffer_addrs didn't change because
5104 * each write-back erases this info. */
5105 if (bufsz < IGB_RXBUFFER_1024) {
5106 rx_desc->read.pkt_addr =
5107 cpu_to_le64(buffer_info->page_dma);
5108 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5109 } else {
5110 rx_desc->read.pkt_addr =
5111 cpu_to_le64(buffer_info->dma);
5112 rx_desc->read.hdr_addr = 0;
5113 }
5114
5115 i++;
5116 if (i == rx_ring->count)
5117 i = 0;
5118 buffer_info = &rx_ring->buffer_info[i];
5119 }
5120
5121no_buffers:
5122 if (rx_ring->next_to_use != i) {
5123 rx_ring->next_to_use = i;
5124 if (i == 0)
5125 i = (rx_ring->count - 1);
5126 else
5127 i--;
5128
5129 /* Force memory writes to complete before letting h/w
5130 * know there are new descriptors to fetch. (Only
5131 * applicable for weak-ordered memory model archs,
5132 * such as IA-64). */
5133 wmb();
5134 writel(i, rx_ring->tail);
5135 }
5136}
5137
5138/**
5139 * igb_mii_ioctl -
5140 * @netdev:
5141 * @ifreq:
5142 * @cmd:
5143 **/
5144static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5145{
5146 struct igb_adapter *adapter = netdev_priv(netdev);
5147 struct mii_ioctl_data *data = if_mii(ifr);
5148
5149 if (adapter->hw.phy.media_type != e1000_media_type_copper)
5150 return -EOPNOTSUPP;
5151
5152 switch (cmd) {
5153 case SIOCGMIIPHY:
5154 data->phy_id = adapter->hw.phy.addr;
5155 break;
5156 case SIOCGMIIREG:
5157 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5158 &data->val_out))
5159 return -EIO;
5160 break;
5161 case SIOCSMIIREG:
5162 default:
5163 return -EOPNOTSUPP;
5164 }
5165 return 0;
5166}
5167
5168/**
5169 * igb_hwtstamp_ioctl - control hardware time stamping
5170 * @netdev:
5171 * @ifreq:
5172 * @cmd:
5173 *
5174 * Outgoing time stamping can be enabled and disabled. Play nice and
5175 * disable it when requested, although it shouldn't case any overhead
5176 * when no packet needs it. At most one packet in the queue may be
5177 * marked for time stamping, otherwise it would be impossible to tell
5178 * for sure to which packet the hardware time stamp belongs.
5179 *
5180 * Incoming time stamping has to be configured via the hardware
5181 * filters. Not all combinations are supported, in particular event
5182 * type has to be specified. Matching the kind of event packet is
5183 * not supported, with the exception of "all V2 events regardless of
5184 * level 2 or 4".
5185 *
5186 **/
5187static int igb_hwtstamp_ioctl(struct net_device *netdev,
5188 struct ifreq *ifr, int cmd)
5189{
5190 struct igb_adapter *adapter = netdev_priv(netdev);
5191 struct e1000_hw *hw = &adapter->hw;
5192 struct hwtstamp_config config;
5193 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5194 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5195 u32 tsync_rx_cfg = 0;
5196 bool is_l4 = false;
5197 bool is_l2 = false;
5198 u32 regval;
5199
5200 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5201 return -EFAULT;
5202
5203 /* reserved for future extensions */
5204 if (config.flags)
5205 return -EINVAL;
5206
5207 switch (config.tx_type) {
5208 case HWTSTAMP_TX_OFF:
5209 tsync_tx_ctl = 0;
5210 case HWTSTAMP_TX_ON:
5211 break;
5212 default:
5213 return -ERANGE;
5214 }
5215
5216 switch (config.rx_filter) {
5217 case HWTSTAMP_FILTER_NONE:
5218 tsync_rx_ctl = 0;
5219 break;
5220 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5221 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5222 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5223 case HWTSTAMP_FILTER_ALL:
5224 /*
5225 * register TSYNCRXCFG must be set, therefore it is not
5226 * possible to time stamp both Sync and Delay_Req messages
5227 * => fall back to time stamping all packets
5228 */
5229 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5230 config.rx_filter = HWTSTAMP_FILTER_ALL;
5231 break;
5232 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5233 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5234 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5235 is_l4 = true;
5236 break;
5237 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5238 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5239 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5240 is_l4 = true;
5241 break;
5242 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5243 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5244 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5245 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5246 is_l2 = true;
5247 is_l4 = true;
5248 config.rx_filter = HWTSTAMP_FILTER_SOME;
5249 break;
5250 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5251 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5252 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5253 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5254 is_l2 = true;
5255 is_l4 = true;
5256 config.rx_filter = HWTSTAMP_FILTER_SOME;
5257 break;
5258 case HWTSTAMP_FILTER_PTP_V2_EVENT:
5259 case HWTSTAMP_FILTER_PTP_V2_SYNC:
5260 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5261 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5262 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5263 is_l2 = true;
5264 break;
5265 default:
5266 return -ERANGE;
5267 }
5268
5269 if (hw->mac.type == e1000_82575) {
5270 if (tsync_rx_ctl | tsync_tx_ctl)
5271 return -EINVAL;
5272 return 0;
5273 }
5274
5275 /* enable/disable TX */
5276 regval = rd32(E1000_TSYNCTXCTL);
5277 regval &= ~E1000_TSYNCTXCTL_ENABLED;
5278 regval |= tsync_tx_ctl;
5279 wr32(E1000_TSYNCTXCTL, regval);
5280
5281 /* enable/disable RX */
5282 regval = rd32(E1000_TSYNCRXCTL);
5283 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5284 regval |= tsync_rx_ctl;
5285 wr32(E1000_TSYNCRXCTL, regval);
5286
5287 /* define which PTP packets are time stamped */
5288 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5289
5290 /* define ethertype filter for timestamped packets */
5291 if (is_l2)
5292 wr32(E1000_ETQF(3),
5293 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5294 E1000_ETQF_1588 | /* enable timestamping */
5295 ETH_P_1588)); /* 1588 eth protocol type */
5296 else
5297 wr32(E1000_ETQF(3), 0);
5298
5299#define PTP_PORT 319
5300 /* L4 Queue Filter[3]: filter by destination port and protocol */
5301 if (is_l4) {
5302 u32 ftqf = (IPPROTO_UDP /* UDP */
5303 | E1000_FTQF_VF_BP /* VF not compared */
5304 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5305 | E1000_FTQF_MASK); /* mask all inputs */
5306 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5307
5308 wr32(E1000_IMIR(3), htons(PTP_PORT));
5309 wr32(E1000_IMIREXT(3),
5310 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5311 if (hw->mac.type == e1000_82576) {
5312 /* enable source port check */
5313 wr32(E1000_SPQF(3), htons(PTP_PORT));
5314 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5315 }
5316 wr32(E1000_FTQF(3), ftqf);
5317 } else {
5318 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5319 }
5320 wrfl();
5321
5322 adapter->hwtstamp_config = config;
5323
5324 /* clear TX/RX time stamp registers, just to be sure */
5325 regval = rd32(E1000_TXSTMPH);
5326 regval = rd32(E1000_RXSTMPH);
5327
5328 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5329 -EFAULT : 0;
5330}
5331
5332/**
5333 * igb_ioctl -
5334 * @netdev:
5335 * @ifreq:
5336 * @cmd:
5337 **/
5338static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5339{
5340 switch (cmd) {
5341 case SIOCGMIIPHY:
5342 case SIOCGMIIREG:
5343 case SIOCSMIIREG:
5344 return igb_mii_ioctl(netdev, ifr, cmd);
5345 case SIOCSHWTSTAMP:
5346 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5347 default:
5348 return -EOPNOTSUPP;
5349 }
5350}
5351
5352s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5353{
5354 struct igb_adapter *adapter = hw->back;
5355 u16 cap_offset;
5356
5357 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5358 if (!cap_offset)
5359 return -E1000_ERR_CONFIG;
5360
5361 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5362
5363 return 0;
5364}
5365
5366s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5367{
5368 struct igb_adapter *adapter = hw->back;
5369 u16 cap_offset;
5370
5371 cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5372 if (!cap_offset)
5373 return -E1000_ERR_CONFIG;
5374
5375 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5376
5377 return 0;
5378}
5379
5380static void igb_vlan_rx_register(struct net_device *netdev,
5381 struct vlan_group *grp)
5382{
5383 struct igb_adapter *adapter = netdev_priv(netdev);
5384 struct e1000_hw *hw = &adapter->hw;
5385 u32 ctrl, rctl;
5386
5387 igb_irq_disable(adapter);
5388 adapter->vlgrp = grp;
5389
5390 if (grp) {
5391 /* enable VLAN tag insert/strip */
5392 ctrl = rd32(E1000_CTRL);
5393 ctrl |= E1000_CTRL_VME;
5394 wr32(E1000_CTRL, ctrl);
5395
5396 /* Disable CFI check */
5397 rctl = rd32(E1000_RCTL);
5398 rctl &= ~E1000_RCTL_CFIEN;
5399 wr32(E1000_RCTL, rctl);
5400 } else {
5401 /* disable VLAN tag insert/strip */
5402 ctrl = rd32(E1000_CTRL);
5403 ctrl &= ~E1000_CTRL_VME;
5404 wr32(E1000_CTRL, ctrl);
5405 }
5406
5407 igb_rlpml_set(adapter);
5408
5409 if (!test_bit(__IGB_DOWN, &adapter->state))
5410 igb_irq_enable(adapter);
5411}
5412
5413static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5414{
5415 struct igb_adapter *adapter = netdev_priv(netdev);
5416 struct e1000_hw *hw = &adapter->hw;
5417 int pf_id = adapter->vfs_allocated_count;
5418
5419 /* attempt to add filter to vlvf array */
5420 igb_vlvf_set(adapter, vid, true, pf_id);
5421
5422 /* add the filter since PF can receive vlans w/o entry in vlvf */
5423 igb_vfta_set(hw, vid, true);
5424}
5425
5426static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5427{
5428 struct igb_adapter *adapter = netdev_priv(netdev);
5429 struct e1000_hw *hw = &adapter->hw;
5430 int pf_id = adapter->vfs_allocated_count;
5431 s32 err;
5432
5433 igb_irq_disable(adapter);
5434 vlan_group_set_device(adapter->vlgrp, vid, NULL);
5435
5436 if (!test_bit(__IGB_DOWN, &adapter->state))
5437 igb_irq_enable(adapter);
5438
5439 /* remove vlan from VLVF table array */
5440 err = igb_vlvf_set(adapter, vid, false, pf_id);
5441
5442 /* if vid was not present in VLVF just remove it from table */
5443 if (err)
5444 igb_vfta_set(hw, vid, false);
5445}
5446
5447static void igb_restore_vlan(struct igb_adapter *adapter)
5448{
5449 igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5450
5451 if (adapter->vlgrp) {
5452 u16 vid;
5453 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5454 if (!vlan_group_get_device(adapter->vlgrp, vid))
5455 continue;
5456 igb_vlan_rx_add_vid(adapter->netdev, vid);
5457 }
5458 }
5459}
5460
5461int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5462{
5463 struct e1000_mac_info *mac = &adapter->hw.mac;
5464
5465 mac->autoneg = 0;
5466
5467 switch (spddplx) {
5468 case SPEED_10 + DUPLEX_HALF:
5469 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5470 break;
5471 case SPEED_10 + DUPLEX_FULL:
5472 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5473 break;
5474 case SPEED_100 + DUPLEX_HALF:
5475 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5476 break;
5477 case SPEED_100 + DUPLEX_FULL:
5478 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5479 break;
5480 case SPEED_1000 + DUPLEX_FULL:
5481 mac->autoneg = 1;
5482 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5483 break;
5484 case SPEED_1000 + DUPLEX_HALF: /* not supported */
5485 default:
5486 dev_err(&adapter->pdev->dev,
5487 "Unsupported Speed/Duplex configuration\n");
5488 return -EINVAL;
5489 }
5490 return 0;
5491}
5492
5493static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5494{
5495 struct net_device *netdev = pci_get_drvdata(pdev);
5496 struct igb_adapter *adapter = netdev_priv(netdev);
5497 struct e1000_hw *hw = &adapter->hw;
5498 u32 ctrl, rctl, status;
5499 u32 wufc = adapter->wol;
5500#ifdef CONFIG_PM
5501 int retval = 0;
5502#endif
5503
5504 netif_device_detach(netdev);
5505
5506 if (netif_running(netdev))
5507 igb_close(netdev);
5508
5509 igb_clear_interrupt_scheme(adapter);
5510
5511#ifdef CONFIG_PM
5512 retval = pci_save_state(pdev);
5513 if (retval)
5514 return retval;
5515#endif
5516
5517 status = rd32(E1000_STATUS);
5518 if (status & E1000_STATUS_LU)
5519 wufc &= ~E1000_WUFC_LNKC;
5520
5521 if (wufc) {
5522 igb_setup_rctl(adapter);
5523 igb_set_rx_mode(netdev);
5524
5525 /* turn on all-multi mode if wake on multicast is enabled */
5526 if (wufc & E1000_WUFC_MC) {
5527 rctl = rd32(E1000_RCTL);
5528 rctl |= E1000_RCTL_MPE;
5529 wr32(E1000_RCTL, rctl);
5530 }
5531
5532 ctrl = rd32(E1000_CTRL);
5533 /* advertise wake from D3Cold */
5534 #define E1000_CTRL_ADVD3WUC 0x00100000
5535 /* phy power management enable */
5536 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5537 ctrl |= E1000_CTRL_ADVD3WUC;
5538 wr32(E1000_CTRL, ctrl);
5539
5540 /* Allow time for pending master requests to run */
5541 igb_disable_pcie_master(&adapter->hw);
5542
5543 wr32(E1000_WUC, E1000_WUC_PME_EN);
5544 wr32(E1000_WUFC, wufc);
5545 } else {
5546 wr32(E1000_WUC, 0);
5547 wr32(E1000_WUFC, 0);
5548 }
5549
5550 *enable_wake = wufc || adapter->en_mng_pt;
5551 if (!*enable_wake)
5552 igb_shutdown_serdes_link_82575(hw);
5553
5554 /* Release control of h/w to f/w. If f/w is AMT enabled, this
5555 * would have already happened in close and is redundant. */
5556 igb_release_hw_control(adapter);
5557
5558 pci_disable_device(pdev);
5559
5560 return 0;
5561}
5562
5563#ifdef CONFIG_PM
5564static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5565{
5566 int retval;
5567 bool wake;
5568
5569 retval = __igb_shutdown(pdev, &wake);
5570 if (retval)
5571 return retval;
5572
5573 if (wake) {
5574 pci_prepare_to_sleep(pdev);
5575 } else {
5576 pci_wake_from_d3(pdev, false);
5577 pci_set_power_state(pdev, PCI_D3hot);
5578 }
5579
5580 return 0;
5581}
5582
5583static int igb_resume(struct pci_dev *pdev)
5584{
5585 struct net_device *netdev = pci_get_drvdata(pdev);
5586 struct igb_adapter *adapter = netdev_priv(netdev);
5587 struct e1000_hw *hw = &adapter->hw;
5588 u32 err;
5589
5590 pci_set_power_state(pdev, PCI_D0);
5591 pci_restore_state(pdev);
5592
5593 err = pci_enable_device_mem(pdev);
5594 if (err) {
5595 dev_err(&pdev->dev,
5596 "igb: Cannot enable PCI device from suspend\n");
5597 return err;
5598 }
5599 pci_set_master(pdev);
5600
5601 pci_enable_wake(pdev, PCI_D3hot, 0);
5602 pci_enable_wake(pdev, PCI_D3cold, 0);
5603
5604 if (igb_init_interrupt_scheme(adapter)) {
5605 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5606 return -ENOMEM;
5607 }
5608
5609 /* e1000_power_up_phy(adapter); */
5610
5611 igb_reset(adapter);
5612
5613 /* let the f/w know that the h/w is now under the control of the
5614 * driver. */
5615 igb_get_hw_control(adapter);
5616
5617 wr32(E1000_WUS, ~0);
5618
5619 if (netif_running(netdev)) {
5620 err = igb_open(netdev);
5621 if (err)
5622 return err;
5623 }
5624
5625 netif_device_attach(netdev);
5626
5627 return 0;
5628}
5629#endif
5630
5631static void igb_shutdown(struct pci_dev *pdev)
5632{
5633 bool wake;
5634
5635 __igb_shutdown(pdev, &wake);
5636
5637 if (system_state == SYSTEM_POWER_OFF) {
5638 pci_wake_from_d3(pdev, wake);
5639 pci_set_power_state(pdev, PCI_D3hot);
5640 }
5641}
5642
5643#ifdef CONFIG_NET_POLL_CONTROLLER
5644/*
5645 * Polling 'interrupt' - used by things like netconsole to send skbs
5646 * without having to re-enable interrupts. It's not called while
5647 * the interrupt routine is executing.
5648 */
5649static void igb_netpoll(struct net_device *netdev)
5650{
5651 struct igb_adapter *adapter = netdev_priv(netdev);
5652 struct e1000_hw *hw = &adapter->hw;
5653 int i;
5654
5655 if (!adapter->msix_entries) {
5656 struct igb_q_vector *q_vector = adapter->q_vector[0];
5657 igb_irq_disable(adapter);
5658 napi_schedule(&q_vector->napi);
5659 return;
5660 }
5661
5662 for (i = 0; i < adapter->num_q_vectors; i++) {
5663 struct igb_q_vector *q_vector = adapter->q_vector[i];
5664 wr32(E1000_EIMC, q_vector->eims_value);
5665 napi_schedule(&q_vector->napi);
5666 }
5667}
5668#endif /* CONFIG_NET_POLL_CONTROLLER */
5669
5670/**
5671 * igb_io_error_detected - called when PCI error is detected
5672 * @pdev: Pointer to PCI device
5673 * @state: The current pci connection state
5674 *
5675 * This function is called after a PCI bus error affecting
5676 * this device has been detected.
5677 */
5678static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5679 pci_channel_state_t state)
5680{
5681 struct net_device *netdev = pci_get_drvdata(pdev);
5682 struct igb_adapter *adapter = netdev_priv(netdev);
5683
5684 netif_device_detach(netdev);
5685
5686 if (state == pci_channel_io_perm_failure)
5687 return PCI_ERS_RESULT_DISCONNECT;
5688
5689 if (netif_running(netdev))
5690 igb_down(adapter);
5691 pci_disable_device(pdev);
5692
5693 /* Request a slot slot reset. */
5694 return PCI_ERS_RESULT_NEED_RESET;
5695}
5696
5697/**
5698 * igb_io_slot_reset - called after the pci bus has been reset.
5699 * @pdev: Pointer to PCI device
5700 *
5701 * Restart the card from scratch, as if from a cold-boot. Implementation
5702 * resembles the first-half of the igb_resume routine.
5703 */
5704static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5705{
5706 struct net_device *netdev = pci_get_drvdata(pdev);
5707 struct igb_adapter *adapter = netdev_priv(netdev);
5708 struct e1000_hw *hw = &adapter->hw;
5709 pci_ers_result_t result;
5710 int err;
5711
5712 if (pci_enable_device_mem(pdev)) {
5713 dev_err(&pdev->dev,
5714 "Cannot re-enable PCI device after reset.\n");
5715 result = PCI_ERS_RESULT_DISCONNECT;
5716 } else {
5717 pci_set_master(pdev);
5718 pci_restore_state(pdev);
5719
5720 pci_enable_wake(pdev, PCI_D3hot, 0);
5721 pci_enable_wake(pdev, PCI_D3cold, 0);
5722
5723 igb_reset(adapter);
5724 wr32(E1000_WUS, ~0);
5725 result = PCI_ERS_RESULT_RECOVERED;
5726 }
5727
5728 err = pci_cleanup_aer_uncorrect_error_status(pdev);
5729 if (err) {
5730 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
5731 "failed 0x%0x\n", err);
5732 /* non-fatal, continue */
5733 }
5734
5735 return result;
5736}
5737
5738/**
5739 * igb_io_resume - called when traffic can start flowing again.
5740 * @pdev: Pointer to PCI device
5741 *
5742 * This callback is called when the error recovery driver tells us that
5743 * its OK to resume normal operation. Implementation resembles the
5744 * second-half of the igb_resume routine.
5745 */
5746static void igb_io_resume(struct pci_dev *pdev)
5747{
5748 struct net_device *netdev = pci_get_drvdata(pdev);
5749 struct igb_adapter *adapter = netdev_priv(netdev);
5750
5751 if (netif_running(netdev)) {
5752 if (igb_up(adapter)) {
5753 dev_err(&pdev->dev, "igb_up failed after reset\n");
5754 return;
5755 }
5756 }
5757
5758 netif_device_attach(netdev);
5759
5760 /* let the f/w know that the h/w is now under the control of the
5761 * driver. */
5762 igb_get_hw_control(adapter);
5763}
5764
5765static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
5766 u8 qsel)
5767{
5768 u32 rar_low, rar_high;
5769 struct e1000_hw *hw = &adapter->hw;
5770
5771 /* HW expects these in little endian so we reverse the byte order
5772 * from network order (big endian) to little endian
5773 */
5774 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
5775 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
5776 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
5777
5778 /* Indicate to hardware the Address is Valid. */
5779 rar_high |= E1000_RAH_AV;
5780
5781 if (hw->mac.type == e1000_82575)
5782 rar_high |= E1000_RAH_POOL_1 * qsel;
5783 else
5784 rar_high |= E1000_RAH_POOL_1 << qsel;
5785
5786 wr32(E1000_RAL(index), rar_low);
5787 wrfl();
5788 wr32(E1000_RAH(index), rar_high);
5789 wrfl();
5790}
5791
5792static int igb_set_vf_mac(struct igb_adapter *adapter,
5793 int vf, unsigned char *mac_addr)
5794{
5795 struct e1000_hw *hw = &adapter->hw;
5796 /* VF MAC addresses start at end of receive addresses and moves
5797 * torwards the first, as a result a collision should not be possible */
5798 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5799
5800 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
5801
5802 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
5803
5804 return 0;
5805}
5806
5807static void igb_vmm_control(struct igb_adapter *adapter)
5808{
5809 struct e1000_hw *hw = &adapter->hw;
5810 u32 reg;
5811
5812 /* replication is not supported for 82575 */
5813 if (hw->mac.type == e1000_82575)
5814 return;
5815
5816 /* enable replication vlan tag stripping */
5817 reg = rd32(E1000_RPLOLR);
5818 reg |= E1000_RPLOLR_STRVLAN;
5819 wr32(E1000_RPLOLR, reg);
5820
5821 /* notify HW that the MAC is adding vlan tags */
5822 reg = rd32(E1000_DTXCTL);
5823 reg |= E1000_DTXCTL_VLAN_ADDED;
5824 wr32(E1000_DTXCTL, reg);
5825
5826 if (adapter->vfs_allocated_count) {
5827 igb_vmdq_set_loopback_pf(hw, true);
5828 igb_vmdq_set_replication_pf(hw, true);
5829 } else {
5830 igb_vmdq_set_loopback_pf(hw, false);
5831 igb_vmdq_set_replication_pf(hw, false);
5832 }
5833}
5834
5835/* igb_main.c */
This page took 0.052058 seconds and 5 git commands to generate.