igb: streamline Rx buffer allocation and cleanup
[deliverable/linux.git] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2011 Intel Corporation.
5
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
9
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
14
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
21
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
42 #include <linux/if.h>
43 #include <linux/if_vlan.h>
44 #include <linux/pci.h>
45 #include <linux/pci-aspm.h>
46 #include <linux/delay.h>
47 #include <linux/interrupt.h>
48 #include <linux/if_ether.h>
49 #include <linux/aer.h>
50 #include <linux/prefetch.h>
51 #ifdef CONFIG_IGB_DCA
52 #include <linux/dca.h>
53 #endif
54 #include "igb.h"
55
56 #define MAJ 3
57 #define MIN 0
58 #define BUILD 6
59 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
60 __stringify(BUILD) "-k"
61 char igb_driver_name[] = "igb";
62 char igb_driver_version[] = DRV_VERSION;
63 static const char igb_driver_string[] =
64 "Intel(R) Gigabit Ethernet Network Driver";
65 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
66
67 static const struct e1000_info *igb_info_tbl[] = {
68 [board_82575] = &e1000_82575_info,
69 };
70
71 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
97 /* required last entry */
98 {0, }
99 };
100
101 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
102
103 void igb_reset(struct igb_adapter *);
104 static int igb_setup_all_tx_resources(struct igb_adapter *);
105 static int igb_setup_all_rx_resources(struct igb_adapter *);
106 static void igb_free_all_tx_resources(struct igb_adapter *);
107 static void igb_free_all_rx_resources(struct igb_adapter *);
108 static void igb_setup_mrqc(struct igb_adapter *);
109 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
110 static void __devexit igb_remove(struct pci_dev *pdev);
111 static void igb_init_hw_timer(struct igb_adapter *adapter);
112 static int igb_sw_init(struct igb_adapter *);
113 static int igb_open(struct net_device *);
114 static int igb_close(struct net_device *);
115 static void igb_configure_tx(struct igb_adapter *);
116 static void igb_configure_rx(struct igb_adapter *);
117 static void igb_clean_all_tx_rings(struct igb_adapter *);
118 static void igb_clean_all_rx_rings(struct igb_adapter *);
119 static void igb_clean_tx_ring(struct igb_ring *);
120 static void igb_clean_rx_ring(struct igb_ring *);
121 static void igb_set_rx_mode(struct net_device *);
122 static void igb_update_phy_info(unsigned long);
123 static void igb_watchdog(unsigned long);
124 static void igb_watchdog_task(struct work_struct *);
125 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
126 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
127 struct rtnl_link_stats64 *stats);
128 static int igb_change_mtu(struct net_device *, int);
129 static int igb_set_mac(struct net_device *, void *);
130 static void igb_set_uta(struct igb_adapter *adapter);
131 static irqreturn_t igb_intr(int irq, void *);
132 static irqreturn_t igb_intr_msi(int irq, void *);
133 static irqreturn_t igb_msix_other(int irq, void *);
134 static irqreturn_t igb_msix_ring(int irq, void *);
135 #ifdef CONFIG_IGB_DCA
136 static void igb_update_dca(struct igb_q_vector *);
137 static void igb_setup_dca(struct igb_adapter *);
138 #endif /* CONFIG_IGB_DCA */
139 static bool igb_clean_tx_irq(struct igb_q_vector *);
140 static int igb_poll(struct napi_struct *, int);
141 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
142 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
143 static void igb_tx_timeout(struct net_device *);
144 static void igb_reset_task(struct work_struct *);
145 static void igb_vlan_mode(struct net_device *netdev, u32 features);
146 static void igb_vlan_rx_add_vid(struct net_device *, u16);
147 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
148 static void igb_restore_vlan(struct igb_adapter *);
149 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
150 static void igb_ping_all_vfs(struct igb_adapter *);
151 static void igb_msg_task(struct igb_adapter *);
152 static void igb_vmm_control(struct igb_adapter *);
153 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
154 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
155 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
156 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
157 int vf, u16 vlan, u8 qos);
158 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
159 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
160 struct ifla_vf_info *ivi);
161 static void igb_check_vf_rate_limit(struct igb_adapter *);
162
163 #ifdef CONFIG_PM
164 static int igb_suspend(struct pci_dev *, pm_message_t);
165 static int igb_resume(struct pci_dev *);
166 #endif
167 static void igb_shutdown(struct pci_dev *);
168 #ifdef CONFIG_IGB_DCA
169 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
170 static struct notifier_block dca_notifier = {
171 .notifier_call = igb_notify_dca,
172 .next = NULL,
173 .priority = 0
174 };
175 #endif
176 #ifdef CONFIG_NET_POLL_CONTROLLER
177 /* for netdump / net console */
178 static void igb_netpoll(struct net_device *);
179 #endif
180 #ifdef CONFIG_PCI_IOV
181 static unsigned int max_vfs = 0;
182 module_param(max_vfs, uint, 0);
183 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
184 "per physical function");
185 #endif /* CONFIG_PCI_IOV */
186
187 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
188 pci_channel_state_t);
189 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
190 static void igb_io_resume(struct pci_dev *);
191
192 static struct pci_error_handlers igb_err_handler = {
193 .error_detected = igb_io_error_detected,
194 .slot_reset = igb_io_slot_reset,
195 .resume = igb_io_resume,
196 };
197
198
199 static struct pci_driver igb_driver = {
200 .name = igb_driver_name,
201 .id_table = igb_pci_tbl,
202 .probe = igb_probe,
203 .remove = __devexit_p(igb_remove),
204 #ifdef CONFIG_PM
205 /* Power Management Hooks */
206 .suspend = igb_suspend,
207 .resume = igb_resume,
208 #endif
209 .shutdown = igb_shutdown,
210 .err_handler = &igb_err_handler
211 };
212
213 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
214 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
215 MODULE_LICENSE("GPL");
216 MODULE_VERSION(DRV_VERSION);
217
218 struct igb_reg_info {
219 u32 ofs;
220 char *name;
221 };
222
223 static const struct igb_reg_info igb_reg_info_tbl[] = {
224
225 /* General Registers */
226 {E1000_CTRL, "CTRL"},
227 {E1000_STATUS, "STATUS"},
228 {E1000_CTRL_EXT, "CTRL_EXT"},
229
230 /* Interrupt Registers */
231 {E1000_ICR, "ICR"},
232
233 /* RX Registers */
234 {E1000_RCTL, "RCTL"},
235 {E1000_RDLEN(0), "RDLEN"},
236 {E1000_RDH(0), "RDH"},
237 {E1000_RDT(0), "RDT"},
238 {E1000_RXDCTL(0), "RXDCTL"},
239 {E1000_RDBAL(0), "RDBAL"},
240 {E1000_RDBAH(0), "RDBAH"},
241
242 /* TX Registers */
243 {E1000_TCTL, "TCTL"},
244 {E1000_TDBAL(0), "TDBAL"},
245 {E1000_TDBAH(0), "TDBAH"},
246 {E1000_TDLEN(0), "TDLEN"},
247 {E1000_TDH(0), "TDH"},
248 {E1000_TDT(0), "TDT"},
249 {E1000_TXDCTL(0), "TXDCTL"},
250 {E1000_TDFH, "TDFH"},
251 {E1000_TDFT, "TDFT"},
252 {E1000_TDFHS, "TDFHS"},
253 {E1000_TDFPC, "TDFPC"},
254
255 /* List Terminator */
256 {}
257 };
258
259 /*
260 * igb_regdump - register printout routine
261 */
262 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
263 {
264 int n = 0;
265 char rname[16];
266 u32 regs[8];
267
268 switch (reginfo->ofs) {
269 case E1000_RDLEN(0):
270 for (n = 0; n < 4; n++)
271 regs[n] = rd32(E1000_RDLEN(n));
272 break;
273 case E1000_RDH(0):
274 for (n = 0; n < 4; n++)
275 regs[n] = rd32(E1000_RDH(n));
276 break;
277 case E1000_RDT(0):
278 for (n = 0; n < 4; n++)
279 regs[n] = rd32(E1000_RDT(n));
280 break;
281 case E1000_RXDCTL(0):
282 for (n = 0; n < 4; n++)
283 regs[n] = rd32(E1000_RXDCTL(n));
284 break;
285 case E1000_RDBAL(0):
286 for (n = 0; n < 4; n++)
287 regs[n] = rd32(E1000_RDBAL(n));
288 break;
289 case E1000_RDBAH(0):
290 for (n = 0; n < 4; n++)
291 regs[n] = rd32(E1000_RDBAH(n));
292 break;
293 case E1000_TDBAL(0):
294 for (n = 0; n < 4; n++)
295 regs[n] = rd32(E1000_RDBAL(n));
296 break;
297 case E1000_TDBAH(0):
298 for (n = 0; n < 4; n++)
299 regs[n] = rd32(E1000_TDBAH(n));
300 break;
301 case E1000_TDLEN(0):
302 for (n = 0; n < 4; n++)
303 regs[n] = rd32(E1000_TDLEN(n));
304 break;
305 case E1000_TDH(0):
306 for (n = 0; n < 4; n++)
307 regs[n] = rd32(E1000_TDH(n));
308 break;
309 case E1000_TDT(0):
310 for (n = 0; n < 4; n++)
311 regs[n] = rd32(E1000_TDT(n));
312 break;
313 case E1000_TXDCTL(0):
314 for (n = 0; n < 4; n++)
315 regs[n] = rd32(E1000_TXDCTL(n));
316 break;
317 default:
318 printk(KERN_INFO "%-15s %08x\n",
319 reginfo->name, rd32(reginfo->ofs));
320 return;
321 }
322
323 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
324 printk(KERN_INFO "%-15s ", rname);
325 for (n = 0; n < 4; n++)
326 printk(KERN_CONT "%08x ", regs[n]);
327 printk(KERN_CONT "\n");
328 }
329
330 /*
331 * igb_dump - Print registers, tx-rings and rx-rings
332 */
333 static void igb_dump(struct igb_adapter *adapter)
334 {
335 struct net_device *netdev = adapter->netdev;
336 struct e1000_hw *hw = &adapter->hw;
337 struct igb_reg_info *reginfo;
338 int n = 0;
339 struct igb_ring *tx_ring;
340 union e1000_adv_tx_desc *tx_desc;
341 struct my_u0 { u64 a; u64 b; } *u0;
342 struct igb_buffer *buffer_info;
343 struct igb_ring *rx_ring;
344 union e1000_adv_rx_desc *rx_desc;
345 u32 staterr;
346 int i = 0;
347
348 if (!netif_msg_hw(adapter))
349 return;
350
351 /* Print netdevice Info */
352 if (netdev) {
353 dev_info(&adapter->pdev->dev, "Net device Info\n");
354 printk(KERN_INFO "Device Name state "
355 "trans_start last_rx\n");
356 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
357 netdev->name,
358 netdev->state,
359 netdev->trans_start,
360 netdev->last_rx);
361 }
362
363 /* Print Registers */
364 dev_info(&adapter->pdev->dev, "Register Dump\n");
365 printk(KERN_INFO " Register Name Value\n");
366 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
367 reginfo->name; reginfo++) {
368 igb_regdump(hw, reginfo);
369 }
370
371 /* Print TX Ring Summary */
372 if (!netdev || !netif_running(netdev))
373 goto exit;
374
375 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
376 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
377 " leng ntw timestamp\n");
378 for (n = 0; n < adapter->num_tx_queues; n++) {
379 tx_ring = adapter->tx_ring[n];
380 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
381 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
382 n, tx_ring->next_to_use, tx_ring->next_to_clean,
383 (u64)buffer_info->dma,
384 buffer_info->length,
385 buffer_info->next_to_watch,
386 (u64)buffer_info->time_stamp);
387 }
388
389 /* Print TX Rings */
390 if (!netif_msg_tx_done(adapter))
391 goto rx_ring_summary;
392
393 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
394
395 /* Transmit Descriptor Formats
396 *
397 * Advanced Transmit Descriptor
398 * +--------------------------------------------------------------+
399 * 0 | Buffer Address [63:0] |
400 * +--------------------------------------------------------------+
401 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
402 * +--------------------------------------------------------------+
403 * 63 46 45 40 39 38 36 35 32 31 24 15 0
404 */
405
406 for (n = 0; n < adapter->num_tx_queues; n++) {
407 tx_ring = adapter->tx_ring[n];
408 printk(KERN_INFO "------------------------------------\n");
409 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
410 printk(KERN_INFO "------------------------------------\n");
411 printk(KERN_INFO "T [desc] [address 63:0 ] "
412 "[PlPOCIStDDM Ln] [bi->dma ] "
413 "leng ntw timestamp bi->skb\n");
414
415 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
416 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
417 buffer_info = &tx_ring->buffer_info[i];
418 u0 = (struct my_u0 *)tx_desc;
419 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
420 " %04X %3X %016llX %p", i,
421 le64_to_cpu(u0->a),
422 le64_to_cpu(u0->b),
423 (u64)buffer_info->dma,
424 buffer_info->length,
425 buffer_info->next_to_watch,
426 (u64)buffer_info->time_stamp,
427 buffer_info->skb);
428 if (i == tx_ring->next_to_use &&
429 i == tx_ring->next_to_clean)
430 printk(KERN_CONT " NTC/U\n");
431 else if (i == tx_ring->next_to_use)
432 printk(KERN_CONT " NTU\n");
433 else if (i == tx_ring->next_to_clean)
434 printk(KERN_CONT " NTC\n");
435 else
436 printk(KERN_CONT "\n");
437
438 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
439 print_hex_dump(KERN_INFO, "",
440 DUMP_PREFIX_ADDRESS,
441 16, 1, phys_to_virt(buffer_info->dma),
442 buffer_info->length, true);
443 }
444 }
445
446 /* Print RX Rings Summary */
447 rx_ring_summary:
448 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
449 printk(KERN_INFO "Queue [NTU] [NTC]\n");
450 for (n = 0; n < adapter->num_rx_queues; n++) {
451 rx_ring = adapter->rx_ring[n];
452 printk(KERN_INFO " %5d %5X %5X\n", n,
453 rx_ring->next_to_use, rx_ring->next_to_clean);
454 }
455
456 /* Print RX Rings */
457 if (!netif_msg_rx_status(adapter))
458 goto exit;
459
460 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
461
462 /* Advanced Receive Descriptor (Read) Format
463 * 63 1 0
464 * +-----------------------------------------------------+
465 * 0 | Packet Buffer Address [63:1] |A0/NSE|
466 * +----------------------------------------------+------+
467 * 8 | Header Buffer Address [63:1] | DD |
468 * +-----------------------------------------------------+
469 *
470 *
471 * Advanced Receive Descriptor (Write-Back) Format
472 *
473 * 63 48 47 32 31 30 21 20 17 16 4 3 0
474 * +------------------------------------------------------+
475 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
476 * | Checksum Ident | | | | Type | Type |
477 * +------------------------------------------------------+
478 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
479 * +------------------------------------------------------+
480 * 63 48 47 32 31 20 19 0
481 */
482
483 for (n = 0; n < adapter->num_rx_queues; n++) {
484 rx_ring = adapter->rx_ring[n];
485 printk(KERN_INFO "------------------------------------\n");
486 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
487 printk(KERN_INFO "------------------------------------\n");
488 printk(KERN_INFO "R [desc] [ PktBuf A0] "
489 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
490 "<-- Adv Rx Read format\n");
491 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
492 "[vl er S cks ln] ---------------- [bi->skb] "
493 "<-- Adv Rx Write-Back format\n");
494
495 for (i = 0; i < rx_ring->count; i++) {
496 buffer_info = &rx_ring->buffer_info[i];
497 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
498 u0 = (struct my_u0 *)rx_desc;
499 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
500 if (staterr & E1000_RXD_STAT_DD) {
501 /* Descriptor Done */
502 printk(KERN_INFO "RWB[0x%03X] %016llX "
503 "%016llX ---------------- %p", i,
504 le64_to_cpu(u0->a),
505 le64_to_cpu(u0->b),
506 buffer_info->skb);
507 } else {
508 printk(KERN_INFO "R [0x%03X] %016llX "
509 "%016llX %016llX %p", i,
510 le64_to_cpu(u0->a),
511 le64_to_cpu(u0->b),
512 (u64)buffer_info->dma,
513 buffer_info->skb);
514
515 if (netif_msg_pktdata(adapter)) {
516 print_hex_dump(KERN_INFO, "",
517 DUMP_PREFIX_ADDRESS,
518 16, 1,
519 phys_to_virt(buffer_info->dma),
520 IGB_RX_HDR_LEN, true);
521 print_hex_dump(KERN_INFO, "",
522 DUMP_PREFIX_ADDRESS,
523 16, 1,
524 phys_to_virt(
525 buffer_info->page_dma +
526 buffer_info->page_offset),
527 PAGE_SIZE/2, true);
528 }
529 }
530
531 if (i == rx_ring->next_to_use)
532 printk(KERN_CONT " NTU\n");
533 else if (i == rx_ring->next_to_clean)
534 printk(KERN_CONT " NTC\n");
535 else
536 printk(KERN_CONT "\n");
537
538 }
539 }
540
541 exit:
542 return;
543 }
544
545
546 /**
547 * igb_read_clock - read raw cycle counter (to be used by time counter)
548 */
549 static cycle_t igb_read_clock(const struct cyclecounter *tc)
550 {
551 struct igb_adapter *adapter =
552 container_of(tc, struct igb_adapter, cycles);
553 struct e1000_hw *hw = &adapter->hw;
554 u64 stamp = 0;
555 int shift = 0;
556
557 /*
558 * The timestamp latches on lowest register read. For the 82580
559 * the lowest register is SYSTIMR instead of SYSTIML. However we never
560 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
561 */
562 if (hw->mac.type == e1000_82580) {
563 stamp = rd32(E1000_SYSTIMR) >> 8;
564 shift = IGB_82580_TSYNC_SHIFT;
565 }
566
567 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
568 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
569 return stamp;
570 }
571
572 /**
573 * igb_get_hw_dev - return device
574 * used by hardware layer to print debugging information
575 **/
576 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
577 {
578 struct igb_adapter *adapter = hw->back;
579 return adapter->netdev;
580 }
581
582 /**
583 * igb_init_module - Driver Registration Routine
584 *
585 * igb_init_module is the first routine called when the driver is
586 * loaded. All it does is register with the PCI subsystem.
587 **/
588 static int __init igb_init_module(void)
589 {
590 int ret;
591 printk(KERN_INFO "%s - version %s\n",
592 igb_driver_string, igb_driver_version);
593
594 printk(KERN_INFO "%s\n", igb_copyright);
595
596 #ifdef CONFIG_IGB_DCA
597 dca_register_notify(&dca_notifier);
598 #endif
599 ret = pci_register_driver(&igb_driver);
600 return ret;
601 }
602
603 module_init(igb_init_module);
604
605 /**
606 * igb_exit_module - Driver Exit Cleanup Routine
607 *
608 * igb_exit_module is called just before the driver is removed
609 * from memory.
610 **/
611 static void __exit igb_exit_module(void)
612 {
613 #ifdef CONFIG_IGB_DCA
614 dca_unregister_notify(&dca_notifier);
615 #endif
616 pci_unregister_driver(&igb_driver);
617 }
618
619 module_exit(igb_exit_module);
620
621 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
622 /**
623 * igb_cache_ring_register - Descriptor ring to register mapping
624 * @adapter: board private structure to initialize
625 *
626 * Once we know the feature-set enabled for the device, we'll cache
627 * the register offset the descriptor ring is assigned to.
628 **/
629 static void igb_cache_ring_register(struct igb_adapter *adapter)
630 {
631 int i = 0, j = 0;
632 u32 rbase_offset = adapter->vfs_allocated_count;
633
634 switch (adapter->hw.mac.type) {
635 case e1000_82576:
636 /* The queues are allocated for virtualization such that VF 0
637 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
638 * In order to avoid collision we start at the first free queue
639 * and continue consuming queues in the same sequence
640 */
641 if (adapter->vfs_allocated_count) {
642 for (; i < adapter->rss_queues; i++)
643 adapter->rx_ring[i]->reg_idx = rbase_offset +
644 Q_IDX_82576(i);
645 }
646 case e1000_82575:
647 case e1000_82580:
648 case e1000_i350:
649 default:
650 for (; i < adapter->num_rx_queues; i++)
651 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
652 for (; j < adapter->num_tx_queues; j++)
653 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
654 break;
655 }
656 }
657
658 static void igb_free_queues(struct igb_adapter *adapter)
659 {
660 int i;
661
662 for (i = 0; i < adapter->num_tx_queues; i++) {
663 kfree(adapter->tx_ring[i]);
664 adapter->tx_ring[i] = NULL;
665 }
666 for (i = 0; i < adapter->num_rx_queues; i++) {
667 kfree(adapter->rx_ring[i]);
668 adapter->rx_ring[i] = NULL;
669 }
670 adapter->num_rx_queues = 0;
671 adapter->num_tx_queues = 0;
672 }
673
674 /**
675 * igb_alloc_queues - Allocate memory for all rings
676 * @adapter: board private structure to initialize
677 *
678 * We allocate one ring per queue at run-time since we don't know the
679 * number of queues at compile-time.
680 **/
681 static int igb_alloc_queues(struct igb_adapter *adapter)
682 {
683 struct igb_ring *ring;
684 int i;
685
686 for (i = 0; i < adapter->num_tx_queues; i++) {
687 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
688 if (!ring)
689 goto err;
690 ring->count = adapter->tx_ring_count;
691 ring->queue_index = i;
692 ring->dev = &adapter->pdev->dev;
693 ring->netdev = adapter->netdev;
694 /* For 82575, context index must be unique per ring. */
695 if (adapter->hw.mac.type == e1000_82575)
696 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
697 adapter->tx_ring[i] = ring;
698 }
699
700 for (i = 0; i < adapter->num_rx_queues; i++) {
701 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
702 if (!ring)
703 goto err;
704 ring->count = adapter->rx_ring_count;
705 ring->queue_index = i;
706 ring->dev = &adapter->pdev->dev;
707 ring->netdev = adapter->netdev;
708 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
709 /* set flag indicating ring supports SCTP checksum offload */
710 if (adapter->hw.mac.type >= e1000_82576)
711 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
712 adapter->rx_ring[i] = ring;
713 }
714
715 igb_cache_ring_register(adapter);
716
717 return 0;
718
719 err:
720 igb_free_queues(adapter);
721
722 return -ENOMEM;
723 }
724
725 #define IGB_N0_QUEUE -1
726 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
727 {
728 u32 msixbm = 0;
729 struct igb_adapter *adapter = q_vector->adapter;
730 struct e1000_hw *hw = &adapter->hw;
731 u32 ivar, index;
732 int rx_queue = IGB_N0_QUEUE;
733 int tx_queue = IGB_N0_QUEUE;
734
735 if (q_vector->rx_ring)
736 rx_queue = q_vector->rx_ring->reg_idx;
737 if (q_vector->tx_ring)
738 tx_queue = q_vector->tx_ring->reg_idx;
739
740 switch (hw->mac.type) {
741 case e1000_82575:
742 /* The 82575 assigns vectors using a bitmask, which matches the
743 bitmask for the EICR/EIMS/EIMC registers. To assign one
744 or more queues to a vector, we write the appropriate bits
745 into the MSIXBM register for that vector. */
746 if (rx_queue > IGB_N0_QUEUE)
747 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
748 if (tx_queue > IGB_N0_QUEUE)
749 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
750 if (!adapter->msix_entries && msix_vector == 0)
751 msixbm |= E1000_EIMS_OTHER;
752 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
753 q_vector->eims_value = msixbm;
754 break;
755 case e1000_82576:
756 /* 82576 uses a table-based method for assigning vectors.
757 Each queue has a single entry in the table to which we write
758 a vector number along with a "valid" bit. Sadly, the layout
759 of the table is somewhat counterintuitive. */
760 if (rx_queue > IGB_N0_QUEUE) {
761 index = (rx_queue & 0x7);
762 ivar = array_rd32(E1000_IVAR0, index);
763 if (rx_queue < 8) {
764 /* vector goes into low byte of register */
765 ivar = ivar & 0xFFFFFF00;
766 ivar |= msix_vector | E1000_IVAR_VALID;
767 } else {
768 /* vector goes into third byte of register */
769 ivar = ivar & 0xFF00FFFF;
770 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
771 }
772 array_wr32(E1000_IVAR0, index, ivar);
773 }
774 if (tx_queue > IGB_N0_QUEUE) {
775 index = (tx_queue & 0x7);
776 ivar = array_rd32(E1000_IVAR0, index);
777 if (tx_queue < 8) {
778 /* vector goes into second byte of register */
779 ivar = ivar & 0xFFFF00FF;
780 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
781 } else {
782 /* vector goes into high byte of register */
783 ivar = ivar & 0x00FFFFFF;
784 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
785 }
786 array_wr32(E1000_IVAR0, index, ivar);
787 }
788 q_vector->eims_value = 1 << msix_vector;
789 break;
790 case e1000_82580:
791 case e1000_i350:
792 /* 82580 uses the same table-based approach as 82576 but has fewer
793 entries as a result we carry over for queues greater than 4. */
794 if (rx_queue > IGB_N0_QUEUE) {
795 index = (rx_queue >> 1);
796 ivar = array_rd32(E1000_IVAR0, index);
797 if (rx_queue & 0x1) {
798 /* vector goes into third byte of register */
799 ivar = ivar & 0xFF00FFFF;
800 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
801 } else {
802 /* vector goes into low byte of register */
803 ivar = ivar & 0xFFFFFF00;
804 ivar |= msix_vector | E1000_IVAR_VALID;
805 }
806 array_wr32(E1000_IVAR0, index, ivar);
807 }
808 if (tx_queue > IGB_N0_QUEUE) {
809 index = (tx_queue >> 1);
810 ivar = array_rd32(E1000_IVAR0, index);
811 if (tx_queue & 0x1) {
812 /* vector goes into high byte of register */
813 ivar = ivar & 0x00FFFFFF;
814 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
815 } else {
816 /* vector goes into second byte of register */
817 ivar = ivar & 0xFFFF00FF;
818 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
819 }
820 array_wr32(E1000_IVAR0, index, ivar);
821 }
822 q_vector->eims_value = 1 << msix_vector;
823 break;
824 default:
825 BUG();
826 break;
827 }
828
829 /* add q_vector eims value to global eims_enable_mask */
830 adapter->eims_enable_mask |= q_vector->eims_value;
831
832 /* configure q_vector to set itr on first interrupt */
833 q_vector->set_itr = 1;
834 }
835
836 /**
837 * igb_configure_msix - Configure MSI-X hardware
838 *
839 * igb_configure_msix sets up the hardware to properly
840 * generate MSI-X interrupts.
841 **/
842 static void igb_configure_msix(struct igb_adapter *adapter)
843 {
844 u32 tmp;
845 int i, vector = 0;
846 struct e1000_hw *hw = &adapter->hw;
847
848 adapter->eims_enable_mask = 0;
849
850 /* set vector for other causes, i.e. link changes */
851 switch (hw->mac.type) {
852 case e1000_82575:
853 tmp = rd32(E1000_CTRL_EXT);
854 /* enable MSI-X PBA support*/
855 tmp |= E1000_CTRL_EXT_PBA_CLR;
856
857 /* Auto-Mask interrupts upon ICR read. */
858 tmp |= E1000_CTRL_EXT_EIAME;
859 tmp |= E1000_CTRL_EXT_IRCA;
860
861 wr32(E1000_CTRL_EXT, tmp);
862
863 /* enable msix_other interrupt */
864 array_wr32(E1000_MSIXBM(0), vector++,
865 E1000_EIMS_OTHER);
866 adapter->eims_other = E1000_EIMS_OTHER;
867
868 break;
869
870 case e1000_82576:
871 case e1000_82580:
872 case e1000_i350:
873 /* Turn on MSI-X capability first, or our settings
874 * won't stick. And it will take days to debug. */
875 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
876 E1000_GPIE_PBA | E1000_GPIE_EIAME |
877 E1000_GPIE_NSICR);
878
879 /* enable msix_other interrupt */
880 adapter->eims_other = 1 << vector;
881 tmp = (vector++ | E1000_IVAR_VALID) << 8;
882
883 wr32(E1000_IVAR_MISC, tmp);
884 break;
885 default:
886 /* do nothing, since nothing else supports MSI-X */
887 break;
888 } /* switch (hw->mac.type) */
889
890 adapter->eims_enable_mask |= adapter->eims_other;
891
892 for (i = 0; i < adapter->num_q_vectors; i++)
893 igb_assign_vector(adapter->q_vector[i], vector++);
894
895 wrfl();
896 }
897
898 /**
899 * igb_request_msix - Initialize MSI-X interrupts
900 *
901 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
902 * kernel.
903 **/
904 static int igb_request_msix(struct igb_adapter *adapter)
905 {
906 struct net_device *netdev = adapter->netdev;
907 struct e1000_hw *hw = &adapter->hw;
908 int i, err = 0, vector = 0;
909
910 err = request_irq(adapter->msix_entries[vector].vector,
911 igb_msix_other, 0, netdev->name, adapter);
912 if (err)
913 goto out;
914 vector++;
915
916 for (i = 0; i < adapter->num_q_vectors; i++) {
917 struct igb_q_vector *q_vector = adapter->q_vector[i];
918
919 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
920
921 if (q_vector->rx_ring && q_vector->tx_ring)
922 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
923 q_vector->rx_ring->queue_index);
924 else if (q_vector->tx_ring)
925 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
926 q_vector->tx_ring->queue_index);
927 else if (q_vector->rx_ring)
928 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
929 q_vector->rx_ring->queue_index);
930 else
931 sprintf(q_vector->name, "%s-unused", netdev->name);
932
933 err = request_irq(adapter->msix_entries[vector].vector,
934 igb_msix_ring, 0, q_vector->name,
935 q_vector);
936 if (err)
937 goto out;
938 vector++;
939 }
940
941 igb_configure_msix(adapter);
942 return 0;
943 out:
944 return err;
945 }
946
947 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
948 {
949 if (adapter->msix_entries) {
950 pci_disable_msix(adapter->pdev);
951 kfree(adapter->msix_entries);
952 adapter->msix_entries = NULL;
953 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
954 pci_disable_msi(adapter->pdev);
955 }
956 }
957
958 /**
959 * igb_free_q_vectors - Free memory allocated for interrupt vectors
960 * @adapter: board private structure to initialize
961 *
962 * This function frees the memory allocated to the q_vectors. In addition if
963 * NAPI is enabled it will delete any references to the NAPI struct prior
964 * to freeing the q_vector.
965 **/
966 static void igb_free_q_vectors(struct igb_adapter *adapter)
967 {
968 int v_idx;
969
970 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
971 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
972 adapter->q_vector[v_idx] = NULL;
973 if (!q_vector)
974 continue;
975 netif_napi_del(&q_vector->napi);
976 kfree(q_vector);
977 }
978 adapter->num_q_vectors = 0;
979 }
980
981 /**
982 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
983 *
984 * This function resets the device so that it has 0 rx queues, tx queues, and
985 * MSI-X interrupts allocated.
986 */
987 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
988 {
989 igb_free_queues(adapter);
990 igb_free_q_vectors(adapter);
991 igb_reset_interrupt_capability(adapter);
992 }
993
994 /**
995 * igb_set_interrupt_capability - set MSI or MSI-X if supported
996 *
997 * Attempt to configure interrupts using the best available
998 * capabilities of the hardware and kernel.
999 **/
1000 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1001 {
1002 int err;
1003 int numvecs, i;
1004
1005 /* Number of supported queues. */
1006 adapter->num_rx_queues = adapter->rss_queues;
1007 if (adapter->vfs_allocated_count)
1008 adapter->num_tx_queues = 1;
1009 else
1010 adapter->num_tx_queues = adapter->rss_queues;
1011
1012 /* start with one vector for every rx queue */
1013 numvecs = adapter->num_rx_queues;
1014
1015 /* if tx handler is separate add 1 for every tx queue */
1016 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1017 numvecs += adapter->num_tx_queues;
1018
1019 /* store the number of vectors reserved for queues */
1020 adapter->num_q_vectors = numvecs;
1021
1022 /* add 1 vector for link status interrupts */
1023 numvecs++;
1024 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1025 GFP_KERNEL);
1026 if (!adapter->msix_entries)
1027 goto msi_only;
1028
1029 for (i = 0; i < numvecs; i++)
1030 adapter->msix_entries[i].entry = i;
1031
1032 err = pci_enable_msix(adapter->pdev,
1033 adapter->msix_entries,
1034 numvecs);
1035 if (err == 0)
1036 goto out;
1037
1038 igb_reset_interrupt_capability(adapter);
1039
1040 /* If we can't do MSI-X, try MSI */
1041 msi_only:
1042 #ifdef CONFIG_PCI_IOV
1043 /* disable SR-IOV for non MSI-X configurations */
1044 if (adapter->vf_data) {
1045 struct e1000_hw *hw = &adapter->hw;
1046 /* disable iov and allow time for transactions to clear */
1047 pci_disable_sriov(adapter->pdev);
1048 msleep(500);
1049
1050 kfree(adapter->vf_data);
1051 adapter->vf_data = NULL;
1052 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1053 wrfl();
1054 msleep(100);
1055 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1056 }
1057 #endif
1058 adapter->vfs_allocated_count = 0;
1059 adapter->rss_queues = 1;
1060 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1061 adapter->num_rx_queues = 1;
1062 adapter->num_tx_queues = 1;
1063 adapter->num_q_vectors = 1;
1064 if (!pci_enable_msi(adapter->pdev))
1065 adapter->flags |= IGB_FLAG_HAS_MSI;
1066 out:
1067 /* Notify the stack of the (possibly) reduced queue counts. */
1068 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1069 return netif_set_real_num_rx_queues(adapter->netdev,
1070 adapter->num_rx_queues);
1071 }
1072
1073 /**
1074 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1075 * @adapter: board private structure to initialize
1076 *
1077 * We allocate one q_vector per queue interrupt. If allocation fails we
1078 * return -ENOMEM.
1079 **/
1080 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1081 {
1082 struct igb_q_vector *q_vector;
1083 struct e1000_hw *hw = &adapter->hw;
1084 int v_idx;
1085
1086 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1087 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1088 if (!q_vector)
1089 goto err_out;
1090 q_vector->adapter = adapter;
1091 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1092 q_vector->itr_val = IGB_START_ITR;
1093 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1094 adapter->q_vector[v_idx] = q_vector;
1095 }
1096 return 0;
1097
1098 err_out:
1099 igb_free_q_vectors(adapter);
1100 return -ENOMEM;
1101 }
1102
1103 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1104 int ring_idx, int v_idx)
1105 {
1106 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1107
1108 q_vector->rx_ring = adapter->rx_ring[ring_idx];
1109 q_vector->rx_ring->q_vector = q_vector;
1110 q_vector->itr_val = adapter->rx_itr_setting;
1111 if (q_vector->itr_val && q_vector->itr_val <= 3)
1112 q_vector->itr_val = IGB_START_ITR;
1113 }
1114
1115 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1116 int ring_idx, int v_idx)
1117 {
1118 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1119
1120 q_vector->tx_ring = adapter->tx_ring[ring_idx];
1121 q_vector->tx_ring->q_vector = q_vector;
1122 q_vector->itr_val = adapter->tx_itr_setting;
1123 if (q_vector->itr_val && q_vector->itr_val <= 3)
1124 q_vector->itr_val = IGB_START_ITR;
1125 }
1126
1127 /**
1128 * igb_map_ring_to_vector - maps allocated queues to vectors
1129 *
1130 * This function maps the recently allocated queues to vectors.
1131 **/
1132 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1133 {
1134 int i;
1135 int v_idx = 0;
1136
1137 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1138 (adapter->num_q_vectors < adapter->num_tx_queues))
1139 return -ENOMEM;
1140
1141 if (adapter->num_q_vectors >=
1142 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1143 for (i = 0; i < adapter->num_rx_queues; i++)
1144 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1145 for (i = 0; i < adapter->num_tx_queues; i++)
1146 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1147 } else {
1148 for (i = 0; i < adapter->num_rx_queues; i++) {
1149 if (i < adapter->num_tx_queues)
1150 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1151 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1152 }
1153 for (; i < adapter->num_tx_queues; i++)
1154 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1155 }
1156 return 0;
1157 }
1158
1159 /**
1160 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1161 *
1162 * This function initializes the interrupts and allocates all of the queues.
1163 **/
1164 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1165 {
1166 struct pci_dev *pdev = adapter->pdev;
1167 int err;
1168
1169 err = igb_set_interrupt_capability(adapter);
1170 if (err)
1171 return err;
1172
1173 err = igb_alloc_q_vectors(adapter);
1174 if (err) {
1175 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1176 goto err_alloc_q_vectors;
1177 }
1178
1179 err = igb_alloc_queues(adapter);
1180 if (err) {
1181 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1182 goto err_alloc_queues;
1183 }
1184
1185 err = igb_map_ring_to_vector(adapter);
1186 if (err) {
1187 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1188 goto err_map_queues;
1189 }
1190
1191
1192 return 0;
1193 err_map_queues:
1194 igb_free_queues(adapter);
1195 err_alloc_queues:
1196 igb_free_q_vectors(adapter);
1197 err_alloc_q_vectors:
1198 igb_reset_interrupt_capability(adapter);
1199 return err;
1200 }
1201
1202 /**
1203 * igb_request_irq - initialize interrupts
1204 *
1205 * Attempts to configure interrupts using the best available
1206 * capabilities of the hardware and kernel.
1207 **/
1208 static int igb_request_irq(struct igb_adapter *adapter)
1209 {
1210 struct net_device *netdev = adapter->netdev;
1211 struct pci_dev *pdev = adapter->pdev;
1212 int err = 0;
1213
1214 if (adapter->msix_entries) {
1215 err = igb_request_msix(adapter);
1216 if (!err)
1217 goto request_done;
1218 /* fall back to MSI */
1219 igb_clear_interrupt_scheme(adapter);
1220 if (!pci_enable_msi(adapter->pdev))
1221 adapter->flags |= IGB_FLAG_HAS_MSI;
1222 igb_free_all_tx_resources(adapter);
1223 igb_free_all_rx_resources(adapter);
1224 adapter->num_tx_queues = 1;
1225 adapter->num_rx_queues = 1;
1226 adapter->num_q_vectors = 1;
1227 err = igb_alloc_q_vectors(adapter);
1228 if (err) {
1229 dev_err(&pdev->dev,
1230 "Unable to allocate memory for vectors\n");
1231 goto request_done;
1232 }
1233 err = igb_alloc_queues(adapter);
1234 if (err) {
1235 dev_err(&pdev->dev,
1236 "Unable to allocate memory for queues\n");
1237 igb_free_q_vectors(adapter);
1238 goto request_done;
1239 }
1240 igb_setup_all_tx_resources(adapter);
1241 igb_setup_all_rx_resources(adapter);
1242 } else {
1243 igb_assign_vector(adapter->q_vector[0], 0);
1244 }
1245
1246 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1247 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1248 netdev->name, adapter);
1249 if (!err)
1250 goto request_done;
1251
1252 /* fall back to legacy interrupts */
1253 igb_reset_interrupt_capability(adapter);
1254 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1255 }
1256
1257 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1258 netdev->name, adapter);
1259
1260 if (err)
1261 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1262 err);
1263
1264 request_done:
1265 return err;
1266 }
1267
1268 static void igb_free_irq(struct igb_adapter *adapter)
1269 {
1270 if (adapter->msix_entries) {
1271 int vector = 0, i;
1272
1273 free_irq(adapter->msix_entries[vector++].vector, adapter);
1274
1275 for (i = 0; i < adapter->num_q_vectors; i++) {
1276 struct igb_q_vector *q_vector = adapter->q_vector[i];
1277 free_irq(adapter->msix_entries[vector++].vector,
1278 q_vector);
1279 }
1280 } else {
1281 free_irq(adapter->pdev->irq, adapter);
1282 }
1283 }
1284
1285 /**
1286 * igb_irq_disable - Mask off interrupt generation on the NIC
1287 * @adapter: board private structure
1288 **/
1289 static void igb_irq_disable(struct igb_adapter *adapter)
1290 {
1291 struct e1000_hw *hw = &adapter->hw;
1292
1293 /*
1294 * we need to be careful when disabling interrupts. The VFs are also
1295 * mapped into these registers and so clearing the bits can cause
1296 * issues on the VF drivers so we only need to clear what we set
1297 */
1298 if (adapter->msix_entries) {
1299 u32 regval = rd32(E1000_EIAM);
1300 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1301 wr32(E1000_EIMC, adapter->eims_enable_mask);
1302 regval = rd32(E1000_EIAC);
1303 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1304 }
1305
1306 wr32(E1000_IAM, 0);
1307 wr32(E1000_IMC, ~0);
1308 wrfl();
1309 if (adapter->msix_entries) {
1310 int i;
1311 for (i = 0; i < adapter->num_q_vectors; i++)
1312 synchronize_irq(adapter->msix_entries[i].vector);
1313 } else {
1314 synchronize_irq(adapter->pdev->irq);
1315 }
1316 }
1317
1318 /**
1319 * igb_irq_enable - Enable default interrupt generation settings
1320 * @adapter: board private structure
1321 **/
1322 static void igb_irq_enable(struct igb_adapter *adapter)
1323 {
1324 struct e1000_hw *hw = &adapter->hw;
1325
1326 if (adapter->msix_entries) {
1327 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1328 u32 regval = rd32(E1000_EIAC);
1329 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1330 regval = rd32(E1000_EIAM);
1331 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1332 wr32(E1000_EIMS, adapter->eims_enable_mask);
1333 if (adapter->vfs_allocated_count) {
1334 wr32(E1000_MBVFIMR, 0xFF);
1335 ims |= E1000_IMS_VMMB;
1336 }
1337 if (adapter->hw.mac.type == e1000_82580)
1338 ims |= E1000_IMS_DRSTA;
1339
1340 wr32(E1000_IMS, ims);
1341 } else {
1342 wr32(E1000_IMS, IMS_ENABLE_MASK |
1343 E1000_IMS_DRSTA);
1344 wr32(E1000_IAM, IMS_ENABLE_MASK |
1345 E1000_IMS_DRSTA);
1346 }
1347 }
1348
1349 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1350 {
1351 struct e1000_hw *hw = &adapter->hw;
1352 u16 vid = adapter->hw.mng_cookie.vlan_id;
1353 u16 old_vid = adapter->mng_vlan_id;
1354
1355 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1356 /* add VID to filter table */
1357 igb_vfta_set(hw, vid, true);
1358 adapter->mng_vlan_id = vid;
1359 } else {
1360 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1361 }
1362
1363 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1364 (vid != old_vid) &&
1365 !test_bit(old_vid, adapter->active_vlans)) {
1366 /* remove VID from filter table */
1367 igb_vfta_set(hw, old_vid, false);
1368 }
1369 }
1370
1371 /**
1372 * igb_release_hw_control - release control of the h/w to f/w
1373 * @adapter: address of board private structure
1374 *
1375 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1376 * For ASF and Pass Through versions of f/w this means that the
1377 * driver is no longer loaded.
1378 *
1379 **/
1380 static void igb_release_hw_control(struct igb_adapter *adapter)
1381 {
1382 struct e1000_hw *hw = &adapter->hw;
1383 u32 ctrl_ext;
1384
1385 /* Let firmware take over control of h/w */
1386 ctrl_ext = rd32(E1000_CTRL_EXT);
1387 wr32(E1000_CTRL_EXT,
1388 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1389 }
1390
1391 /**
1392 * igb_get_hw_control - get control of the h/w from f/w
1393 * @adapter: address of board private structure
1394 *
1395 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1396 * For ASF and Pass Through versions of f/w this means that
1397 * the driver is loaded.
1398 *
1399 **/
1400 static void igb_get_hw_control(struct igb_adapter *adapter)
1401 {
1402 struct e1000_hw *hw = &adapter->hw;
1403 u32 ctrl_ext;
1404
1405 /* Let firmware know the driver has taken over */
1406 ctrl_ext = rd32(E1000_CTRL_EXT);
1407 wr32(E1000_CTRL_EXT,
1408 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1409 }
1410
1411 /**
1412 * igb_configure - configure the hardware for RX and TX
1413 * @adapter: private board structure
1414 **/
1415 static void igb_configure(struct igb_adapter *adapter)
1416 {
1417 struct net_device *netdev = adapter->netdev;
1418 int i;
1419
1420 igb_get_hw_control(adapter);
1421 igb_set_rx_mode(netdev);
1422
1423 igb_restore_vlan(adapter);
1424
1425 igb_setup_tctl(adapter);
1426 igb_setup_mrqc(adapter);
1427 igb_setup_rctl(adapter);
1428
1429 igb_configure_tx(adapter);
1430 igb_configure_rx(adapter);
1431
1432 igb_rx_fifo_flush_82575(&adapter->hw);
1433
1434 /* call igb_desc_unused which always leaves
1435 * at least 1 descriptor unused to make sure
1436 * next_to_use != next_to_clean */
1437 for (i = 0; i < adapter->num_rx_queues; i++) {
1438 struct igb_ring *ring = adapter->rx_ring[i];
1439 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1440 }
1441 }
1442
1443 /**
1444 * igb_power_up_link - Power up the phy/serdes link
1445 * @adapter: address of board private structure
1446 **/
1447 void igb_power_up_link(struct igb_adapter *adapter)
1448 {
1449 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1450 igb_power_up_phy_copper(&adapter->hw);
1451 else
1452 igb_power_up_serdes_link_82575(&adapter->hw);
1453 }
1454
1455 /**
1456 * igb_power_down_link - Power down the phy/serdes link
1457 * @adapter: address of board private structure
1458 */
1459 static void igb_power_down_link(struct igb_adapter *adapter)
1460 {
1461 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1462 igb_power_down_phy_copper_82575(&adapter->hw);
1463 else
1464 igb_shutdown_serdes_link_82575(&adapter->hw);
1465 }
1466
1467 /**
1468 * igb_up - Open the interface and prepare it to handle traffic
1469 * @adapter: board private structure
1470 **/
1471 int igb_up(struct igb_adapter *adapter)
1472 {
1473 struct e1000_hw *hw = &adapter->hw;
1474 int i;
1475
1476 /* hardware has been reset, we need to reload some things */
1477 igb_configure(adapter);
1478
1479 clear_bit(__IGB_DOWN, &adapter->state);
1480
1481 for (i = 0; i < adapter->num_q_vectors; i++) {
1482 struct igb_q_vector *q_vector = adapter->q_vector[i];
1483 napi_enable(&q_vector->napi);
1484 }
1485 if (adapter->msix_entries)
1486 igb_configure_msix(adapter);
1487 else
1488 igb_assign_vector(adapter->q_vector[0], 0);
1489
1490 /* Clear any pending interrupts. */
1491 rd32(E1000_ICR);
1492 igb_irq_enable(adapter);
1493
1494 /* notify VFs that reset has been completed */
1495 if (adapter->vfs_allocated_count) {
1496 u32 reg_data = rd32(E1000_CTRL_EXT);
1497 reg_data |= E1000_CTRL_EXT_PFRSTD;
1498 wr32(E1000_CTRL_EXT, reg_data);
1499 }
1500
1501 netif_tx_start_all_queues(adapter->netdev);
1502
1503 /* start the watchdog. */
1504 hw->mac.get_link_status = 1;
1505 schedule_work(&adapter->watchdog_task);
1506
1507 return 0;
1508 }
1509
1510 void igb_down(struct igb_adapter *adapter)
1511 {
1512 struct net_device *netdev = adapter->netdev;
1513 struct e1000_hw *hw = &adapter->hw;
1514 u32 tctl, rctl;
1515 int i;
1516
1517 /* signal that we're down so the interrupt handler does not
1518 * reschedule our watchdog timer */
1519 set_bit(__IGB_DOWN, &adapter->state);
1520
1521 /* disable receives in the hardware */
1522 rctl = rd32(E1000_RCTL);
1523 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1524 /* flush and sleep below */
1525
1526 netif_tx_stop_all_queues(netdev);
1527
1528 /* disable transmits in the hardware */
1529 tctl = rd32(E1000_TCTL);
1530 tctl &= ~E1000_TCTL_EN;
1531 wr32(E1000_TCTL, tctl);
1532 /* flush both disables and wait for them to finish */
1533 wrfl();
1534 msleep(10);
1535
1536 for (i = 0; i < adapter->num_q_vectors; i++) {
1537 struct igb_q_vector *q_vector = adapter->q_vector[i];
1538 napi_disable(&q_vector->napi);
1539 }
1540
1541 igb_irq_disable(adapter);
1542
1543 del_timer_sync(&adapter->watchdog_timer);
1544 del_timer_sync(&adapter->phy_info_timer);
1545
1546 netif_carrier_off(netdev);
1547
1548 /* record the stats before reset*/
1549 spin_lock(&adapter->stats64_lock);
1550 igb_update_stats(adapter, &adapter->stats64);
1551 spin_unlock(&adapter->stats64_lock);
1552
1553 adapter->link_speed = 0;
1554 adapter->link_duplex = 0;
1555
1556 if (!pci_channel_offline(adapter->pdev))
1557 igb_reset(adapter);
1558 igb_clean_all_tx_rings(adapter);
1559 igb_clean_all_rx_rings(adapter);
1560 #ifdef CONFIG_IGB_DCA
1561
1562 /* since we reset the hardware DCA settings were cleared */
1563 igb_setup_dca(adapter);
1564 #endif
1565 }
1566
1567 void igb_reinit_locked(struct igb_adapter *adapter)
1568 {
1569 WARN_ON(in_interrupt());
1570 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1571 msleep(1);
1572 igb_down(adapter);
1573 igb_up(adapter);
1574 clear_bit(__IGB_RESETTING, &adapter->state);
1575 }
1576
1577 void igb_reset(struct igb_adapter *adapter)
1578 {
1579 struct pci_dev *pdev = adapter->pdev;
1580 struct e1000_hw *hw = &adapter->hw;
1581 struct e1000_mac_info *mac = &hw->mac;
1582 struct e1000_fc_info *fc = &hw->fc;
1583 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1584 u16 hwm;
1585
1586 /* Repartition Pba for greater than 9k mtu
1587 * To take effect CTRL.RST is required.
1588 */
1589 switch (mac->type) {
1590 case e1000_i350:
1591 case e1000_82580:
1592 pba = rd32(E1000_RXPBS);
1593 pba = igb_rxpbs_adjust_82580(pba);
1594 break;
1595 case e1000_82576:
1596 pba = rd32(E1000_RXPBS);
1597 pba &= E1000_RXPBS_SIZE_MASK_82576;
1598 break;
1599 case e1000_82575:
1600 default:
1601 pba = E1000_PBA_34K;
1602 break;
1603 }
1604
1605 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1606 (mac->type < e1000_82576)) {
1607 /* adjust PBA for jumbo frames */
1608 wr32(E1000_PBA, pba);
1609
1610 /* To maintain wire speed transmits, the Tx FIFO should be
1611 * large enough to accommodate two full transmit packets,
1612 * rounded up to the next 1KB and expressed in KB. Likewise,
1613 * the Rx FIFO should be large enough to accommodate at least
1614 * one full receive packet and is similarly rounded up and
1615 * expressed in KB. */
1616 pba = rd32(E1000_PBA);
1617 /* upper 16 bits has Tx packet buffer allocation size in KB */
1618 tx_space = pba >> 16;
1619 /* lower 16 bits has Rx packet buffer allocation size in KB */
1620 pba &= 0xffff;
1621 /* the tx fifo also stores 16 bytes of information about the tx
1622 * but don't include ethernet FCS because hardware appends it */
1623 min_tx_space = (adapter->max_frame_size +
1624 sizeof(union e1000_adv_tx_desc) -
1625 ETH_FCS_LEN) * 2;
1626 min_tx_space = ALIGN(min_tx_space, 1024);
1627 min_tx_space >>= 10;
1628 /* software strips receive CRC, so leave room for it */
1629 min_rx_space = adapter->max_frame_size;
1630 min_rx_space = ALIGN(min_rx_space, 1024);
1631 min_rx_space >>= 10;
1632
1633 /* If current Tx allocation is less than the min Tx FIFO size,
1634 * and the min Tx FIFO size is less than the current Rx FIFO
1635 * allocation, take space away from current Rx allocation */
1636 if (tx_space < min_tx_space &&
1637 ((min_tx_space - tx_space) < pba)) {
1638 pba = pba - (min_tx_space - tx_space);
1639
1640 /* if short on rx space, rx wins and must trump tx
1641 * adjustment */
1642 if (pba < min_rx_space)
1643 pba = min_rx_space;
1644 }
1645 wr32(E1000_PBA, pba);
1646 }
1647
1648 /* flow control settings */
1649 /* The high water mark must be low enough to fit one full frame
1650 * (or the size used for early receive) above it in the Rx FIFO.
1651 * Set it to the lower of:
1652 * - 90% of the Rx FIFO size, or
1653 * - the full Rx FIFO size minus one full frame */
1654 hwm = min(((pba << 10) * 9 / 10),
1655 ((pba << 10) - 2 * adapter->max_frame_size));
1656
1657 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1658 fc->low_water = fc->high_water - 16;
1659 fc->pause_time = 0xFFFF;
1660 fc->send_xon = 1;
1661 fc->current_mode = fc->requested_mode;
1662
1663 /* disable receive for all VFs and wait one second */
1664 if (adapter->vfs_allocated_count) {
1665 int i;
1666 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1667 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1668
1669 /* ping all the active vfs to let them know we are going down */
1670 igb_ping_all_vfs(adapter);
1671
1672 /* disable transmits and receives */
1673 wr32(E1000_VFRE, 0);
1674 wr32(E1000_VFTE, 0);
1675 }
1676
1677 /* Allow time for pending master requests to run */
1678 hw->mac.ops.reset_hw(hw);
1679 wr32(E1000_WUC, 0);
1680
1681 if (hw->mac.ops.init_hw(hw))
1682 dev_err(&pdev->dev, "Hardware Error\n");
1683 if (hw->mac.type > e1000_82580) {
1684 if (adapter->flags & IGB_FLAG_DMAC) {
1685 u32 reg;
1686
1687 /*
1688 * DMA Coalescing high water mark needs to be higher
1689 * than * the * Rx threshold. The Rx threshold is
1690 * currently * pba - 6, so we * should use a high water
1691 * mark of pba * - 4. */
1692 hwm = (pba - 4) << 10;
1693
1694 reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1695 & E1000_DMACR_DMACTHR_MASK);
1696
1697 /* transition to L0x or L1 if available..*/
1698 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1699
1700 /* watchdog timer= +-1000 usec in 32usec intervals */
1701 reg |= (1000 >> 5);
1702 wr32(E1000_DMACR, reg);
1703
1704 /* no lower threshold to disable coalescing(smart fifb)
1705 * -UTRESH=0*/
1706 wr32(E1000_DMCRTRH, 0);
1707
1708 /* set hwm to PBA - 2 * max frame size */
1709 wr32(E1000_FCRTC, hwm);
1710
1711 /*
1712 * This sets the time to wait before requesting tran-
1713 * sition to * low power state to number of usecs needed
1714 * to receive 1 512 * byte frame at gigabit line rate
1715 */
1716 reg = rd32(E1000_DMCTLX);
1717 reg |= IGB_DMCTLX_DCFLUSH_DIS;
1718
1719 /* Delay 255 usec before entering Lx state. */
1720 reg |= 0xFF;
1721 wr32(E1000_DMCTLX, reg);
1722
1723 /* free space in Tx packet buffer to wake from DMAC */
1724 wr32(E1000_DMCTXTH,
1725 (IGB_MIN_TXPBSIZE -
1726 (IGB_TX_BUF_4096 + adapter->max_frame_size))
1727 >> 6);
1728
1729 /* make low power state decision controlled by DMAC */
1730 reg = rd32(E1000_PCIEMISC);
1731 reg |= E1000_PCIEMISC_LX_DECISION;
1732 wr32(E1000_PCIEMISC, reg);
1733 } /* end if IGB_FLAG_DMAC set */
1734 }
1735 if (hw->mac.type == e1000_82580) {
1736 u32 reg = rd32(E1000_PCIEMISC);
1737 wr32(E1000_PCIEMISC,
1738 reg & ~E1000_PCIEMISC_LX_DECISION);
1739 }
1740 if (!netif_running(adapter->netdev))
1741 igb_power_down_link(adapter);
1742
1743 igb_update_mng_vlan(adapter);
1744
1745 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1746 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1747
1748 igb_get_phy_info(hw);
1749 }
1750
1751 static u32 igb_fix_features(struct net_device *netdev, u32 features)
1752 {
1753 /*
1754 * Since there is no support for separate rx/tx vlan accel
1755 * enable/disable make sure tx flag is always in same state as rx.
1756 */
1757 if (features & NETIF_F_HW_VLAN_RX)
1758 features |= NETIF_F_HW_VLAN_TX;
1759 else
1760 features &= ~NETIF_F_HW_VLAN_TX;
1761
1762 return features;
1763 }
1764
1765 static int igb_set_features(struct net_device *netdev, u32 features)
1766 {
1767 struct igb_adapter *adapter = netdev_priv(netdev);
1768 int i;
1769 u32 changed = netdev->features ^ features;
1770
1771 for (i = 0; i < adapter->num_rx_queues; i++) {
1772 if (features & NETIF_F_RXCSUM)
1773 adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
1774 else
1775 adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
1776 }
1777
1778 if (changed & NETIF_F_HW_VLAN_RX)
1779 igb_vlan_mode(netdev, features);
1780
1781 return 0;
1782 }
1783
1784 static const struct net_device_ops igb_netdev_ops = {
1785 .ndo_open = igb_open,
1786 .ndo_stop = igb_close,
1787 .ndo_start_xmit = igb_xmit_frame_adv,
1788 .ndo_get_stats64 = igb_get_stats64,
1789 .ndo_set_rx_mode = igb_set_rx_mode,
1790 .ndo_set_mac_address = igb_set_mac,
1791 .ndo_change_mtu = igb_change_mtu,
1792 .ndo_do_ioctl = igb_ioctl,
1793 .ndo_tx_timeout = igb_tx_timeout,
1794 .ndo_validate_addr = eth_validate_addr,
1795 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1796 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1797 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1798 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1799 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1800 .ndo_get_vf_config = igb_ndo_get_vf_config,
1801 #ifdef CONFIG_NET_POLL_CONTROLLER
1802 .ndo_poll_controller = igb_netpoll,
1803 #endif
1804 .ndo_fix_features = igb_fix_features,
1805 .ndo_set_features = igb_set_features,
1806 };
1807
1808 /**
1809 * igb_probe - Device Initialization Routine
1810 * @pdev: PCI device information struct
1811 * @ent: entry in igb_pci_tbl
1812 *
1813 * Returns 0 on success, negative on failure
1814 *
1815 * igb_probe initializes an adapter identified by a pci_dev structure.
1816 * The OS initialization, configuring of the adapter private structure,
1817 * and a hardware reset occur.
1818 **/
1819 static int __devinit igb_probe(struct pci_dev *pdev,
1820 const struct pci_device_id *ent)
1821 {
1822 struct net_device *netdev;
1823 struct igb_adapter *adapter;
1824 struct e1000_hw *hw;
1825 u16 eeprom_data = 0;
1826 s32 ret_val;
1827 static int global_quad_port_a; /* global quad port a indication */
1828 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1829 unsigned long mmio_start, mmio_len;
1830 int err, pci_using_dac;
1831 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1832 u8 part_str[E1000_PBANUM_LENGTH];
1833
1834 /* Catch broken hardware that put the wrong VF device ID in
1835 * the PCIe SR-IOV capability.
1836 */
1837 if (pdev->is_virtfn) {
1838 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1839 pci_name(pdev), pdev->vendor, pdev->device);
1840 return -EINVAL;
1841 }
1842
1843 err = pci_enable_device_mem(pdev);
1844 if (err)
1845 return err;
1846
1847 pci_using_dac = 0;
1848 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1849 if (!err) {
1850 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1851 if (!err)
1852 pci_using_dac = 1;
1853 } else {
1854 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1855 if (err) {
1856 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1857 if (err) {
1858 dev_err(&pdev->dev, "No usable DMA "
1859 "configuration, aborting\n");
1860 goto err_dma;
1861 }
1862 }
1863 }
1864
1865 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1866 IORESOURCE_MEM),
1867 igb_driver_name);
1868 if (err)
1869 goto err_pci_reg;
1870
1871 pci_enable_pcie_error_reporting(pdev);
1872
1873 pci_set_master(pdev);
1874 pci_save_state(pdev);
1875
1876 err = -ENOMEM;
1877 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1878 IGB_ABS_MAX_TX_QUEUES);
1879 if (!netdev)
1880 goto err_alloc_etherdev;
1881
1882 SET_NETDEV_DEV(netdev, &pdev->dev);
1883
1884 pci_set_drvdata(pdev, netdev);
1885 adapter = netdev_priv(netdev);
1886 adapter->netdev = netdev;
1887 adapter->pdev = pdev;
1888 hw = &adapter->hw;
1889 hw->back = adapter;
1890 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1891
1892 mmio_start = pci_resource_start(pdev, 0);
1893 mmio_len = pci_resource_len(pdev, 0);
1894
1895 err = -EIO;
1896 hw->hw_addr = ioremap(mmio_start, mmio_len);
1897 if (!hw->hw_addr)
1898 goto err_ioremap;
1899
1900 netdev->netdev_ops = &igb_netdev_ops;
1901 igb_set_ethtool_ops(netdev);
1902 netdev->watchdog_timeo = 5 * HZ;
1903
1904 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1905
1906 netdev->mem_start = mmio_start;
1907 netdev->mem_end = mmio_start + mmio_len;
1908
1909 /* PCI config space info */
1910 hw->vendor_id = pdev->vendor;
1911 hw->device_id = pdev->device;
1912 hw->revision_id = pdev->revision;
1913 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1914 hw->subsystem_device_id = pdev->subsystem_device;
1915
1916 /* Copy the default MAC, PHY and NVM function pointers */
1917 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1918 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1919 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1920 /* Initialize skew-specific constants */
1921 err = ei->get_invariants(hw);
1922 if (err)
1923 goto err_sw_init;
1924
1925 /* setup the private structure */
1926 err = igb_sw_init(adapter);
1927 if (err)
1928 goto err_sw_init;
1929
1930 igb_get_bus_info_pcie(hw);
1931
1932 hw->phy.autoneg_wait_to_complete = false;
1933
1934 /* Copper options */
1935 if (hw->phy.media_type == e1000_media_type_copper) {
1936 hw->phy.mdix = AUTO_ALL_MODES;
1937 hw->phy.disable_polarity_correction = false;
1938 hw->phy.ms_type = e1000_ms_hw_default;
1939 }
1940
1941 if (igb_check_reset_block(hw))
1942 dev_info(&pdev->dev,
1943 "PHY reset is blocked due to SOL/IDER session.\n");
1944
1945 netdev->hw_features = NETIF_F_SG |
1946 NETIF_F_IP_CSUM |
1947 NETIF_F_IPV6_CSUM |
1948 NETIF_F_TSO |
1949 NETIF_F_TSO6 |
1950 NETIF_F_RXCSUM |
1951 NETIF_F_HW_VLAN_RX;
1952
1953 netdev->features = netdev->hw_features |
1954 NETIF_F_HW_VLAN_TX |
1955 NETIF_F_HW_VLAN_FILTER;
1956
1957 netdev->vlan_features |= NETIF_F_TSO;
1958 netdev->vlan_features |= NETIF_F_TSO6;
1959 netdev->vlan_features |= NETIF_F_IP_CSUM;
1960 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1961 netdev->vlan_features |= NETIF_F_SG;
1962
1963 if (pci_using_dac) {
1964 netdev->features |= NETIF_F_HIGHDMA;
1965 netdev->vlan_features |= NETIF_F_HIGHDMA;
1966 }
1967
1968 if (hw->mac.type >= e1000_82576) {
1969 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1970 netdev->features |= NETIF_F_SCTP_CSUM;
1971 }
1972
1973 netdev->priv_flags |= IFF_UNICAST_FLT;
1974
1975 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1976
1977 /* before reading the NVM, reset the controller to put the device in a
1978 * known good starting state */
1979 hw->mac.ops.reset_hw(hw);
1980
1981 /* make sure the NVM is good */
1982 if (hw->nvm.ops.validate(hw) < 0) {
1983 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1984 err = -EIO;
1985 goto err_eeprom;
1986 }
1987
1988 /* copy the MAC address out of the NVM */
1989 if (hw->mac.ops.read_mac_addr(hw))
1990 dev_err(&pdev->dev, "NVM Read Error\n");
1991
1992 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1993 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1994
1995 if (!is_valid_ether_addr(netdev->perm_addr)) {
1996 dev_err(&pdev->dev, "Invalid MAC Address\n");
1997 err = -EIO;
1998 goto err_eeprom;
1999 }
2000
2001 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2002 (unsigned long) adapter);
2003 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2004 (unsigned long) adapter);
2005
2006 INIT_WORK(&adapter->reset_task, igb_reset_task);
2007 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2008
2009 /* Initialize link properties that are user-changeable */
2010 adapter->fc_autoneg = true;
2011 hw->mac.autoneg = true;
2012 hw->phy.autoneg_advertised = 0x2f;
2013
2014 hw->fc.requested_mode = e1000_fc_default;
2015 hw->fc.current_mode = e1000_fc_default;
2016
2017 igb_validate_mdi_setting(hw);
2018
2019 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2020 * enable the ACPI Magic Packet filter
2021 */
2022
2023 if (hw->bus.func == 0)
2024 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2025 else if (hw->mac.type >= e1000_82580)
2026 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2027 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2028 &eeprom_data);
2029 else if (hw->bus.func == 1)
2030 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2031
2032 if (eeprom_data & eeprom_apme_mask)
2033 adapter->eeprom_wol |= E1000_WUFC_MAG;
2034
2035 /* now that we have the eeprom settings, apply the special cases where
2036 * the eeprom may be wrong or the board simply won't support wake on
2037 * lan on a particular port */
2038 switch (pdev->device) {
2039 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2040 adapter->eeprom_wol = 0;
2041 break;
2042 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2043 case E1000_DEV_ID_82576_FIBER:
2044 case E1000_DEV_ID_82576_SERDES:
2045 /* Wake events only supported on port A for dual fiber
2046 * regardless of eeprom setting */
2047 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2048 adapter->eeprom_wol = 0;
2049 break;
2050 case E1000_DEV_ID_82576_QUAD_COPPER:
2051 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2052 /* if quad port adapter, disable WoL on all but port A */
2053 if (global_quad_port_a != 0)
2054 adapter->eeprom_wol = 0;
2055 else
2056 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2057 /* Reset for multiple quad port adapters */
2058 if (++global_quad_port_a == 4)
2059 global_quad_port_a = 0;
2060 break;
2061 }
2062
2063 /* initialize the wol settings based on the eeprom settings */
2064 adapter->wol = adapter->eeprom_wol;
2065 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2066
2067 /* reset the hardware with the new settings */
2068 igb_reset(adapter);
2069
2070 /* let the f/w know that the h/w is now under the control of the
2071 * driver. */
2072 igb_get_hw_control(adapter);
2073
2074 strcpy(netdev->name, "eth%d");
2075 err = register_netdev(netdev);
2076 if (err)
2077 goto err_register;
2078
2079 igb_vlan_mode(netdev, netdev->features);
2080
2081 /* carrier off reporting is important to ethtool even BEFORE open */
2082 netif_carrier_off(netdev);
2083
2084 #ifdef CONFIG_IGB_DCA
2085 if (dca_add_requester(&pdev->dev) == 0) {
2086 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2087 dev_info(&pdev->dev, "DCA enabled\n");
2088 igb_setup_dca(adapter);
2089 }
2090
2091 #endif
2092 /* do hw tstamp init after resetting */
2093 igb_init_hw_timer(adapter);
2094
2095 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2096 /* print bus type/speed/width info */
2097 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2098 netdev->name,
2099 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2100 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2101 "unknown"),
2102 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2103 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2104 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2105 "unknown"),
2106 netdev->dev_addr);
2107
2108 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2109 if (ret_val)
2110 strcpy(part_str, "Unknown");
2111 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2112 dev_info(&pdev->dev,
2113 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2114 adapter->msix_entries ? "MSI-X" :
2115 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2116 adapter->num_rx_queues, adapter->num_tx_queues);
2117 switch (hw->mac.type) {
2118 case e1000_i350:
2119 igb_set_eee_i350(hw);
2120 break;
2121 default:
2122 break;
2123 }
2124 return 0;
2125
2126 err_register:
2127 igb_release_hw_control(adapter);
2128 err_eeprom:
2129 if (!igb_check_reset_block(hw))
2130 igb_reset_phy(hw);
2131
2132 if (hw->flash_address)
2133 iounmap(hw->flash_address);
2134 err_sw_init:
2135 igb_clear_interrupt_scheme(adapter);
2136 iounmap(hw->hw_addr);
2137 err_ioremap:
2138 free_netdev(netdev);
2139 err_alloc_etherdev:
2140 pci_release_selected_regions(pdev,
2141 pci_select_bars(pdev, IORESOURCE_MEM));
2142 err_pci_reg:
2143 err_dma:
2144 pci_disable_device(pdev);
2145 return err;
2146 }
2147
2148 /**
2149 * igb_remove - Device Removal Routine
2150 * @pdev: PCI device information struct
2151 *
2152 * igb_remove is called by the PCI subsystem to alert the driver
2153 * that it should release a PCI device. The could be caused by a
2154 * Hot-Plug event, or because the driver is going to be removed from
2155 * memory.
2156 **/
2157 static void __devexit igb_remove(struct pci_dev *pdev)
2158 {
2159 struct net_device *netdev = pci_get_drvdata(pdev);
2160 struct igb_adapter *adapter = netdev_priv(netdev);
2161 struct e1000_hw *hw = &adapter->hw;
2162
2163 /*
2164 * The watchdog timer may be rescheduled, so explicitly
2165 * disable watchdog from being rescheduled.
2166 */
2167 set_bit(__IGB_DOWN, &adapter->state);
2168 del_timer_sync(&adapter->watchdog_timer);
2169 del_timer_sync(&adapter->phy_info_timer);
2170
2171 cancel_work_sync(&adapter->reset_task);
2172 cancel_work_sync(&adapter->watchdog_task);
2173
2174 #ifdef CONFIG_IGB_DCA
2175 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2176 dev_info(&pdev->dev, "DCA disabled\n");
2177 dca_remove_requester(&pdev->dev);
2178 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2179 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2180 }
2181 #endif
2182
2183 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2184 * would have already happened in close and is redundant. */
2185 igb_release_hw_control(adapter);
2186
2187 unregister_netdev(netdev);
2188
2189 igb_clear_interrupt_scheme(adapter);
2190
2191 #ifdef CONFIG_PCI_IOV
2192 /* reclaim resources allocated to VFs */
2193 if (adapter->vf_data) {
2194 /* disable iov and allow time for transactions to clear */
2195 pci_disable_sriov(pdev);
2196 msleep(500);
2197
2198 kfree(adapter->vf_data);
2199 adapter->vf_data = NULL;
2200 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2201 wrfl();
2202 msleep(100);
2203 dev_info(&pdev->dev, "IOV Disabled\n");
2204 }
2205 #endif
2206
2207 iounmap(hw->hw_addr);
2208 if (hw->flash_address)
2209 iounmap(hw->flash_address);
2210 pci_release_selected_regions(pdev,
2211 pci_select_bars(pdev, IORESOURCE_MEM));
2212
2213 free_netdev(netdev);
2214
2215 pci_disable_pcie_error_reporting(pdev);
2216
2217 pci_disable_device(pdev);
2218 }
2219
2220 /**
2221 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2222 * @adapter: board private structure to initialize
2223 *
2224 * This function initializes the vf specific data storage and then attempts to
2225 * allocate the VFs. The reason for ordering it this way is because it is much
2226 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2227 * the memory for the VFs.
2228 **/
2229 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2230 {
2231 #ifdef CONFIG_PCI_IOV
2232 struct pci_dev *pdev = adapter->pdev;
2233
2234 if (adapter->vfs_allocated_count) {
2235 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2236 sizeof(struct vf_data_storage),
2237 GFP_KERNEL);
2238 /* if allocation failed then we do not support SR-IOV */
2239 if (!adapter->vf_data) {
2240 adapter->vfs_allocated_count = 0;
2241 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2242 "Data Storage\n");
2243 }
2244 }
2245
2246 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2247 kfree(adapter->vf_data);
2248 adapter->vf_data = NULL;
2249 #endif /* CONFIG_PCI_IOV */
2250 adapter->vfs_allocated_count = 0;
2251 #ifdef CONFIG_PCI_IOV
2252 } else {
2253 unsigned char mac_addr[ETH_ALEN];
2254 int i;
2255 dev_info(&pdev->dev, "%d vfs allocated\n",
2256 adapter->vfs_allocated_count);
2257 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2258 random_ether_addr(mac_addr);
2259 igb_set_vf_mac(adapter, i, mac_addr);
2260 }
2261 /* DMA Coalescing is not supported in IOV mode. */
2262 if (adapter->flags & IGB_FLAG_DMAC)
2263 adapter->flags &= ~IGB_FLAG_DMAC;
2264 }
2265 #endif /* CONFIG_PCI_IOV */
2266 }
2267
2268
2269 /**
2270 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2271 * @adapter: board private structure to initialize
2272 *
2273 * igb_init_hw_timer initializes the function pointer and values for the hw
2274 * timer found in hardware.
2275 **/
2276 static void igb_init_hw_timer(struct igb_adapter *adapter)
2277 {
2278 struct e1000_hw *hw = &adapter->hw;
2279
2280 switch (hw->mac.type) {
2281 case e1000_i350:
2282 case e1000_82580:
2283 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2284 adapter->cycles.read = igb_read_clock;
2285 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2286 adapter->cycles.mult = 1;
2287 /*
2288 * The 82580 timesync updates the system timer every 8ns by 8ns
2289 * and the value cannot be shifted. Instead we need to shift
2290 * the registers to generate a 64bit timer value. As a result
2291 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2292 * 24 in order to generate a larger value for synchronization.
2293 */
2294 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2295 /* disable system timer temporarily by setting bit 31 */
2296 wr32(E1000_TSAUXC, 0x80000000);
2297 wrfl();
2298
2299 /* Set registers so that rollover occurs soon to test this. */
2300 wr32(E1000_SYSTIMR, 0x00000000);
2301 wr32(E1000_SYSTIML, 0x80000000);
2302 wr32(E1000_SYSTIMH, 0x000000FF);
2303 wrfl();
2304
2305 /* enable system timer by clearing bit 31 */
2306 wr32(E1000_TSAUXC, 0x0);
2307 wrfl();
2308
2309 timecounter_init(&adapter->clock,
2310 &adapter->cycles,
2311 ktime_to_ns(ktime_get_real()));
2312 /*
2313 * Synchronize our NIC clock against system wall clock. NIC
2314 * time stamp reading requires ~3us per sample, each sample
2315 * was pretty stable even under load => only require 10
2316 * samples for each offset comparison.
2317 */
2318 memset(&adapter->compare, 0, sizeof(adapter->compare));
2319 adapter->compare.source = &adapter->clock;
2320 adapter->compare.target = ktime_get_real;
2321 adapter->compare.num_samples = 10;
2322 timecompare_update(&adapter->compare, 0);
2323 break;
2324 case e1000_82576:
2325 /*
2326 * Initialize hardware timer: we keep it running just in case
2327 * that some program needs it later on.
2328 */
2329 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2330 adapter->cycles.read = igb_read_clock;
2331 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2332 adapter->cycles.mult = 1;
2333 /**
2334 * Scale the NIC clock cycle by a large factor so that
2335 * relatively small clock corrections can be added or
2336 * subtracted at each clock tick. The drawbacks of a large
2337 * factor are a) that the clock register overflows more quickly
2338 * (not such a big deal) and b) that the increment per tick has
2339 * to fit into 24 bits. As a result we need to use a shift of
2340 * 19 so we can fit a value of 16 into the TIMINCA register.
2341 */
2342 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2343 wr32(E1000_TIMINCA,
2344 (1 << E1000_TIMINCA_16NS_SHIFT) |
2345 (16 << IGB_82576_TSYNC_SHIFT));
2346
2347 /* Set registers so that rollover occurs soon to test this. */
2348 wr32(E1000_SYSTIML, 0x00000000);
2349 wr32(E1000_SYSTIMH, 0xFF800000);
2350 wrfl();
2351
2352 timecounter_init(&adapter->clock,
2353 &adapter->cycles,
2354 ktime_to_ns(ktime_get_real()));
2355 /*
2356 * Synchronize our NIC clock against system wall clock. NIC
2357 * time stamp reading requires ~3us per sample, each sample
2358 * was pretty stable even under load => only require 10
2359 * samples for each offset comparison.
2360 */
2361 memset(&adapter->compare, 0, sizeof(adapter->compare));
2362 adapter->compare.source = &adapter->clock;
2363 adapter->compare.target = ktime_get_real;
2364 adapter->compare.num_samples = 10;
2365 timecompare_update(&adapter->compare, 0);
2366 break;
2367 case e1000_82575:
2368 /* 82575 does not support timesync */
2369 default:
2370 break;
2371 }
2372
2373 }
2374
2375 /**
2376 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2377 * @adapter: board private structure to initialize
2378 *
2379 * igb_sw_init initializes the Adapter private data structure.
2380 * Fields are initialized based on PCI device information and
2381 * OS network device settings (MTU size).
2382 **/
2383 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2384 {
2385 struct e1000_hw *hw = &adapter->hw;
2386 struct net_device *netdev = adapter->netdev;
2387 struct pci_dev *pdev = adapter->pdev;
2388
2389 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2390
2391 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2392 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2393 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2394 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2395
2396 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2397 VLAN_HLEN;
2398 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2399
2400 spin_lock_init(&adapter->stats64_lock);
2401 #ifdef CONFIG_PCI_IOV
2402 switch (hw->mac.type) {
2403 case e1000_82576:
2404 case e1000_i350:
2405 if (max_vfs > 7) {
2406 dev_warn(&pdev->dev,
2407 "Maximum of 7 VFs per PF, using max\n");
2408 adapter->vfs_allocated_count = 7;
2409 } else
2410 adapter->vfs_allocated_count = max_vfs;
2411 break;
2412 default:
2413 break;
2414 }
2415 #endif /* CONFIG_PCI_IOV */
2416 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2417 /* i350 cannot do RSS and SR-IOV at the same time */
2418 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2419 adapter->rss_queues = 1;
2420
2421 /*
2422 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2423 * then we should combine the queues into a queue pair in order to
2424 * conserve interrupts due to limited supply
2425 */
2426 if ((adapter->rss_queues > 4) ||
2427 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2428 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2429
2430 /* This call may decrease the number of queues */
2431 if (igb_init_interrupt_scheme(adapter)) {
2432 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2433 return -ENOMEM;
2434 }
2435
2436 igb_probe_vfs(adapter);
2437
2438 /* Explicitly disable IRQ since the NIC can be in any state. */
2439 igb_irq_disable(adapter);
2440
2441 if (hw->mac.type == e1000_i350)
2442 adapter->flags &= ~IGB_FLAG_DMAC;
2443
2444 set_bit(__IGB_DOWN, &adapter->state);
2445 return 0;
2446 }
2447
2448 /**
2449 * igb_open - Called when a network interface is made active
2450 * @netdev: network interface device structure
2451 *
2452 * Returns 0 on success, negative value on failure
2453 *
2454 * The open entry point is called when a network interface is made
2455 * active by the system (IFF_UP). At this point all resources needed
2456 * for transmit and receive operations are allocated, the interrupt
2457 * handler is registered with the OS, the watchdog timer is started,
2458 * and the stack is notified that the interface is ready.
2459 **/
2460 static int igb_open(struct net_device *netdev)
2461 {
2462 struct igb_adapter *adapter = netdev_priv(netdev);
2463 struct e1000_hw *hw = &adapter->hw;
2464 int err;
2465 int i;
2466
2467 /* disallow open during test */
2468 if (test_bit(__IGB_TESTING, &adapter->state))
2469 return -EBUSY;
2470
2471 netif_carrier_off(netdev);
2472
2473 /* allocate transmit descriptors */
2474 err = igb_setup_all_tx_resources(adapter);
2475 if (err)
2476 goto err_setup_tx;
2477
2478 /* allocate receive descriptors */
2479 err = igb_setup_all_rx_resources(adapter);
2480 if (err)
2481 goto err_setup_rx;
2482
2483 igb_power_up_link(adapter);
2484
2485 /* before we allocate an interrupt, we must be ready to handle it.
2486 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2487 * as soon as we call pci_request_irq, so we have to setup our
2488 * clean_rx handler before we do so. */
2489 igb_configure(adapter);
2490
2491 err = igb_request_irq(adapter);
2492 if (err)
2493 goto err_req_irq;
2494
2495 /* From here on the code is the same as igb_up() */
2496 clear_bit(__IGB_DOWN, &adapter->state);
2497
2498 for (i = 0; i < adapter->num_q_vectors; i++) {
2499 struct igb_q_vector *q_vector = adapter->q_vector[i];
2500 napi_enable(&q_vector->napi);
2501 }
2502
2503 /* Clear any pending interrupts. */
2504 rd32(E1000_ICR);
2505
2506 igb_irq_enable(adapter);
2507
2508 /* notify VFs that reset has been completed */
2509 if (adapter->vfs_allocated_count) {
2510 u32 reg_data = rd32(E1000_CTRL_EXT);
2511 reg_data |= E1000_CTRL_EXT_PFRSTD;
2512 wr32(E1000_CTRL_EXT, reg_data);
2513 }
2514
2515 netif_tx_start_all_queues(netdev);
2516
2517 /* start the watchdog. */
2518 hw->mac.get_link_status = 1;
2519 schedule_work(&adapter->watchdog_task);
2520
2521 return 0;
2522
2523 err_req_irq:
2524 igb_release_hw_control(adapter);
2525 igb_power_down_link(adapter);
2526 igb_free_all_rx_resources(adapter);
2527 err_setup_rx:
2528 igb_free_all_tx_resources(adapter);
2529 err_setup_tx:
2530 igb_reset(adapter);
2531
2532 return err;
2533 }
2534
2535 /**
2536 * igb_close - Disables a network interface
2537 * @netdev: network interface device structure
2538 *
2539 * Returns 0, this is not allowed to fail
2540 *
2541 * The close entry point is called when an interface is de-activated
2542 * by the OS. The hardware is still under the driver's control, but
2543 * needs to be disabled. A global MAC reset is issued to stop the
2544 * hardware, and all transmit and receive resources are freed.
2545 **/
2546 static int igb_close(struct net_device *netdev)
2547 {
2548 struct igb_adapter *adapter = netdev_priv(netdev);
2549
2550 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2551 igb_down(adapter);
2552
2553 igb_free_irq(adapter);
2554
2555 igb_free_all_tx_resources(adapter);
2556 igb_free_all_rx_resources(adapter);
2557
2558 return 0;
2559 }
2560
2561 /**
2562 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2563 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2564 *
2565 * Return 0 on success, negative on failure
2566 **/
2567 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2568 {
2569 struct device *dev = tx_ring->dev;
2570 int size;
2571
2572 size = sizeof(struct igb_buffer) * tx_ring->count;
2573 tx_ring->buffer_info = vzalloc(size);
2574 if (!tx_ring->buffer_info)
2575 goto err;
2576
2577 /* round up to nearest 4K */
2578 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2579 tx_ring->size = ALIGN(tx_ring->size, 4096);
2580
2581 tx_ring->desc = dma_alloc_coherent(dev,
2582 tx_ring->size,
2583 &tx_ring->dma,
2584 GFP_KERNEL);
2585
2586 if (!tx_ring->desc)
2587 goto err;
2588
2589 tx_ring->next_to_use = 0;
2590 tx_ring->next_to_clean = 0;
2591 return 0;
2592
2593 err:
2594 vfree(tx_ring->buffer_info);
2595 dev_err(dev,
2596 "Unable to allocate memory for the transmit descriptor ring\n");
2597 return -ENOMEM;
2598 }
2599
2600 /**
2601 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2602 * (Descriptors) for all queues
2603 * @adapter: board private structure
2604 *
2605 * Return 0 on success, negative on failure
2606 **/
2607 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2608 {
2609 struct pci_dev *pdev = adapter->pdev;
2610 int i, err = 0;
2611
2612 for (i = 0; i < adapter->num_tx_queues; i++) {
2613 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2614 if (err) {
2615 dev_err(&pdev->dev,
2616 "Allocation for Tx Queue %u failed\n", i);
2617 for (i--; i >= 0; i--)
2618 igb_free_tx_resources(adapter->tx_ring[i]);
2619 break;
2620 }
2621 }
2622
2623 for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2624 int r_idx = i % adapter->num_tx_queues;
2625 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2626 }
2627 return err;
2628 }
2629
2630 /**
2631 * igb_setup_tctl - configure the transmit control registers
2632 * @adapter: Board private structure
2633 **/
2634 void igb_setup_tctl(struct igb_adapter *adapter)
2635 {
2636 struct e1000_hw *hw = &adapter->hw;
2637 u32 tctl;
2638
2639 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2640 wr32(E1000_TXDCTL(0), 0);
2641
2642 /* Program the Transmit Control Register */
2643 tctl = rd32(E1000_TCTL);
2644 tctl &= ~E1000_TCTL_CT;
2645 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2646 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2647
2648 igb_config_collision_dist(hw);
2649
2650 /* Enable transmits */
2651 tctl |= E1000_TCTL_EN;
2652
2653 wr32(E1000_TCTL, tctl);
2654 }
2655
2656 /**
2657 * igb_configure_tx_ring - Configure transmit ring after Reset
2658 * @adapter: board private structure
2659 * @ring: tx ring to configure
2660 *
2661 * Configure a transmit ring after a reset.
2662 **/
2663 void igb_configure_tx_ring(struct igb_adapter *adapter,
2664 struct igb_ring *ring)
2665 {
2666 struct e1000_hw *hw = &adapter->hw;
2667 u32 txdctl = 0;
2668 u64 tdba = ring->dma;
2669 int reg_idx = ring->reg_idx;
2670
2671 /* disable the queue */
2672 wr32(E1000_TXDCTL(reg_idx), 0);
2673 wrfl();
2674 mdelay(10);
2675
2676 wr32(E1000_TDLEN(reg_idx),
2677 ring->count * sizeof(union e1000_adv_tx_desc));
2678 wr32(E1000_TDBAL(reg_idx),
2679 tdba & 0x00000000ffffffffULL);
2680 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2681
2682 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2683 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2684 wr32(E1000_TDH(reg_idx), 0);
2685 writel(0, ring->tail);
2686
2687 txdctl |= IGB_TX_PTHRESH;
2688 txdctl |= IGB_TX_HTHRESH << 8;
2689 txdctl |= IGB_TX_WTHRESH << 16;
2690
2691 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2692 wr32(E1000_TXDCTL(reg_idx), txdctl);
2693 }
2694
2695 /**
2696 * igb_configure_tx - Configure transmit Unit after Reset
2697 * @adapter: board private structure
2698 *
2699 * Configure the Tx unit of the MAC after a reset.
2700 **/
2701 static void igb_configure_tx(struct igb_adapter *adapter)
2702 {
2703 int i;
2704
2705 for (i = 0; i < adapter->num_tx_queues; i++)
2706 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2707 }
2708
2709 /**
2710 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2711 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2712 *
2713 * Returns 0 on success, negative on failure
2714 **/
2715 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2716 {
2717 struct device *dev = rx_ring->dev;
2718 int size, desc_len;
2719
2720 size = sizeof(struct igb_buffer) * rx_ring->count;
2721 rx_ring->buffer_info = vzalloc(size);
2722 if (!rx_ring->buffer_info)
2723 goto err;
2724
2725 desc_len = sizeof(union e1000_adv_rx_desc);
2726
2727 /* Round up to nearest 4K */
2728 rx_ring->size = rx_ring->count * desc_len;
2729 rx_ring->size = ALIGN(rx_ring->size, 4096);
2730
2731 rx_ring->desc = dma_alloc_coherent(dev,
2732 rx_ring->size,
2733 &rx_ring->dma,
2734 GFP_KERNEL);
2735
2736 if (!rx_ring->desc)
2737 goto err;
2738
2739 rx_ring->next_to_clean = 0;
2740 rx_ring->next_to_use = 0;
2741
2742 return 0;
2743
2744 err:
2745 vfree(rx_ring->buffer_info);
2746 rx_ring->buffer_info = NULL;
2747 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2748 " ring\n");
2749 return -ENOMEM;
2750 }
2751
2752 /**
2753 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2754 * (Descriptors) for all queues
2755 * @adapter: board private structure
2756 *
2757 * Return 0 on success, negative on failure
2758 **/
2759 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2760 {
2761 struct pci_dev *pdev = adapter->pdev;
2762 int i, err = 0;
2763
2764 for (i = 0; i < adapter->num_rx_queues; i++) {
2765 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2766 if (err) {
2767 dev_err(&pdev->dev,
2768 "Allocation for Rx Queue %u failed\n", i);
2769 for (i--; i >= 0; i--)
2770 igb_free_rx_resources(adapter->rx_ring[i]);
2771 break;
2772 }
2773 }
2774
2775 return err;
2776 }
2777
2778 /**
2779 * igb_setup_mrqc - configure the multiple receive queue control registers
2780 * @adapter: Board private structure
2781 **/
2782 static void igb_setup_mrqc(struct igb_adapter *adapter)
2783 {
2784 struct e1000_hw *hw = &adapter->hw;
2785 u32 mrqc, rxcsum;
2786 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2787 union e1000_reta {
2788 u32 dword;
2789 u8 bytes[4];
2790 } reta;
2791 static const u8 rsshash[40] = {
2792 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2793 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2794 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2795 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2796
2797 /* Fill out hash function seeds */
2798 for (j = 0; j < 10; j++) {
2799 u32 rsskey = rsshash[(j * 4)];
2800 rsskey |= rsshash[(j * 4) + 1] << 8;
2801 rsskey |= rsshash[(j * 4) + 2] << 16;
2802 rsskey |= rsshash[(j * 4) + 3] << 24;
2803 array_wr32(E1000_RSSRK(0), j, rsskey);
2804 }
2805
2806 num_rx_queues = adapter->rss_queues;
2807
2808 if (adapter->vfs_allocated_count) {
2809 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2810 switch (hw->mac.type) {
2811 case e1000_i350:
2812 case e1000_82580:
2813 num_rx_queues = 1;
2814 shift = 0;
2815 break;
2816 case e1000_82576:
2817 shift = 3;
2818 num_rx_queues = 2;
2819 break;
2820 case e1000_82575:
2821 shift = 2;
2822 shift2 = 6;
2823 default:
2824 break;
2825 }
2826 } else {
2827 if (hw->mac.type == e1000_82575)
2828 shift = 6;
2829 }
2830
2831 for (j = 0; j < (32 * 4); j++) {
2832 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2833 if (shift2)
2834 reta.bytes[j & 3] |= num_rx_queues << shift2;
2835 if ((j & 3) == 3)
2836 wr32(E1000_RETA(j >> 2), reta.dword);
2837 }
2838
2839 /*
2840 * Disable raw packet checksumming so that RSS hash is placed in
2841 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2842 * offloads as they are enabled by default
2843 */
2844 rxcsum = rd32(E1000_RXCSUM);
2845 rxcsum |= E1000_RXCSUM_PCSD;
2846
2847 if (adapter->hw.mac.type >= e1000_82576)
2848 /* Enable Receive Checksum Offload for SCTP */
2849 rxcsum |= E1000_RXCSUM_CRCOFL;
2850
2851 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2852 wr32(E1000_RXCSUM, rxcsum);
2853
2854 /* If VMDq is enabled then we set the appropriate mode for that, else
2855 * we default to RSS so that an RSS hash is calculated per packet even
2856 * if we are only using one queue */
2857 if (adapter->vfs_allocated_count) {
2858 if (hw->mac.type > e1000_82575) {
2859 /* Set the default pool for the PF's first queue */
2860 u32 vtctl = rd32(E1000_VT_CTL);
2861 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2862 E1000_VT_CTL_DISABLE_DEF_POOL);
2863 vtctl |= adapter->vfs_allocated_count <<
2864 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2865 wr32(E1000_VT_CTL, vtctl);
2866 }
2867 if (adapter->rss_queues > 1)
2868 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2869 else
2870 mrqc = E1000_MRQC_ENABLE_VMDQ;
2871 } else {
2872 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2873 }
2874 igb_vmm_control(adapter);
2875
2876 /*
2877 * Generate RSS hash based on TCP port numbers and/or
2878 * IPv4/v6 src and dst addresses since UDP cannot be
2879 * hashed reliably due to IP fragmentation
2880 */
2881 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2882 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2883 E1000_MRQC_RSS_FIELD_IPV6 |
2884 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2885 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2886
2887 wr32(E1000_MRQC, mrqc);
2888 }
2889
2890 /**
2891 * igb_setup_rctl - configure the receive control registers
2892 * @adapter: Board private structure
2893 **/
2894 void igb_setup_rctl(struct igb_adapter *adapter)
2895 {
2896 struct e1000_hw *hw = &adapter->hw;
2897 u32 rctl;
2898
2899 rctl = rd32(E1000_RCTL);
2900
2901 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2902 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2903
2904 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2905 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2906
2907 /*
2908 * enable stripping of CRC. It's unlikely this will break BMC
2909 * redirection as it did with e1000. Newer features require
2910 * that the HW strips the CRC.
2911 */
2912 rctl |= E1000_RCTL_SECRC;
2913
2914 /* disable store bad packets and clear size bits. */
2915 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2916
2917 /* enable LPE to prevent packets larger than max_frame_size */
2918 rctl |= E1000_RCTL_LPE;
2919
2920 /* disable queue 0 to prevent tail write w/o re-config */
2921 wr32(E1000_RXDCTL(0), 0);
2922
2923 /* Attention!!! For SR-IOV PF driver operations you must enable
2924 * queue drop for all VF and PF queues to prevent head of line blocking
2925 * if an un-trusted VF does not provide descriptors to hardware.
2926 */
2927 if (adapter->vfs_allocated_count) {
2928 /* set all queue drop enable bits */
2929 wr32(E1000_QDE, ALL_QUEUES);
2930 }
2931
2932 wr32(E1000_RCTL, rctl);
2933 }
2934
2935 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2936 int vfn)
2937 {
2938 struct e1000_hw *hw = &adapter->hw;
2939 u32 vmolr;
2940
2941 /* if it isn't the PF check to see if VFs are enabled and
2942 * increase the size to support vlan tags */
2943 if (vfn < adapter->vfs_allocated_count &&
2944 adapter->vf_data[vfn].vlans_enabled)
2945 size += VLAN_TAG_SIZE;
2946
2947 vmolr = rd32(E1000_VMOLR(vfn));
2948 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2949 vmolr |= size | E1000_VMOLR_LPE;
2950 wr32(E1000_VMOLR(vfn), vmolr);
2951
2952 return 0;
2953 }
2954
2955 /**
2956 * igb_rlpml_set - set maximum receive packet size
2957 * @adapter: board private structure
2958 *
2959 * Configure maximum receivable packet size.
2960 **/
2961 static void igb_rlpml_set(struct igb_adapter *adapter)
2962 {
2963 u32 max_frame_size = adapter->max_frame_size;
2964 struct e1000_hw *hw = &adapter->hw;
2965 u16 pf_id = adapter->vfs_allocated_count;
2966
2967 if (pf_id) {
2968 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2969 /*
2970 * If we're in VMDQ or SR-IOV mode, then set global RLPML
2971 * to our max jumbo frame size, in case we need to enable
2972 * jumbo frames on one of the rings later.
2973 * This will not pass over-length frames into the default
2974 * queue because it's gated by the VMOLR.RLPML.
2975 */
2976 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2977 }
2978
2979 wr32(E1000_RLPML, max_frame_size);
2980 }
2981
2982 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2983 int vfn, bool aupe)
2984 {
2985 struct e1000_hw *hw = &adapter->hw;
2986 u32 vmolr;
2987
2988 /*
2989 * This register exists only on 82576 and newer so if we are older then
2990 * we should exit and do nothing
2991 */
2992 if (hw->mac.type < e1000_82576)
2993 return;
2994
2995 vmolr = rd32(E1000_VMOLR(vfn));
2996 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2997 if (aupe)
2998 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
2999 else
3000 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3001
3002 /* clear all bits that might not be set */
3003 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3004
3005 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3006 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3007 /*
3008 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3009 * multicast packets
3010 */
3011 if (vfn <= adapter->vfs_allocated_count)
3012 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3013
3014 wr32(E1000_VMOLR(vfn), vmolr);
3015 }
3016
3017 /**
3018 * igb_configure_rx_ring - Configure a receive ring after Reset
3019 * @adapter: board private structure
3020 * @ring: receive ring to be configured
3021 *
3022 * Configure the Rx unit of the MAC after a reset.
3023 **/
3024 void igb_configure_rx_ring(struct igb_adapter *adapter,
3025 struct igb_ring *ring)
3026 {
3027 struct e1000_hw *hw = &adapter->hw;
3028 u64 rdba = ring->dma;
3029 int reg_idx = ring->reg_idx;
3030 u32 srrctl = 0, rxdctl = 0;
3031
3032 /* disable the queue */
3033 wr32(E1000_RXDCTL(reg_idx), 0);
3034
3035 /* Set DMA base address registers */
3036 wr32(E1000_RDBAL(reg_idx),
3037 rdba & 0x00000000ffffffffULL);
3038 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3039 wr32(E1000_RDLEN(reg_idx),
3040 ring->count * sizeof(union e1000_adv_rx_desc));
3041
3042 /* initialize head and tail */
3043 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
3044 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3045 wr32(E1000_RDH(reg_idx), 0);
3046 writel(0, ring->tail);
3047
3048 /* set descriptor configuration */
3049 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3050 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3051 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3052 #else
3053 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3054 #endif
3055 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3056 if (hw->mac.type == e1000_82580)
3057 srrctl |= E1000_SRRCTL_TIMESTAMP;
3058 /* Only set Drop Enable if we are supporting multiple queues */
3059 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3060 srrctl |= E1000_SRRCTL_DROP_EN;
3061
3062 wr32(E1000_SRRCTL(reg_idx), srrctl);
3063
3064 /* set filtering for VMDQ pools */
3065 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3066
3067 rxdctl |= IGB_RX_PTHRESH;
3068 rxdctl |= IGB_RX_HTHRESH << 8;
3069 rxdctl |= IGB_RX_WTHRESH << 16;
3070
3071 /* enable receive descriptor fetching */
3072 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3073 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3074 }
3075
3076 /**
3077 * igb_configure_rx - Configure receive Unit after Reset
3078 * @adapter: board private structure
3079 *
3080 * Configure the Rx unit of the MAC after a reset.
3081 **/
3082 static void igb_configure_rx(struct igb_adapter *adapter)
3083 {
3084 int i;
3085
3086 /* set UTA to appropriate mode */
3087 igb_set_uta(adapter);
3088
3089 /* set the correct pool for the PF default MAC address in entry 0 */
3090 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3091 adapter->vfs_allocated_count);
3092
3093 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3094 * the Base and Length of the Rx Descriptor Ring */
3095 for (i = 0; i < adapter->num_rx_queues; i++)
3096 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3097 }
3098
3099 /**
3100 * igb_free_tx_resources - Free Tx Resources per Queue
3101 * @tx_ring: Tx descriptor ring for a specific queue
3102 *
3103 * Free all transmit software resources
3104 **/
3105 void igb_free_tx_resources(struct igb_ring *tx_ring)
3106 {
3107 igb_clean_tx_ring(tx_ring);
3108
3109 vfree(tx_ring->buffer_info);
3110 tx_ring->buffer_info = NULL;
3111
3112 /* if not set, then don't free */
3113 if (!tx_ring->desc)
3114 return;
3115
3116 dma_free_coherent(tx_ring->dev, tx_ring->size,
3117 tx_ring->desc, tx_ring->dma);
3118
3119 tx_ring->desc = NULL;
3120 }
3121
3122 /**
3123 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3124 * @adapter: board private structure
3125 *
3126 * Free all transmit software resources
3127 **/
3128 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3129 {
3130 int i;
3131
3132 for (i = 0; i < adapter->num_tx_queues; i++)
3133 igb_free_tx_resources(adapter->tx_ring[i]);
3134 }
3135
3136 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3137 struct igb_buffer *buffer_info)
3138 {
3139 if (buffer_info->dma) {
3140 if (buffer_info->mapped_as_page)
3141 dma_unmap_page(tx_ring->dev,
3142 buffer_info->dma,
3143 buffer_info->length,
3144 DMA_TO_DEVICE);
3145 else
3146 dma_unmap_single(tx_ring->dev,
3147 buffer_info->dma,
3148 buffer_info->length,
3149 DMA_TO_DEVICE);
3150 buffer_info->dma = 0;
3151 }
3152 if (buffer_info->skb) {
3153 dev_kfree_skb_any(buffer_info->skb);
3154 buffer_info->skb = NULL;
3155 }
3156 buffer_info->time_stamp = 0;
3157 buffer_info->length = 0;
3158 buffer_info->next_to_watch = 0;
3159 buffer_info->mapped_as_page = false;
3160 }
3161
3162 /**
3163 * igb_clean_tx_ring - Free Tx Buffers
3164 * @tx_ring: ring to be cleaned
3165 **/
3166 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3167 {
3168 struct igb_buffer *buffer_info;
3169 unsigned long size;
3170 unsigned int i;
3171
3172 if (!tx_ring->buffer_info)
3173 return;
3174 /* Free all the Tx ring sk_buffs */
3175
3176 for (i = 0; i < tx_ring->count; i++) {
3177 buffer_info = &tx_ring->buffer_info[i];
3178 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3179 }
3180
3181 size = sizeof(struct igb_buffer) * tx_ring->count;
3182 memset(tx_ring->buffer_info, 0, size);
3183
3184 /* Zero out the descriptor ring */
3185 memset(tx_ring->desc, 0, tx_ring->size);
3186
3187 tx_ring->next_to_use = 0;
3188 tx_ring->next_to_clean = 0;
3189 }
3190
3191 /**
3192 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3193 * @adapter: board private structure
3194 **/
3195 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3196 {
3197 int i;
3198
3199 for (i = 0; i < adapter->num_tx_queues; i++)
3200 igb_clean_tx_ring(adapter->tx_ring[i]);
3201 }
3202
3203 /**
3204 * igb_free_rx_resources - Free Rx Resources
3205 * @rx_ring: ring to clean the resources from
3206 *
3207 * Free all receive software resources
3208 **/
3209 void igb_free_rx_resources(struct igb_ring *rx_ring)
3210 {
3211 igb_clean_rx_ring(rx_ring);
3212
3213 vfree(rx_ring->buffer_info);
3214 rx_ring->buffer_info = NULL;
3215
3216 /* if not set, then don't free */
3217 if (!rx_ring->desc)
3218 return;
3219
3220 dma_free_coherent(rx_ring->dev, rx_ring->size,
3221 rx_ring->desc, rx_ring->dma);
3222
3223 rx_ring->desc = NULL;
3224 }
3225
3226 /**
3227 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3228 * @adapter: board private structure
3229 *
3230 * Free all receive software resources
3231 **/
3232 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3233 {
3234 int i;
3235
3236 for (i = 0; i < adapter->num_rx_queues; i++)
3237 igb_free_rx_resources(adapter->rx_ring[i]);
3238 }
3239
3240 /**
3241 * igb_clean_rx_ring - Free Rx Buffers per Queue
3242 * @rx_ring: ring to free buffers from
3243 **/
3244 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3245 {
3246 unsigned long size;
3247 u16 i;
3248
3249 if (!rx_ring->buffer_info)
3250 return;
3251
3252 /* Free all the Rx ring sk_buffs */
3253 for (i = 0; i < rx_ring->count; i++) {
3254 struct igb_buffer *buffer_info = &rx_ring->buffer_info[i];
3255 if (buffer_info->dma) {
3256 dma_unmap_single(rx_ring->dev,
3257 buffer_info->dma,
3258 IGB_RX_HDR_LEN,
3259 DMA_FROM_DEVICE);
3260 buffer_info->dma = 0;
3261 }
3262
3263 if (buffer_info->skb) {
3264 dev_kfree_skb(buffer_info->skb);
3265 buffer_info->skb = NULL;
3266 }
3267 if (buffer_info->page_dma) {
3268 dma_unmap_page(rx_ring->dev,
3269 buffer_info->page_dma,
3270 PAGE_SIZE / 2,
3271 DMA_FROM_DEVICE);
3272 buffer_info->page_dma = 0;
3273 }
3274 if (buffer_info->page) {
3275 put_page(buffer_info->page);
3276 buffer_info->page = NULL;
3277 buffer_info->page_offset = 0;
3278 }
3279 }
3280
3281 size = sizeof(struct igb_buffer) * rx_ring->count;
3282 memset(rx_ring->buffer_info, 0, size);
3283
3284 /* Zero out the descriptor ring */
3285 memset(rx_ring->desc, 0, rx_ring->size);
3286
3287 rx_ring->next_to_clean = 0;
3288 rx_ring->next_to_use = 0;
3289 }
3290
3291 /**
3292 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3293 * @adapter: board private structure
3294 **/
3295 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3296 {
3297 int i;
3298
3299 for (i = 0; i < adapter->num_rx_queues; i++)
3300 igb_clean_rx_ring(adapter->rx_ring[i]);
3301 }
3302
3303 /**
3304 * igb_set_mac - Change the Ethernet Address of the NIC
3305 * @netdev: network interface device structure
3306 * @p: pointer to an address structure
3307 *
3308 * Returns 0 on success, negative on failure
3309 **/
3310 static int igb_set_mac(struct net_device *netdev, void *p)
3311 {
3312 struct igb_adapter *adapter = netdev_priv(netdev);
3313 struct e1000_hw *hw = &adapter->hw;
3314 struct sockaddr *addr = p;
3315
3316 if (!is_valid_ether_addr(addr->sa_data))
3317 return -EADDRNOTAVAIL;
3318
3319 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3320 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3321
3322 /* set the correct pool for the new PF MAC address in entry 0 */
3323 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3324 adapter->vfs_allocated_count);
3325
3326 return 0;
3327 }
3328
3329 /**
3330 * igb_write_mc_addr_list - write multicast addresses to MTA
3331 * @netdev: network interface device structure
3332 *
3333 * Writes multicast address list to the MTA hash table.
3334 * Returns: -ENOMEM on failure
3335 * 0 on no addresses written
3336 * X on writing X addresses to MTA
3337 **/
3338 static int igb_write_mc_addr_list(struct net_device *netdev)
3339 {
3340 struct igb_adapter *adapter = netdev_priv(netdev);
3341 struct e1000_hw *hw = &adapter->hw;
3342 struct netdev_hw_addr *ha;
3343 u8 *mta_list;
3344 int i;
3345
3346 if (netdev_mc_empty(netdev)) {
3347 /* nothing to program, so clear mc list */
3348 igb_update_mc_addr_list(hw, NULL, 0);
3349 igb_restore_vf_multicasts(adapter);
3350 return 0;
3351 }
3352
3353 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3354 if (!mta_list)
3355 return -ENOMEM;
3356
3357 /* The shared function expects a packed array of only addresses. */
3358 i = 0;
3359 netdev_for_each_mc_addr(ha, netdev)
3360 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3361
3362 igb_update_mc_addr_list(hw, mta_list, i);
3363 kfree(mta_list);
3364
3365 return netdev_mc_count(netdev);
3366 }
3367
3368 /**
3369 * igb_write_uc_addr_list - write unicast addresses to RAR table
3370 * @netdev: network interface device structure
3371 *
3372 * Writes unicast address list to the RAR table.
3373 * Returns: -ENOMEM on failure/insufficient address space
3374 * 0 on no addresses written
3375 * X on writing X addresses to the RAR table
3376 **/
3377 static int igb_write_uc_addr_list(struct net_device *netdev)
3378 {
3379 struct igb_adapter *adapter = netdev_priv(netdev);
3380 struct e1000_hw *hw = &adapter->hw;
3381 unsigned int vfn = adapter->vfs_allocated_count;
3382 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3383 int count = 0;
3384
3385 /* return ENOMEM indicating insufficient memory for addresses */
3386 if (netdev_uc_count(netdev) > rar_entries)
3387 return -ENOMEM;
3388
3389 if (!netdev_uc_empty(netdev) && rar_entries) {
3390 struct netdev_hw_addr *ha;
3391
3392 netdev_for_each_uc_addr(ha, netdev) {
3393 if (!rar_entries)
3394 break;
3395 igb_rar_set_qsel(adapter, ha->addr,
3396 rar_entries--,
3397 vfn);
3398 count++;
3399 }
3400 }
3401 /* write the addresses in reverse order to avoid write combining */
3402 for (; rar_entries > 0 ; rar_entries--) {
3403 wr32(E1000_RAH(rar_entries), 0);
3404 wr32(E1000_RAL(rar_entries), 0);
3405 }
3406 wrfl();
3407
3408 return count;
3409 }
3410
3411 /**
3412 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3413 * @netdev: network interface device structure
3414 *
3415 * The set_rx_mode entry point is called whenever the unicast or multicast
3416 * address lists or the network interface flags are updated. This routine is
3417 * responsible for configuring the hardware for proper unicast, multicast,
3418 * promiscuous mode, and all-multi behavior.
3419 **/
3420 static void igb_set_rx_mode(struct net_device *netdev)
3421 {
3422 struct igb_adapter *adapter = netdev_priv(netdev);
3423 struct e1000_hw *hw = &adapter->hw;
3424 unsigned int vfn = adapter->vfs_allocated_count;
3425 u32 rctl, vmolr = 0;
3426 int count;
3427
3428 /* Check for Promiscuous and All Multicast modes */
3429 rctl = rd32(E1000_RCTL);
3430
3431 /* clear the effected bits */
3432 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3433
3434 if (netdev->flags & IFF_PROMISC) {
3435 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3436 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3437 } else {
3438 if (netdev->flags & IFF_ALLMULTI) {
3439 rctl |= E1000_RCTL_MPE;
3440 vmolr |= E1000_VMOLR_MPME;
3441 } else {
3442 /*
3443 * Write addresses to the MTA, if the attempt fails
3444 * then we should just turn on promiscuous mode so
3445 * that we can at least receive multicast traffic
3446 */
3447 count = igb_write_mc_addr_list(netdev);
3448 if (count < 0) {
3449 rctl |= E1000_RCTL_MPE;
3450 vmolr |= E1000_VMOLR_MPME;
3451 } else if (count) {
3452 vmolr |= E1000_VMOLR_ROMPE;
3453 }
3454 }
3455 /*
3456 * Write addresses to available RAR registers, if there is not
3457 * sufficient space to store all the addresses then enable
3458 * unicast promiscuous mode
3459 */
3460 count = igb_write_uc_addr_list(netdev);
3461 if (count < 0) {
3462 rctl |= E1000_RCTL_UPE;
3463 vmolr |= E1000_VMOLR_ROPE;
3464 }
3465 rctl |= E1000_RCTL_VFE;
3466 }
3467 wr32(E1000_RCTL, rctl);
3468
3469 /*
3470 * In order to support SR-IOV and eventually VMDq it is necessary to set
3471 * the VMOLR to enable the appropriate modes. Without this workaround
3472 * we will have issues with VLAN tag stripping not being done for frames
3473 * that are only arriving because we are the default pool
3474 */
3475 if (hw->mac.type < e1000_82576)
3476 return;
3477
3478 vmolr |= rd32(E1000_VMOLR(vfn)) &
3479 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3480 wr32(E1000_VMOLR(vfn), vmolr);
3481 igb_restore_vf_multicasts(adapter);
3482 }
3483
3484 static void igb_check_wvbr(struct igb_adapter *adapter)
3485 {
3486 struct e1000_hw *hw = &adapter->hw;
3487 u32 wvbr = 0;
3488
3489 switch (hw->mac.type) {
3490 case e1000_82576:
3491 case e1000_i350:
3492 if (!(wvbr = rd32(E1000_WVBR)))
3493 return;
3494 break;
3495 default:
3496 break;
3497 }
3498
3499 adapter->wvbr |= wvbr;
3500 }
3501
3502 #define IGB_STAGGERED_QUEUE_OFFSET 8
3503
3504 static void igb_spoof_check(struct igb_adapter *adapter)
3505 {
3506 int j;
3507
3508 if (!adapter->wvbr)
3509 return;
3510
3511 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3512 if (adapter->wvbr & (1 << j) ||
3513 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3514 dev_warn(&adapter->pdev->dev,
3515 "Spoof event(s) detected on VF %d\n", j);
3516 adapter->wvbr &=
3517 ~((1 << j) |
3518 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3519 }
3520 }
3521 }
3522
3523 /* Need to wait a few seconds after link up to get diagnostic information from
3524 * the phy */
3525 static void igb_update_phy_info(unsigned long data)
3526 {
3527 struct igb_adapter *adapter = (struct igb_adapter *) data;
3528 igb_get_phy_info(&adapter->hw);
3529 }
3530
3531 /**
3532 * igb_has_link - check shared code for link and determine up/down
3533 * @adapter: pointer to driver private info
3534 **/
3535 bool igb_has_link(struct igb_adapter *adapter)
3536 {
3537 struct e1000_hw *hw = &adapter->hw;
3538 bool link_active = false;
3539 s32 ret_val = 0;
3540
3541 /* get_link_status is set on LSC (link status) interrupt or
3542 * rx sequence error interrupt. get_link_status will stay
3543 * false until the e1000_check_for_link establishes link
3544 * for copper adapters ONLY
3545 */
3546 switch (hw->phy.media_type) {
3547 case e1000_media_type_copper:
3548 if (hw->mac.get_link_status) {
3549 ret_val = hw->mac.ops.check_for_link(hw);
3550 link_active = !hw->mac.get_link_status;
3551 } else {
3552 link_active = true;
3553 }
3554 break;
3555 case e1000_media_type_internal_serdes:
3556 ret_val = hw->mac.ops.check_for_link(hw);
3557 link_active = hw->mac.serdes_has_link;
3558 break;
3559 default:
3560 case e1000_media_type_unknown:
3561 break;
3562 }
3563
3564 return link_active;
3565 }
3566
3567 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3568 {
3569 bool ret = false;
3570 u32 ctrl_ext, thstat;
3571
3572 /* check for thermal sensor event on i350, copper only */
3573 if (hw->mac.type == e1000_i350) {
3574 thstat = rd32(E1000_THSTAT);
3575 ctrl_ext = rd32(E1000_CTRL_EXT);
3576
3577 if ((hw->phy.media_type == e1000_media_type_copper) &&
3578 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3579 ret = !!(thstat & event);
3580 }
3581 }
3582
3583 return ret;
3584 }
3585
3586 /**
3587 * igb_watchdog - Timer Call-back
3588 * @data: pointer to adapter cast into an unsigned long
3589 **/
3590 static void igb_watchdog(unsigned long data)
3591 {
3592 struct igb_adapter *adapter = (struct igb_adapter *)data;
3593 /* Do the rest outside of interrupt context */
3594 schedule_work(&adapter->watchdog_task);
3595 }
3596
3597 static void igb_watchdog_task(struct work_struct *work)
3598 {
3599 struct igb_adapter *adapter = container_of(work,
3600 struct igb_adapter,
3601 watchdog_task);
3602 struct e1000_hw *hw = &adapter->hw;
3603 struct net_device *netdev = adapter->netdev;
3604 u32 link;
3605 int i;
3606
3607 link = igb_has_link(adapter);
3608 if (link) {
3609 if (!netif_carrier_ok(netdev)) {
3610 u32 ctrl;
3611 hw->mac.ops.get_speed_and_duplex(hw,
3612 &adapter->link_speed,
3613 &adapter->link_duplex);
3614
3615 ctrl = rd32(E1000_CTRL);
3616 /* Links status message must follow this format */
3617 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3618 "Flow Control: %s\n",
3619 netdev->name,
3620 adapter->link_speed,
3621 adapter->link_duplex == FULL_DUPLEX ?
3622 "Full Duplex" : "Half Duplex",
3623 ((ctrl & E1000_CTRL_TFCE) &&
3624 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3625 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3626 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3627
3628 /* check for thermal sensor event */
3629 if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3630 printk(KERN_INFO "igb: %s The network adapter "
3631 "link speed was downshifted "
3632 "because it overheated.\n",
3633 netdev->name);
3634 }
3635
3636 /* adjust timeout factor according to speed/duplex */
3637 adapter->tx_timeout_factor = 1;
3638 switch (adapter->link_speed) {
3639 case SPEED_10:
3640 adapter->tx_timeout_factor = 14;
3641 break;
3642 case SPEED_100:
3643 /* maybe add some timeout factor ? */
3644 break;
3645 }
3646
3647 netif_carrier_on(netdev);
3648
3649 igb_ping_all_vfs(adapter);
3650 igb_check_vf_rate_limit(adapter);
3651
3652 /* link state has changed, schedule phy info update */
3653 if (!test_bit(__IGB_DOWN, &adapter->state))
3654 mod_timer(&adapter->phy_info_timer,
3655 round_jiffies(jiffies + 2 * HZ));
3656 }
3657 } else {
3658 if (netif_carrier_ok(netdev)) {
3659 adapter->link_speed = 0;
3660 adapter->link_duplex = 0;
3661
3662 /* check for thermal sensor event */
3663 if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3664 printk(KERN_ERR "igb: %s The network adapter "
3665 "was stopped because it "
3666 "overheated.\n",
3667 netdev->name);
3668 }
3669
3670 /* Links status message must follow this format */
3671 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3672 netdev->name);
3673 netif_carrier_off(netdev);
3674
3675 igb_ping_all_vfs(adapter);
3676
3677 /* link state has changed, schedule phy info update */
3678 if (!test_bit(__IGB_DOWN, &adapter->state))
3679 mod_timer(&adapter->phy_info_timer,
3680 round_jiffies(jiffies + 2 * HZ));
3681 }
3682 }
3683
3684 spin_lock(&adapter->stats64_lock);
3685 igb_update_stats(adapter, &adapter->stats64);
3686 spin_unlock(&adapter->stats64_lock);
3687
3688 for (i = 0; i < adapter->num_tx_queues; i++) {
3689 struct igb_ring *tx_ring = adapter->tx_ring[i];
3690 if (!netif_carrier_ok(netdev)) {
3691 /* We've lost link, so the controller stops DMA,
3692 * but we've got queued Tx work that's never going
3693 * to get done, so reset controller to flush Tx.
3694 * (Do the reset outside of interrupt context). */
3695 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3696 adapter->tx_timeout_count++;
3697 schedule_work(&adapter->reset_task);
3698 /* return immediately since reset is imminent */
3699 return;
3700 }
3701 }
3702
3703 /* Force detection of hung controller every watchdog period */
3704 tx_ring->detect_tx_hung = true;
3705 }
3706
3707 /* Cause software interrupt to ensure rx ring is cleaned */
3708 if (adapter->msix_entries) {
3709 u32 eics = 0;
3710 for (i = 0; i < adapter->num_q_vectors; i++) {
3711 struct igb_q_vector *q_vector = adapter->q_vector[i];
3712 eics |= q_vector->eims_value;
3713 }
3714 wr32(E1000_EICS, eics);
3715 } else {
3716 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3717 }
3718
3719 igb_spoof_check(adapter);
3720
3721 /* Reset the timer */
3722 if (!test_bit(__IGB_DOWN, &adapter->state))
3723 mod_timer(&adapter->watchdog_timer,
3724 round_jiffies(jiffies + 2 * HZ));
3725 }
3726
3727 enum latency_range {
3728 lowest_latency = 0,
3729 low_latency = 1,
3730 bulk_latency = 2,
3731 latency_invalid = 255
3732 };
3733
3734 /**
3735 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3736 *
3737 * Stores a new ITR value based on strictly on packet size. This
3738 * algorithm is less sophisticated than that used in igb_update_itr,
3739 * due to the difficulty of synchronizing statistics across multiple
3740 * receive rings. The divisors and thresholds used by this function
3741 * were determined based on theoretical maximum wire speed and testing
3742 * data, in order to minimize response time while increasing bulk
3743 * throughput.
3744 * This functionality is controlled by the InterruptThrottleRate module
3745 * parameter (see igb_param.c)
3746 * NOTE: This function is called only when operating in a multiqueue
3747 * receive environment.
3748 * @q_vector: pointer to q_vector
3749 **/
3750 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3751 {
3752 int new_val = q_vector->itr_val;
3753 int avg_wire_size = 0;
3754 struct igb_adapter *adapter = q_vector->adapter;
3755 struct igb_ring *ring;
3756 unsigned int packets;
3757
3758 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3759 * ints/sec - ITR timer value of 120 ticks.
3760 */
3761 if (adapter->link_speed != SPEED_1000) {
3762 new_val = 976;
3763 goto set_itr_val;
3764 }
3765
3766 ring = q_vector->rx_ring;
3767 if (ring) {
3768 packets = ACCESS_ONCE(ring->total_packets);
3769
3770 if (packets)
3771 avg_wire_size = ring->total_bytes / packets;
3772 }
3773
3774 ring = q_vector->tx_ring;
3775 if (ring) {
3776 packets = ACCESS_ONCE(ring->total_packets);
3777
3778 if (packets)
3779 avg_wire_size = max_t(u32, avg_wire_size,
3780 ring->total_bytes / packets);
3781 }
3782
3783 /* if avg_wire_size isn't set no work was done */
3784 if (!avg_wire_size)
3785 goto clear_counts;
3786
3787 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3788 avg_wire_size += 24;
3789
3790 /* Don't starve jumbo frames */
3791 avg_wire_size = min(avg_wire_size, 3000);
3792
3793 /* Give a little boost to mid-size frames */
3794 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3795 new_val = avg_wire_size / 3;
3796 else
3797 new_val = avg_wire_size / 2;
3798
3799 /* when in itr mode 3 do not exceed 20K ints/sec */
3800 if (adapter->rx_itr_setting == 3 && new_val < 196)
3801 new_val = 196;
3802
3803 set_itr_val:
3804 if (new_val != q_vector->itr_val) {
3805 q_vector->itr_val = new_val;
3806 q_vector->set_itr = 1;
3807 }
3808 clear_counts:
3809 if (q_vector->rx_ring) {
3810 q_vector->rx_ring->total_bytes = 0;
3811 q_vector->rx_ring->total_packets = 0;
3812 }
3813 if (q_vector->tx_ring) {
3814 q_vector->tx_ring->total_bytes = 0;
3815 q_vector->tx_ring->total_packets = 0;
3816 }
3817 }
3818
3819 /**
3820 * igb_update_itr - update the dynamic ITR value based on statistics
3821 * Stores a new ITR value based on packets and byte
3822 * counts during the last interrupt. The advantage of per interrupt
3823 * computation is faster updates and more accurate ITR for the current
3824 * traffic pattern. Constants in this function were computed
3825 * based on theoretical maximum wire speed and thresholds were set based
3826 * on testing data as well as attempting to minimize response time
3827 * while increasing bulk throughput.
3828 * this functionality is controlled by the InterruptThrottleRate module
3829 * parameter (see igb_param.c)
3830 * NOTE: These calculations are only valid when operating in a single-
3831 * queue environment.
3832 * @adapter: pointer to adapter
3833 * @itr_setting: current q_vector->itr_val
3834 * @packets: the number of packets during this measurement interval
3835 * @bytes: the number of bytes during this measurement interval
3836 **/
3837 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3838 int packets, int bytes)
3839 {
3840 unsigned int retval = itr_setting;
3841
3842 if (packets == 0)
3843 goto update_itr_done;
3844
3845 switch (itr_setting) {
3846 case lowest_latency:
3847 /* handle TSO and jumbo frames */
3848 if (bytes/packets > 8000)
3849 retval = bulk_latency;
3850 else if ((packets < 5) && (bytes > 512))
3851 retval = low_latency;
3852 break;
3853 case low_latency: /* 50 usec aka 20000 ints/s */
3854 if (bytes > 10000) {
3855 /* this if handles the TSO accounting */
3856 if (bytes/packets > 8000) {
3857 retval = bulk_latency;
3858 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3859 retval = bulk_latency;
3860 } else if ((packets > 35)) {
3861 retval = lowest_latency;
3862 }
3863 } else if (bytes/packets > 2000) {
3864 retval = bulk_latency;
3865 } else if (packets <= 2 && bytes < 512) {
3866 retval = lowest_latency;
3867 }
3868 break;
3869 case bulk_latency: /* 250 usec aka 4000 ints/s */
3870 if (bytes > 25000) {
3871 if (packets > 35)
3872 retval = low_latency;
3873 } else if (bytes < 1500) {
3874 retval = low_latency;
3875 }
3876 break;
3877 }
3878
3879 update_itr_done:
3880 return retval;
3881 }
3882
3883 static void igb_set_itr(struct igb_adapter *adapter)
3884 {
3885 struct igb_q_vector *q_vector = adapter->q_vector[0];
3886 u16 current_itr;
3887 u32 new_itr = q_vector->itr_val;
3888
3889 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3890 if (adapter->link_speed != SPEED_1000) {
3891 current_itr = 0;
3892 new_itr = 4000;
3893 goto set_itr_now;
3894 }
3895
3896 adapter->rx_itr = igb_update_itr(adapter,
3897 adapter->rx_itr,
3898 q_vector->rx_ring->total_packets,
3899 q_vector->rx_ring->total_bytes);
3900
3901 adapter->tx_itr = igb_update_itr(adapter,
3902 adapter->tx_itr,
3903 q_vector->tx_ring->total_packets,
3904 q_vector->tx_ring->total_bytes);
3905 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3906
3907 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3908 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3909 current_itr = low_latency;
3910
3911 switch (current_itr) {
3912 /* counts and packets in update_itr are dependent on these numbers */
3913 case lowest_latency:
3914 new_itr = 56; /* aka 70,000 ints/sec */
3915 break;
3916 case low_latency:
3917 new_itr = 196; /* aka 20,000 ints/sec */
3918 break;
3919 case bulk_latency:
3920 new_itr = 980; /* aka 4,000 ints/sec */
3921 break;
3922 default:
3923 break;
3924 }
3925
3926 set_itr_now:
3927 q_vector->rx_ring->total_bytes = 0;
3928 q_vector->rx_ring->total_packets = 0;
3929 q_vector->tx_ring->total_bytes = 0;
3930 q_vector->tx_ring->total_packets = 0;
3931
3932 if (new_itr != q_vector->itr_val) {
3933 /* this attempts to bias the interrupt rate towards Bulk
3934 * by adding intermediate steps when interrupt rate is
3935 * increasing */
3936 new_itr = new_itr > q_vector->itr_val ?
3937 max((new_itr * q_vector->itr_val) /
3938 (new_itr + (q_vector->itr_val >> 2)),
3939 new_itr) :
3940 new_itr;
3941 /* Don't write the value here; it resets the adapter's
3942 * internal timer, and causes us to delay far longer than
3943 * we should between interrupts. Instead, we write the ITR
3944 * value at the beginning of the next interrupt so the timing
3945 * ends up being correct.
3946 */
3947 q_vector->itr_val = new_itr;
3948 q_vector->set_itr = 1;
3949 }
3950 }
3951
3952 #define IGB_TX_FLAGS_CSUM 0x00000001
3953 #define IGB_TX_FLAGS_VLAN 0x00000002
3954 #define IGB_TX_FLAGS_TSO 0x00000004
3955 #define IGB_TX_FLAGS_IPV4 0x00000008
3956 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3957 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3958 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3959
3960 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3961 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3962 {
3963 struct e1000_adv_tx_context_desc *context_desc;
3964 unsigned int i;
3965 int err;
3966 struct igb_buffer *buffer_info;
3967 u32 info = 0, tu_cmd = 0;
3968 u32 mss_l4len_idx;
3969 u8 l4len;
3970
3971 if (skb_header_cloned(skb)) {
3972 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3973 if (err)
3974 return err;
3975 }
3976
3977 l4len = tcp_hdrlen(skb);
3978 *hdr_len += l4len;
3979
3980 if (skb->protocol == htons(ETH_P_IP)) {
3981 struct iphdr *iph = ip_hdr(skb);
3982 iph->tot_len = 0;
3983 iph->check = 0;
3984 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3985 iph->daddr, 0,
3986 IPPROTO_TCP,
3987 0);
3988 } else if (skb_is_gso_v6(skb)) {
3989 ipv6_hdr(skb)->payload_len = 0;
3990 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3991 &ipv6_hdr(skb)->daddr,
3992 0, IPPROTO_TCP, 0);
3993 }
3994
3995 i = tx_ring->next_to_use;
3996
3997 buffer_info = &tx_ring->buffer_info[i];
3998 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3999 /* VLAN MACLEN IPLEN */
4000 if (tx_flags & IGB_TX_FLAGS_VLAN)
4001 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4002 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4003 *hdr_len += skb_network_offset(skb);
4004 info |= skb_network_header_len(skb);
4005 *hdr_len += skb_network_header_len(skb);
4006 context_desc->vlan_macip_lens = cpu_to_le32(info);
4007
4008 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4009 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4010
4011 if (skb->protocol == htons(ETH_P_IP))
4012 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4013 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4014
4015 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4016
4017 /* MSS L4LEN IDX */
4018 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
4019 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
4020
4021 /* For 82575, context index must be unique per ring. */
4022 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4023 mss_l4len_idx |= tx_ring->reg_idx << 4;
4024
4025 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4026 context_desc->seqnum_seed = 0;
4027
4028 buffer_info->time_stamp = jiffies;
4029 buffer_info->next_to_watch = i;
4030 buffer_info->dma = 0;
4031 i++;
4032 if (i == tx_ring->count)
4033 i = 0;
4034
4035 tx_ring->next_to_use = i;
4036
4037 return true;
4038 }
4039
4040 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
4041 struct sk_buff *skb, u32 tx_flags)
4042 {
4043 struct e1000_adv_tx_context_desc *context_desc;
4044 struct device *dev = tx_ring->dev;
4045 struct igb_buffer *buffer_info;
4046 u32 info = 0, tu_cmd = 0;
4047 unsigned int i;
4048
4049 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4050 (tx_flags & IGB_TX_FLAGS_VLAN)) {
4051 i = tx_ring->next_to_use;
4052 buffer_info = &tx_ring->buffer_info[i];
4053 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4054
4055 if (tx_flags & IGB_TX_FLAGS_VLAN)
4056 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4057
4058 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4059 if (skb->ip_summed == CHECKSUM_PARTIAL)
4060 info |= skb_network_header_len(skb);
4061
4062 context_desc->vlan_macip_lens = cpu_to_le32(info);
4063
4064 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4065
4066 if (skb->ip_summed == CHECKSUM_PARTIAL) {
4067 __be16 protocol;
4068
4069 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4070 const struct vlan_ethhdr *vhdr =
4071 (const struct vlan_ethhdr*)skb->data;
4072
4073 protocol = vhdr->h_vlan_encapsulated_proto;
4074 } else {
4075 protocol = skb->protocol;
4076 }
4077
4078 switch (protocol) {
4079 case cpu_to_be16(ETH_P_IP):
4080 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4081 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4082 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4083 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4084 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4085 break;
4086 case cpu_to_be16(ETH_P_IPV6):
4087 /* XXX what about other V6 headers?? */
4088 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4089 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4090 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4091 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4092 break;
4093 default:
4094 if (unlikely(net_ratelimit()))
4095 dev_warn(dev,
4096 "partial checksum but proto=%x!\n",
4097 skb->protocol);
4098 break;
4099 }
4100 }
4101
4102 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4103 context_desc->seqnum_seed = 0;
4104 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4105 context_desc->mss_l4len_idx =
4106 cpu_to_le32(tx_ring->reg_idx << 4);
4107
4108 buffer_info->time_stamp = jiffies;
4109 buffer_info->next_to_watch = i;
4110 buffer_info->dma = 0;
4111
4112 i++;
4113 if (i == tx_ring->count)
4114 i = 0;
4115 tx_ring->next_to_use = i;
4116
4117 return true;
4118 }
4119 return false;
4120 }
4121
4122 #define IGB_MAX_TXD_PWR 16
4123 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4124
4125 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
4126 unsigned int first)
4127 {
4128 struct igb_buffer *buffer_info;
4129 struct device *dev = tx_ring->dev;
4130 unsigned int hlen = skb_headlen(skb);
4131 unsigned int count = 0, i;
4132 unsigned int f;
4133 u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4134
4135 i = tx_ring->next_to_use;
4136
4137 buffer_info = &tx_ring->buffer_info[i];
4138 BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4139 buffer_info->length = hlen;
4140 /* set time_stamp *before* dma to help avoid a possible race */
4141 buffer_info->time_stamp = jiffies;
4142 buffer_info->next_to_watch = i;
4143 buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4144 DMA_TO_DEVICE);
4145 if (dma_mapping_error(dev, buffer_info->dma))
4146 goto dma_error;
4147
4148 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4149 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4150 unsigned int len = frag->size;
4151
4152 count++;
4153 i++;
4154 if (i == tx_ring->count)
4155 i = 0;
4156
4157 buffer_info = &tx_ring->buffer_info[i];
4158 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4159 buffer_info->length = len;
4160 buffer_info->time_stamp = jiffies;
4161 buffer_info->next_to_watch = i;
4162 buffer_info->mapped_as_page = true;
4163 buffer_info->dma = skb_frag_dma_map(dev, frag, 0, len,
4164 DMA_TO_DEVICE);
4165 if (dma_mapping_error(dev, buffer_info->dma))
4166 goto dma_error;
4167
4168 }
4169
4170 tx_ring->buffer_info[i].skb = skb;
4171 tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4172 /* multiply data chunks by size of headers */
4173 tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4174 tx_ring->buffer_info[i].gso_segs = gso_segs;
4175 tx_ring->buffer_info[first].next_to_watch = i;
4176
4177 return ++count;
4178
4179 dma_error:
4180 dev_err(dev, "TX DMA map failed\n");
4181
4182 /* clear timestamp and dma mappings for failed buffer_info mapping */
4183 buffer_info->dma = 0;
4184 buffer_info->time_stamp = 0;
4185 buffer_info->length = 0;
4186 buffer_info->next_to_watch = 0;
4187 buffer_info->mapped_as_page = false;
4188
4189 /* clear timestamp and dma mappings for remaining portion of packet */
4190 while (count--) {
4191 if (i == 0)
4192 i = tx_ring->count;
4193 i--;
4194 buffer_info = &tx_ring->buffer_info[i];
4195 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4196 }
4197
4198 return 0;
4199 }
4200
4201 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4202 u32 tx_flags, int count, u32 paylen,
4203 u8 hdr_len)
4204 {
4205 union e1000_adv_tx_desc *tx_desc;
4206 struct igb_buffer *buffer_info;
4207 u32 olinfo_status = 0, cmd_type_len;
4208 unsigned int i = tx_ring->next_to_use;
4209
4210 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4211 E1000_ADVTXD_DCMD_DEXT);
4212
4213 if (tx_flags & IGB_TX_FLAGS_VLAN)
4214 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4215
4216 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4217 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4218
4219 if (tx_flags & IGB_TX_FLAGS_TSO) {
4220 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4221
4222 /* insert tcp checksum */
4223 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4224
4225 /* insert ip checksum */
4226 if (tx_flags & IGB_TX_FLAGS_IPV4)
4227 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4228
4229 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4230 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4231 }
4232
4233 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4234 (tx_flags & (IGB_TX_FLAGS_CSUM |
4235 IGB_TX_FLAGS_TSO |
4236 IGB_TX_FLAGS_VLAN)))
4237 olinfo_status |= tx_ring->reg_idx << 4;
4238
4239 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4240
4241 do {
4242 buffer_info = &tx_ring->buffer_info[i];
4243 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4244 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4245 tx_desc->read.cmd_type_len =
4246 cpu_to_le32(cmd_type_len | buffer_info->length);
4247 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4248 count--;
4249 i++;
4250 if (i == tx_ring->count)
4251 i = 0;
4252 } while (count > 0);
4253
4254 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4255 /* Force memory writes to complete before letting h/w
4256 * know there are new descriptors to fetch. (Only
4257 * applicable for weak-ordered memory model archs,
4258 * such as IA-64). */
4259 wmb();
4260
4261 tx_ring->next_to_use = i;
4262 writel(i, tx_ring->tail);
4263 /* we need this if more than one processor can write to our tail
4264 * at a time, it syncronizes IO on IA64/Altix systems */
4265 mmiowb();
4266 }
4267
4268 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4269 {
4270 struct net_device *netdev = tx_ring->netdev;
4271
4272 netif_stop_subqueue(netdev, tx_ring->queue_index);
4273
4274 /* Herbert's original patch had:
4275 * smp_mb__after_netif_stop_queue();
4276 * but since that doesn't exist yet, just open code it. */
4277 smp_mb();
4278
4279 /* We need to check again in a case another CPU has just
4280 * made room available. */
4281 if (igb_desc_unused(tx_ring) < size)
4282 return -EBUSY;
4283
4284 /* A reprieve! */
4285 netif_wake_subqueue(netdev, tx_ring->queue_index);
4286
4287 u64_stats_update_begin(&tx_ring->tx_syncp2);
4288 tx_ring->tx_stats.restart_queue2++;
4289 u64_stats_update_end(&tx_ring->tx_syncp2);
4290
4291 return 0;
4292 }
4293
4294 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4295 {
4296 if (igb_desc_unused(tx_ring) >= size)
4297 return 0;
4298 return __igb_maybe_stop_tx(tx_ring, size);
4299 }
4300
4301 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4302 struct igb_ring *tx_ring)
4303 {
4304 int tso = 0, count;
4305 u32 tx_flags = 0;
4306 u16 first;
4307 u8 hdr_len = 0;
4308
4309 /* need: 1 descriptor per page,
4310 * + 2 desc gap to keep tail from touching head,
4311 * + 1 desc for skb->data,
4312 * + 1 desc for context descriptor,
4313 * otherwise try next time */
4314 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4315 /* this is a hard error */
4316 return NETDEV_TX_BUSY;
4317 }
4318
4319 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4320 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4321 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4322 }
4323
4324 if (vlan_tx_tag_present(skb)) {
4325 tx_flags |= IGB_TX_FLAGS_VLAN;
4326 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4327 }
4328
4329 if (skb->protocol == htons(ETH_P_IP))
4330 tx_flags |= IGB_TX_FLAGS_IPV4;
4331
4332 first = tx_ring->next_to_use;
4333 if (skb_is_gso(skb)) {
4334 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4335
4336 if (tso < 0) {
4337 dev_kfree_skb_any(skb);
4338 return NETDEV_TX_OK;
4339 }
4340 }
4341
4342 if (tso)
4343 tx_flags |= IGB_TX_FLAGS_TSO;
4344 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4345 (skb->ip_summed == CHECKSUM_PARTIAL))
4346 tx_flags |= IGB_TX_FLAGS_CSUM;
4347
4348 /*
4349 * count reflects descriptors mapped, if 0 or less then mapping error
4350 * has occurred and we need to rewind the descriptor queue
4351 */
4352 count = igb_tx_map_adv(tx_ring, skb, first);
4353 if (!count) {
4354 dev_kfree_skb_any(skb);
4355 tx_ring->buffer_info[first].time_stamp = 0;
4356 tx_ring->next_to_use = first;
4357 return NETDEV_TX_OK;
4358 }
4359
4360 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4361
4362 /* Make sure there is space in the ring for the next send. */
4363 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4364
4365 return NETDEV_TX_OK;
4366 }
4367
4368 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4369 struct net_device *netdev)
4370 {
4371 struct igb_adapter *adapter = netdev_priv(netdev);
4372 struct igb_ring *tx_ring;
4373 int r_idx = 0;
4374
4375 if (test_bit(__IGB_DOWN, &adapter->state)) {
4376 dev_kfree_skb_any(skb);
4377 return NETDEV_TX_OK;
4378 }
4379
4380 if (skb->len <= 0) {
4381 dev_kfree_skb_any(skb);
4382 return NETDEV_TX_OK;
4383 }
4384
4385 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4386 tx_ring = adapter->multi_tx_table[r_idx];
4387
4388 /* This goes back to the question of how to logically map a tx queue
4389 * to a flow. Right now, performance is impacted slightly negatively
4390 * if using multiple tx queues. If the stack breaks away from a
4391 * single qdisc implementation, we can look at this again. */
4392 return igb_xmit_frame_ring_adv(skb, tx_ring);
4393 }
4394
4395 /**
4396 * igb_tx_timeout - Respond to a Tx Hang
4397 * @netdev: network interface device structure
4398 **/
4399 static void igb_tx_timeout(struct net_device *netdev)
4400 {
4401 struct igb_adapter *adapter = netdev_priv(netdev);
4402 struct e1000_hw *hw = &adapter->hw;
4403
4404 /* Do the reset outside of interrupt context */
4405 adapter->tx_timeout_count++;
4406
4407 if (hw->mac.type == e1000_82580)
4408 hw->dev_spec._82575.global_device_reset = true;
4409
4410 schedule_work(&adapter->reset_task);
4411 wr32(E1000_EICS,
4412 (adapter->eims_enable_mask & ~adapter->eims_other));
4413 }
4414
4415 static void igb_reset_task(struct work_struct *work)
4416 {
4417 struct igb_adapter *adapter;
4418 adapter = container_of(work, struct igb_adapter, reset_task);
4419
4420 igb_dump(adapter);
4421 netdev_err(adapter->netdev, "Reset adapter\n");
4422 igb_reinit_locked(adapter);
4423 }
4424
4425 /**
4426 * igb_get_stats64 - Get System Network Statistics
4427 * @netdev: network interface device structure
4428 * @stats: rtnl_link_stats64 pointer
4429 *
4430 **/
4431 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4432 struct rtnl_link_stats64 *stats)
4433 {
4434 struct igb_adapter *adapter = netdev_priv(netdev);
4435
4436 spin_lock(&adapter->stats64_lock);
4437 igb_update_stats(adapter, &adapter->stats64);
4438 memcpy(stats, &adapter->stats64, sizeof(*stats));
4439 spin_unlock(&adapter->stats64_lock);
4440
4441 return stats;
4442 }
4443
4444 /**
4445 * igb_change_mtu - Change the Maximum Transfer Unit
4446 * @netdev: network interface device structure
4447 * @new_mtu: new value for maximum frame size
4448 *
4449 * Returns 0 on success, negative on failure
4450 **/
4451 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4452 {
4453 struct igb_adapter *adapter = netdev_priv(netdev);
4454 struct pci_dev *pdev = adapter->pdev;
4455 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4456
4457 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4458 dev_err(&pdev->dev, "Invalid MTU setting\n");
4459 return -EINVAL;
4460 }
4461
4462 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4463 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4464 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4465 return -EINVAL;
4466 }
4467
4468 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4469 msleep(1);
4470
4471 /* igb_down has a dependency on max_frame_size */
4472 adapter->max_frame_size = max_frame;
4473
4474 if (netif_running(netdev))
4475 igb_down(adapter);
4476
4477 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4478 netdev->mtu, new_mtu);
4479 netdev->mtu = new_mtu;
4480
4481 if (netif_running(netdev))
4482 igb_up(adapter);
4483 else
4484 igb_reset(adapter);
4485
4486 clear_bit(__IGB_RESETTING, &adapter->state);
4487
4488 return 0;
4489 }
4490
4491 /**
4492 * igb_update_stats - Update the board statistics counters
4493 * @adapter: board private structure
4494 **/
4495
4496 void igb_update_stats(struct igb_adapter *adapter,
4497 struct rtnl_link_stats64 *net_stats)
4498 {
4499 struct e1000_hw *hw = &adapter->hw;
4500 struct pci_dev *pdev = adapter->pdev;
4501 u32 reg, mpc;
4502 u16 phy_tmp;
4503 int i;
4504 u64 bytes, packets;
4505 unsigned int start;
4506 u64 _bytes, _packets;
4507
4508 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4509
4510 /*
4511 * Prevent stats update while adapter is being reset, or if the pci
4512 * connection is down.
4513 */
4514 if (adapter->link_speed == 0)
4515 return;
4516 if (pci_channel_offline(pdev))
4517 return;
4518
4519 bytes = 0;
4520 packets = 0;
4521 for (i = 0; i < adapter->num_rx_queues; i++) {
4522 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4523 struct igb_ring *ring = adapter->rx_ring[i];
4524
4525 ring->rx_stats.drops += rqdpc_tmp;
4526 net_stats->rx_fifo_errors += rqdpc_tmp;
4527
4528 do {
4529 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4530 _bytes = ring->rx_stats.bytes;
4531 _packets = ring->rx_stats.packets;
4532 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4533 bytes += _bytes;
4534 packets += _packets;
4535 }
4536
4537 net_stats->rx_bytes = bytes;
4538 net_stats->rx_packets = packets;
4539
4540 bytes = 0;
4541 packets = 0;
4542 for (i = 0; i < adapter->num_tx_queues; i++) {
4543 struct igb_ring *ring = adapter->tx_ring[i];
4544 do {
4545 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4546 _bytes = ring->tx_stats.bytes;
4547 _packets = ring->tx_stats.packets;
4548 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4549 bytes += _bytes;
4550 packets += _packets;
4551 }
4552 net_stats->tx_bytes = bytes;
4553 net_stats->tx_packets = packets;
4554
4555 /* read stats registers */
4556 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4557 adapter->stats.gprc += rd32(E1000_GPRC);
4558 adapter->stats.gorc += rd32(E1000_GORCL);
4559 rd32(E1000_GORCH); /* clear GORCL */
4560 adapter->stats.bprc += rd32(E1000_BPRC);
4561 adapter->stats.mprc += rd32(E1000_MPRC);
4562 adapter->stats.roc += rd32(E1000_ROC);
4563
4564 adapter->stats.prc64 += rd32(E1000_PRC64);
4565 adapter->stats.prc127 += rd32(E1000_PRC127);
4566 adapter->stats.prc255 += rd32(E1000_PRC255);
4567 adapter->stats.prc511 += rd32(E1000_PRC511);
4568 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4569 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4570 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4571 adapter->stats.sec += rd32(E1000_SEC);
4572
4573 mpc = rd32(E1000_MPC);
4574 adapter->stats.mpc += mpc;
4575 net_stats->rx_fifo_errors += mpc;
4576 adapter->stats.scc += rd32(E1000_SCC);
4577 adapter->stats.ecol += rd32(E1000_ECOL);
4578 adapter->stats.mcc += rd32(E1000_MCC);
4579 adapter->stats.latecol += rd32(E1000_LATECOL);
4580 adapter->stats.dc += rd32(E1000_DC);
4581 adapter->stats.rlec += rd32(E1000_RLEC);
4582 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4583 adapter->stats.xontxc += rd32(E1000_XONTXC);
4584 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4585 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4586 adapter->stats.fcruc += rd32(E1000_FCRUC);
4587 adapter->stats.gptc += rd32(E1000_GPTC);
4588 adapter->stats.gotc += rd32(E1000_GOTCL);
4589 rd32(E1000_GOTCH); /* clear GOTCL */
4590 adapter->stats.rnbc += rd32(E1000_RNBC);
4591 adapter->stats.ruc += rd32(E1000_RUC);
4592 adapter->stats.rfc += rd32(E1000_RFC);
4593 adapter->stats.rjc += rd32(E1000_RJC);
4594 adapter->stats.tor += rd32(E1000_TORH);
4595 adapter->stats.tot += rd32(E1000_TOTH);
4596 adapter->stats.tpr += rd32(E1000_TPR);
4597
4598 adapter->stats.ptc64 += rd32(E1000_PTC64);
4599 adapter->stats.ptc127 += rd32(E1000_PTC127);
4600 adapter->stats.ptc255 += rd32(E1000_PTC255);
4601 adapter->stats.ptc511 += rd32(E1000_PTC511);
4602 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4603 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4604
4605 adapter->stats.mptc += rd32(E1000_MPTC);
4606 adapter->stats.bptc += rd32(E1000_BPTC);
4607
4608 adapter->stats.tpt += rd32(E1000_TPT);
4609 adapter->stats.colc += rd32(E1000_COLC);
4610
4611 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4612 /* read internal phy specific stats */
4613 reg = rd32(E1000_CTRL_EXT);
4614 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4615 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4616 adapter->stats.tncrs += rd32(E1000_TNCRS);
4617 }
4618
4619 adapter->stats.tsctc += rd32(E1000_TSCTC);
4620 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4621
4622 adapter->stats.iac += rd32(E1000_IAC);
4623 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4624 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4625 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4626 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4627 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4628 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4629 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4630 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4631
4632 /* Fill out the OS statistics structure */
4633 net_stats->multicast = adapter->stats.mprc;
4634 net_stats->collisions = adapter->stats.colc;
4635
4636 /* Rx Errors */
4637
4638 /* RLEC on some newer hardware can be incorrect so build
4639 * our own version based on RUC and ROC */
4640 net_stats->rx_errors = adapter->stats.rxerrc +
4641 adapter->stats.crcerrs + adapter->stats.algnerrc +
4642 adapter->stats.ruc + adapter->stats.roc +
4643 adapter->stats.cexterr;
4644 net_stats->rx_length_errors = adapter->stats.ruc +
4645 adapter->stats.roc;
4646 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4647 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4648 net_stats->rx_missed_errors = adapter->stats.mpc;
4649
4650 /* Tx Errors */
4651 net_stats->tx_errors = adapter->stats.ecol +
4652 adapter->stats.latecol;
4653 net_stats->tx_aborted_errors = adapter->stats.ecol;
4654 net_stats->tx_window_errors = adapter->stats.latecol;
4655 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4656
4657 /* Tx Dropped needs to be maintained elsewhere */
4658
4659 /* Phy Stats */
4660 if (hw->phy.media_type == e1000_media_type_copper) {
4661 if ((adapter->link_speed == SPEED_1000) &&
4662 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4663 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4664 adapter->phy_stats.idle_errors += phy_tmp;
4665 }
4666 }
4667
4668 /* Management Stats */
4669 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4670 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4671 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4672
4673 /* OS2BMC Stats */
4674 reg = rd32(E1000_MANC);
4675 if (reg & E1000_MANC_EN_BMC2OS) {
4676 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4677 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4678 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4679 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4680 }
4681 }
4682
4683 static irqreturn_t igb_msix_other(int irq, void *data)
4684 {
4685 struct igb_adapter *adapter = data;
4686 struct e1000_hw *hw = &adapter->hw;
4687 u32 icr = rd32(E1000_ICR);
4688 /* reading ICR causes bit 31 of EICR to be cleared */
4689
4690 if (icr & E1000_ICR_DRSTA)
4691 schedule_work(&adapter->reset_task);
4692
4693 if (icr & E1000_ICR_DOUTSYNC) {
4694 /* HW is reporting DMA is out of sync */
4695 adapter->stats.doosync++;
4696 /* The DMA Out of Sync is also indication of a spoof event
4697 * in IOV mode. Check the Wrong VM Behavior register to
4698 * see if it is really a spoof event. */
4699 igb_check_wvbr(adapter);
4700 }
4701
4702 /* Check for a mailbox event */
4703 if (icr & E1000_ICR_VMMB)
4704 igb_msg_task(adapter);
4705
4706 if (icr & E1000_ICR_LSC) {
4707 hw->mac.get_link_status = 1;
4708 /* guard against interrupt when we're going down */
4709 if (!test_bit(__IGB_DOWN, &adapter->state))
4710 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4711 }
4712
4713 if (adapter->vfs_allocated_count)
4714 wr32(E1000_IMS, E1000_IMS_LSC |
4715 E1000_IMS_VMMB |
4716 E1000_IMS_DOUTSYNC);
4717 else
4718 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4719 wr32(E1000_EIMS, adapter->eims_other);
4720
4721 return IRQ_HANDLED;
4722 }
4723
4724 static void igb_write_itr(struct igb_q_vector *q_vector)
4725 {
4726 struct igb_adapter *adapter = q_vector->adapter;
4727 u32 itr_val = q_vector->itr_val & 0x7FFC;
4728
4729 if (!q_vector->set_itr)
4730 return;
4731
4732 if (!itr_val)
4733 itr_val = 0x4;
4734
4735 if (adapter->hw.mac.type == e1000_82575)
4736 itr_val |= itr_val << 16;
4737 else
4738 itr_val |= 0x8000000;
4739
4740 writel(itr_val, q_vector->itr_register);
4741 q_vector->set_itr = 0;
4742 }
4743
4744 static irqreturn_t igb_msix_ring(int irq, void *data)
4745 {
4746 struct igb_q_vector *q_vector = data;
4747
4748 /* Write the ITR value calculated from the previous interrupt. */
4749 igb_write_itr(q_vector);
4750
4751 napi_schedule(&q_vector->napi);
4752
4753 return IRQ_HANDLED;
4754 }
4755
4756 #ifdef CONFIG_IGB_DCA
4757 static void igb_update_dca(struct igb_q_vector *q_vector)
4758 {
4759 struct igb_adapter *adapter = q_vector->adapter;
4760 struct e1000_hw *hw = &adapter->hw;
4761 int cpu = get_cpu();
4762
4763 if (q_vector->cpu == cpu)
4764 goto out_no_update;
4765
4766 if (q_vector->tx_ring) {
4767 int q = q_vector->tx_ring->reg_idx;
4768 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4769 if (hw->mac.type == e1000_82575) {
4770 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4771 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4772 } else {
4773 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4774 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4775 E1000_DCA_TXCTRL_CPUID_SHIFT;
4776 }
4777 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4778 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4779 }
4780 if (q_vector->rx_ring) {
4781 int q = q_vector->rx_ring->reg_idx;
4782 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4783 if (hw->mac.type == e1000_82575) {
4784 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4785 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4786 } else {
4787 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4788 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4789 E1000_DCA_RXCTRL_CPUID_SHIFT;
4790 }
4791 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4792 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4793 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4794 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4795 }
4796 q_vector->cpu = cpu;
4797 out_no_update:
4798 put_cpu();
4799 }
4800
4801 static void igb_setup_dca(struct igb_adapter *adapter)
4802 {
4803 struct e1000_hw *hw = &adapter->hw;
4804 int i;
4805
4806 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4807 return;
4808
4809 /* Always use CB2 mode, difference is masked in the CB driver. */
4810 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4811
4812 for (i = 0; i < adapter->num_q_vectors; i++) {
4813 adapter->q_vector[i]->cpu = -1;
4814 igb_update_dca(adapter->q_vector[i]);
4815 }
4816 }
4817
4818 static int __igb_notify_dca(struct device *dev, void *data)
4819 {
4820 struct net_device *netdev = dev_get_drvdata(dev);
4821 struct igb_adapter *adapter = netdev_priv(netdev);
4822 struct pci_dev *pdev = adapter->pdev;
4823 struct e1000_hw *hw = &adapter->hw;
4824 unsigned long event = *(unsigned long *)data;
4825
4826 switch (event) {
4827 case DCA_PROVIDER_ADD:
4828 /* if already enabled, don't do it again */
4829 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4830 break;
4831 if (dca_add_requester(dev) == 0) {
4832 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4833 dev_info(&pdev->dev, "DCA enabled\n");
4834 igb_setup_dca(adapter);
4835 break;
4836 }
4837 /* Fall Through since DCA is disabled. */
4838 case DCA_PROVIDER_REMOVE:
4839 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4840 /* without this a class_device is left
4841 * hanging around in the sysfs model */
4842 dca_remove_requester(dev);
4843 dev_info(&pdev->dev, "DCA disabled\n");
4844 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4845 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4846 }
4847 break;
4848 }
4849
4850 return 0;
4851 }
4852
4853 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4854 void *p)
4855 {
4856 int ret_val;
4857
4858 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4859 __igb_notify_dca);
4860
4861 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4862 }
4863 #endif /* CONFIG_IGB_DCA */
4864
4865 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4866 {
4867 struct e1000_hw *hw = &adapter->hw;
4868 u32 ping;
4869 int i;
4870
4871 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4872 ping = E1000_PF_CONTROL_MSG;
4873 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4874 ping |= E1000_VT_MSGTYPE_CTS;
4875 igb_write_mbx(hw, &ping, 1, i);
4876 }
4877 }
4878
4879 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4880 {
4881 struct e1000_hw *hw = &adapter->hw;
4882 u32 vmolr = rd32(E1000_VMOLR(vf));
4883 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4884
4885 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4886 IGB_VF_FLAG_MULTI_PROMISC);
4887 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4888
4889 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4890 vmolr |= E1000_VMOLR_MPME;
4891 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4892 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4893 } else {
4894 /*
4895 * if we have hashes and we are clearing a multicast promisc
4896 * flag we need to write the hashes to the MTA as this step
4897 * was previously skipped
4898 */
4899 if (vf_data->num_vf_mc_hashes > 30) {
4900 vmolr |= E1000_VMOLR_MPME;
4901 } else if (vf_data->num_vf_mc_hashes) {
4902 int j;
4903 vmolr |= E1000_VMOLR_ROMPE;
4904 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4905 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4906 }
4907 }
4908
4909 wr32(E1000_VMOLR(vf), vmolr);
4910
4911 /* there are flags left unprocessed, likely not supported */
4912 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4913 return -EINVAL;
4914
4915 return 0;
4916
4917 }
4918
4919 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4920 u32 *msgbuf, u32 vf)
4921 {
4922 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4923 u16 *hash_list = (u16 *)&msgbuf[1];
4924 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4925 int i;
4926
4927 /* salt away the number of multicast addresses assigned
4928 * to this VF for later use to restore when the PF multi cast
4929 * list changes
4930 */
4931 vf_data->num_vf_mc_hashes = n;
4932
4933 /* only up to 30 hash values supported */
4934 if (n > 30)
4935 n = 30;
4936
4937 /* store the hashes for later use */
4938 for (i = 0; i < n; i++)
4939 vf_data->vf_mc_hashes[i] = hash_list[i];
4940
4941 /* Flush and reset the mta with the new values */
4942 igb_set_rx_mode(adapter->netdev);
4943
4944 return 0;
4945 }
4946
4947 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4948 {
4949 struct e1000_hw *hw = &adapter->hw;
4950 struct vf_data_storage *vf_data;
4951 int i, j;
4952
4953 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4954 u32 vmolr = rd32(E1000_VMOLR(i));
4955 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4956
4957 vf_data = &adapter->vf_data[i];
4958
4959 if ((vf_data->num_vf_mc_hashes > 30) ||
4960 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4961 vmolr |= E1000_VMOLR_MPME;
4962 } else if (vf_data->num_vf_mc_hashes) {
4963 vmolr |= E1000_VMOLR_ROMPE;
4964 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4965 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4966 }
4967 wr32(E1000_VMOLR(i), vmolr);
4968 }
4969 }
4970
4971 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4972 {
4973 struct e1000_hw *hw = &adapter->hw;
4974 u32 pool_mask, reg, vid;
4975 int i;
4976
4977 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4978
4979 /* Find the vlan filter for this id */
4980 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4981 reg = rd32(E1000_VLVF(i));
4982
4983 /* remove the vf from the pool */
4984 reg &= ~pool_mask;
4985
4986 /* if pool is empty then remove entry from vfta */
4987 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4988 (reg & E1000_VLVF_VLANID_ENABLE)) {
4989 reg = 0;
4990 vid = reg & E1000_VLVF_VLANID_MASK;
4991 igb_vfta_set(hw, vid, false);
4992 }
4993
4994 wr32(E1000_VLVF(i), reg);
4995 }
4996
4997 adapter->vf_data[vf].vlans_enabled = 0;
4998 }
4999
5000 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5001 {
5002 struct e1000_hw *hw = &adapter->hw;
5003 u32 reg, i;
5004
5005 /* The vlvf table only exists on 82576 hardware and newer */
5006 if (hw->mac.type < e1000_82576)
5007 return -1;
5008
5009 /* we only need to do this if VMDq is enabled */
5010 if (!adapter->vfs_allocated_count)
5011 return -1;
5012
5013 /* Find the vlan filter for this id */
5014 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5015 reg = rd32(E1000_VLVF(i));
5016 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5017 vid == (reg & E1000_VLVF_VLANID_MASK))
5018 break;
5019 }
5020
5021 if (add) {
5022 if (i == E1000_VLVF_ARRAY_SIZE) {
5023 /* Did not find a matching VLAN ID entry that was
5024 * enabled. Search for a free filter entry, i.e.
5025 * one without the enable bit set
5026 */
5027 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5028 reg = rd32(E1000_VLVF(i));
5029 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5030 break;
5031 }
5032 }
5033 if (i < E1000_VLVF_ARRAY_SIZE) {
5034 /* Found an enabled/available entry */
5035 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5036
5037 /* if !enabled we need to set this up in vfta */
5038 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5039 /* add VID to filter table */
5040 igb_vfta_set(hw, vid, true);
5041 reg |= E1000_VLVF_VLANID_ENABLE;
5042 }
5043 reg &= ~E1000_VLVF_VLANID_MASK;
5044 reg |= vid;
5045 wr32(E1000_VLVF(i), reg);
5046
5047 /* do not modify RLPML for PF devices */
5048 if (vf >= adapter->vfs_allocated_count)
5049 return 0;
5050
5051 if (!adapter->vf_data[vf].vlans_enabled) {
5052 u32 size;
5053 reg = rd32(E1000_VMOLR(vf));
5054 size = reg & E1000_VMOLR_RLPML_MASK;
5055 size += 4;
5056 reg &= ~E1000_VMOLR_RLPML_MASK;
5057 reg |= size;
5058 wr32(E1000_VMOLR(vf), reg);
5059 }
5060
5061 adapter->vf_data[vf].vlans_enabled++;
5062 return 0;
5063 }
5064 } else {
5065 if (i < E1000_VLVF_ARRAY_SIZE) {
5066 /* remove vf from the pool */
5067 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5068 /* if pool is empty then remove entry from vfta */
5069 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5070 reg = 0;
5071 igb_vfta_set(hw, vid, false);
5072 }
5073 wr32(E1000_VLVF(i), reg);
5074
5075 /* do not modify RLPML for PF devices */
5076 if (vf >= adapter->vfs_allocated_count)
5077 return 0;
5078
5079 adapter->vf_data[vf].vlans_enabled--;
5080 if (!adapter->vf_data[vf].vlans_enabled) {
5081 u32 size;
5082 reg = rd32(E1000_VMOLR(vf));
5083 size = reg & E1000_VMOLR_RLPML_MASK;
5084 size -= 4;
5085 reg &= ~E1000_VMOLR_RLPML_MASK;
5086 reg |= size;
5087 wr32(E1000_VMOLR(vf), reg);
5088 }
5089 }
5090 }
5091 return 0;
5092 }
5093
5094 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5095 {
5096 struct e1000_hw *hw = &adapter->hw;
5097
5098 if (vid)
5099 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5100 else
5101 wr32(E1000_VMVIR(vf), 0);
5102 }
5103
5104 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5105 int vf, u16 vlan, u8 qos)
5106 {
5107 int err = 0;
5108 struct igb_adapter *adapter = netdev_priv(netdev);
5109
5110 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5111 return -EINVAL;
5112 if (vlan || qos) {
5113 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5114 if (err)
5115 goto out;
5116 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5117 igb_set_vmolr(adapter, vf, !vlan);
5118 adapter->vf_data[vf].pf_vlan = vlan;
5119 adapter->vf_data[vf].pf_qos = qos;
5120 dev_info(&adapter->pdev->dev,
5121 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5122 if (test_bit(__IGB_DOWN, &adapter->state)) {
5123 dev_warn(&adapter->pdev->dev,
5124 "The VF VLAN has been set,"
5125 " but the PF device is not up.\n");
5126 dev_warn(&adapter->pdev->dev,
5127 "Bring the PF device up before"
5128 " attempting to use the VF device.\n");
5129 }
5130 } else {
5131 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5132 false, vf);
5133 igb_set_vmvir(adapter, vlan, vf);
5134 igb_set_vmolr(adapter, vf, true);
5135 adapter->vf_data[vf].pf_vlan = 0;
5136 adapter->vf_data[vf].pf_qos = 0;
5137 }
5138 out:
5139 return err;
5140 }
5141
5142 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5143 {
5144 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5145 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5146
5147 return igb_vlvf_set(adapter, vid, add, vf);
5148 }
5149
5150 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5151 {
5152 /* clear flags - except flag that indicates PF has set the MAC */
5153 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5154 adapter->vf_data[vf].last_nack = jiffies;
5155
5156 /* reset offloads to defaults */
5157 igb_set_vmolr(adapter, vf, true);
5158
5159 /* reset vlans for device */
5160 igb_clear_vf_vfta(adapter, vf);
5161 if (adapter->vf_data[vf].pf_vlan)
5162 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5163 adapter->vf_data[vf].pf_vlan,
5164 adapter->vf_data[vf].pf_qos);
5165 else
5166 igb_clear_vf_vfta(adapter, vf);
5167
5168 /* reset multicast table array for vf */
5169 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5170
5171 /* Flush and reset the mta with the new values */
5172 igb_set_rx_mode(adapter->netdev);
5173 }
5174
5175 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5176 {
5177 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5178
5179 /* generate a new mac address as we were hotplug removed/added */
5180 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5181 random_ether_addr(vf_mac);
5182
5183 /* process remaining reset events */
5184 igb_vf_reset(adapter, vf);
5185 }
5186
5187 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5188 {
5189 struct e1000_hw *hw = &adapter->hw;
5190 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5191 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5192 u32 reg, msgbuf[3];
5193 u8 *addr = (u8 *)(&msgbuf[1]);
5194
5195 /* process all the same items cleared in a function level reset */
5196 igb_vf_reset(adapter, vf);
5197
5198 /* set vf mac address */
5199 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5200
5201 /* enable transmit and receive for vf */
5202 reg = rd32(E1000_VFTE);
5203 wr32(E1000_VFTE, reg | (1 << vf));
5204 reg = rd32(E1000_VFRE);
5205 wr32(E1000_VFRE, reg | (1 << vf));
5206
5207 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5208
5209 /* reply to reset with ack and vf mac address */
5210 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5211 memcpy(addr, vf_mac, 6);
5212 igb_write_mbx(hw, msgbuf, 3, vf);
5213 }
5214
5215 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5216 {
5217 /*
5218 * The VF MAC Address is stored in a packed array of bytes
5219 * starting at the second 32 bit word of the msg array
5220 */
5221 unsigned char *addr = (char *)&msg[1];
5222 int err = -1;
5223
5224 if (is_valid_ether_addr(addr))
5225 err = igb_set_vf_mac(adapter, vf, addr);
5226
5227 return err;
5228 }
5229
5230 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5231 {
5232 struct e1000_hw *hw = &adapter->hw;
5233 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5234 u32 msg = E1000_VT_MSGTYPE_NACK;
5235
5236 /* if device isn't clear to send it shouldn't be reading either */
5237 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5238 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5239 igb_write_mbx(hw, &msg, 1, vf);
5240 vf_data->last_nack = jiffies;
5241 }
5242 }
5243
5244 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5245 {
5246 struct pci_dev *pdev = adapter->pdev;
5247 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5248 struct e1000_hw *hw = &adapter->hw;
5249 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5250 s32 retval;
5251
5252 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5253
5254 if (retval) {
5255 /* if receive failed revoke VF CTS stats and restart init */
5256 dev_err(&pdev->dev, "Error receiving message from VF\n");
5257 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5258 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5259 return;
5260 goto out;
5261 }
5262
5263 /* this is a message we already processed, do nothing */
5264 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5265 return;
5266
5267 /*
5268 * until the vf completes a reset it should not be
5269 * allowed to start any configuration.
5270 */
5271
5272 if (msgbuf[0] == E1000_VF_RESET) {
5273 igb_vf_reset_msg(adapter, vf);
5274 return;
5275 }
5276
5277 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5278 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5279 return;
5280 retval = -1;
5281 goto out;
5282 }
5283
5284 switch ((msgbuf[0] & 0xFFFF)) {
5285 case E1000_VF_SET_MAC_ADDR:
5286 retval = -EINVAL;
5287 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5288 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5289 else
5290 dev_warn(&pdev->dev,
5291 "VF %d attempted to override administratively "
5292 "set MAC address\nReload the VF driver to "
5293 "resume operations\n", vf);
5294 break;
5295 case E1000_VF_SET_PROMISC:
5296 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5297 break;
5298 case E1000_VF_SET_MULTICAST:
5299 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5300 break;
5301 case E1000_VF_SET_LPE:
5302 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5303 break;
5304 case E1000_VF_SET_VLAN:
5305 retval = -1;
5306 if (vf_data->pf_vlan)
5307 dev_warn(&pdev->dev,
5308 "VF %d attempted to override administratively "
5309 "set VLAN tag\nReload the VF driver to "
5310 "resume operations\n", vf);
5311 else
5312 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5313 break;
5314 default:
5315 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5316 retval = -1;
5317 break;
5318 }
5319
5320 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5321 out:
5322 /* notify the VF of the results of what it sent us */
5323 if (retval)
5324 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5325 else
5326 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5327
5328 igb_write_mbx(hw, msgbuf, 1, vf);
5329 }
5330
5331 static void igb_msg_task(struct igb_adapter *adapter)
5332 {
5333 struct e1000_hw *hw = &adapter->hw;
5334 u32 vf;
5335
5336 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5337 /* process any reset requests */
5338 if (!igb_check_for_rst(hw, vf))
5339 igb_vf_reset_event(adapter, vf);
5340
5341 /* process any messages pending */
5342 if (!igb_check_for_msg(hw, vf))
5343 igb_rcv_msg_from_vf(adapter, vf);
5344
5345 /* process any acks */
5346 if (!igb_check_for_ack(hw, vf))
5347 igb_rcv_ack_from_vf(adapter, vf);
5348 }
5349 }
5350
5351 /**
5352 * igb_set_uta - Set unicast filter table address
5353 * @adapter: board private structure
5354 *
5355 * The unicast table address is a register array of 32-bit registers.
5356 * The table is meant to be used in a way similar to how the MTA is used
5357 * however due to certain limitations in the hardware it is necessary to
5358 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5359 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5360 **/
5361 static void igb_set_uta(struct igb_adapter *adapter)
5362 {
5363 struct e1000_hw *hw = &adapter->hw;
5364 int i;
5365
5366 /* The UTA table only exists on 82576 hardware and newer */
5367 if (hw->mac.type < e1000_82576)
5368 return;
5369
5370 /* we only need to do this if VMDq is enabled */
5371 if (!adapter->vfs_allocated_count)
5372 return;
5373
5374 for (i = 0; i < hw->mac.uta_reg_count; i++)
5375 array_wr32(E1000_UTA, i, ~0);
5376 }
5377
5378 /**
5379 * igb_intr_msi - Interrupt Handler
5380 * @irq: interrupt number
5381 * @data: pointer to a network interface device structure
5382 **/
5383 static irqreturn_t igb_intr_msi(int irq, void *data)
5384 {
5385 struct igb_adapter *adapter = data;
5386 struct igb_q_vector *q_vector = adapter->q_vector[0];
5387 struct e1000_hw *hw = &adapter->hw;
5388 /* read ICR disables interrupts using IAM */
5389 u32 icr = rd32(E1000_ICR);
5390
5391 igb_write_itr(q_vector);
5392
5393 if (icr & E1000_ICR_DRSTA)
5394 schedule_work(&adapter->reset_task);
5395
5396 if (icr & E1000_ICR_DOUTSYNC) {
5397 /* HW is reporting DMA is out of sync */
5398 adapter->stats.doosync++;
5399 }
5400
5401 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5402 hw->mac.get_link_status = 1;
5403 if (!test_bit(__IGB_DOWN, &adapter->state))
5404 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5405 }
5406
5407 napi_schedule(&q_vector->napi);
5408
5409 return IRQ_HANDLED;
5410 }
5411
5412 /**
5413 * igb_intr - Legacy Interrupt Handler
5414 * @irq: interrupt number
5415 * @data: pointer to a network interface device structure
5416 **/
5417 static irqreturn_t igb_intr(int irq, void *data)
5418 {
5419 struct igb_adapter *adapter = data;
5420 struct igb_q_vector *q_vector = adapter->q_vector[0];
5421 struct e1000_hw *hw = &adapter->hw;
5422 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5423 * need for the IMC write */
5424 u32 icr = rd32(E1000_ICR);
5425 if (!icr)
5426 return IRQ_NONE; /* Not our interrupt */
5427
5428 igb_write_itr(q_vector);
5429
5430 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5431 * not set, then the adapter didn't send an interrupt */
5432 if (!(icr & E1000_ICR_INT_ASSERTED))
5433 return IRQ_NONE;
5434
5435 if (icr & E1000_ICR_DRSTA)
5436 schedule_work(&adapter->reset_task);
5437
5438 if (icr & E1000_ICR_DOUTSYNC) {
5439 /* HW is reporting DMA is out of sync */
5440 adapter->stats.doosync++;
5441 }
5442
5443 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5444 hw->mac.get_link_status = 1;
5445 /* guard against interrupt when we're going down */
5446 if (!test_bit(__IGB_DOWN, &adapter->state))
5447 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5448 }
5449
5450 napi_schedule(&q_vector->napi);
5451
5452 return IRQ_HANDLED;
5453 }
5454
5455 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5456 {
5457 struct igb_adapter *adapter = q_vector->adapter;
5458 struct e1000_hw *hw = &adapter->hw;
5459
5460 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5461 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5462 if (!adapter->msix_entries)
5463 igb_set_itr(adapter);
5464 else
5465 igb_update_ring_itr(q_vector);
5466 }
5467
5468 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5469 if (adapter->msix_entries)
5470 wr32(E1000_EIMS, q_vector->eims_value);
5471 else
5472 igb_irq_enable(adapter);
5473 }
5474 }
5475
5476 /**
5477 * igb_poll - NAPI Rx polling callback
5478 * @napi: napi polling structure
5479 * @budget: count of how many packets we should handle
5480 **/
5481 static int igb_poll(struct napi_struct *napi, int budget)
5482 {
5483 struct igb_q_vector *q_vector = container_of(napi,
5484 struct igb_q_vector,
5485 napi);
5486 int tx_clean_complete = 1, work_done = 0;
5487
5488 #ifdef CONFIG_IGB_DCA
5489 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5490 igb_update_dca(q_vector);
5491 #endif
5492 if (q_vector->tx_ring)
5493 tx_clean_complete = igb_clean_tx_irq(q_vector);
5494
5495 if (q_vector->rx_ring)
5496 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5497
5498 if (!tx_clean_complete)
5499 work_done = budget;
5500
5501 /* If not enough Rx work done, exit the polling mode */
5502 if (work_done < budget) {
5503 napi_complete(napi);
5504 igb_ring_irq_enable(q_vector);
5505 }
5506
5507 return work_done;
5508 }
5509
5510 /**
5511 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5512 * @adapter: board private structure
5513 * @shhwtstamps: timestamp structure to update
5514 * @regval: unsigned 64bit system time value.
5515 *
5516 * We need to convert the system time value stored in the RX/TXSTMP registers
5517 * into a hwtstamp which can be used by the upper level timestamping functions
5518 */
5519 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5520 struct skb_shared_hwtstamps *shhwtstamps,
5521 u64 regval)
5522 {
5523 u64 ns;
5524
5525 /*
5526 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5527 * 24 to match clock shift we setup earlier.
5528 */
5529 if (adapter->hw.mac.type == e1000_82580)
5530 regval <<= IGB_82580_TSYNC_SHIFT;
5531
5532 ns = timecounter_cyc2time(&adapter->clock, regval);
5533 timecompare_update(&adapter->compare, ns);
5534 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5535 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5536 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5537 }
5538
5539 /**
5540 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5541 * @q_vector: pointer to q_vector containing needed info
5542 * @buffer: pointer to igb_buffer structure
5543 *
5544 * If we were asked to do hardware stamping and such a time stamp is
5545 * available, then it must have been for this skb here because we only
5546 * allow only one such packet into the queue.
5547 */
5548 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5549 {
5550 struct igb_adapter *adapter = q_vector->adapter;
5551 struct e1000_hw *hw = &adapter->hw;
5552 struct skb_shared_hwtstamps shhwtstamps;
5553 u64 regval;
5554
5555 /* if skb does not support hw timestamp or TX stamp not valid exit */
5556 if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5557 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5558 return;
5559
5560 regval = rd32(E1000_TXSTMPL);
5561 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5562
5563 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5564 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5565 }
5566
5567 /**
5568 * igb_clean_tx_irq - Reclaim resources after transmit completes
5569 * @q_vector: pointer to q_vector containing needed info
5570 * returns true if ring is completely cleaned
5571 **/
5572 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5573 {
5574 struct igb_adapter *adapter = q_vector->adapter;
5575 struct igb_ring *tx_ring = q_vector->tx_ring;
5576 struct net_device *netdev = tx_ring->netdev;
5577 struct e1000_hw *hw = &adapter->hw;
5578 struct igb_buffer *buffer_info;
5579 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5580 unsigned int total_bytes = 0, total_packets = 0;
5581 unsigned int i, eop, count = 0;
5582 bool cleaned = false;
5583
5584 i = tx_ring->next_to_clean;
5585 eop = tx_ring->buffer_info[i].next_to_watch;
5586 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5587
5588 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5589 (count < tx_ring->count)) {
5590 rmb(); /* read buffer_info after eop_desc status */
5591 for (cleaned = false; !cleaned; count++) {
5592 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5593 buffer_info = &tx_ring->buffer_info[i];
5594 cleaned = (i == eop);
5595
5596 if (buffer_info->skb) {
5597 total_bytes += buffer_info->bytecount;
5598 /* gso_segs is currently only valid for tcp */
5599 total_packets += buffer_info->gso_segs;
5600 igb_tx_hwtstamp(q_vector, buffer_info);
5601 }
5602
5603 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5604 tx_desc->wb.status = 0;
5605
5606 i++;
5607 if (i == tx_ring->count)
5608 i = 0;
5609 }
5610 eop = tx_ring->buffer_info[i].next_to_watch;
5611 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5612 }
5613
5614 tx_ring->next_to_clean = i;
5615
5616 if (unlikely(count &&
5617 netif_carrier_ok(netdev) &&
5618 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5619 /* Make sure that anybody stopping the queue after this
5620 * sees the new next_to_clean.
5621 */
5622 smp_mb();
5623 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5624 !(test_bit(__IGB_DOWN, &adapter->state))) {
5625 netif_wake_subqueue(netdev, tx_ring->queue_index);
5626
5627 u64_stats_update_begin(&tx_ring->tx_syncp);
5628 tx_ring->tx_stats.restart_queue++;
5629 u64_stats_update_end(&tx_ring->tx_syncp);
5630 }
5631 }
5632
5633 if (tx_ring->detect_tx_hung) {
5634 /* Detect a transmit hang in hardware, this serializes the
5635 * check with the clearing of time_stamp and movement of i */
5636 tx_ring->detect_tx_hung = false;
5637 if (tx_ring->buffer_info[i].time_stamp &&
5638 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5639 (adapter->tx_timeout_factor * HZ)) &&
5640 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5641
5642 /* detected Tx unit hang */
5643 dev_err(tx_ring->dev,
5644 "Detected Tx Unit Hang\n"
5645 " Tx Queue <%d>\n"
5646 " TDH <%x>\n"
5647 " TDT <%x>\n"
5648 " next_to_use <%x>\n"
5649 " next_to_clean <%x>\n"
5650 "buffer_info[next_to_clean]\n"
5651 " time_stamp <%lx>\n"
5652 " next_to_watch <%x>\n"
5653 " jiffies <%lx>\n"
5654 " desc.status <%x>\n",
5655 tx_ring->queue_index,
5656 readl(tx_ring->head),
5657 readl(tx_ring->tail),
5658 tx_ring->next_to_use,
5659 tx_ring->next_to_clean,
5660 tx_ring->buffer_info[eop].time_stamp,
5661 eop,
5662 jiffies,
5663 eop_desc->wb.status);
5664 netif_stop_subqueue(netdev, tx_ring->queue_index);
5665 }
5666 }
5667 tx_ring->total_bytes += total_bytes;
5668 tx_ring->total_packets += total_packets;
5669 u64_stats_update_begin(&tx_ring->tx_syncp);
5670 tx_ring->tx_stats.bytes += total_bytes;
5671 tx_ring->tx_stats.packets += total_packets;
5672 u64_stats_update_end(&tx_ring->tx_syncp);
5673 return count < tx_ring->count;
5674 }
5675
5676 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5677 u32 status_err, struct sk_buff *skb)
5678 {
5679 skb_checksum_none_assert(skb);
5680
5681 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5682 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5683 (status_err & E1000_RXD_STAT_IXSM))
5684 return;
5685
5686 /* TCP/UDP checksum error bit is set */
5687 if (status_err &
5688 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5689 /*
5690 * work around errata with sctp packets where the TCPE aka
5691 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5692 * packets, (aka let the stack check the crc32c)
5693 */
5694 if ((skb->len == 60) &&
5695 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5696 u64_stats_update_begin(&ring->rx_syncp);
5697 ring->rx_stats.csum_err++;
5698 u64_stats_update_end(&ring->rx_syncp);
5699 }
5700 /* let the stack verify checksum errors */
5701 return;
5702 }
5703 /* It must be a TCP or UDP packet with a valid checksum */
5704 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5705 skb->ip_summed = CHECKSUM_UNNECESSARY;
5706
5707 dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5708 }
5709
5710 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5711 struct sk_buff *skb)
5712 {
5713 struct igb_adapter *adapter = q_vector->adapter;
5714 struct e1000_hw *hw = &adapter->hw;
5715 u64 regval;
5716
5717 /*
5718 * If this bit is set, then the RX registers contain the time stamp. No
5719 * other packet will be time stamped until we read these registers, so
5720 * read the registers to make them available again. Because only one
5721 * packet can be time stamped at a time, we know that the register
5722 * values must belong to this one here and therefore we don't need to
5723 * compare any of the additional attributes stored for it.
5724 *
5725 * If nothing went wrong, then it should have a shared tx_flags that we
5726 * can turn into a skb_shared_hwtstamps.
5727 */
5728 if (staterr & E1000_RXDADV_STAT_TSIP) {
5729 u32 *stamp = (u32 *)skb->data;
5730 regval = le32_to_cpu(*(stamp + 2));
5731 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5732 skb_pull(skb, IGB_TS_HDR_LEN);
5733 } else {
5734 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5735 return;
5736
5737 regval = rd32(E1000_RXSTMPL);
5738 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5739 }
5740
5741 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5742 }
5743 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
5744 {
5745 /* HW will not DMA in data larger than the given buffer, even if it
5746 * parses the (NFS, of course) header to be larger. In that case, it
5747 * fills the header buffer and spills the rest into the page.
5748 */
5749 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5750 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5751 if (hlen > IGB_RX_HDR_LEN)
5752 hlen = IGB_RX_HDR_LEN;
5753 return hlen;
5754 }
5755
5756 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5757 int *work_done, int budget)
5758 {
5759 struct igb_ring *rx_ring = q_vector->rx_ring;
5760 struct net_device *netdev = rx_ring->netdev;
5761 struct device *dev = rx_ring->dev;
5762 union e1000_adv_rx_desc *rx_desc , *next_rxd;
5763 struct igb_buffer *buffer_info , *next_buffer;
5764 struct sk_buff *skb;
5765 bool cleaned = false;
5766 u16 cleaned_count = igb_desc_unused(rx_ring);
5767 int current_node = numa_node_id();
5768 unsigned int total_bytes = 0, total_packets = 0;
5769 unsigned int i;
5770 u32 staterr;
5771 u16 length;
5772
5773 i = rx_ring->next_to_clean;
5774 buffer_info = &rx_ring->buffer_info[i];
5775 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5776 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5777
5778 while (staterr & E1000_RXD_STAT_DD) {
5779 if (*work_done >= budget)
5780 break;
5781 (*work_done)++;
5782 rmb(); /* read descriptor and rx_buffer_info after status DD */
5783
5784 skb = buffer_info->skb;
5785 prefetch(skb->data - NET_IP_ALIGN);
5786 buffer_info->skb = NULL;
5787
5788 i++;
5789 if (i == rx_ring->count)
5790 i = 0;
5791
5792 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5793 prefetch(next_rxd);
5794 next_buffer = &rx_ring->buffer_info[i];
5795
5796 length = le16_to_cpu(rx_desc->wb.upper.length);
5797 cleaned = true;
5798 cleaned_count++;
5799
5800 if (buffer_info->dma) {
5801 dma_unmap_single(dev, buffer_info->dma,
5802 IGB_RX_HDR_LEN,
5803 DMA_FROM_DEVICE);
5804 buffer_info->dma = 0;
5805 skb_put(skb, igb_get_hlen(rx_desc));
5806 }
5807
5808 if (length) {
5809 dma_unmap_page(dev, buffer_info->page_dma,
5810 PAGE_SIZE / 2, DMA_FROM_DEVICE);
5811 buffer_info->page_dma = 0;
5812
5813 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5814 buffer_info->page,
5815 buffer_info->page_offset,
5816 length);
5817
5818 if ((page_count(buffer_info->page) != 1) ||
5819 (page_to_nid(buffer_info->page) != current_node))
5820 buffer_info->page = NULL;
5821 else
5822 get_page(buffer_info->page);
5823
5824 skb->len += length;
5825 skb->data_len += length;
5826 skb->truesize += length;
5827 }
5828
5829 if (!(staterr & E1000_RXD_STAT_EOP)) {
5830 buffer_info->skb = next_buffer->skb;
5831 buffer_info->dma = next_buffer->dma;
5832 next_buffer->skb = skb;
5833 next_buffer->dma = 0;
5834 goto next_desc;
5835 }
5836
5837 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5838 dev_kfree_skb_irq(skb);
5839 goto next_desc;
5840 }
5841
5842 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5843 igb_rx_hwtstamp(q_vector, staterr, skb);
5844 total_bytes += skb->len;
5845 total_packets++;
5846
5847 igb_rx_checksum_adv(rx_ring, staterr, skb);
5848
5849 skb->protocol = eth_type_trans(skb, netdev);
5850
5851 if (staterr & E1000_RXD_STAT_VP) {
5852 u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5853
5854 __vlan_hwaccel_put_tag(skb, vid);
5855 }
5856 napi_gro_receive(&q_vector->napi, skb);
5857
5858 next_desc:
5859 /* return some buffers to hardware, one at a time is too slow */
5860 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5861 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5862 cleaned_count = 0;
5863 }
5864
5865 /* use prefetched values */
5866 rx_desc = next_rxd;
5867 buffer_info = next_buffer;
5868 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5869 }
5870
5871 rx_ring->next_to_clean = i;
5872 u64_stats_update_begin(&rx_ring->rx_syncp);
5873 rx_ring->rx_stats.packets += total_packets;
5874 rx_ring->rx_stats.bytes += total_bytes;
5875 u64_stats_update_end(&rx_ring->rx_syncp);
5876 rx_ring->total_packets += total_packets;
5877 rx_ring->total_bytes += total_bytes;
5878
5879 if (cleaned_count)
5880 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5881
5882 return cleaned;
5883 }
5884
5885 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
5886 struct igb_buffer *bi)
5887 {
5888 struct sk_buff *skb = bi->skb;
5889 dma_addr_t dma = bi->dma;
5890
5891 if (dma)
5892 return true;
5893
5894 if (likely(!skb)) {
5895 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
5896 IGB_RX_HDR_LEN);
5897 bi->skb = skb;
5898 if (!skb) {
5899 rx_ring->rx_stats.alloc_failed++;
5900 return false;
5901 }
5902
5903 /* initialize skb for ring */
5904 skb_record_rx_queue(skb, rx_ring->queue_index);
5905 }
5906
5907 dma = dma_map_single(rx_ring->dev, skb->data,
5908 IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
5909
5910 if (dma_mapping_error(rx_ring->dev, dma)) {
5911 rx_ring->rx_stats.alloc_failed++;
5912 return false;
5913 }
5914
5915 bi->dma = dma;
5916 return true;
5917 }
5918
5919 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
5920 struct igb_buffer *bi)
5921 {
5922 struct page *page = bi->page;
5923 dma_addr_t page_dma = bi->page_dma;
5924 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
5925
5926 if (page_dma)
5927 return true;
5928
5929 if (!page) {
5930 page = netdev_alloc_page(rx_ring->netdev);
5931 bi->page = page;
5932 if (unlikely(!page)) {
5933 rx_ring->rx_stats.alloc_failed++;
5934 return false;
5935 }
5936 }
5937
5938 page_dma = dma_map_page(rx_ring->dev, page,
5939 page_offset, PAGE_SIZE / 2,
5940 DMA_FROM_DEVICE);
5941
5942 if (dma_mapping_error(rx_ring->dev, page_dma)) {
5943 rx_ring->rx_stats.alloc_failed++;
5944 return false;
5945 }
5946
5947 bi->page_dma = page_dma;
5948 bi->page_offset = page_offset;
5949 return true;
5950 }
5951
5952 /**
5953 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5954 * @adapter: address of board private structure
5955 **/
5956 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, u16 cleaned_count)
5957 {
5958 union e1000_adv_rx_desc *rx_desc;
5959 struct igb_buffer *bi;
5960 u16 i = rx_ring->next_to_use;
5961
5962 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5963 bi = &rx_ring->buffer_info[i];
5964 i -= rx_ring->count;
5965
5966 while (cleaned_count--) {
5967 if (!igb_alloc_mapped_skb(rx_ring, bi))
5968 break;
5969
5970 /* Refresh the desc even if buffer_addrs didn't change
5971 * because each write-back erases this info. */
5972 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
5973
5974 if (!igb_alloc_mapped_page(rx_ring, bi))
5975 break;
5976
5977 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
5978
5979 rx_desc++;
5980 bi++;
5981 i++;
5982 if (unlikely(!i)) {
5983 rx_desc = E1000_RX_DESC_ADV(*rx_ring, 0);
5984 bi = rx_ring->buffer_info;
5985 i -= rx_ring->count;
5986 }
5987
5988 /* clear the hdr_addr for the next_to_use descriptor */
5989 rx_desc->read.hdr_addr = 0;
5990 }
5991
5992 i += rx_ring->count;
5993
5994 if (rx_ring->next_to_use != i) {
5995 rx_ring->next_to_use = i;
5996
5997 /* Force memory writes to complete before letting h/w
5998 * know there are new descriptors to fetch. (Only
5999 * applicable for weak-ordered memory model archs,
6000 * such as IA-64). */
6001 wmb();
6002 writel(i, rx_ring->tail);
6003 }
6004 }
6005
6006 /**
6007 * igb_mii_ioctl -
6008 * @netdev:
6009 * @ifreq:
6010 * @cmd:
6011 **/
6012 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6013 {
6014 struct igb_adapter *adapter = netdev_priv(netdev);
6015 struct mii_ioctl_data *data = if_mii(ifr);
6016
6017 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6018 return -EOPNOTSUPP;
6019
6020 switch (cmd) {
6021 case SIOCGMIIPHY:
6022 data->phy_id = adapter->hw.phy.addr;
6023 break;
6024 case SIOCGMIIREG:
6025 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6026 &data->val_out))
6027 return -EIO;
6028 break;
6029 case SIOCSMIIREG:
6030 default:
6031 return -EOPNOTSUPP;
6032 }
6033 return 0;
6034 }
6035
6036 /**
6037 * igb_hwtstamp_ioctl - control hardware time stamping
6038 * @netdev:
6039 * @ifreq:
6040 * @cmd:
6041 *
6042 * Outgoing time stamping can be enabled and disabled. Play nice and
6043 * disable it when requested, although it shouldn't case any overhead
6044 * when no packet needs it. At most one packet in the queue may be
6045 * marked for time stamping, otherwise it would be impossible to tell
6046 * for sure to which packet the hardware time stamp belongs.
6047 *
6048 * Incoming time stamping has to be configured via the hardware
6049 * filters. Not all combinations are supported, in particular event
6050 * type has to be specified. Matching the kind of event packet is
6051 * not supported, with the exception of "all V2 events regardless of
6052 * level 2 or 4".
6053 *
6054 **/
6055 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6056 struct ifreq *ifr, int cmd)
6057 {
6058 struct igb_adapter *adapter = netdev_priv(netdev);
6059 struct e1000_hw *hw = &adapter->hw;
6060 struct hwtstamp_config config;
6061 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6062 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6063 u32 tsync_rx_cfg = 0;
6064 bool is_l4 = false;
6065 bool is_l2 = false;
6066 u32 regval;
6067
6068 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6069 return -EFAULT;
6070
6071 /* reserved for future extensions */
6072 if (config.flags)
6073 return -EINVAL;
6074
6075 switch (config.tx_type) {
6076 case HWTSTAMP_TX_OFF:
6077 tsync_tx_ctl = 0;
6078 case HWTSTAMP_TX_ON:
6079 break;
6080 default:
6081 return -ERANGE;
6082 }
6083
6084 switch (config.rx_filter) {
6085 case HWTSTAMP_FILTER_NONE:
6086 tsync_rx_ctl = 0;
6087 break;
6088 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6089 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6090 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6091 case HWTSTAMP_FILTER_ALL:
6092 /*
6093 * register TSYNCRXCFG must be set, therefore it is not
6094 * possible to time stamp both Sync and Delay_Req messages
6095 * => fall back to time stamping all packets
6096 */
6097 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6098 config.rx_filter = HWTSTAMP_FILTER_ALL;
6099 break;
6100 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6101 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6102 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6103 is_l4 = true;
6104 break;
6105 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6106 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6107 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6108 is_l4 = true;
6109 break;
6110 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6111 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6112 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6113 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6114 is_l2 = true;
6115 is_l4 = true;
6116 config.rx_filter = HWTSTAMP_FILTER_SOME;
6117 break;
6118 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6119 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6120 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6121 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6122 is_l2 = true;
6123 is_l4 = true;
6124 config.rx_filter = HWTSTAMP_FILTER_SOME;
6125 break;
6126 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6127 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6128 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6129 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6130 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6131 is_l2 = true;
6132 break;
6133 default:
6134 return -ERANGE;
6135 }
6136
6137 if (hw->mac.type == e1000_82575) {
6138 if (tsync_rx_ctl | tsync_tx_ctl)
6139 return -EINVAL;
6140 return 0;
6141 }
6142
6143 /*
6144 * Per-packet timestamping only works if all packets are
6145 * timestamped, so enable timestamping in all packets as
6146 * long as one rx filter was configured.
6147 */
6148 if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6149 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6150 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6151 }
6152
6153 /* enable/disable TX */
6154 regval = rd32(E1000_TSYNCTXCTL);
6155 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6156 regval |= tsync_tx_ctl;
6157 wr32(E1000_TSYNCTXCTL, regval);
6158
6159 /* enable/disable RX */
6160 regval = rd32(E1000_TSYNCRXCTL);
6161 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6162 regval |= tsync_rx_ctl;
6163 wr32(E1000_TSYNCRXCTL, regval);
6164
6165 /* define which PTP packets are time stamped */
6166 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6167
6168 /* define ethertype filter for timestamped packets */
6169 if (is_l2)
6170 wr32(E1000_ETQF(3),
6171 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6172 E1000_ETQF_1588 | /* enable timestamping */
6173 ETH_P_1588)); /* 1588 eth protocol type */
6174 else
6175 wr32(E1000_ETQF(3), 0);
6176
6177 #define PTP_PORT 319
6178 /* L4 Queue Filter[3]: filter by destination port and protocol */
6179 if (is_l4) {
6180 u32 ftqf = (IPPROTO_UDP /* UDP */
6181 | E1000_FTQF_VF_BP /* VF not compared */
6182 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6183 | E1000_FTQF_MASK); /* mask all inputs */
6184 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6185
6186 wr32(E1000_IMIR(3), htons(PTP_PORT));
6187 wr32(E1000_IMIREXT(3),
6188 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6189 if (hw->mac.type == e1000_82576) {
6190 /* enable source port check */
6191 wr32(E1000_SPQF(3), htons(PTP_PORT));
6192 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6193 }
6194 wr32(E1000_FTQF(3), ftqf);
6195 } else {
6196 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6197 }
6198 wrfl();
6199
6200 adapter->hwtstamp_config = config;
6201
6202 /* clear TX/RX time stamp registers, just to be sure */
6203 regval = rd32(E1000_TXSTMPH);
6204 regval = rd32(E1000_RXSTMPH);
6205
6206 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6207 -EFAULT : 0;
6208 }
6209
6210 /**
6211 * igb_ioctl -
6212 * @netdev:
6213 * @ifreq:
6214 * @cmd:
6215 **/
6216 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6217 {
6218 switch (cmd) {
6219 case SIOCGMIIPHY:
6220 case SIOCGMIIREG:
6221 case SIOCSMIIREG:
6222 return igb_mii_ioctl(netdev, ifr, cmd);
6223 case SIOCSHWTSTAMP:
6224 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6225 default:
6226 return -EOPNOTSUPP;
6227 }
6228 }
6229
6230 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6231 {
6232 struct igb_adapter *adapter = hw->back;
6233 u16 cap_offset;
6234
6235 cap_offset = adapter->pdev->pcie_cap;
6236 if (!cap_offset)
6237 return -E1000_ERR_CONFIG;
6238
6239 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6240
6241 return 0;
6242 }
6243
6244 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6245 {
6246 struct igb_adapter *adapter = hw->back;
6247 u16 cap_offset;
6248
6249 cap_offset = adapter->pdev->pcie_cap;
6250 if (!cap_offset)
6251 return -E1000_ERR_CONFIG;
6252
6253 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6254
6255 return 0;
6256 }
6257
6258 static void igb_vlan_mode(struct net_device *netdev, u32 features)
6259 {
6260 struct igb_adapter *adapter = netdev_priv(netdev);
6261 struct e1000_hw *hw = &adapter->hw;
6262 u32 ctrl, rctl;
6263
6264 igb_irq_disable(adapter);
6265
6266 if (features & NETIF_F_HW_VLAN_RX) {
6267 /* enable VLAN tag insert/strip */
6268 ctrl = rd32(E1000_CTRL);
6269 ctrl |= E1000_CTRL_VME;
6270 wr32(E1000_CTRL, ctrl);
6271
6272 /* Disable CFI check */
6273 rctl = rd32(E1000_RCTL);
6274 rctl &= ~E1000_RCTL_CFIEN;
6275 wr32(E1000_RCTL, rctl);
6276 } else {
6277 /* disable VLAN tag insert/strip */
6278 ctrl = rd32(E1000_CTRL);
6279 ctrl &= ~E1000_CTRL_VME;
6280 wr32(E1000_CTRL, ctrl);
6281 }
6282
6283 igb_rlpml_set(adapter);
6284
6285 if (!test_bit(__IGB_DOWN, &adapter->state))
6286 igb_irq_enable(adapter);
6287 }
6288
6289 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6290 {
6291 struct igb_adapter *adapter = netdev_priv(netdev);
6292 struct e1000_hw *hw = &adapter->hw;
6293 int pf_id = adapter->vfs_allocated_count;
6294
6295 /* attempt to add filter to vlvf array */
6296 igb_vlvf_set(adapter, vid, true, pf_id);
6297
6298 /* add the filter since PF can receive vlans w/o entry in vlvf */
6299 igb_vfta_set(hw, vid, true);
6300
6301 set_bit(vid, adapter->active_vlans);
6302 }
6303
6304 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6305 {
6306 struct igb_adapter *adapter = netdev_priv(netdev);
6307 struct e1000_hw *hw = &adapter->hw;
6308 int pf_id = adapter->vfs_allocated_count;
6309 s32 err;
6310
6311 igb_irq_disable(adapter);
6312
6313 if (!test_bit(__IGB_DOWN, &adapter->state))
6314 igb_irq_enable(adapter);
6315
6316 /* remove vlan from VLVF table array */
6317 err = igb_vlvf_set(adapter, vid, false, pf_id);
6318
6319 /* if vid was not present in VLVF just remove it from table */
6320 if (err)
6321 igb_vfta_set(hw, vid, false);
6322
6323 clear_bit(vid, adapter->active_vlans);
6324 }
6325
6326 static void igb_restore_vlan(struct igb_adapter *adapter)
6327 {
6328 u16 vid;
6329
6330 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6331 igb_vlan_rx_add_vid(adapter->netdev, vid);
6332 }
6333
6334 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6335 {
6336 struct pci_dev *pdev = adapter->pdev;
6337 struct e1000_mac_info *mac = &adapter->hw.mac;
6338
6339 mac->autoneg = 0;
6340
6341 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6342 * for the switch() below to work */
6343 if ((spd & 1) || (dplx & ~1))
6344 goto err_inval;
6345
6346 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6347 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6348 spd != SPEED_1000 &&
6349 dplx != DUPLEX_FULL)
6350 goto err_inval;
6351
6352 switch (spd + dplx) {
6353 case SPEED_10 + DUPLEX_HALF:
6354 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6355 break;
6356 case SPEED_10 + DUPLEX_FULL:
6357 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6358 break;
6359 case SPEED_100 + DUPLEX_HALF:
6360 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6361 break;
6362 case SPEED_100 + DUPLEX_FULL:
6363 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6364 break;
6365 case SPEED_1000 + DUPLEX_FULL:
6366 mac->autoneg = 1;
6367 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6368 break;
6369 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6370 default:
6371 goto err_inval;
6372 }
6373 return 0;
6374
6375 err_inval:
6376 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6377 return -EINVAL;
6378 }
6379
6380 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6381 {
6382 struct net_device *netdev = pci_get_drvdata(pdev);
6383 struct igb_adapter *adapter = netdev_priv(netdev);
6384 struct e1000_hw *hw = &adapter->hw;
6385 u32 ctrl, rctl, status;
6386 u32 wufc = adapter->wol;
6387 #ifdef CONFIG_PM
6388 int retval = 0;
6389 #endif
6390
6391 netif_device_detach(netdev);
6392
6393 if (netif_running(netdev))
6394 igb_close(netdev);
6395
6396 igb_clear_interrupt_scheme(adapter);
6397
6398 #ifdef CONFIG_PM
6399 retval = pci_save_state(pdev);
6400 if (retval)
6401 return retval;
6402 #endif
6403
6404 status = rd32(E1000_STATUS);
6405 if (status & E1000_STATUS_LU)
6406 wufc &= ~E1000_WUFC_LNKC;
6407
6408 if (wufc) {
6409 igb_setup_rctl(adapter);
6410 igb_set_rx_mode(netdev);
6411
6412 /* turn on all-multi mode if wake on multicast is enabled */
6413 if (wufc & E1000_WUFC_MC) {
6414 rctl = rd32(E1000_RCTL);
6415 rctl |= E1000_RCTL_MPE;
6416 wr32(E1000_RCTL, rctl);
6417 }
6418
6419 ctrl = rd32(E1000_CTRL);
6420 /* advertise wake from D3Cold */
6421 #define E1000_CTRL_ADVD3WUC 0x00100000
6422 /* phy power management enable */
6423 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6424 ctrl |= E1000_CTRL_ADVD3WUC;
6425 wr32(E1000_CTRL, ctrl);
6426
6427 /* Allow time for pending master requests to run */
6428 igb_disable_pcie_master(hw);
6429
6430 wr32(E1000_WUC, E1000_WUC_PME_EN);
6431 wr32(E1000_WUFC, wufc);
6432 } else {
6433 wr32(E1000_WUC, 0);
6434 wr32(E1000_WUFC, 0);
6435 }
6436
6437 *enable_wake = wufc || adapter->en_mng_pt;
6438 if (!*enable_wake)
6439 igb_power_down_link(adapter);
6440 else
6441 igb_power_up_link(adapter);
6442
6443 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6444 * would have already happened in close and is redundant. */
6445 igb_release_hw_control(adapter);
6446
6447 pci_disable_device(pdev);
6448
6449 return 0;
6450 }
6451
6452 #ifdef CONFIG_PM
6453 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6454 {
6455 int retval;
6456 bool wake;
6457
6458 retval = __igb_shutdown(pdev, &wake);
6459 if (retval)
6460 return retval;
6461
6462 if (wake) {
6463 pci_prepare_to_sleep(pdev);
6464 } else {
6465 pci_wake_from_d3(pdev, false);
6466 pci_set_power_state(pdev, PCI_D3hot);
6467 }
6468
6469 return 0;
6470 }
6471
6472 static int igb_resume(struct pci_dev *pdev)
6473 {
6474 struct net_device *netdev = pci_get_drvdata(pdev);
6475 struct igb_adapter *adapter = netdev_priv(netdev);
6476 struct e1000_hw *hw = &adapter->hw;
6477 u32 err;
6478
6479 pci_set_power_state(pdev, PCI_D0);
6480 pci_restore_state(pdev);
6481 pci_save_state(pdev);
6482
6483 err = pci_enable_device_mem(pdev);
6484 if (err) {
6485 dev_err(&pdev->dev,
6486 "igb: Cannot enable PCI device from suspend\n");
6487 return err;
6488 }
6489 pci_set_master(pdev);
6490
6491 pci_enable_wake(pdev, PCI_D3hot, 0);
6492 pci_enable_wake(pdev, PCI_D3cold, 0);
6493
6494 if (igb_init_interrupt_scheme(adapter)) {
6495 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6496 return -ENOMEM;
6497 }
6498
6499 igb_reset(adapter);
6500
6501 /* let the f/w know that the h/w is now under the control of the
6502 * driver. */
6503 igb_get_hw_control(adapter);
6504
6505 wr32(E1000_WUS, ~0);
6506
6507 if (netif_running(netdev)) {
6508 err = igb_open(netdev);
6509 if (err)
6510 return err;
6511 }
6512
6513 netif_device_attach(netdev);
6514
6515 return 0;
6516 }
6517 #endif
6518
6519 static void igb_shutdown(struct pci_dev *pdev)
6520 {
6521 bool wake;
6522
6523 __igb_shutdown(pdev, &wake);
6524
6525 if (system_state == SYSTEM_POWER_OFF) {
6526 pci_wake_from_d3(pdev, wake);
6527 pci_set_power_state(pdev, PCI_D3hot);
6528 }
6529 }
6530
6531 #ifdef CONFIG_NET_POLL_CONTROLLER
6532 /*
6533 * Polling 'interrupt' - used by things like netconsole to send skbs
6534 * without having to re-enable interrupts. It's not called while
6535 * the interrupt routine is executing.
6536 */
6537 static void igb_netpoll(struct net_device *netdev)
6538 {
6539 struct igb_adapter *adapter = netdev_priv(netdev);
6540 struct e1000_hw *hw = &adapter->hw;
6541 int i;
6542
6543 if (!adapter->msix_entries) {
6544 struct igb_q_vector *q_vector = adapter->q_vector[0];
6545 igb_irq_disable(adapter);
6546 napi_schedule(&q_vector->napi);
6547 return;
6548 }
6549
6550 for (i = 0; i < adapter->num_q_vectors; i++) {
6551 struct igb_q_vector *q_vector = adapter->q_vector[i];
6552 wr32(E1000_EIMC, q_vector->eims_value);
6553 napi_schedule(&q_vector->napi);
6554 }
6555 }
6556 #endif /* CONFIG_NET_POLL_CONTROLLER */
6557
6558 /**
6559 * igb_io_error_detected - called when PCI error is detected
6560 * @pdev: Pointer to PCI device
6561 * @state: The current pci connection state
6562 *
6563 * This function is called after a PCI bus error affecting
6564 * this device has been detected.
6565 */
6566 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6567 pci_channel_state_t state)
6568 {
6569 struct net_device *netdev = pci_get_drvdata(pdev);
6570 struct igb_adapter *adapter = netdev_priv(netdev);
6571
6572 netif_device_detach(netdev);
6573
6574 if (state == pci_channel_io_perm_failure)
6575 return PCI_ERS_RESULT_DISCONNECT;
6576
6577 if (netif_running(netdev))
6578 igb_down(adapter);
6579 pci_disable_device(pdev);
6580
6581 /* Request a slot slot reset. */
6582 return PCI_ERS_RESULT_NEED_RESET;
6583 }
6584
6585 /**
6586 * igb_io_slot_reset - called after the pci bus has been reset.
6587 * @pdev: Pointer to PCI device
6588 *
6589 * Restart the card from scratch, as if from a cold-boot. Implementation
6590 * resembles the first-half of the igb_resume routine.
6591 */
6592 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6593 {
6594 struct net_device *netdev = pci_get_drvdata(pdev);
6595 struct igb_adapter *adapter = netdev_priv(netdev);
6596 struct e1000_hw *hw = &adapter->hw;
6597 pci_ers_result_t result;
6598 int err;
6599
6600 if (pci_enable_device_mem(pdev)) {
6601 dev_err(&pdev->dev,
6602 "Cannot re-enable PCI device after reset.\n");
6603 result = PCI_ERS_RESULT_DISCONNECT;
6604 } else {
6605 pci_set_master(pdev);
6606 pci_restore_state(pdev);
6607 pci_save_state(pdev);
6608
6609 pci_enable_wake(pdev, PCI_D3hot, 0);
6610 pci_enable_wake(pdev, PCI_D3cold, 0);
6611
6612 igb_reset(adapter);
6613 wr32(E1000_WUS, ~0);
6614 result = PCI_ERS_RESULT_RECOVERED;
6615 }
6616
6617 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6618 if (err) {
6619 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6620 "failed 0x%0x\n", err);
6621 /* non-fatal, continue */
6622 }
6623
6624 return result;
6625 }
6626
6627 /**
6628 * igb_io_resume - called when traffic can start flowing again.
6629 * @pdev: Pointer to PCI device
6630 *
6631 * This callback is called when the error recovery driver tells us that
6632 * its OK to resume normal operation. Implementation resembles the
6633 * second-half of the igb_resume routine.
6634 */
6635 static void igb_io_resume(struct pci_dev *pdev)
6636 {
6637 struct net_device *netdev = pci_get_drvdata(pdev);
6638 struct igb_adapter *adapter = netdev_priv(netdev);
6639
6640 if (netif_running(netdev)) {
6641 if (igb_up(adapter)) {
6642 dev_err(&pdev->dev, "igb_up failed after reset\n");
6643 return;
6644 }
6645 }
6646
6647 netif_device_attach(netdev);
6648
6649 /* let the f/w know that the h/w is now under the control of the
6650 * driver. */
6651 igb_get_hw_control(adapter);
6652 }
6653
6654 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6655 u8 qsel)
6656 {
6657 u32 rar_low, rar_high;
6658 struct e1000_hw *hw = &adapter->hw;
6659
6660 /* HW expects these in little endian so we reverse the byte order
6661 * from network order (big endian) to little endian
6662 */
6663 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6664 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6665 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6666
6667 /* Indicate to hardware the Address is Valid. */
6668 rar_high |= E1000_RAH_AV;
6669
6670 if (hw->mac.type == e1000_82575)
6671 rar_high |= E1000_RAH_POOL_1 * qsel;
6672 else
6673 rar_high |= E1000_RAH_POOL_1 << qsel;
6674
6675 wr32(E1000_RAL(index), rar_low);
6676 wrfl();
6677 wr32(E1000_RAH(index), rar_high);
6678 wrfl();
6679 }
6680
6681 static int igb_set_vf_mac(struct igb_adapter *adapter,
6682 int vf, unsigned char *mac_addr)
6683 {
6684 struct e1000_hw *hw = &adapter->hw;
6685 /* VF MAC addresses start at end of receive addresses and moves
6686 * torwards the first, as a result a collision should not be possible */
6687 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6688
6689 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6690
6691 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6692
6693 return 0;
6694 }
6695
6696 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6697 {
6698 struct igb_adapter *adapter = netdev_priv(netdev);
6699 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6700 return -EINVAL;
6701 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6702 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6703 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6704 " change effective.");
6705 if (test_bit(__IGB_DOWN, &adapter->state)) {
6706 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6707 " but the PF device is not up.\n");
6708 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6709 " attempting to use the VF device.\n");
6710 }
6711 return igb_set_vf_mac(adapter, vf, mac);
6712 }
6713
6714 static int igb_link_mbps(int internal_link_speed)
6715 {
6716 switch (internal_link_speed) {
6717 case SPEED_100:
6718 return 100;
6719 case SPEED_1000:
6720 return 1000;
6721 default:
6722 return 0;
6723 }
6724 }
6725
6726 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6727 int link_speed)
6728 {
6729 int rf_dec, rf_int;
6730 u32 bcnrc_val;
6731
6732 if (tx_rate != 0) {
6733 /* Calculate the rate factor values to set */
6734 rf_int = link_speed / tx_rate;
6735 rf_dec = (link_speed - (rf_int * tx_rate));
6736 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6737
6738 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6739 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6740 E1000_RTTBCNRC_RF_INT_MASK);
6741 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6742 } else {
6743 bcnrc_val = 0;
6744 }
6745
6746 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6747 wr32(E1000_RTTBCNRC, bcnrc_val);
6748 }
6749
6750 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6751 {
6752 int actual_link_speed, i;
6753 bool reset_rate = false;
6754
6755 /* VF TX rate limit was not set or not supported */
6756 if ((adapter->vf_rate_link_speed == 0) ||
6757 (adapter->hw.mac.type != e1000_82576))
6758 return;
6759
6760 actual_link_speed = igb_link_mbps(adapter->link_speed);
6761 if (actual_link_speed != adapter->vf_rate_link_speed) {
6762 reset_rate = true;
6763 adapter->vf_rate_link_speed = 0;
6764 dev_info(&adapter->pdev->dev,
6765 "Link speed has been changed. VF Transmit "
6766 "rate is disabled\n");
6767 }
6768
6769 for (i = 0; i < adapter->vfs_allocated_count; i++) {
6770 if (reset_rate)
6771 adapter->vf_data[i].tx_rate = 0;
6772
6773 igb_set_vf_rate_limit(&adapter->hw, i,
6774 adapter->vf_data[i].tx_rate,
6775 actual_link_speed);
6776 }
6777 }
6778
6779 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6780 {
6781 struct igb_adapter *adapter = netdev_priv(netdev);
6782 struct e1000_hw *hw = &adapter->hw;
6783 int actual_link_speed;
6784
6785 if (hw->mac.type != e1000_82576)
6786 return -EOPNOTSUPP;
6787
6788 actual_link_speed = igb_link_mbps(adapter->link_speed);
6789 if ((vf >= adapter->vfs_allocated_count) ||
6790 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6791 (tx_rate < 0) || (tx_rate > actual_link_speed))
6792 return -EINVAL;
6793
6794 adapter->vf_rate_link_speed = actual_link_speed;
6795 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6796 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6797
6798 return 0;
6799 }
6800
6801 static int igb_ndo_get_vf_config(struct net_device *netdev,
6802 int vf, struct ifla_vf_info *ivi)
6803 {
6804 struct igb_adapter *adapter = netdev_priv(netdev);
6805 if (vf >= adapter->vfs_allocated_count)
6806 return -EINVAL;
6807 ivi->vf = vf;
6808 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6809 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6810 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6811 ivi->qos = adapter->vf_data[vf].pf_qos;
6812 return 0;
6813 }
6814
6815 static void igb_vmm_control(struct igb_adapter *adapter)
6816 {
6817 struct e1000_hw *hw = &adapter->hw;
6818 u32 reg;
6819
6820 switch (hw->mac.type) {
6821 case e1000_82575:
6822 default:
6823 /* replication is not supported for 82575 */
6824 return;
6825 case e1000_82576:
6826 /* notify HW that the MAC is adding vlan tags */
6827 reg = rd32(E1000_DTXCTL);
6828 reg |= E1000_DTXCTL_VLAN_ADDED;
6829 wr32(E1000_DTXCTL, reg);
6830 case e1000_82580:
6831 /* enable replication vlan tag stripping */
6832 reg = rd32(E1000_RPLOLR);
6833 reg |= E1000_RPLOLR_STRVLAN;
6834 wr32(E1000_RPLOLR, reg);
6835 case e1000_i350:
6836 /* none of the above registers are supported by i350 */
6837 break;
6838 }
6839
6840 if (adapter->vfs_allocated_count) {
6841 igb_vmdq_set_loopback_pf(hw, true);
6842 igb_vmdq_set_replication_pf(hw, true);
6843 igb_vmdq_set_anti_spoofing_pf(hw, true,
6844 adapter->vfs_allocated_count);
6845 } else {
6846 igb_vmdq_set_loopback_pf(hw, false);
6847 igb_vmdq_set_replication_pf(hw, false);
6848 }
6849 }
6850
6851 /* igb_main.c */
This page took 0.18231 seconds and 6 git commands to generate.