ixgbe: dcb: BIT_APP_UPCHG not set by ixgbe_copy_dcb_cfg()
[deliverable/linux.git] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2012 Intel Corporation.
5
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
9
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
14
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
21
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
44 #include <linux/if.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
50 #include <linux/ip.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
56 #include <linux/pm_runtime.h>
57 #ifdef CONFIG_IGB_DCA
58 #include <linux/dca.h>
59 #endif
60 #include "igb.h"
61
62 #define MAJ 3
63 #define MIN 2
64 #define BUILD 10
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66 __stringify(BUILD) "-k"
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72
73 static const struct e1000_info *igb_info_tbl[] = {
74 [board_82575] = &e1000_82575_info,
75 };
76
77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
96 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
97 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
98 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
99 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
100 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
101 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
102 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
103 /* required last entry */
104 {0, }
105 };
106
107 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
108
109 void igb_reset(struct igb_adapter *);
110 static int igb_setup_all_tx_resources(struct igb_adapter *);
111 static int igb_setup_all_rx_resources(struct igb_adapter *);
112 static void igb_free_all_tx_resources(struct igb_adapter *);
113 static void igb_free_all_rx_resources(struct igb_adapter *);
114 static void igb_setup_mrqc(struct igb_adapter *);
115 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
116 static void __devexit igb_remove(struct pci_dev *pdev);
117 static void igb_init_hw_timer(struct igb_adapter *adapter);
118 static int igb_sw_init(struct igb_adapter *);
119 static int igb_open(struct net_device *);
120 static int igb_close(struct net_device *);
121 static void igb_configure_tx(struct igb_adapter *);
122 static void igb_configure_rx(struct igb_adapter *);
123 static void igb_clean_all_tx_rings(struct igb_adapter *);
124 static void igb_clean_all_rx_rings(struct igb_adapter *);
125 static void igb_clean_tx_ring(struct igb_ring *);
126 static void igb_clean_rx_ring(struct igb_ring *);
127 static void igb_set_rx_mode(struct net_device *);
128 static void igb_update_phy_info(unsigned long);
129 static void igb_watchdog(unsigned long);
130 static void igb_watchdog_task(struct work_struct *);
131 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
132 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
133 struct rtnl_link_stats64 *stats);
134 static int igb_change_mtu(struct net_device *, int);
135 static int igb_set_mac(struct net_device *, void *);
136 static void igb_set_uta(struct igb_adapter *adapter);
137 static irqreturn_t igb_intr(int irq, void *);
138 static irqreturn_t igb_intr_msi(int irq, void *);
139 static irqreturn_t igb_msix_other(int irq, void *);
140 static irqreturn_t igb_msix_ring(int irq, void *);
141 #ifdef CONFIG_IGB_DCA
142 static void igb_update_dca(struct igb_q_vector *);
143 static void igb_setup_dca(struct igb_adapter *);
144 #endif /* CONFIG_IGB_DCA */
145 static int igb_poll(struct napi_struct *, int);
146 static bool igb_clean_tx_irq(struct igb_q_vector *);
147 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
148 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
149 static void igb_tx_timeout(struct net_device *);
150 static void igb_reset_task(struct work_struct *);
151 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
152 static int igb_vlan_rx_add_vid(struct net_device *, u16);
153 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
154 static void igb_restore_vlan(struct igb_adapter *);
155 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
156 static void igb_ping_all_vfs(struct igb_adapter *);
157 static void igb_msg_task(struct igb_adapter *);
158 static void igb_vmm_control(struct igb_adapter *);
159 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
160 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
161 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
162 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
163 int vf, u16 vlan, u8 qos);
164 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
165 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
166 struct ifla_vf_info *ivi);
167 static void igb_check_vf_rate_limit(struct igb_adapter *);
168
169 #ifdef CONFIG_PCI_IOV
170 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
171 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
172 static int igb_check_vf_assignment(struct igb_adapter *adapter);
173 #endif
174
175 #ifdef CONFIG_PM
176 #ifdef CONFIG_PM_SLEEP
177 static int igb_suspend(struct device *);
178 #endif
179 static int igb_resume(struct device *);
180 #ifdef CONFIG_PM_RUNTIME
181 static int igb_runtime_suspend(struct device *dev);
182 static int igb_runtime_resume(struct device *dev);
183 static int igb_runtime_idle(struct device *dev);
184 #endif
185 static const struct dev_pm_ops igb_pm_ops = {
186 SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
187 SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
188 igb_runtime_idle)
189 };
190 #endif
191 static void igb_shutdown(struct pci_dev *);
192 #ifdef CONFIG_IGB_DCA
193 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
194 static struct notifier_block dca_notifier = {
195 .notifier_call = igb_notify_dca,
196 .next = NULL,
197 .priority = 0
198 };
199 #endif
200 #ifdef CONFIG_NET_POLL_CONTROLLER
201 /* for netdump / net console */
202 static void igb_netpoll(struct net_device *);
203 #endif
204 #ifdef CONFIG_PCI_IOV
205 static unsigned int max_vfs = 0;
206 module_param(max_vfs, uint, 0);
207 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
208 "per physical function");
209 #endif /* CONFIG_PCI_IOV */
210
211 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
212 pci_channel_state_t);
213 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
214 static void igb_io_resume(struct pci_dev *);
215
216 static struct pci_error_handlers igb_err_handler = {
217 .error_detected = igb_io_error_detected,
218 .slot_reset = igb_io_slot_reset,
219 .resume = igb_io_resume,
220 };
221
222 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
223
224 static struct pci_driver igb_driver = {
225 .name = igb_driver_name,
226 .id_table = igb_pci_tbl,
227 .probe = igb_probe,
228 .remove = __devexit_p(igb_remove),
229 #ifdef CONFIG_PM
230 .driver.pm = &igb_pm_ops,
231 #endif
232 .shutdown = igb_shutdown,
233 .err_handler = &igb_err_handler
234 };
235
236 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
237 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
238 MODULE_LICENSE("GPL");
239 MODULE_VERSION(DRV_VERSION);
240
241 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
242 static int debug = -1;
243 module_param(debug, int, 0);
244 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
245
246 struct igb_reg_info {
247 u32 ofs;
248 char *name;
249 };
250
251 static const struct igb_reg_info igb_reg_info_tbl[] = {
252
253 /* General Registers */
254 {E1000_CTRL, "CTRL"},
255 {E1000_STATUS, "STATUS"},
256 {E1000_CTRL_EXT, "CTRL_EXT"},
257
258 /* Interrupt Registers */
259 {E1000_ICR, "ICR"},
260
261 /* RX Registers */
262 {E1000_RCTL, "RCTL"},
263 {E1000_RDLEN(0), "RDLEN"},
264 {E1000_RDH(0), "RDH"},
265 {E1000_RDT(0), "RDT"},
266 {E1000_RXDCTL(0), "RXDCTL"},
267 {E1000_RDBAL(0), "RDBAL"},
268 {E1000_RDBAH(0), "RDBAH"},
269
270 /* TX Registers */
271 {E1000_TCTL, "TCTL"},
272 {E1000_TDBAL(0), "TDBAL"},
273 {E1000_TDBAH(0), "TDBAH"},
274 {E1000_TDLEN(0), "TDLEN"},
275 {E1000_TDH(0), "TDH"},
276 {E1000_TDT(0), "TDT"},
277 {E1000_TXDCTL(0), "TXDCTL"},
278 {E1000_TDFH, "TDFH"},
279 {E1000_TDFT, "TDFT"},
280 {E1000_TDFHS, "TDFHS"},
281 {E1000_TDFPC, "TDFPC"},
282
283 /* List Terminator */
284 {}
285 };
286
287 /*
288 * igb_regdump - register printout routine
289 */
290 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
291 {
292 int n = 0;
293 char rname[16];
294 u32 regs[8];
295
296 switch (reginfo->ofs) {
297 case E1000_RDLEN(0):
298 for (n = 0; n < 4; n++)
299 regs[n] = rd32(E1000_RDLEN(n));
300 break;
301 case E1000_RDH(0):
302 for (n = 0; n < 4; n++)
303 regs[n] = rd32(E1000_RDH(n));
304 break;
305 case E1000_RDT(0):
306 for (n = 0; n < 4; n++)
307 regs[n] = rd32(E1000_RDT(n));
308 break;
309 case E1000_RXDCTL(0):
310 for (n = 0; n < 4; n++)
311 regs[n] = rd32(E1000_RXDCTL(n));
312 break;
313 case E1000_RDBAL(0):
314 for (n = 0; n < 4; n++)
315 regs[n] = rd32(E1000_RDBAL(n));
316 break;
317 case E1000_RDBAH(0):
318 for (n = 0; n < 4; n++)
319 regs[n] = rd32(E1000_RDBAH(n));
320 break;
321 case E1000_TDBAL(0):
322 for (n = 0; n < 4; n++)
323 regs[n] = rd32(E1000_RDBAL(n));
324 break;
325 case E1000_TDBAH(0):
326 for (n = 0; n < 4; n++)
327 regs[n] = rd32(E1000_TDBAH(n));
328 break;
329 case E1000_TDLEN(0):
330 for (n = 0; n < 4; n++)
331 regs[n] = rd32(E1000_TDLEN(n));
332 break;
333 case E1000_TDH(0):
334 for (n = 0; n < 4; n++)
335 regs[n] = rd32(E1000_TDH(n));
336 break;
337 case E1000_TDT(0):
338 for (n = 0; n < 4; n++)
339 regs[n] = rd32(E1000_TDT(n));
340 break;
341 case E1000_TXDCTL(0):
342 for (n = 0; n < 4; n++)
343 regs[n] = rd32(E1000_TXDCTL(n));
344 break;
345 default:
346 pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
347 return;
348 }
349
350 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
351 pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
352 regs[2], regs[3]);
353 }
354
355 /*
356 * igb_dump - Print registers, tx-rings and rx-rings
357 */
358 static void igb_dump(struct igb_adapter *adapter)
359 {
360 struct net_device *netdev = adapter->netdev;
361 struct e1000_hw *hw = &adapter->hw;
362 struct igb_reg_info *reginfo;
363 struct igb_ring *tx_ring;
364 union e1000_adv_tx_desc *tx_desc;
365 struct my_u0 { u64 a; u64 b; } *u0;
366 struct igb_ring *rx_ring;
367 union e1000_adv_rx_desc *rx_desc;
368 u32 staterr;
369 u16 i, n;
370
371 if (!netif_msg_hw(adapter))
372 return;
373
374 /* Print netdevice Info */
375 if (netdev) {
376 dev_info(&adapter->pdev->dev, "Net device Info\n");
377 pr_info("Device Name state trans_start "
378 "last_rx\n");
379 pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
380 netdev->state, netdev->trans_start, netdev->last_rx);
381 }
382
383 /* Print Registers */
384 dev_info(&adapter->pdev->dev, "Register Dump\n");
385 pr_info(" Register Name Value\n");
386 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
387 reginfo->name; reginfo++) {
388 igb_regdump(hw, reginfo);
389 }
390
391 /* Print TX Ring Summary */
392 if (!netdev || !netif_running(netdev))
393 goto exit;
394
395 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
396 pr_info("Queue [NTU] [NTC] [bi(ntc)->dma ] leng ntw timestamp\n");
397 for (n = 0; n < adapter->num_tx_queues; n++) {
398 struct igb_tx_buffer *buffer_info;
399 tx_ring = adapter->tx_ring[n];
400 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
401 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
402 n, tx_ring->next_to_use, tx_ring->next_to_clean,
403 (u64)buffer_info->dma,
404 buffer_info->length,
405 buffer_info->next_to_watch,
406 (u64)buffer_info->time_stamp);
407 }
408
409 /* Print TX Rings */
410 if (!netif_msg_tx_done(adapter))
411 goto rx_ring_summary;
412
413 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
414
415 /* Transmit Descriptor Formats
416 *
417 * Advanced Transmit Descriptor
418 * +--------------------------------------------------------------+
419 * 0 | Buffer Address [63:0] |
420 * +--------------------------------------------------------------+
421 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
422 * +--------------------------------------------------------------+
423 * 63 46 45 40 39 38 36 35 32 31 24 15 0
424 */
425
426 for (n = 0; n < adapter->num_tx_queues; n++) {
427 tx_ring = adapter->tx_ring[n];
428 pr_info("------------------------------------\n");
429 pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
430 pr_info("------------------------------------\n");
431 pr_info("T [desc] [address 63:0 ] [PlPOCIStDDM Ln] "
432 "[bi->dma ] leng ntw timestamp "
433 "bi->skb\n");
434
435 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
436 const char *next_desc;
437 struct igb_tx_buffer *buffer_info;
438 tx_desc = IGB_TX_DESC(tx_ring, i);
439 buffer_info = &tx_ring->tx_buffer_info[i];
440 u0 = (struct my_u0 *)tx_desc;
441 if (i == tx_ring->next_to_use &&
442 i == tx_ring->next_to_clean)
443 next_desc = " NTC/U";
444 else if (i == tx_ring->next_to_use)
445 next_desc = " NTU";
446 else if (i == tx_ring->next_to_clean)
447 next_desc = " NTC";
448 else
449 next_desc = "";
450
451 pr_info("T [0x%03X] %016llX %016llX %016llX"
452 " %04X %p %016llX %p%s\n", i,
453 le64_to_cpu(u0->a),
454 le64_to_cpu(u0->b),
455 (u64)buffer_info->dma,
456 buffer_info->length,
457 buffer_info->next_to_watch,
458 (u64)buffer_info->time_stamp,
459 buffer_info->skb, next_desc);
460
461 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
462 print_hex_dump(KERN_INFO, "",
463 DUMP_PREFIX_ADDRESS,
464 16, 1, phys_to_virt(buffer_info->dma),
465 buffer_info->length, true);
466 }
467 }
468
469 /* Print RX Rings Summary */
470 rx_ring_summary:
471 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
472 pr_info("Queue [NTU] [NTC]\n");
473 for (n = 0; n < adapter->num_rx_queues; n++) {
474 rx_ring = adapter->rx_ring[n];
475 pr_info(" %5d %5X %5X\n",
476 n, rx_ring->next_to_use, rx_ring->next_to_clean);
477 }
478
479 /* Print RX Rings */
480 if (!netif_msg_rx_status(adapter))
481 goto exit;
482
483 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
484
485 /* Advanced Receive Descriptor (Read) Format
486 * 63 1 0
487 * +-----------------------------------------------------+
488 * 0 | Packet Buffer Address [63:1] |A0/NSE|
489 * +----------------------------------------------+------+
490 * 8 | Header Buffer Address [63:1] | DD |
491 * +-----------------------------------------------------+
492 *
493 *
494 * Advanced Receive Descriptor (Write-Back) Format
495 *
496 * 63 48 47 32 31 30 21 20 17 16 4 3 0
497 * +------------------------------------------------------+
498 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
499 * | Checksum Ident | | | | Type | Type |
500 * +------------------------------------------------------+
501 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
502 * +------------------------------------------------------+
503 * 63 48 47 32 31 20 19 0
504 */
505
506 for (n = 0; n < adapter->num_rx_queues; n++) {
507 rx_ring = adapter->rx_ring[n];
508 pr_info("------------------------------------\n");
509 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
510 pr_info("------------------------------------\n");
511 pr_info("R [desc] [ PktBuf A0] [ HeadBuf DD] "
512 "[bi->dma ] [bi->skb] <-- Adv Rx Read format\n");
513 pr_info("RWB[desc] [PcsmIpSHl PtRs] [vl er S cks ln] -----"
514 "----------- [bi->skb] <-- Adv Rx Write-Back format\n");
515
516 for (i = 0; i < rx_ring->count; i++) {
517 const char *next_desc;
518 struct igb_rx_buffer *buffer_info;
519 buffer_info = &rx_ring->rx_buffer_info[i];
520 rx_desc = IGB_RX_DESC(rx_ring, i);
521 u0 = (struct my_u0 *)rx_desc;
522 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
523
524 if (i == rx_ring->next_to_use)
525 next_desc = " NTU";
526 else if (i == rx_ring->next_to_clean)
527 next_desc = " NTC";
528 else
529 next_desc = "";
530
531 if (staterr & E1000_RXD_STAT_DD) {
532 /* Descriptor Done */
533 pr_info("%s[0x%03X] %016llX %016llX -------"
534 "--------- %p%s\n", "RWB", i,
535 le64_to_cpu(u0->a),
536 le64_to_cpu(u0->b),
537 buffer_info->skb, next_desc);
538 } else {
539 pr_info("%s[0x%03X] %016llX %016llX %016llX"
540 " %p%s\n", "R ", i,
541 le64_to_cpu(u0->a),
542 le64_to_cpu(u0->b),
543 (u64)buffer_info->dma,
544 buffer_info->skb, next_desc);
545
546 if (netif_msg_pktdata(adapter)) {
547 print_hex_dump(KERN_INFO, "",
548 DUMP_PREFIX_ADDRESS,
549 16, 1,
550 phys_to_virt(buffer_info->dma),
551 IGB_RX_HDR_LEN, true);
552 print_hex_dump(KERN_INFO, "",
553 DUMP_PREFIX_ADDRESS,
554 16, 1,
555 phys_to_virt(
556 buffer_info->page_dma +
557 buffer_info->page_offset),
558 PAGE_SIZE/2, true);
559 }
560 }
561 }
562 }
563
564 exit:
565 return;
566 }
567
568
569 /**
570 * igb_read_clock - read raw cycle counter (to be used by time counter)
571 */
572 static cycle_t igb_read_clock(const struct cyclecounter *tc)
573 {
574 struct igb_adapter *adapter =
575 container_of(tc, struct igb_adapter, cycles);
576 struct e1000_hw *hw = &adapter->hw;
577 u64 stamp = 0;
578 int shift = 0;
579
580 /*
581 * The timestamp latches on lowest register read. For the 82580
582 * the lowest register is SYSTIMR instead of SYSTIML. However we never
583 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
584 */
585 if (hw->mac.type >= e1000_82580) {
586 stamp = rd32(E1000_SYSTIMR) >> 8;
587 shift = IGB_82580_TSYNC_SHIFT;
588 }
589
590 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
591 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
592 return stamp;
593 }
594
595 /**
596 * igb_get_hw_dev - return device
597 * used by hardware layer to print debugging information
598 **/
599 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
600 {
601 struct igb_adapter *adapter = hw->back;
602 return adapter->netdev;
603 }
604
605 /**
606 * igb_init_module - Driver Registration Routine
607 *
608 * igb_init_module is the first routine called when the driver is
609 * loaded. All it does is register with the PCI subsystem.
610 **/
611 static int __init igb_init_module(void)
612 {
613 int ret;
614 pr_info("%s - version %s\n",
615 igb_driver_string, igb_driver_version);
616
617 pr_info("%s\n", igb_copyright);
618
619 #ifdef CONFIG_IGB_DCA
620 dca_register_notify(&dca_notifier);
621 #endif
622 ret = pci_register_driver(&igb_driver);
623 return ret;
624 }
625
626 module_init(igb_init_module);
627
628 /**
629 * igb_exit_module - Driver Exit Cleanup Routine
630 *
631 * igb_exit_module is called just before the driver is removed
632 * from memory.
633 **/
634 static void __exit igb_exit_module(void)
635 {
636 #ifdef CONFIG_IGB_DCA
637 dca_unregister_notify(&dca_notifier);
638 #endif
639 pci_unregister_driver(&igb_driver);
640 }
641
642 module_exit(igb_exit_module);
643
644 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
645 /**
646 * igb_cache_ring_register - Descriptor ring to register mapping
647 * @adapter: board private structure to initialize
648 *
649 * Once we know the feature-set enabled for the device, we'll cache
650 * the register offset the descriptor ring is assigned to.
651 **/
652 static void igb_cache_ring_register(struct igb_adapter *adapter)
653 {
654 int i = 0, j = 0;
655 u32 rbase_offset = adapter->vfs_allocated_count;
656
657 switch (adapter->hw.mac.type) {
658 case e1000_82576:
659 /* The queues are allocated for virtualization such that VF 0
660 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
661 * In order to avoid collision we start at the first free queue
662 * and continue consuming queues in the same sequence
663 */
664 if (adapter->vfs_allocated_count) {
665 for (; i < adapter->rss_queues; i++)
666 adapter->rx_ring[i]->reg_idx = rbase_offset +
667 Q_IDX_82576(i);
668 }
669 case e1000_82575:
670 case e1000_82580:
671 case e1000_i350:
672 default:
673 for (; i < adapter->num_rx_queues; i++)
674 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
675 for (; j < adapter->num_tx_queues; j++)
676 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
677 break;
678 }
679 }
680
681 static void igb_free_queues(struct igb_adapter *adapter)
682 {
683 int i;
684
685 for (i = 0; i < adapter->num_tx_queues; i++) {
686 kfree(adapter->tx_ring[i]);
687 adapter->tx_ring[i] = NULL;
688 }
689 for (i = 0; i < adapter->num_rx_queues; i++) {
690 kfree(adapter->rx_ring[i]);
691 adapter->rx_ring[i] = NULL;
692 }
693 adapter->num_rx_queues = 0;
694 adapter->num_tx_queues = 0;
695 }
696
697 /**
698 * igb_alloc_queues - Allocate memory for all rings
699 * @adapter: board private structure to initialize
700 *
701 * We allocate one ring per queue at run-time since we don't know the
702 * number of queues at compile-time.
703 **/
704 static int igb_alloc_queues(struct igb_adapter *adapter)
705 {
706 struct igb_ring *ring;
707 int i;
708 int orig_node = adapter->node;
709
710 for (i = 0; i < adapter->num_tx_queues; i++) {
711 if (orig_node == -1) {
712 int cur_node = next_online_node(adapter->node);
713 if (cur_node == MAX_NUMNODES)
714 cur_node = first_online_node;
715 adapter->node = cur_node;
716 }
717 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
718 adapter->node);
719 if (!ring)
720 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
721 if (!ring)
722 goto err;
723 ring->count = adapter->tx_ring_count;
724 ring->queue_index = i;
725 ring->dev = &adapter->pdev->dev;
726 ring->netdev = adapter->netdev;
727 ring->numa_node = adapter->node;
728 /* For 82575, context index must be unique per ring. */
729 if (adapter->hw.mac.type == e1000_82575)
730 set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
731 adapter->tx_ring[i] = ring;
732 }
733 /* Restore the adapter's original node */
734 adapter->node = orig_node;
735
736 for (i = 0; i < adapter->num_rx_queues; i++) {
737 if (orig_node == -1) {
738 int cur_node = next_online_node(adapter->node);
739 if (cur_node == MAX_NUMNODES)
740 cur_node = first_online_node;
741 adapter->node = cur_node;
742 }
743 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
744 adapter->node);
745 if (!ring)
746 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
747 if (!ring)
748 goto err;
749 ring->count = adapter->rx_ring_count;
750 ring->queue_index = i;
751 ring->dev = &adapter->pdev->dev;
752 ring->netdev = adapter->netdev;
753 ring->numa_node = adapter->node;
754 /* set flag indicating ring supports SCTP checksum offload */
755 if (adapter->hw.mac.type >= e1000_82576)
756 set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
757
758 /* On i350, loopback VLAN packets have the tag byte-swapped. */
759 if (adapter->hw.mac.type == e1000_i350)
760 set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
761
762 adapter->rx_ring[i] = ring;
763 }
764 /* Restore the adapter's original node */
765 adapter->node = orig_node;
766
767 igb_cache_ring_register(adapter);
768
769 return 0;
770
771 err:
772 /* Restore the adapter's original node */
773 adapter->node = orig_node;
774 igb_free_queues(adapter);
775
776 return -ENOMEM;
777 }
778
779 /**
780 * igb_write_ivar - configure ivar for given MSI-X vector
781 * @hw: pointer to the HW structure
782 * @msix_vector: vector number we are allocating to a given ring
783 * @index: row index of IVAR register to write within IVAR table
784 * @offset: column offset of in IVAR, should be multiple of 8
785 *
786 * This function is intended to handle the writing of the IVAR register
787 * for adapters 82576 and newer. The IVAR table consists of 2 columns,
788 * each containing an cause allocation for an Rx and Tx ring, and a
789 * variable number of rows depending on the number of queues supported.
790 **/
791 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
792 int index, int offset)
793 {
794 u32 ivar = array_rd32(E1000_IVAR0, index);
795
796 /* clear any bits that are currently set */
797 ivar &= ~((u32)0xFF << offset);
798
799 /* write vector and valid bit */
800 ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
801
802 array_wr32(E1000_IVAR0, index, ivar);
803 }
804
805 #define IGB_N0_QUEUE -1
806 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
807 {
808 struct igb_adapter *adapter = q_vector->adapter;
809 struct e1000_hw *hw = &adapter->hw;
810 int rx_queue = IGB_N0_QUEUE;
811 int tx_queue = IGB_N0_QUEUE;
812 u32 msixbm = 0;
813
814 if (q_vector->rx.ring)
815 rx_queue = q_vector->rx.ring->reg_idx;
816 if (q_vector->tx.ring)
817 tx_queue = q_vector->tx.ring->reg_idx;
818
819 switch (hw->mac.type) {
820 case e1000_82575:
821 /* The 82575 assigns vectors using a bitmask, which matches the
822 bitmask for the EICR/EIMS/EIMC registers. To assign one
823 or more queues to a vector, we write the appropriate bits
824 into the MSIXBM register for that vector. */
825 if (rx_queue > IGB_N0_QUEUE)
826 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
827 if (tx_queue > IGB_N0_QUEUE)
828 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
829 if (!adapter->msix_entries && msix_vector == 0)
830 msixbm |= E1000_EIMS_OTHER;
831 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
832 q_vector->eims_value = msixbm;
833 break;
834 case e1000_82576:
835 /*
836 * 82576 uses a table that essentially consists of 2 columns
837 * with 8 rows. The ordering is column-major so we use the
838 * lower 3 bits as the row index, and the 4th bit as the
839 * column offset.
840 */
841 if (rx_queue > IGB_N0_QUEUE)
842 igb_write_ivar(hw, msix_vector,
843 rx_queue & 0x7,
844 (rx_queue & 0x8) << 1);
845 if (tx_queue > IGB_N0_QUEUE)
846 igb_write_ivar(hw, msix_vector,
847 tx_queue & 0x7,
848 ((tx_queue & 0x8) << 1) + 8);
849 q_vector->eims_value = 1 << msix_vector;
850 break;
851 case e1000_82580:
852 case e1000_i350:
853 /*
854 * On 82580 and newer adapters the scheme is similar to 82576
855 * however instead of ordering column-major we have things
856 * ordered row-major. So we traverse the table by using
857 * bit 0 as the column offset, and the remaining bits as the
858 * row index.
859 */
860 if (rx_queue > IGB_N0_QUEUE)
861 igb_write_ivar(hw, msix_vector,
862 rx_queue >> 1,
863 (rx_queue & 0x1) << 4);
864 if (tx_queue > IGB_N0_QUEUE)
865 igb_write_ivar(hw, msix_vector,
866 tx_queue >> 1,
867 ((tx_queue & 0x1) << 4) + 8);
868 q_vector->eims_value = 1 << msix_vector;
869 break;
870 default:
871 BUG();
872 break;
873 }
874
875 /* add q_vector eims value to global eims_enable_mask */
876 adapter->eims_enable_mask |= q_vector->eims_value;
877
878 /* configure q_vector to set itr on first interrupt */
879 q_vector->set_itr = 1;
880 }
881
882 /**
883 * igb_configure_msix - Configure MSI-X hardware
884 *
885 * igb_configure_msix sets up the hardware to properly
886 * generate MSI-X interrupts.
887 **/
888 static void igb_configure_msix(struct igb_adapter *adapter)
889 {
890 u32 tmp;
891 int i, vector = 0;
892 struct e1000_hw *hw = &adapter->hw;
893
894 adapter->eims_enable_mask = 0;
895
896 /* set vector for other causes, i.e. link changes */
897 switch (hw->mac.type) {
898 case e1000_82575:
899 tmp = rd32(E1000_CTRL_EXT);
900 /* enable MSI-X PBA support*/
901 tmp |= E1000_CTRL_EXT_PBA_CLR;
902
903 /* Auto-Mask interrupts upon ICR read. */
904 tmp |= E1000_CTRL_EXT_EIAME;
905 tmp |= E1000_CTRL_EXT_IRCA;
906
907 wr32(E1000_CTRL_EXT, tmp);
908
909 /* enable msix_other interrupt */
910 array_wr32(E1000_MSIXBM(0), vector++,
911 E1000_EIMS_OTHER);
912 adapter->eims_other = E1000_EIMS_OTHER;
913
914 break;
915
916 case e1000_82576:
917 case e1000_82580:
918 case e1000_i350:
919 /* Turn on MSI-X capability first, or our settings
920 * won't stick. And it will take days to debug. */
921 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
922 E1000_GPIE_PBA | E1000_GPIE_EIAME |
923 E1000_GPIE_NSICR);
924
925 /* enable msix_other interrupt */
926 adapter->eims_other = 1 << vector;
927 tmp = (vector++ | E1000_IVAR_VALID) << 8;
928
929 wr32(E1000_IVAR_MISC, tmp);
930 break;
931 default:
932 /* do nothing, since nothing else supports MSI-X */
933 break;
934 } /* switch (hw->mac.type) */
935
936 adapter->eims_enable_mask |= adapter->eims_other;
937
938 for (i = 0; i < adapter->num_q_vectors; i++)
939 igb_assign_vector(adapter->q_vector[i], vector++);
940
941 wrfl();
942 }
943
944 /**
945 * igb_request_msix - Initialize MSI-X interrupts
946 *
947 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
948 * kernel.
949 **/
950 static int igb_request_msix(struct igb_adapter *adapter)
951 {
952 struct net_device *netdev = adapter->netdev;
953 struct e1000_hw *hw = &adapter->hw;
954 int i, err = 0, vector = 0;
955
956 err = request_irq(adapter->msix_entries[vector].vector,
957 igb_msix_other, 0, netdev->name, adapter);
958 if (err)
959 goto out;
960 vector++;
961
962 for (i = 0; i < adapter->num_q_vectors; i++) {
963 struct igb_q_vector *q_vector = adapter->q_vector[i];
964
965 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
966
967 if (q_vector->rx.ring && q_vector->tx.ring)
968 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
969 q_vector->rx.ring->queue_index);
970 else if (q_vector->tx.ring)
971 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
972 q_vector->tx.ring->queue_index);
973 else if (q_vector->rx.ring)
974 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
975 q_vector->rx.ring->queue_index);
976 else
977 sprintf(q_vector->name, "%s-unused", netdev->name);
978
979 err = request_irq(adapter->msix_entries[vector].vector,
980 igb_msix_ring, 0, q_vector->name,
981 q_vector);
982 if (err)
983 goto out;
984 vector++;
985 }
986
987 igb_configure_msix(adapter);
988 return 0;
989 out:
990 return err;
991 }
992
993 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
994 {
995 if (adapter->msix_entries) {
996 pci_disable_msix(adapter->pdev);
997 kfree(adapter->msix_entries);
998 adapter->msix_entries = NULL;
999 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
1000 pci_disable_msi(adapter->pdev);
1001 }
1002 }
1003
1004 /**
1005 * igb_free_q_vectors - Free memory allocated for interrupt vectors
1006 * @adapter: board private structure to initialize
1007 *
1008 * This function frees the memory allocated to the q_vectors. In addition if
1009 * NAPI is enabled it will delete any references to the NAPI struct prior
1010 * to freeing the q_vector.
1011 **/
1012 static void igb_free_q_vectors(struct igb_adapter *adapter)
1013 {
1014 int v_idx;
1015
1016 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1017 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1018 adapter->q_vector[v_idx] = NULL;
1019 if (!q_vector)
1020 continue;
1021 netif_napi_del(&q_vector->napi);
1022 kfree(q_vector);
1023 }
1024 adapter->num_q_vectors = 0;
1025 }
1026
1027 /**
1028 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1029 *
1030 * This function resets the device so that it has 0 rx queues, tx queues, and
1031 * MSI-X interrupts allocated.
1032 */
1033 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1034 {
1035 igb_free_queues(adapter);
1036 igb_free_q_vectors(adapter);
1037 igb_reset_interrupt_capability(adapter);
1038 }
1039
1040 /**
1041 * igb_set_interrupt_capability - set MSI or MSI-X if supported
1042 *
1043 * Attempt to configure interrupts using the best available
1044 * capabilities of the hardware and kernel.
1045 **/
1046 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1047 {
1048 int err;
1049 int numvecs, i;
1050
1051 /* Number of supported queues. */
1052 adapter->num_rx_queues = adapter->rss_queues;
1053 if (adapter->vfs_allocated_count)
1054 adapter->num_tx_queues = 1;
1055 else
1056 adapter->num_tx_queues = adapter->rss_queues;
1057
1058 /* start with one vector for every rx queue */
1059 numvecs = adapter->num_rx_queues;
1060
1061 /* if tx handler is separate add 1 for every tx queue */
1062 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1063 numvecs += adapter->num_tx_queues;
1064
1065 /* store the number of vectors reserved for queues */
1066 adapter->num_q_vectors = numvecs;
1067
1068 /* add 1 vector for link status interrupts */
1069 numvecs++;
1070 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1071 GFP_KERNEL);
1072 if (!adapter->msix_entries)
1073 goto msi_only;
1074
1075 for (i = 0; i < numvecs; i++)
1076 adapter->msix_entries[i].entry = i;
1077
1078 err = pci_enable_msix(adapter->pdev,
1079 adapter->msix_entries,
1080 numvecs);
1081 if (err == 0)
1082 goto out;
1083
1084 igb_reset_interrupt_capability(adapter);
1085
1086 /* If we can't do MSI-X, try MSI */
1087 msi_only:
1088 #ifdef CONFIG_PCI_IOV
1089 /* disable SR-IOV for non MSI-X configurations */
1090 if (adapter->vf_data) {
1091 struct e1000_hw *hw = &adapter->hw;
1092 /* disable iov and allow time for transactions to clear */
1093 pci_disable_sriov(adapter->pdev);
1094 msleep(500);
1095
1096 kfree(adapter->vf_data);
1097 adapter->vf_data = NULL;
1098 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1099 wrfl();
1100 msleep(100);
1101 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1102 }
1103 #endif
1104 adapter->vfs_allocated_count = 0;
1105 adapter->rss_queues = 1;
1106 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1107 adapter->num_rx_queues = 1;
1108 adapter->num_tx_queues = 1;
1109 adapter->num_q_vectors = 1;
1110 if (!pci_enable_msi(adapter->pdev))
1111 adapter->flags |= IGB_FLAG_HAS_MSI;
1112 out:
1113 /* Notify the stack of the (possibly) reduced queue counts. */
1114 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1115 return netif_set_real_num_rx_queues(adapter->netdev,
1116 adapter->num_rx_queues);
1117 }
1118
1119 /**
1120 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1121 * @adapter: board private structure to initialize
1122 *
1123 * We allocate one q_vector per queue interrupt. If allocation fails we
1124 * return -ENOMEM.
1125 **/
1126 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1127 {
1128 struct igb_q_vector *q_vector;
1129 struct e1000_hw *hw = &adapter->hw;
1130 int v_idx;
1131 int orig_node = adapter->node;
1132
1133 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1134 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1135 adapter->num_tx_queues)) &&
1136 (adapter->num_rx_queues == v_idx))
1137 adapter->node = orig_node;
1138 if (orig_node == -1) {
1139 int cur_node = next_online_node(adapter->node);
1140 if (cur_node == MAX_NUMNODES)
1141 cur_node = first_online_node;
1142 adapter->node = cur_node;
1143 }
1144 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1145 adapter->node);
1146 if (!q_vector)
1147 q_vector = kzalloc(sizeof(struct igb_q_vector),
1148 GFP_KERNEL);
1149 if (!q_vector)
1150 goto err_out;
1151 q_vector->adapter = adapter;
1152 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1153 q_vector->itr_val = IGB_START_ITR;
1154 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1155 adapter->q_vector[v_idx] = q_vector;
1156 }
1157 /* Restore the adapter's original node */
1158 adapter->node = orig_node;
1159
1160 return 0;
1161
1162 err_out:
1163 /* Restore the adapter's original node */
1164 adapter->node = orig_node;
1165 igb_free_q_vectors(adapter);
1166 return -ENOMEM;
1167 }
1168
1169 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1170 int ring_idx, int v_idx)
1171 {
1172 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1173
1174 q_vector->rx.ring = adapter->rx_ring[ring_idx];
1175 q_vector->rx.ring->q_vector = q_vector;
1176 q_vector->rx.count++;
1177 q_vector->itr_val = adapter->rx_itr_setting;
1178 if (q_vector->itr_val && q_vector->itr_val <= 3)
1179 q_vector->itr_val = IGB_START_ITR;
1180 }
1181
1182 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1183 int ring_idx, int v_idx)
1184 {
1185 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1186
1187 q_vector->tx.ring = adapter->tx_ring[ring_idx];
1188 q_vector->tx.ring->q_vector = q_vector;
1189 q_vector->tx.count++;
1190 q_vector->itr_val = adapter->tx_itr_setting;
1191 q_vector->tx.work_limit = adapter->tx_work_limit;
1192 if (q_vector->itr_val && q_vector->itr_val <= 3)
1193 q_vector->itr_val = IGB_START_ITR;
1194 }
1195
1196 /**
1197 * igb_map_ring_to_vector - maps allocated queues to vectors
1198 *
1199 * This function maps the recently allocated queues to vectors.
1200 **/
1201 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1202 {
1203 int i;
1204 int v_idx = 0;
1205
1206 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1207 (adapter->num_q_vectors < adapter->num_tx_queues))
1208 return -ENOMEM;
1209
1210 if (adapter->num_q_vectors >=
1211 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1212 for (i = 0; i < adapter->num_rx_queues; i++)
1213 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1214 for (i = 0; i < adapter->num_tx_queues; i++)
1215 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1216 } else {
1217 for (i = 0; i < adapter->num_rx_queues; i++) {
1218 if (i < adapter->num_tx_queues)
1219 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1220 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1221 }
1222 for (; i < adapter->num_tx_queues; i++)
1223 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1224 }
1225 return 0;
1226 }
1227
1228 /**
1229 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1230 *
1231 * This function initializes the interrupts and allocates all of the queues.
1232 **/
1233 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1234 {
1235 struct pci_dev *pdev = adapter->pdev;
1236 int err;
1237
1238 err = igb_set_interrupt_capability(adapter);
1239 if (err)
1240 return err;
1241
1242 err = igb_alloc_q_vectors(adapter);
1243 if (err) {
1244 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1245 goto err_alloc_q_vectors;
1246 }
1247
1248 err = igb_alloc_queues(adapter);
1249 if (err) {
1250 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1251 goto err_alloc_queues;
1252 }
1253
1254 err = igb_map_ring_to_vector(adapter);
1255 if (err) {
1256 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1257 goto err_map_queues;
1258 }
1259
1260
1261 return 0;
1262 err_map_queues:
1263 igb_free_queues(adapter);
1264 err_alloc_queues:
1265 igb_free_q_vectors(adapter);
1266 err_alloc_q_vectors:
1267 igb_reset_interrupt_capability(adapter);
1268 return err;
1269 }
1270
1271 /**
1272 * igb_request_irq - initialize interrupts
1273 *
1274 * Attempts to configure interrupts using the best available
1275 * capabilities of the hardware and kernel.
1276 **/
1277 static int igb_request_irq(struct igb_adapter *adapter)
1278 {
1279 struct net_device *netdev = adapter->netdev;
1280 struct pci_dev *pdev = adapter->pdev;
1281 int err = 0;
1282
1283 if (adapter->msix_entries) {
1284 err = igb_request_msix(adapter);
1285 if (!err)
1286 goto request_done;
1287 /* fall back to MSI */
1288 igb_clear_interrupt_scheme(adapter);
1289 if (!pci_enable_msi(pdev))
1290 adapter->flags |= IGB_FLAG_HAS_MSI;
1291 igb_free_all_tx_resources(adapter);
1292 igb_free_all_rx_resources(adapter);
1293 adapter->num_tx_queues = 1;
1294 adapter->num_rx_queues = 1;
1295 adapter->num_q_vectors = 1;
1296 err = igb_alloc_q_vectors(adapter);
1297 if (err) {
1298 dev_err(&pdev->dev,
1299 "Unable to allocate memory for vectors\n");
1300 goto request_done;
1301 }
1302 err = igb_alloc_queues(adapter);
1303 if (err) {
1304 dev_err(&pdev->dev,
1305 "Unable to allocate memory for queues\n");
1306 igb_free_q_vectors(adapter);
1307 goto request_done;
1308 }
1309 igb_setup_all_tx_resources(adapter);
1310 igb_setup_all_rx_resources(adapter);
1311 }
1312
1313 igb_assign_vector(adapter->q_vector[0], 0);
1314
1315 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1316 err = request_irq(pdev->irq, igb_intr_msi, 0,
1317 netdev->name, adapter);
1318 if (!err)
1319 goto request_done;
1320
1321 /* fall back to legacy interrupts */
1322 igb_reset_interrupt_capability(adapter);
1323 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1324 }
1325
1326 err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1327 netdev->name, adapter);
1328
1329 if (err)
1330 dev_err(&pdev->dev, "Error %d getting interrupt\n",
1331 err);
1332
1333 request_done:
1334 return err;
1335 }
1336
1337 static void igb_free_irq(struct igb_adapter *adapter)
1338 {
1339 if (adapter->msix_entries) {
1340 int vector = 0, i;
1341
1342 free_irq(adapter->msix_entries[vector++].vector, adapter);
1343
1344 for (i = 0; i < adapter->num_q_vectors; i++)
1345 free_irq(adapter->msix_entries[vector++].vector,
1346 adapter->q_vector[i]);
1347 } else {
1348 free_irq(adapter->pdev->irq, adapter);
1349 }
1350 }
1351
1352 /**
1353 * igb_irq_disable - Mask off interrupt generation on the NIC
1354 * @adapter: board private structure
1355 **/
1356 static void igb_irq_disable(struct igb_adapter *adapter)
1357 {
1358 struct e1000_hw *hw = &adapter->hw;
1359
1360 /*
1361 * we need to be careful when disabling interrupts. The VFs are also
1362 * mapped into these registers and so clearing the bits can cause
1363 * issues on the VF drivers so we only need to clear what we set
1364 */
1365 if (adapter->msix_entries) {
1366 u32 regval = rd32(E1000_EIAM);
1367 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1368 wr32(E1000_EIMC, adapter->eims_enable_mask);
1369 regval = rd32(E1000_EIAC);
1370 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1371 }
1372
1373 wr32(E1000_IAM, 0);
1374 wr32(E1000_IMC, ~0);
1375 wrfl();
1376 if (adapter->msix_entries) {
1377 int i;
1378 for (i = 0; i < adapter->num_q_vectors; i++)
1379 synchronize_irq(adapter->msix_entries[i].vector);
1380 } else {
1381 synchronize_irq(adapter->pdev->irq);
1382 }
1383 }
1384
1385 /**
1386 * igb_irq_enable - Enable default interrupt generation settings
1387 * @adapter: board private structure
1388 **/
1389 static void igb_irq_enable(struct igb_adapter *adapter)
1390 {
1391 struct e1000_hw *hw = &adapter->hw;
1392
1393 if (adapter->msix_entries) {
1394 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1395 u32 regval = rd32(E1000_EIAC);
1396 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1397 regval = rd32(E1000_EIAM);
1398 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1399 wr32(E1000_EIMS, adapter->eims_enable_mask);
1400 if (adapter->vfs_allocated_count) {
1401 wr32(E1000_MBVFIMR, 0xFF);
1402 ims |= E1000_IMS_VMMB;
1403 }
1404 wr32(E1000_IMS, ims);
1405 } else {
1406 wr32(E1000_IMS, IMS_ENABLE_MASK |
1407 E1000_IMS_DRSTA);
1408 wr32(E1000_IAM, IMS_ENABLE_MASK |
1409 E1000_IMS_DRSTA);
1410 }
1411 }
1412
1413 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1414 {
1415 struct e1000_hw *hw = &adapter->hw;
1416 u16 vid = adapter->hw.mng_cookie.vlan_id;
1417 u16 old_vid = adapter->mng_vlan_id;
1418
1419 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1420 /* add VID to filter table */
1421 igb_vfta_set(hw, vid, true);
1422 adapter->mng_vlan_id = vid;
1423 } else {
1424 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1425 }
1426
1427 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1428 (vid != old_vid) &&
1429 !test_bit(old_vid, adapter->active_vlans)) {
1430 /* remove VID from filter table */
1431 igb_vfta_set(hw, old_vid, false);
1432 }
1433 }
1434
1435 /**
1436 * igb_release_hw_control - release control of the h/w to f/w
1437 * @adapter: address of board private structure
1438 *
1439 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1440 * For ASF and Pass Through versions of f/w this means that the
1441 * driver is no longer loaded.
1442 *
1443 **/
1444 static void igb_release_hw_control(struct igb_adapter *adapter)
1445 {
1446 struct e1000_hw *hw = &adapter->hw;
1447 u32 ctrl_ext;
1448
1449 /* Let firmware take over control of h/w */
1450 ctrl_ext = rd32(E1000_CTRL_EXT);
1451 wr32(E1000_CTRL_EXT,
1452 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1453 }
1454
1455 /**
1456 * igb_get_hw_control - get control of the h/w from f/w
1457 * @adapter: address of board private structure
1458 *
1459 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1460 * For ASF and Pass Through versions of f/w this means that
1461 * the driver is loaded.
1462 *
1463 **/
1464 static void igb_get_hw_control(struct igb_adapter *adapter)
1465 {
1466 struct e1000_hw *hw = &adapter->hw;
1467 u32 ctrl_ext;
1468
1469 /* Let firmware know the driver has taken over */
1470 ctrl_ext = rd32(E1000_CTRL_EXT);
1471 wr32(E1000_CTRL_EXT,
1472 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1473 }
1474
1475 /**
1476 * igb_configure - configure the hardware for RX and TX
1477 * @adapter: private board structure
1478 **/
1479 static void igb_configure(struct igb_adapter *adapter)
1480 {
1481 struct net_device *netdev = adapter->netdev;
1482 int i;
1483
1484 igb_get_hw_control(adapter);
1485 igb_set_rx_mode(netdev);
1486
1487 igb_restore_vlan(adapter);
1488
1489 igb_setup_tctl(adapter);
1490 igb_setup_mrqc(adapter);
1491 igb_setup_rctl(adapter);
1492
1493 igb_configure_tx(adapter);
1494 igb_configure_rx(adapter);
1495
1496 igb_rx_fifo_flush_82575(&adapter->hw);
1497
1498 /* call igb_desc_unused which always leaves
1499 * at least 1 descriptor unused to make sure
1500 * next_to_use != next_to_clean */
1501 for (i = 0; i < adapter->num_rx_queues; i++) {
1502 struct igb_ring *ring = adapter->rx_ring[i];
1503 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1504 }
1505 }
1506
1507 /**
1508 * igb_power_up_link - Power up the phy/serdes link
1509 * @adapter: address of board private structure
1510 **/
1511 void igb_power_up_link(struct igb_adapter *adapter)
1512 {
1513 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1514 igb_power_up_phy_copper(&adapter->hw);
1515 else
1516 igb_power_up_serdes_link_82575(&adapter->hw);
1517 igb_reset_phy(&adapter->hw);
1518 }
1519
1520 /**
1521 * igb_power_down_link - Power down the phy/serdes link
1522 * @adapter: address of board private structure
1523 */
1524 static void igb_power_down_link(struct igb_adapter *adapter)
1525 {
1526 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1527 igb_power_down_phy_copper_82575(&adapter->hw);
1528 else
1529 igb_shutdown_serdes_link_82575(&adapter->hw);
1530 }
1531
1532 /**
1533 * igb_up - Open the interface and prepare it to handle traffic
1534 * @adapter: board private structure
1535 **/
1536 int igb_up(struct igb_adapter *adapter)
1537 {
1538 struct e1000_hw *hw = &adapter->hw;
1539 int i;
1540
1541 /* hardware has been reset, we need to reload some things */
1542 igb_configure(adapter);
1543
1544 clear_bit(__IGB_DOWN, &adapter->state);
1545
1546 for (i = 0; i < adapter->num_q_vectors; i++)
1547 napi_enable(&(adapter->q_vector[i]->napi));
1548
1549 if (adapter->msix_entries)
1550 igb_configure_msix(adapter);
1551 else
1552 igb_assign_vector(adapter->q_vector[0], 0);
1553
1554 /* Clear any pending interrupts. */
1555 rd32(E1000_ICR);
1556 igb_irq_enable(adapter);
1557
1558 /* notify VFs that reset has been completed */
1559 if (adapter->vfs_allocated_count) {
1560 u32 reg_data = rd32(E1000_CTRL_EXT);
1561 reg_data |= E1000_CTRL_EXT_PFRSTD;
1562 wr32(E1000_CTRL_EXT, reg_data);
1563 }
1564
1565 netif_tx_start_all_queues(adapter->netdev);
1566
1567 /* start the watchdog. */
1568 hw->mac.get_link_status = 1;
1569 schedule_work(&adapter->watchdog_task);
1570
1571 return 0;
1572 }
1573
1574 void igb_down(struct igb_adapter *adapter)
1575 {
1576 struct net_device *netdev = adapter->netdev;
1577 struct e1000_hw *hw = &adapter->hw;
1578 u32 tctl, rctl;
1579 int i;
1580
1581 /* signal that we're down so the interrupt handler does not
1582 * reschedule our watchdog timer */
1583 set_bit(__IGB_DOWN, &adapter->state);
1584
1585 /* disable receives in the hardware */
1586 rctl = rd32(E1000_RCTL);
1587 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1588 /* flush and sleep below */
1589
1590 netif_tx_stop_all_queues(netdev);
1591
1592 /* disable transmits in the hardware */
1593 tctl = rd32(E1000_TCTL);
1594 tctl &= ~E1000_TCTL_EN;
1595 wr32(E1000_TCTL, tctl);
1596 /* flush both disables and wait for them to finish */
1597 wrfl();
1598 msleep(10);
1599
1600 for (i = 0; i < adapter->num_q_vectors; i++)
1601 napi_disable(&(adapter->q_vector[i]->napi));
1602
1603 igb_irq_disable(adapter);
1604
1605 del_timer_sync(&adapter->watchdog_timer);
1606 del_timer_sync(&adapter->phy_info_timer);
1607
1608 netif_carrier_off(netdev);
1609
1610 /* record the stats before reset*/
1611 spin_lock(&adapter->stats64_lock);
1612 igb_update_stats(adapter, &adapter->stats64);
1613 spin_unlock(&adapter->stats64_lock);
1614
1615 adapter->link_speed = 0;
1616 adapter->link_duplex = 0;
1617
1618 if (!pci_channel_offline(adapter->pdev))
1619 igb_reset(adapter);
1620 igb_clean_all_tx_rings(adapter);
1621 igb_clean_all_rx_rings(adapter);
1622 #ifdef CONFIG_IGB_DCA
1623
1624 /* since we reset the hardware DCA settings were cleared */
1625 igb_setup_dca(adapter);
1626 #endif
1627 }
1628
1629 void igb_reinit_locked(struct igb_adapter *adapter)
1630 {
1631 WARN_ON(in_interrupt());
1632 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1633 msleep(1);
1634 igb_down(adapter);
1635 igb_up(adapter);
1636 clear_bit(__IGB_RESETTING, &adapter->state);
1637 }
1638
1639 void igb_reset(struct igb_adapter *adapter)
1640 {
1641 struct pci_dev *pdev = adapter->pdev;
1642 struct e1000_hw *hw = &adapter->hw;
1643 struct e1000_mac_info *mac = &hw->mac;
1644 struct e1000_fc_info *fc = &hw->fc;
1645 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1646 u16 hwm;
1647
1648 /* Repartition Pba for greater than 9k mtu
1649 * To take effect CTRL.RST is required.
1650 */
1651 switch (mac->type) {
1652 case e1000_i350:
1653 case e1000_82580:
1654 pba = rd32(E1000_RXPBS);
1655 pba = igb_rxpbs_adjust_82580(pba);
1656 break;
1657 case e1000_82576:
1658 pba = rd32(E1000_RXPBS);
1659 pba &= E1000_RXPBS_SIZE_MASK_82576;
1660 break;
1661 case e1000_82575:
1662 default:
1663 pba = E1000_PBA_34K;
1664 break;
1665 }
1666
1667 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1668 (mac->type < e1000_82576)) {
1669 /* adjust PBA for jumbo frames */
1670 wr32(E1000_PBA, pba);
1671
1672 /* To maintain wire speed transmits, the Tx FIFO should be
1673 * large enough to accommodate two full transmit packets,
1674 * rounded up to the next 1KB and expressed in KB. Likewise,
1675 * the Rx FIFO should be large enough to accommodate at least
1676 * one full receive packet and is similarly rounded up and
1677 * expressed in KB. */
1678 pba = rd32(E1000_PBA);
1679 /* upper 16 bits has Tx packet buffer allocation size in KB */
1680 tx_space = pba >> 16;
1681 /* lower 16 bits has Rx packet buffer allocation size in KB */
1682 pba &= 0xffff;
1683 /* the tx fifo also stores 16 bytes of information about the tx
1684 * but don't include ethernet FCS because hardware appends it */
1685 min_tx_space = (adapter->max_frame_size +
1686 sizeof(union e1000_adv_tx_desc) -
1687 ETH_FCS_LEN) * 2;
1688 min_tx_space = ALIGN(min_tx_space, 1024);
1689 min_tx_space >>= 10;
1690 /* software strips receive CRC, so leave room for it */
1691 min_rx_space = adapter->max_frame_size;
1692 min_rx_space = ALIGN(min_rx_space, 1024);
1693 min_rx_space >>= 10;
1694
1695 /* If current Tx allocation is less than the min Tx FIFO size,
1696 * and the min Tx FIFO size is less than the current Rx FIFO
1697 * allocation, take space away from current Rx allocation */
1698 if (tx_space < min_tx_space &&
1699 ((min_tx_space - tx_space) < pba)) {
1700 pba = pba - (min_tx_space - tx_space);
1701
1702 /* if short on rx space, rx wins and must trump tx
1703 * adjustment */
1704 if (pba < min_rx_space)
1705 pba = min_rx_space;
1706 }
1707 wr32(E1000_PBA, pba);
1708 }
1709
1710 /* flow control settings */
1711 /* The high water mark must be low enough to fit one full frame
1712 * (or the size used for early receive) above it in the Rx FIFO.
1713 * Set it to the lower of:
1714 * - 90% of the Rx FIFO size, or
1715 * - the full Rx FIFO size minus one full frame */
1716 hwm = min(((pba << 10) * 9 / 10),
1717 ((pba << 10) - 2 * adapter->max_frame_size));
1718
1719 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1720 fc->low_water = fc->high_water - 16;
1721 fc->pause_time = 0xFFFF;
1722 fc->send_xon = 1;
1723 fc->current_mode = fc->requested_mode;
1724
1725 /* disable receive for all VFs and wait one second */
1726 if (adapter->vfs_allocated_count) {
1727 int i;
1728 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1729 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1730
1731 /* ping all the active vfs to let them know we are going down */
1732 igb_ping_all_vfs(adapter);
1733
1734 /* disable transmits and receives */
1735 wr32(E1000_VFRE, 0);
1736 wr32(E1000_VFTE, 0);
1737 }
1738
1739 /* Allow time for pending master requests to run */
1740 hw->mac.ops.reset_hw(hw);
1741 wr32(E1000_WUC, 0);
1742
1743 if (hw->mac.ops.init_hw(hw))
1744 dev_err(&pdev->dev, "Hardware Error\n");
1745
1746 igb_init_dmac(adapter, pba);
1747 if (!netif_running(adapter->netdev))
1748 igb_power_down_link(adapter);
1749
1750 igb_update_mng_vlan(adapter);
1751
1752 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1753 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1754
1755 igb_get_phy_info(hw);
1756 }
1757
1758 static netdev_features_t igb_fix_features(struct net_device *netdev,
1759 netdev_features_t features)
1760 {
1761 /*
1762 * Since there is no support for separate rx/tx vlan accel
1763 * enable/disable make sure tx flag is always in same state as rx.
1764 */
1765 if (features & NETIF_F_HW_VLAN_RX)
1766 features |= NETIF_F_HW_VLAN_TX;
1767 else
1768 features &= ~NETIF_F_HW_VLAN_TX;
1769
1770 return features;
1771 }
1772
1773 static int igb_set_features(struct net_device *netdev,
1774 netdev_features_t features)
1775 {
1776 netdev_features_t changed = netdev->features ^ features;
1777 struct igb_adapter *adapter = netdev_priv(netdev);
1778
1779 if (changed & NETIF_F_HW_VLAN_RX)
1780 igb_vlan_mode(netdev, features);
1781
1782 if (!(changed & NETIF_F_RXALL))
1783 return 0;
1784
1785 netdev->features = features;
1786
1787 if (netif_running(netdev))
1788 igb_reinit_locked(adapter);
1789 else
1790 igb_reset(adapter);
1791
1792 return 0;
1793 }
1794
1795 static const struct net_device_ops igb_netdev_ops = {
1796 .ndo_open = igb_open,
1797 .ndo_stop = igb_close,
1798 .ndo_start_xmit = igb_xmit_frame,
1799 .ndo_get_stats64 = igb_get_stats64,
1800 .ndo_set_rx_mode = igb_set_rx_mode,
1801 .ndo_set_mac_address = igb_set_mac,
1802 .ndo_change_mtu = igb_change_mtu,
1803 .ndo_do_ioctl = igb_ioctl,
1804 .ndo_tx_timeout = igb_tx_timeout,
1805 .ndo_validate_addr = eth_validate_addr,
1806 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1807 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1808 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1809 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1810 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1811 .ndo_get_vf_config = igb_ndo_get_vf_config,
1812 #ifdef CONFIG_NET_POLL_CONTROLLER
1813 .ndo_poll_controller = igb_netpoll,
1814 #endif
1815 .ndo_fix_features = igb_fix_features,
1816 .ndo_set_features = igb_set_features,
1817 };
1818
1819 /**
1820 * igb_probe - Device Initialization Routine
1821 * @pdev: PCI device information struct
1822 * @ent: entry in igb_pci_tbl
1823 *
1824 * Returns 0 on success, negative on failure
1825 *
1826 * igb_probe initializes an adapter identified by a pci_dev structure.
1827 * The OS initialization, configuring of the adapter private structure,
1828 * and a hardware reset occur.
1829 **/
1830 static int __devinit igb_probe(struct pci_dev *pdev,
1831 const struct pci_device_id *ent)
1832 {
1833 struct net_device *netdev;
1834 struct igb_adapter *adapter;
1835 struct e1000_hw *hw;
1836 u16 eeprom_data = 0;
1837 s32 ret_val;
1838 static int global_quad_port_a; /* global quad port a indication */
1839 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1840 unsigned long mmio_start, mmio_len;
1841 int err, pci_using_dac;
1842 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1843 u8 part_str[E1000_PBANUM_LENGTH];
1844
1845 /* Catch broken hardware that put the wrong VF device ID in
1846 * the PCIe SR-IOV capability.
1847 */
1848 if (pdev->is_virtfn) {
1849 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1850 pci_name(pdev), pdev->vendor, pdev->device);
1851 return -EINVAL;
1852 }
1853
1854 err = pci_enable_device_mem(pdev);
1855 if (err)
1856 return err;
1857
1858 pci_using_dac = 0;
1859 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1860 if (!err) {
1861 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1862 if (!err)
1863 pci_using_dac = 1;
1864 } else {
1865 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1866 if (err) {
1867 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1868 if (err) {
1869 dev_err(&pdev->dev, "No usable DMA "
1870 "configuration, aborting\n");
1871 goto err_dma;
1872 }
1873 }
1874 }
1875
1876 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1877 IORESOURCE_MEM),
1878 igb_driver_name);
1879 if (err)
1880 goto err_pci_reg;
1881
1882 pci_enable_pcie_error_reporting(pdev);
1883
1884 pci_set_master(pdev);
1885 pci_save_state(pdev);
1886
1887 err = -ENOMEM;
1888 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1889 IGB_MAX_TX_QUEUES);
1890 if (!netdev)
1891 goto err_alloc_etherdev;
1892
1893 SET_NETDEV_DEV(netdev, &pdev->dev);
1894
1895 pci_set_drvdata(pdev, netdev);
1896 adapter = netdev_priv(netdev);
1897 adapter->netdev = netdev;
1898 adapter->pdev = pdev;
1899 hw = &adapter->hw;
1900 hw->back = adapter;
1901 adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
1902
1903 mmio_start = pci_resource_start(pdev, 0);
1904 mmio_len = pci_resource_len(pdev, 0);
1905
1906 err = -EIO;
1907 hw->hw_addr = ioremap(mmio_start, mmio_len);
1908 if (!hw->hw_addr)
1909 goto err_ioremap;
1910
1911 netdev->netdev_ops = &igb_netdev_ops;
1912 igb_set_ethtool_ops(netdev);
1913 netdev->watchdog_timeo = 5 * HZ;
1914
1915 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1916
1917 netdev->mem_start = mmio_start;
1918 netdev->mem_end = mmio_start + mmio_len;
1919
1920 /* PCI config space info */
1921 hw->vendor_id = pdev->vendor;
1922 hw->device_id = pdev->device;
1923 hw->revision_id = pdev->revision;
1924 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1925 hw->subsystem_device_id = pdev->subsystem_device;
1926
1927 /* Copy the default MAC, PHY and NVM function pointers */
1928 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1929 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1930 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1931 /* Initialize skew-specific constants */
1932 err = ei->get_invariants(hw);
1933 if (err)
1934 goto err_sw_init;
1935
1936 /* setup the private structure */
1937 err = igb_sw_init(adapter);
1938 if (err)
1939 goto err_sw_init;
1940
1941 igb_get_bus_info_pcie(hw);
1942
1943 hw->phy.autoneg_wait_to_complete = false;
1944
1945 /* Copper options */
1946 if (hw->phy.media_type == e1000_media_type_copper) {
1947 hw->phy.mdix = AUTO_ALL_MODES;
1948 hw->phy.disable_polarity_correction = false;
1949 hw->phy.ms_type = e1000_ms_hw_default;
1950 }
1951
1952 if (igb_check_reset_block(hw))
1953 dev_info(&pdev->dev,
1954 "PHY reset is blocked due to SOL/IDER session.\n");
1955
1956 /*
1957 * features is initialized to 0 in allocation, it might have bits
1958 * set by igb_sw_init so we should use an or instead of an
1959 * assignment.
1960 */
1961 netdev->features |= NETIF_F_SG |
1962 NETIF_F_IP_CSUM |
1963 NETIF_F_IPV6_CSUM |
1964 NETIF_F_TSO |
1965 NETIF_F_TSO6 |
1966 NETIF_F_RXHASH |
1967 NETIF_F_RXCSUM |
1968 NETIF_F_HW_VLAN_RX |
1969 NETIF_F_HW_VLAN_TX;
1970
1971 /* copy netdev features into list of user selectable features */
1972 netdev->hw_features |= netdev->features;
1973 netdev->hw_features |= NETIF_F_RXALL;
1974
1975 /* set this bit last since it cannot be part of hw_features */
1976 netdev->features |= NETIF_F_HW_VLAN_FILTER;
1977
1978 netdev->vlan_features |= NETIF_F_TSO |
1979 NETIF_F_TSO6 |
1980 NETIF_F_IP_CSUM |
1981 NETIF_F_IPV6_CSUM |
1982 NETIF_F_SG;
1983
1984 netdev->priv_flags |= IFF_SUPP_NOFCS;
1985
1986 if (pci_using_dac) {
1987 netdev->features |= NETIF_F_HIGHDMA;
1988 netdev->vlan_features |= NETIF_F_HIGHDMA;
1989 }
1990
1991 if (hw->mac.type >= e1000_82576) {
1992 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1993 netdev->features |= NETIF_F_SCTP_CSUM;
1994 }
1995
1996 netdev->priv_flags |= IFF_UNICAST_FLT;
1997
1998 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1999
2000 /* before reading the NVM, reset the controller to put the device in a
2001 * known good starting state */
2002 hw->mac.ops.reset_hw(hw);
2003
2004 /* make sure the NVM is good */
2005 if (hw->nvm.ops.validate(hw) < 0) {
2006 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2007 err = -EIO;
2008 goto err_eeprom;
2009 }
2010
2011 /* copy the MAC address out of the NVM */
2012 if (hw->mac.ops.read_mac_addr(hw))
2013 dev_err(&pdev->dev, "NVM Read Error\n");
2014
2015 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2016 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2017
2018 if (!is_valid_ether_addr(netdev->perm_addr)) {
2019 dev_err(&pdev->dev, "Invalid MAC Address\n");
2020 err = -EIO;
2021 goto err_eeprom;
2022 }
2023
2024 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2025 (unsigned long) adapter);
2026 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2027 (unsigned long) adapter);
2028
2029 INIT_WORK(&adapter->reset_task, igb_reset_task);
2030 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2031
2032 /* Initialize link properties that are user-changeable */
2033 adapter->fc_autoneg = true;
2034 hw->mac.autoneg = true;
2035 hw->phy.autoneg_advertised = 0x2f;
2036
2037 hw->fc.requested_mode = e1000_fc_default;
2038 hw->fc.current_mode = e1000_fc_default;
2039
2040 igb_validate_mdi_setting(hw);
2041
2042 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2043 * enable the ACPI Magic Packet filter
2044 */
2045
2046 if (hw->bus.func == 0)
2047 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2048 else if (hw->mac.type >= e1000_82580)
2049 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2050 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2051 &eeprom_data);
2052 else if (hw->bus.func == 1)
2053 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2054
2055 if (eeprom_data & eeprom_apme_mask)
2056 adapter->eeprom_wol |= E1000_WUFC_MAG;
2057
2058 /* now that we have the eeprom settings, apply the special cases where
2059 * the eeprom may be wrong or the board simply won't support wake on
2060 * lan on a particular port */
2061 switch (pdev->device) {
2062 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2063 adapter->eeprom_wol = 0;
2064 break;
2065 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2066 case E1000_DEV_ID_82576_FIBER:
2067 case E1000_DEV_ID_82576_SERDES:
2068 /* Wake events only supported on port A for dual fiber
2069 * regardless of eeprom setting */
2070 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2071 adapter->eeprom_wol = 0;
2072 break;
2073 case E1000_DEV_ID_82576_QUAD_COPPER:
2074 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2075 /* if quad port adapter, disable WoL on all but port A */
2076 if (global_quad_port_a != 0)
2077 adapter->eeprom_wol = 0;
2078 else
2079 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2080 /* Reset for multiple quad port adapters */
2081 if (++global_quad_port_a == 4)
2082 global_quad_port_a = 0;
2083 break;
2084 }
2085
2086 /* initialize the wol settings based on the eeprom settings */
2087 adapter->wol = adapter->eeprom_wol;
2088 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2089
2090 /* reset the hardware with the new settings */
2091 igb_reset(adapter);
2092
2093 /* let the f/w know that the h/w is now under the control of the
2094 * driver. */
2095 igb_get_hw_control(adapter);
2096
2097 strcpy(netdev->name, "eth%d");
2098 err = register_netdev(netdev);
2099 if (err)
2100 goto err_register;
2101
2102 /* carrier off reporting is important to ethtool even BEFORE open */
2103 netif_carrier_off(netdev);
2104
2105 #ifdef CONFIG_IGB_DCA
2106 if (dca_add_requester(&pdev->dev) == 0) {
2107 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2108 dev_info(&pdev->dev, "DCA enabled\n");
2109 igb_setup_dca(adapter);
2110 }
2111
2112 #endif
2113 /* do hw tstamp init after resetting */
2114 igb_init_hw_timer(adapter);
2115
2116 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2117 /* print bus type/speed/width info */
2118 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2119 netdev->name,
2120 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2121 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2122 "unknown"),
2123 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2124 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2125 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2126 "unknown"),
2127 netdev->dev_addr);
2128
2129 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2130 if (ret_val)
2131 strcpy(part_str, "Unknown");
2132 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2133 dev_info(&pdev->dev,
2134 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2135 adapter->msix_entries ? "MSI-X" :
2136 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2137 adapter->num_rx_queues, adapter->num_tx_queues);
2138 switch (hw->mac.type) {
2139 case e1000_i350:
2140 igb_set_eee_i350(hw);
2141 break;
2142 default:
2143 break;
2144 }
2145
2146 pm_runtime_put_noidle(&pdev->dev);
2147 return 0;
2148
2149 err_register:
2150 igb_release_hw_control(adapter);
2151 err_eeprom:
2152 if (!igb_check_reset_block(hw))
2153 igb_reset_phy(hw);
2154
2155 if (hw->flash_address)
2156 iounmap(hw->flash_address);
2157 err_sw_init:
2158 igb_clear_interrupt_scheme(adapter);
2159 iounmap(hw->hw_addr);
2160 err_ioremap:
2161 free_netdev(netdev);
2162 err_alloc_etherdev:
2163 pci_release_selected_regions(pdev,
2164 pci_select_bars(pdev, IORESOURCE_MEM));
2165 err_pci_reg:
2166 err_dma:
2167 pci_disable_device(pdev);
2168 return err;
2169 }
2170
2171 /**
2172 * igb_remove - Device Removal Routine
2173 * @pdev: PCI device information struct
2174 *
2175 * igb_remove is called by the PCI subsystem to alert the driver
2176 * that it should release a PCI device. The could be caused by a
2177 * Hot-Plug event, or because the driver is going to be removed from
2178 * memory.
2179 **/
2180 static void __devexit igb_remove(struct pci_dev *pdev)
2181 {
2182 struct net_device *netdev = pci_get_drvdata(pdev);
2183 struct igb_adapter *adapter = netdev_priv(netdev);
2184 struct e1000_hw *hw = &adapter->hw;
2185
2186 pm_runtime_get_noresume(&pdev->dev);
2187
2188 /*
2189 * The watchdog timer may be rescheduled, so explicitly
2190 * disable watchdog from being rescheduled.
2191 */
2192 set_bit(__IGB_DOWN, &adapter->state);
2193 del_timer_sync(&adapter->watchdog_timer);
2194 del_timer_sync(&adapter->phy_info_timer);
2195
2196 cancel_work_sync(&adapter->reset_task);
2197 cancel_work_sync(&adapter->watchdog_task);
2198
2199 #ifdef CONFIG_IGB_DCA
2200 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2201 dev_info(&pdev->dev, "DCA disabled\n");
2202 dca_remove_requester(&pdev->dev);
2203 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2204 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2205 }
2206 #endif
2207
2208 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2209 * would have already happened in close and is redundant. */
2210 igb_release_hw_control(adapter);
2211
2212 unregister_netdev(netdev);
2213
2214 igb_clear_interrupt_scheme(adapter);
2215
2216 #ifdef CONFIG_PCI_IOV
2217 /* reclaim resources allocated to VFs */
2218 if (adapter->vf_data) {
2219 /* disable iov and allow time for transactions to clear */
2220 if (!igb_check_vf_assignment(adapter)) {
2221 pci_disable_sriov(pdev);
2222 msleep(500);
2223 } else {
2224 dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2225 }
2226
2227 kfree(adapter->vf_data);
2228 adapter->vf_data = NULL;
2229 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2230 wrfl();
2231 msleep(100);
2232 dev_info(&pdev->dev, "IOV Disabled\n");
2233 }
2234 #endif
2235
2236 iounmap(hw->hw_addr);
2237 if (hw->flash_address)
2238 iounmap(hw->flash_address);
2239 pci_release_selected_regions(pdev,
2240 pci_select_bars(pdev, IORESOURCE_MEM));
2241
2242 kfree(adapter->shadow_vfta);
2243 free_netdev(netdev);
2244
2245 pci_disable_pcie_error_reporting(pdev);
2246
2247 pci_disable_device(pdev);
2248 }
2249
2250 /**
2251 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2252 * @adapter: board private structure to initialize
2253 *
2254 * This function initializes the vf specific data storage and then attempts to
2255 * allocate the VFs. The reason for ordering it this way is because it is much
2256 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2257 * the memory for the VFs.
2258 **/
2259 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2260 {
2261 #ifdef CONFIG_PCI_IOV
2262 struct pci_dev *pdev = adapter->pdev;
2263 int old_vfs = igb_find_enabled_vfs(adapter);
2264 int i;
2265
2266 if (old_vfs) {
2267 dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2268 "max_vfs setting of %d\n", old_vfs, max_vfs);
2269 adapter->vfs_allocated_count = old_vfs;
2270 }
2271
2272 if (!adapter->vfs_allocated_count)
2273 return;
2274
2275 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2276 sizeof(struct vf_data_storage), GFP_KERNEL);
2277 /* if allocation failed then we do not support SR-IOV */
2278 if (!adapter->vf_data) {
2279 adapter->vfs_allocated_count = 0;
2280 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2281 "Data Storage\n");
2282 goto out;
2283 }
2284
2285 if (!old_vfs) {
2286 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2287 goto err_out;
2288 }
2289 dev_info(&pdev->dev, "%d VFs allocated\n",
2290 adapter->vfs_allocated_count);
2291 for (i = 0; i < adapter->vfs_allocated_count; i++)
2292 igb_vf_configure(adapter, i);
2293
2294 /* DMA Coalescing is not supported in IOV mode. */
2295 adapter->flags &= ~IGB_FLAG_DMAC;
2296 goto out;
2297 err_out:
2298 kfree(adapter->vf_data);
2299 adapter->vf_data = NULL;
2300 adapter->vfs_allocated_count = 0;
2301 out:
2302 return;
2303 #endif /* CONFIG_PCI_IOV */
2304 }
2305
2306 /**
2307 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2308 * @adapter: board private structure to initialize
2309 *
2310 * igb_init_hw_timer initializes the function pointer and values for the hw
2311 * timer found in hardware.
2312 **/
2313 static void igb_init_hw_timer(struct igb_adapter *adapter)
2314 {
2315 struct e1000_hw *hw = &adapter->hw;
2316
2317 switch (hw->mac.type) {
2318 case e1000_i350:
2319 case e1000_82580:
2320 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2321 adapter->cycles.read = igb_read_clock;
2322 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2323 adapter->cycles.mult = 1;
2324 /*
2325 * The 82580 timesync updates the system timer every 8ns by 8ns
2326 * and the value cannot be shifted. Instead we need to shift
2327 * the registers to generate a 64bit timer value. As a result
2328 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2329 * 24 in order to generate a larger value for synchronization.
2330 */
2331 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2332 /* disable system timer temporarily by setting bit 31 */
2333 wr32(E1000_TSAUXC, 0x80000000);
2334 wrfl();
2335
2336 /* Set registers so that rollover occurs soon to test this. */
2337 wr32(E1000_SYSTIMR, 0x00000000);
2338 wr32(E1000_SYSTIML, 0x80000000);
2339 wr32(E1000_SYSTIMH, 0x000000FF);
2340 wrfl();
2341
2342 /* enable system timer by clearing bit 31 */
2343 wr32(E1000_TSAUXC, 0x0);
2344 wrfl();
2345
2346 timecounter_init(&adapter->clock,
2347 &adapter->cycles,
2348 ktime_to_ns(ktime_get_real()));
2349 /*
2350 * Synchronize our NIC clock against system wall clock. NIC
2351 * time stamp reading requires ~3us per sample, each sample
2352 * was pretty stable even under load => only require 10
2353 * samples for each offset comparison.
2354 */
2355 memset(&adapter->compare, 0, sizeof(adapter->compare));
2356 adapter->compare.source = &adapter->clock;
2357 adapter->compare.target = ktime_get_real;
2358 adapter->compare.num_samples = 10;
2359 timecompare_update(&adapter->compare, 0);
2360 break;
2361 case e1000_82576:
2362 /*
2363 * Initialize hardware timer: we keep it running just in case
2364 * that some program needs it later on.
2365 */
2366 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2367 adapter->cycles.read = igb_read_clock;
2368 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2369 adapter->cycles.mult = 1;
2370 /**
2371 * Scale the NIC clock cycle by a large factor so that
2372 * relatively small clock corrections can be added or
2373 * subtracted at each clock tick. The drawbacks of a large
2374 * factor are a) that the clock register overflows more quickly
2375 * (not such a big deal) and b) that the increment per tick has
2376 * to fit into 24 bits. As a result we need to use a shift of
2377 * 19 so we can fit a value of 16 into the TIMINCA register.
2378 */
2379 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2380 wr32(E1000_TIMINCA,
2381 (1 << E1000_TIMINCA_16NS_SHIFT) |
2382 (16 << IGB_82576_TSYNC_SHIFT));
2383
2384 /* Set registers so that rollover occurs soon to test this. */
2385 wr32(E1000_SYSTIML, 0x00000000);
2386 wr32(E1000_SYSTIMH, 0xFF800000);
2387 wrfl();
2388
2389 timecounter_init(&adapter->clock,
2390 &adapter->cycles,
2391 ktime_to_ns(ktime_get_real()));
2392 /*
2393 * Synchronize our NIC clock against system wall clock. NIC
2394 * time stamp reading requires ~3us per sample, each sample
2395 * was pretty stable even under load => only require 10
2396 * samples for each offset comparison.
2397 */
2398 memset(&adapter->compare, 0, sizeof(adapter->compare));
2399 adapter->compare.source = &adapter->clock;
2400 adapter->compare.target = ktime_get_real;
2401 adapter->compare.num_samples = 10;
2402 timecompare_update(&adapter->compare, 0);
2403 break;
2404 case e1000_82575:
2405 /* 82575 does not support timesync */
2406 default:
2407 break;
2408 }
2409
2410 }
2411
2412 /**
2413 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2414 * @adapter: board private structure to initialize
2415 *
2416 * igb_sw_init initializes the Adapter private data structure.
2417 * Fields are initialized based on PCI device information and
2418 * OS network device settings (MTU size).
2419 **/
2420 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2421 {
2422 struct e1000_hw *hw = &adapter->hw;
2423 struct net_device *netdev = adapter->netdev;
2424 struct pci_dev *pdev = adapter->pdev;
2425
2426 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2427
2428 /* set default ring sizes */
2429 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2430 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2431
2432 /* set default ITR values */
2433 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2434 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2435
2436 /* set default work limits */
2437 adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2438
2439 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2440 VLAN_HLEN;
2441 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2442
2443 adapter->node = -1;
2444
2445 spin_lock_init(&adapter->stats64_lock);
2446 #ifdef CONFIG_PCI_IOV
2447 switch (hw->mac.type) {
2448 case e1000_82576:
2449 case e1000_i350:
2450 if (max_vfs > 7) {
2451 dev_warn(&pdev->dev,
2452 "Maximum of 7 VFs per PF, using max\n");
2453 adapter->vfs_allocated_count = 7;
2454 } else
2455 adapter->vfs_allocated_count = max_vfs;
2456 break;
2457 default:
2458 break;
2459 }
2460 #endif /* CONFIG_PCI_IOV */
2461 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2462 /* i350 cannot do RSS and SR-IOV at the same time */
2463 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2464 adapter->rss_queues = 1;
2465
2466 /*
2467 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2468 * then we should combine the queues into a queue pair in order to
2469 * conserve interrupts due to limited supply
2470 */
2471 if ((adapter->rss_queues > 4) ||
2472 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2473 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2474
2475 /* Setup and initialize a copy of the hw vlan table array */
2476 adapter->shadow_vfta = kzalloc(sizeof(u32) *
2477 E1000_VLAN_FILTER_TBL_SIZE,
2478 GFP_ATOMIC);
2479
2480 /* This call may decrease the number of queues */
2481 if (igb_init_interrupt_scheme(adapter)) {
2482 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2483 return -ENOMEM;
2484 }
2485
2486 igb_probe_vfs(adapter);
2487
2488 /* Explicitly disable IRQ since the NIC can be in any state. */
2489 igb_irq_disable(adapter);
2490
2491 if (hw->mac.type == e1000_i350)
2492 adapter->flags &= ~IGB_FLAG_DMAC;
2493
2494 set_bit(__IGB_DOWN, &adapter->state);
2495 return 0;
2496 }
2497
2498 /**
2499 * igb_open - Called when a network interface is made active
2500 * @netdev: network interface device structure
2501 *
2502 * Returns 0 on success, negative value on failure
2503 *
2504 * The open entry point is called when a network interface is made
2505 * active by the system (IFF_UP). At this point all resources needed
2506 * for transmit and receive operations are allocated, the interrupt
2507 * handler is registered with the OS, the watchdog timer is started,
2508 * and the stack is notified that the interface is ready.
2509 **/
2510 static int __igb_open(struct net_device *netdev, bool resuming)
2511 {
2512 struct igb_adapter *adapter = netdev_priv(netdev);
2513 struct e1000_hw *hw = &adapter->hw;
2514 struct pci_dev *pdev = adapter->pdev;
2515 int err;
2516 int i;
2517
2518 /* disallow open during test */
2519 if (test_bit(__IGB_TESTING, &adapter->state)) {
2520 WARN_ON(resuming);
2521 return -EBUSY;
2522 }
2523
2524 if (!resuming)
2525 pm_runtime_get_sync(&pdev->dev);
2526
2527 netif_carrier_off(netdev);
2528
2529 /* allocate transmit descriptors */
2530 err = igb_setup_all_tx_resources(adapter);
2531 if (err)
2532 goto err_setup_tx;
2533
2534 /* allocate receive descriptors */
2535 err = igb_setup_all_rx_resources(adapter);
2536 if (err)
2537 goto err_setup_rx;
2538
2539 igb_power_up_link(adapter);
2540
2541 /* before we allocate an interrupt, we must be ready to handle it.
2542 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2543 * as soon as we call pci_request_irq, so we have to setup our
2544 * clean_rx handler before we do so. */
2545 igb_configure(adapter);
2546
2547 err = igb_request_irq(adapter);
2548 if (err)
2549 goto err_req_irq;
2550
2551 /* From here on the code is the same as igb_up() */
2552 clear_bit(__IGB_DOWN, &adapter->state);
2553
2554 for (i = 0; i < adapter->num_q_vectors; i++)
2555 napi_enable(&(adapter->q_vector[i]->napi));
2556
2557 /* Clear any pending interrupts. */
2558 rd32(E1000_ICR);
2559
2560 igb_irq_enable(adapter);
2561
2562 /* notify VFs that reset has been completed */
2563 if (adapter->vfs_allocated_count) {
2564 u32 reg_data = rd32(E1000_CTRL_EXT);
2565 reg_data |= E1000_CTRL_EXT_PFRSTD;
2566 wr32(E1000_CTRL_EXT, reg_data);
2567 }
2568
2569 netif_tx_start_all_queues(netdev);
2570
2571 if (!resuming)
2572 pm_runtime_put(&pdev->dev);
2573
2574 /* start the watchdog. */
2575 hw->mac.get_link_status = 1;
2576 schedule_work(&adapter->watchdog_task);
2577
2578 return 0;
2579
2580 err_req_irq:
2581 igb_release_hw_control(adapter);
2582 igb_power_down_link(adapter);
2583 igb_free_all_rx_resources(adapter);
2584 err_setup_rx:
2585 igb_free_all_tx_resources(adapter);
2586 err_setup_tx:
2587 igb_reset(adapter);
2588 if (!resuming)
2589 pm_runtime_put(&pdev->dev);
2590
2591 return err;
2592 }
2593
2594 static int igb_open(struct net_device *netdev)
2595 {
2596 return __igb_open(netdev, false);
2597 }
2598
2599 /**
2600 * igb_close - Disables a network interface
2601 * @netdev: network interface device structure
2602 *
2603 * Returns 0, this is not allowed to fail
2604 *
2605 * The close entry point is called when an interface is de-activated
2606 * by the OS. The hardware is still under the driver's control, but
2607 * needs to be disabled. A global MAC reset is issued to stop the
2608 * hardware, and all transmit and receive resources are freed.
2609 **/
2610 static int __igb_close(struct net_device *netdev, bool suspending)
2611 {
2612 struct igb_adapter *adapter = netdev_priv(netdev);
2613 struct pci_dev *pdev = adapter->pdev;
2614
2615 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2616
2617 if (!suspending)
2618 pm_runtime_get_sync(&pdev->dev);
2619
2620 igb_down(adapter);
2621 igb_free_irq(adapter);
2622
2623 igb_free_all_tx_resources(adapter);
2624 igb_free_all_rx_resources(adapter);
2625
2626 if (!suspending)
2627 pm_runtime_put_sync(&pdev->dev);
2628 return 0;
2629 }
2630
2631 static int igb_close(struct net_device *netdev)
2632 {
2633 return __igb_close(netdev, false);
2634 }
2635
2636 /**
2637 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2638 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2639 *
2640 * Return 0 on success, negative on failure
2641 **/
2642 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2643 {
2644 struct device *dev = tx_ring->dev;
2645 int orig_node = dev_to_node(dev);
2646 int size;
2647
2648 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2649 tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2650 if (!tx_ring->tx_buffer_info)
2651 tx_ring->tx_buffer_info = vzalloc(size);
2652 if (!tx_ring->tx_buffer_info)
2653 goto err;
2654
2655 /* round up to nearest 4K */
2656 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2657 tx_ring->size = ALIGN(tx_ring->size, 4096);
2658
2659 set_dev_node(dev, tx_ring->numa_node);
2660 tx_ring->desc = dma_alloc_coherent(dev,
2661 tx_ring->size,
2662 &tx_ring->dma,
2663 GFP_KERNEL);
2664 set_dev_node(dev, orig_node);
2665 if (!tx_ring->desc)
2666 tx_ring->desc = dma_alloc_coherent(dev,
2667 tx_ring->size,
2668 &tx_ring->dma,
2669 GFP_KERNEL);
2670
2671 if (!tx_ring->desc)
2672 goto err;
2673
2674 tx_ring->next_to_use = 0;
2675 tx_ring->next_to_clean = 0;
2676
2677 return 0;
2678
2679 err:
2680 vfree(tx_ring->tx_buffer_info);
2681 dev_err(dev,
2682 "Unable to allocate memory for the transmit descriptor ring\n");
2683 return -ENOMEM;
2684 }
2685
2686 /**
2687 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2688 * (Descriptors) for all queues
2689 * @adapter: board private structure
2690 *
2691 * Return 0 on success, negative on failure
2692 **/
2693 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2694 {
2695 struct pci_dev *pdev = adapter->pdev;
2696 int i, err = 0;
2697
2698 for (i = 0; i < adapter->num_tx_queues; i++) {
2699 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2700 if (err) {
2701 dev_err(&pdev->dev,
2702 "Allocation for Tx Queue %u failed\n", i);
2703 for (i--; i >= 0; i--)
2704 igb_free_tx_resources(adapter->tx_ring[i]);
2705 break;
2706 }
2707 }
2708
2709 return err;
2710 }
2711
2712 /**
2713 * igb_setup_tctl - configure the transmit control registers
2714 * @adapter: Board private structure
2715 **/
2716 void igb_setup_tctl(struct igb_adapter *adapter)
2717 {
2718 struct e1000_hw *hw = &adapter->hw;
2719 u32 tctl;
2720
2721 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2722 wr32(E1000_TXDCTL(0), 0);
2723
2724 /* Program the Transmit Control Register */
2725 tctl = rd32(E1000_TCTL);
2726 tctl &= ~E1000_TCTL_CT;
2727 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2728 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2729
2730 igb_config_collision_dist(hw);
2731
2732 /* Enable transmits */
2733 tctl |= E1000_TCTL_EN;
2734
2735 wr32(E1000_TCTL, tctl);
2736 }
2737
2738 /**
2739 * igb_configure_tx_ring - Configure transmit ring after Reset
2740 * @adapter: board private structure
2741 * @ring: tx ring to configure
2742 *
2743 * Configure a transmit ring after a reset.
2744 **/
2745 void igb_configure_tx_ring(struct igb_adapter *adapter,
2746 struct igb_ring *ring)
2747 {
2748 struct e1000_hw *hw = &adapter->hw;
2749 u32 txdctl = 0;
2750 u64 tdba = ring->dma;
2751 int reg_idx = ring->reg_idx;
2752
2753 /* disable the queue */
2754 wr32(E1000_TXDCTL(reg_idx), 0);
2755 wrfl();
2756 mdelay(10);
2757
2758 wr32(E1000_TDLEN(reg_idx),
2759 ring->count * sizeof(union e1000_adv_tx_desc));
2760 wr32(E1000_TDBAL(reg_idx),
2761 tdba & 0x00000000ffffffffULL);
2762 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2763
2764 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2765 wr32(E1000_TDH(reg_idx), 0);
2766 writel(0, ring->tail);
2767
2768 txdctl |= IGB_TX_PTHRESH;
2769 txdctl |= IGB_TX_HTHRESH << 8;
2770 txdctl |= IGB_TX_WTHRESH << 16;
2771
2772 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2773 wr32(E1000_TXDCTL(reg_idx), txdctl);
2774
2775 netdev_tx_reset_queue(txring_txq(ring));
2776 }
2777
2778 /**
2779 * igb_configure_tx - Configure transmit Unit after Reset
2780 * @adapter: board private structure
2781 *
2782 * Configure the Tx unit of the MAC after a reset.
2783 **/
2784 static void igb_configure_tx(struct igb_adapter *adapter)
2785 {
2786 int i;
2787
2788 for (i = 0; i < adapter->num_tx_queues; i++)
2789 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2790 }
2791
2792 /**
2793 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2794 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2795 *
2796 * Returns 0 on success, negative on failure
2797 **/
2798 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2799 {
2800 struct device *dev = rx_ring->dev;
2801 int orig_node = dev_to_node(dev);
2802 int size, desc_len;
2803
2804 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2805 rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2806 if (!rx_ring->rx_buffer_info)
2807 rx_ring->rx_buffer_info = vzalloc(size);
2808 if (!rx_ring->rx_buffer_info)
2809 goto err;
2810
2811 desc_len = sizeof(union e1000_adv_rx_desc);
2812
2813 /* Round up to nearest 4K */
2814 rx_ring->size = rx_ring->count * desc_len;
2815 rx_ring->size = ALIGN(rx_ring->size, 4096);
2816
2817 set_dev_node(dev, rx_ring->numa_node);
2818 rx_ring->desc = dma_alloc_coherent(dev,
2819 rx_ring->size,
2820 &rx_ring->dma,
2821 GFP_KERNEL);
2822 set_dev_node(dev, orig_node);
2823 if (!rx_ring->desc)
2824 rx_ring->desc = dma_alloc_coherent(dev,
2825 rx_ring->size,
2826 &rx_ring->dma,
2827 GFP_KERNEL);
2828
2829 if (!rx_ring->desc)
2830 goto err;
2831
2832 rx_ring->next_to_clean = 0;
2833 rx_ring->next_to_use = 0;
2834
2835 return 0;
2836
2837 err:
2838 vfree(rx_ring->rx_buffer_info);
2839 rx_ring->rx_buffer_info = NULL;
2840 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2841 " ring\n");
2842 return -ENOMEM;
2843 }
2844
2845 /**
2846 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2847 * (Descriptors) for all queues
2848 * @adapter: board private structure
2849 *
2850 * Return 0 on success, negative on failure
2851 **/
2852 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2853 {
2854 struct pci_dev *pdev = adapter->pdev;
2855 int i, err = 0;
2856
2857 for (i = 0; i < adapter->num_rx_queues; i++) {
2858 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2859 if (err) {
2860 dev_err(&pdev->dev,
2861 "Allocation for Rx Queue %u failed\n", i);
2862 for (i--; i >= 0; i--)
2863 igb_free_rx_resources(adapter->rx_ring[i]);
2864 break;
2865 }
2866 }
2867
2868 return err;
2869 }
2870
2871 /**
2872 * igb_setup_mrqc - configure the multiple receive queue control registers
2873 * @adapter: Board private structure
2874 **/
2875 static void igb_setup_mrqc(struct igb_adapter *adapter)
2876 {
2877 struct e1000_hw *hw = &adapter->hw;
2878 u32 mrqc, rxcsum;
2879 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2880 union e1000_reta {
2881 u32 dword;
2882 u8 bytes[4];
2883 } reta;
2884 static const u8 rsshash[40] = {
2885 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2886 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2887 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2888 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2889
2890 /* Fill out hash function seeds */
2891 for (j = 0; j < 10; j++) {
2892 u32 rsskey = rsshash[(j * 4)];
2893 rsskey |= rsshash[(j * 4) + 1] << 8;
2894 rsskey |= rsshash[(j * 4) + 2] << 16;
2895 rsskey |= rsshash[(j * 4) + 3] << 24;
2896 array_wr32(E1000_RSSRK(0), j, rsskey);
2897 }
2898
2899 num_rx_queues = adapter->rss_queues;
2900
2901 if (adapter->vfs_allocated_count) {
2902 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2903 switch (hw->mac.type) {
2904 case e1000_i350:
2905 case e1000_82580:
2906 num_rx_queues = 1;
2907 shift = 0;
2908 break;
2909 case e1000_82576:
2910 shift = 3;
2911 num_rx_queues = 2;
2912 break;
2913 case e1000_82575:
2914 shift = 2;
2915 shift2 = 6;
2916 default:
2917 break;
2918 }
2919 } else {
2920 if (hw->mac.type == e1000_82575)
2921 shift = 6;
2922 }
2923
2924 for (j = 0; j < (32 * 4); j++) {
2925 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2926 if (shift2)
2927 reta.bytes[j & 3] |= num_rx_queues << shift2;
2928 if ((j & 3) == 3)
2929 wr32(E1000_RETA(j >> 2), reta.dword);
2930 }
2931
2932 /*
2933 * Disable raw packet checksumming so that RSS hash is placed in
2934 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2935 * offloads as they are enabled by default
2936 */
2937 rxcsum = rd32(E1000_RXCSUM);
2938 rxcsum |= E1000_RXCSUM_PCSD;
2939
2940 if (adapter->hw.mac.type >= e1000_82576)
2941 /* Enable Receive Checksum Offload for SCTP */
2942 rxcsum |= E1000_RXCSUM_CRCOFL;
2943
2944 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2945 wr32(E1000_RXCSUM, rxcsum);
2946
2947 /* If VMDq is enabled then we set the appropriate mode for that, else
2948 * we default to RSS so that an RSS hash is calculated per packet even
2949 * if we are only using one queue */
2950 if (adapter->vfs_allocated_count) {
2951 if (hw->mac.type > e1000_82575) {
2952 /* Set the default pool for the PF's first queue */
2953 u32 vtctl = rd32(E1000_VT_CTL);
2954 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2955 E1000_VT_CTL_DISABLE_DEF_POOL);
2956 vtctl |= adapter->vfs_allocated_count <<
2957 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2958 wr32(E1000_VT_CTL, vtctl);
2959 }
2960 if (adapter->rss_queues > 1)
2961 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2962 else
2963 mrqc = E1000_MRQC_ENABLE_VMDQ;
2964 } else {
2965 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2966 }
2967 igb_vmm_control(adapter);
2968
2969 /*
2970 * Generate RSS hash based on TCP port numbers and/or
2971 * IPv4/v6 src and dst addresses since UDP cannot be
2972 * hashed reliably due to IP fragmentation
2973 */
2974 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2975 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2976 E1000_MRQC_RSS_FIELD_IPV6 |
2977 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2978 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2979
2980 wr32(E1000_MRQC, mrqc);
2981 }
2982
2983 /**
2984 * igb_setup_rctl - configure the receive control registers
2985 * @adapter: Board private structure
2986 **/
2987 void igb_setup_rctl(struct igb_adapter *adapter)
2988 {
2989 struct e1000_hw *hw = &adapter->hw;
2990 u32 rctl;
2991
2992 rctl = rd32(E1000_RCTL);
2993
2994 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2995 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2996
2997 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2998 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2999
3000 /*
3001 * enable stripping of CRC. It's unlikely this will break BMC
3002 * redirection as it did with e1000. Newer features require
3003 * that the HW strips the CRC.
3004 */
3005 rctl |= E1000_RCTL_SECRC;
3006
3007 /* disable store bad packets and clear size bits. */
3008 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3009
3010 /* enable LPE to prevent packets larger than max_frame_size */
3011 rctl |= E1000_RCTL_LPE;
3012
3013 /* disable queue 0 to prevent tail write w/o re-config */
3014 wr32(E1000_RXDCTL(0), 0);
3015
3016 /* Attention!!! For SR-IOV PF driver operations you must enable
3017 * queue drop for all VF and PF queues to prevent head of line blocking
3018 * if an un-trusted VF does not provide descriptors to hardware.
3019 */
3020 if (adapter->vfs_allocated_count) {
3021 /* set all queue drop enable bits */
3022 wr32(E1000_QDE, ALL_QUEUES);
3023 }
3024
3025 /* This is useful for sniffing bad packets. */
3026 if (adapter->netdev->features & NETIF_F_RXALL) {
3027 /* UPE and MPE will be handled by normal PROMISC logic
3028 * in e1000e_set_rx_mode */
3029 rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
3030 E1000_RCTL_BAM | /* RX All Bcast Pkts */
3031 E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
3032
3033 rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
3034 E1000_RCTL_DPF | /* Allow filtered pause */
3035 E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
3036 /* Do not mess with E1000_CTRL_VME, it affects transmit as well,
3037 * and that breaks VLANs.
3038 */
3039 }
3040
3041 wr32(E1000_RCTL, rctl);
3042 }
3043
3044 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3045 int vfn)
3046 {
3047 struct e1000_hw *hw = &adapter->hw;
3048 u32 vmolr;
3049
3050 /* if it isn't the PF check to see if VFs are enabled and
3051 * increase the size to support vlan tags */
3052 if (vfn < adapter->vfs_allocated_count &&
3053 adapter->vf_data[vfn].vlans_enabled)
3054 size += VLAN_TAG_SIZE;
3055
3056 vmolr = rd32(E1000_VMOLR(vfn));
3057 vmolr &= ~E1000_VMOLR_RLPML_MASK;
3058 vmolr |= size | E1000_VMOLR_LPE;
3059 wr32(E1000_VMOLR(vfn), vmolr);
3060
3061 return 0;
3062 }
3063
3064 /**
3065 * igb_rlpml_set - set maximum receive packet size
3066 * @adapter: board private structure
3067 *
3068 * Configure maximum receivable packet size.
3069 **/
3070 static void igb_rlpml_set(struct igb_adapter *adapter)
3071 {
3072 u32 max_frame_size = adapter->max_frame_size;
3073 struct e1000_hw *hw = &adapter->hw;
3074 u16 pf_id = adapter->vfs_allocated_count;
3075
3076 if (pf_id) {
3077 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3078 /*
3079 * If we're in VMDQ or SR-IOV mode, then set global RLPML
3080 * to our max jumbo frame size, in case we need to enable
3081 * jumbo frames on one of the rings later.
3082 * This will not pass over-length frames into the default
3083 * queue because it's gated by the VMOLR.RLPML.
3084 */
3085 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3086 }
3087
3088 wr32(E1000_RLPML, max_frame_size);
3089 }
3090
3091 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3092 int vfn, bool aupe)
3093 {
3094 struct e1000_hw *hw = &adapter->hw;
3095 u32 vmolr;
3096
3097 /*
3098 * This register exists only on 82576 and newer so if we are older then
3099 * we should exit and do nothing
3100 */
3101 if (hw->mac.type < e1000_82576)
3102 return;
3103
3104 vmolr = rd32(E1000_VMOLR(vfn));
3105 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
3106 if (aupe)
3107 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
3108 else
3109 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3110
3111 /* clear all bits that might not be set */
3112 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3113
3114 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3115 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3116 /*
3117 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3118 * multicast packets
3119 */
3120 if (vfn <= adapter->vfs_allocated_count)
3121 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3122
3123 wr32(E1000_VMOLR(vfn), vmolr);
3124 }
3125
3126 /**
3127 * igb_configure_rx_ring - Configure a receive ring after Reset
3128 * @adapter: board private structure
3129 * @ring: receive ring to be configured
3130 *
3131 * Configure the Rx unit of the MAC after a reset.
3132 **/
3133 void igb_configure_rx_ring(struct igb_adapter *adapter,
3134 struct igb_ring *ring)
3135 {
3136 struct e1000_hw *hw = &adapter->hw;
3137 u64 rdba = ring->dma;
3138 int reg_idx = ring->reg_idx;
3139 u32 srrctl = 0, rxdctl = 0;
3140
3141 /* disable the queue */
3142 wr32(E1000_RXDCTL(reg_idx), 0);
3143
3144 /* Set DMA base address registers */
3145 wr32(E1000_RDBAL(reg_idx),
3146 rdba & 0x00000000ffffffffULL);
3147 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3148 wr32(E1000_RDLEN(reg_idx),
3149 ring->count * sizeof(union e1000_adv_rx_desc));
3150
3151 /* initialize head and tail */
3152 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3153 wr32(E1000_RDH(reg_idx), 0);
3154 writel(0, ring->tail);
3155
3156 /* set descriptor configuration */
3157 srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3158 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3159 srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3160 #else
3161 srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3162 #endif
3163 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3164 if (hw->mac.type >= e1000_82580)
3165 srrctl |= E1000_SRRCTL_TIMESTAMP;
3166 /* Only set Drop Enable if we are supporting multiple queues */
3167 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3168 srrctl |= E1000_SRRCTL_DROP_EN;
3169
3170 wr32(E1000_SRRCTL(reg_idx), srrctl);
3171
3172 /* set filtering for VMDQ pools */
3173 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3174
3175 rxdctl |= IGB_RX_PTHRESH;
3176 rxdctl |= IGB_RX_HTHRESH << 8;
3177 rxdctl |= IGB_RX_WTHRESH << 16;
3178
3179 /* enable receive descriptor fetching */
3180 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3181 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3182 }
3183
3184 /**
3185 * igb_configure_rx - Configure receive Unit after Reset
3186 * @adapter: board private structure
3187 *
3188 * Configure the Rx unit of the MAC after a reset.
3189 **/
3190 static void igb_configure_rx(struct igb_adapter *adapter)
3191 {
3192 int i;
3193
3194 /* set UTA to appropriate mode */
3195 igb_set_uta(adapter);
3196
3197 /* set the correct pool for the PF default MAC address in entry 0 */
3198 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3199 adapter->vfs_allocated_count);
3200
3201 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3202 * the Base and Length of the Rx Descriptor Ring */
3203 for (i = 0; i < adapter->num_rx_queues; i++)
3204 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3205 }
3206
3207 /**
3208 * igb_free_tx_resources - Free Tx Resources per Queue
3209 * @tx_ring: Tx descriptor ring for a specific queue
3210 *
3211 * Free all transmit software resources
3212 **/
3213 void igb_free_tx_resources(struct igb_ring *tx_ring)
3214 {
3215 igb_clean_tx_ring(tx_ring);
3216
3217 vfree(tx_ring->tx_buffer_info);
3218 tx_ring->tx_buffer_info = NULL;
3219
3220 /* if not set, then don't free */
3221 if (!tx_ring->desc)
3222 return;
3223
3224 dma_free_coherent(tx_ring->dev, tx_ring->size,
3225 tx_ring->desc, tx_ring->dma);
3226
3227 tx_ring->desc = NULL;
3228 }
3229
3230 /**
3231 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3232 * @adapter: board private structure
3233 *
3234 * Free all transmit software resources
3235 **/
3236 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3237 {
3238 int i;
3239
3240 for (i = 0; i < adapter->num_tx_queues; i++)
3241 igb_free_tx_resources(adapter->tx_ring[i]);
3242 }
3243
3244 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3245 struct igb_tx_buffer *tx_buffer)
3246 {
3247 if (tx_buffer->skb) {
3248 dev_kfree_skb_any(tx_buffer->skb);
3249 if (tx_buffer->dma)
3250 dma_unmap_single(ring->dev,
3251 tx_buffer->dma,
3252 tx_buffer->length,
3253 DMA_TO_DEVICE);
3254 } else if (tx_buffer->dma) {
3255 dma_unmap_page(ring->dev,
3256 tx_buffer->dma,
3257 tx_buffer->length,
3258 DMA_TO_DEVICE);
3259 }
3260 tx_buffer->next_to_watch = NULL;
3261 tx_buffer->skb = NULL;
3262 tx_buffer->dma = 0;
3263 /* buffer_info must be completely set up in the transmit path */
3264 }
3265
3266 /**
3267 * igb_clean_tx_ring - Free Tx Buffers
3268 * @tx_ring: ring to be cleaned
3269 **/
3270 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3271 {
3272 struct igb_tx_buffer *buffer_info;
3273 unsigned long size;
3274 u16 i;
3275
3276 if (!tx_ring->tx_buffer_info)
3277 return;
3278 /* Free all the Tx ring sk_buffs */
3279
3280 for (i = 0; i < tx_ring->count; i++) {
3281 buffer_info = &tx_ring->tx_buffer_info[i];
3282 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3283 }
3284
3285 size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3286 memset(tx_ring->tx_buffer_info, 0, size);
3287
3288 /* Zero out the descriptor ring */
3289 memset(tx_ring->desc, 0, tx_ring->size);
3290
3291 tx_ring->next_to_use = 0;
3292 tx_ring->next_to_clean = 0;
3293 }
3294
3295 /**
3296 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3297 * @adapter: board private structure
3298 **/
3299 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3300 {
3301 int i;
3302
3303 for (i = 0; i < adapter->num_tx_queues; i++)
3304 igb_clean_tx_ring(adapter->tx_ring[i]);
3305 }
3306
3307 /**
3308 * igb_free_rx_resources - Free Rx Resources
3309 * @rx_ring: ring to clean the resources from
3310 *
3311 * Free all receive software resources
3312 **/
3313 void igb_free_rx_resources(struct igb_ring *rx_ring)
3314 {
3315 igb_clean_rx_ring(rx_ring);
3316
3317 vfree(rx_ring->rx_buffer_info);
3318 rx_ring->rx_buffer_info = NULL;
3319
3320 /* if not set, then don't free */
3321 if (!rx_ring->desc)
3322 return;
3323
3324 dma_free_coherent(rx_ring->dev, rx_ring->size,
3325 rx_ring->desc, rx_ring->dma);
3326
3327 rx_ring->desc = NULL;
3328 }
3329
3330 /**
3331 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3332 * @adapter: board private structure
3333 *
3334 * Free all receive software resources
3335 **/
3336 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3337 {
3338 int i;
3339
3340 for (i = 0; i < adapter->num_rx_queues; i++)
3341 igb_free_rx_resources(adapter->rx_ring[i]);
3342 }
3343
3344 /**
3345 * igb_clean_rx_ring - Free Rx Buffers per Queue
3346 * @rx_ring: ring to free buffers from
3347 **/
3348 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3349 {
3350 unsigned long size;
3351 u16 i;
3352
3353 if (!rx_ring->rx_buffer_info)
3354 return;
3355
3356 /* Free all the Rx ring sk_buffs */
3357 for (i = 0; i < rx_ring->count; i++) {
3358 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3359 if (buffer_info->dma) {
3360 dma_unmap_single(rx_ring->dev,
3361 buffer_info->dma,
3362 IGB_RX_HDR_LEN,
3363 DMA_FROM_DEVICE);
3364 buffer_info->dma = 0;
3365 }
3366
3367 if (buffer_info->skb) {
3368 dev_kfree_skb(buffer_info->skb);
3369 buffer_info->skb = NULL;
3370 }
3371 if (buffer_info->page_dma) {
3372 dma_unmap_page(rx_ring->dev,
3373 buffer_info->page_dma,
3374 PAGE_SIZE / 2,
3375 DMA_FROM_DEVICE);
3376 buffer_info->page_dma = 0;
3377 }
3378 if (buffer_info->page) {
3379 put_page(buffer_info->page);
3380 buffer_info->page = NULL;
3381 buffer_info->page_offset = 0;
3382 }
3383 }
3384
3385 size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3386 memset(rx_ring->rx_buffer_info, 0, size);
3387
3388 /* Zero out the descriptor ring */
3389 memset(rx_ring->desc, 0, rx_ring->size);
3390
3391 rx_ring->next_to_clean = 0;
3392 rx_ring->next_to_use = 0;
3393 }
3394
3395 /**
3396 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3397 * @adapter: board private structure
3398 **/
3399 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3400 {
3401 int i;
3402
3403 for (i = 0; i < adapter->num_rx_queues; i++)
3404 igb_clean_rx_ring(adapter->rx_ring[i]);
3405 }
3406
3407 /**
3408 * igb_set_mac - Change the Ethernet Address of the NIC
3409 * @netdev: network interface device structure
3410 * @p: pointer to an address structure
3411 *
3412 * Returns 0 on success, negative on failure
3413 **/
3414 static int igb_set_mac(struct net_device *netdev, void *p)
3415 {
3416 struct igb_adapter *adapter = netdev_priv(netdev);
3417 struct e1000_hw *hw = &adapter->hw;
3418 struct sockaddr *addr = p;
3419
3420 if (!is_valid_ether_addr(addr->sa_data))
3421 return -EADDRNOTAVAIL;
3422
3423 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3424 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3425
3426 /* set the correct pool for the new PF MAC address in entry 0 */
3427 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3428 adapter->vfs_allocated_count);
3429
3430 return 0;
3431 }
3432
3433 /**
3434 * igb_write_mc_addr_list - write multicast addresses to MTA
3435 * @netdev: network interface device structure
3436 *
3437 * Writes multicast address list to the MTA hash table.
3438 * Returns: -ENOMEM on failure
3439 * 0 on no addresses written
3440 * X on writing X addresses to MTA
3441 **/
3442 static int igb_write_mc_addr_list(struct net_device *netdev)
3443 {
3444 struct igb_adapter *adapter = netdev_priv(netdev);
3445 struct e1000_hw *hw = &adapter->hw;
3446 struct netdev_hw_addr *ha;
3447 u8 *mta_list;
3448 int i;
3449
3450 if (netdev_mc_empty(netdev)) {
3451 /* nothing to program, so clear mc list */
3452 igb_update_mc_addr_list(hw, NULL, 0);
3453 igb_restore_vf_multicasts(adapter);
3454 return 0;
3455 }
3456
3457 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3458 if (!mta_list)
3459 return -ENOMEM;
3460
3461 /* The shared function expects a packed array of only addresses. */
3462 i = 0;
3463 netdev_for_each_mc_addr(ha, netdev)
3464 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3465
3466 igb_update_mc_addr_list(hw, mta_list, i);
3467 kfree(mta_list);
3468
3469 return netdev_mc_count(netdev);
3470 }
3471
3472 /**
3473 * igb_write_uc_addr_list - write unicast addresses to RAR table
3474 * @netdev: network interface device structure
3475 *
3476 * Writes unicast address list to the RAR table.
3477 * Returns: -ENOMEM on failure/insufficient address space
3478 * 0 on no addresses written
3479 * X on writing X addresses to the RAR table
3480 **/
3481 static int igb_write_uc_addr_list(struct net_device *netdev)
3482 {
3483 struct igb_adapter *adapter = netdev_priv(netdev);
3484 struct e1000_hw *hw = &adapter->hw;
3485 unsigned int vfn = adapter->vfs_allocated_count;
3486 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3487 int count = 0;
3488
3489 /* return ENOMEM indicating insufficient memory for addresses */
3490 if (netdev_uc_count(netdev) > rar_entries)
3491 return -ENOMEM;
3492
3493 if (!netdev_uc_empty(netdev) && rar_entries) {
3494 struct netdev_hw_addr *ha;
3495
3496 netdev_for_each_uc_addr(ha, netdev) {
3497 if (!rar_entries)
3498 break;
3499 igb_rar_set_qsel(adapter, ha->addr,
3500 rar_entries--,
3501 vfn);
3502 count++;
3503 }
3504 }
3505 /* write the addresses in reverse order to avoid write combining */
3506 for (; rar_entries > 0 ; rar_entries--) {
3507 wr32(E1000_RAH(rar_entries), 0);
3508 wr32(E1000_RAL(rar_entries), 0);
3509 }
3510 wrfl();
3511
3512 return count;
3513 }
3514
3515 /**
3516 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3517 * @netdev: network interface device structure
3518 *
3519 * The set_rx_mode entry point is called whenever the unicast or multicast
3520 * address lists or the network interface flags are updated. This routine is
3521 * responsible for configuring the hardware for proper unicast, multicast,
3522 * promiscuous mode, and all-multi behavior.
3523 **/
3524 static void igb_set_rx_mode(struct net_device *netdev)
3525 {
3526 struct igb_adapter *adapter = netdev_priv(netdev);
3527 struct e1000_hw *hw = &adapter->hw;
3528 unsigned int vfn = adapter->vfs_allocated_count;
3529 u32 rctl, vmolr = 0;
3530 int count;
3531
3532 /* Check for Promiscuous and All Multicast modes */
3533 rctl = rd32(E1000_RCTL);
3534
3535 /* clear the effected bits */
3536 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3537
3538 if (netdev->flags & IFF_PROMISC) {
3539 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3540 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3541 } else {
3542 if (netdev->flags & IFF_ALLMULTI) {
3543 rctl |= E1000_RCTL_MPE;
3544 vmolr |= E1000_VMOLR_MPME;
3545 } else {
3546 /*
3547 * Write addresses to the MTA, if the attempt fails
3548 * then we should just turn on promiscuous mode so
3549 * that we can at least receive multicast traffic
3550 */
3551 count = igb_write_mc_addr_list(netdev);
3552 if (count < 0) {
3553 rctl |= E1000_RCTL_MPE;
3554 vmolr |= E1000_VMOLR_MPME;
3555 } else if (count) {
3556 vmolr |= E1000_VMOLR_ROMPE;
3557 }
3558 }
3559 /*
3560 * Write addresses to available RAR registers, if there is not
3561 * sufficient space to store all the addresses then enable
3562 * unicast promiscuous mode
3563 */
3564 count = igb_write_uc_addr_list(netdev);
3565 if (count < 0) {
3566 rctl |= E1000_RCTL_UPE;
3567 vmolr |= E1000_VMOLR_ROPE;
3568 }
3569 rctl |= E1000_RCTL_VFE;
3570 }
3571 wr32(E1000_RCTL, rctl);
3572
3573 /*
3574 * In order to support SR-IOV and eventually VMDq it is necessary to set
3575 * the VMOLR to enable the appropriate modes. Without this workaround
3576 * we will have issues with VLAN tag stripping not being done for frames
3577 * that are only arriving because we are the default pool
3578 */
3579 if (hw->mac.type < e1000_82576)
3580 return;
3581
3582 vmolr |= rd32(E1000_VMOLR(vfn)) &
3583 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3584 wr32(E1000_VMOLR(vfn), vmolr);
3585 igb_restore_vf_multicasts(adapter);
3586 }
3587
3588 static void igb_check_wvbr(struct igb_adapter *adapter)
3589 {
3590 struct e1000_hw *hw = &adapter->hw;
3591 u32 wvbr = 0;
3592
3593 switch (hw->mac.type) {
3594 case e1000_82576:
3595 case e1000_i350:
3596 if (!(wvbr = rd32(E1000_WVBR)))
3597 return;
3598 break;
3599 default:
3600 break;
3601 }
3602
3603 adapter->wvbr |= wvbr;
3604 }
3605
3606 #define IGB_STAGGERED_QUEUE_OFFSET 8
3607
3608 static void igb_spoof_check(struct igb_adapter *adapter)
3609 {
3610 int j;
3611
3612 if (!adapter->wvbr)
3613 return;
3614
3615 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3616 if (adapter->wvbr & (1 << j) ||
3617 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3618 dev_warn(&adapter->pdev->dev,
3619 "Spoof event(s) detected on VF %d\n", j);
3620 adapter->wvbr &=
3621 ~((1 << j) |
3622 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3623 }
3624 }
3625 }
3626
3627 /* Need to wait a few seconds after link up to get diagnostic information from
3628 * the phy */
3629 static void igb_update_phy_info(unsigned long data)
3630 {
3631 struct igb_adapter *adapter = (struct igb_adapter *) data;
3632 igb_get_phy_info(&adapter->hw);
3633 }
3634
3635 /**
3636 * igb_has_link - check shared code for link and determine up/down
3637 * @adapter: pointer to driver private info
3638 **/
3639 bool igb_has_link(struct igb_adapter *adapter)
3640 {
3641 struct e1000_hw *hw = &adapter->hw;
3642 bool link_active = false;
3643 s32 ret_val = 0;
3644
3645 /* get_link_status is set on LSC (link status) interrupt or
3646 * rx sequence error interrupt. get_link_status will stay
3647 * false until the e1000_check_for_link establishes link
3648 * for copper adapters ONLY
3649 */
3650 switch (hw->phy.media_type) {
3651 case e1000_media_type_copper:
3652 if (hw->mac.get_link_status) {
3653 ret_val = hw->mac.ops.check_for_link(hw);
3654 link_active = !hw->mac.get_link_status;
3655 } else {
3656 link_active = true;
3657 }
3658 break;
3659 case e1000_media_type_internal_serdes:
3660 ret_val = hw->mac.ops.check_for_link(hw);
3661 link_active = hw->mac.serdes_has_link;
3662 break;
3663 default:
3664 case e1000_media_type_unknown:
3665 break;
3666 }
3667
3668 return link_active;
3669 }
3670
3671 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3672 {
3673 bool ret = false;
3674 u32 ctrl_ext, thstat;
3675
3676 /* check for thermal sensor event on i350, copper only */
3677 if (hw->mac.type == e1000_i350) {
3678 thstat = rd32(E1000_THSTAT);
3679 ctrl_ext = rd32(E1000_CTRL_EXT);
3680
3681 if ((hw->phy.media_type == e1000_media_type_copper) &&
3682 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3683 ret = !!(thstat & event);
3684 }
3685 }
3686
3687 return ret;
3688 }
3689
3690 /**
3691 * igb_watchdog - Timer Call-back
3692 * @data: pointer to adapter cast into an unsigned long
3693 **/
3694 static void igb_watchdog(unsigned long data)
3695 {
3696 struct igb_adapter *adapter = (struct igb_adapter *)data;
3697 /* Do the rest outside of interrupt context */
3698 schedule_work(&adapter->watchdog_task);
3699 }
3700
3701 static void igb_watchdog_task(struct work_struct *work)
3702 {
3703 struct igb_adapter *adapter = container_of(work,
3704 struct igb_adapter,
3705 watchdog_task);
3706 struct e1000_hw *hw = &adapter->hw;
3707 struct net_device *netdev = adapter->netdev;
3708 u32 link;
3709 int i;
3710
3711 link = igb_has_link(adapter);
3712 if (link) {
3713 /* Cancel scheduled suspend requests. */
3714 pm_runtime_resume(netdev->dev.parent);
3715
3716 if (!netif_carrier_ok(netdev)) {
3717 u32 ctrl;
3718 hw->mac.ops.get_speed_and_duplex(hw,
3719 &adapter->link_speed,
3720 &adapter->link_duplex);
3721
3722 ctrl = rd32(E1000_CTRL);
3723 /* Links status message must follow this format */
3724 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3725 "Duplex, Flow Control: %s\n",
3726 netdev->name,
3727 adapter->link_speed,
3728 adapter->link_duplex == FULL_DUPLEX ?
3729 "Full" : "Half",
3730 (ctrl & E1000_CTRL_TFCE) &&
3731 (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3732 (ctrl & E1000_CTRL_RFCE) ? "RX" :
3733 (ctrl & E1000_CTRL_TFCE) ? "TX" : "None");
3734
3735 /* check for thermal sensor event */
3736 if (igb_thermal_sensor_event(hw,
3737 E1000_THSTAT_LINK_THROTTLE)) {
3738 netdev_info(netdev, "The network adapter link "
3739 "speed was downshifted because it "
3740 "overheated\n");
3741 }
3742
3743 /* adjust timeout factor according to speed/duplex */
3744 adapter->tx_timeout_factor = 1;
3745 switch (adapter->link_speed) {
3746 case SPEED_10:
3747 adapter->tx_timeout_factor = 14;
3748 break;
3749 case SPEED_100:
3750 /* maybe add some timeout factor ? */
3751 break;
3752 }
3753
3754 netif_carrier_on(netdev);
3755
3756 igb_ping_all_vfs(adapter);
3757 igb_check_vf_rate_limit(adapter);
3758
3759 /* link state has changed, schedule phy info update */
3760 if (!test_bit(__IGB_DOWN, &adapter->state))
3761 mod_timer(&adapter->phy_info_timer,
3762 round_jiffies(jiffies + 2 * HZ));
3763 }
3764 } else {
3765 if (netif_carrier_ok(netdev)) {
3766 adapter->link_speed = 0;
3767 adapter->link_duplex = 0;
3768
3769 /* check for thermal sensor event */
3770 if (igb_thermal_sensor_event(hw,
3771 E1000_THSTAT_PWR_DOWN)) {
3772 netdev_err(netdev, "The network adapter was "
3773 "stopped because it overheated\n");
3774 }
3775
3776 /* Links status message must follow this format */
3777 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3778 netdev->name);
3779 netif_carrier_off(netdev);
3780
3781 igb_ping_all_vfs(adapter);
3782
3783 /* link state has changed, schedule phy info update */
3784 if (!test_bit(__IGB_DOWN, &adapter->state))
3785 mod_timer(&adapter->phy_info_timer,
3786 round_jiffies(jiffies + 2 * HZ));
3787
3788 pm_schedule_suspend(netdev->dev.parent,
3789 MSEC_PER_SEC * 5);
3790 }
3791 }
3792
3793 spin_lock(&adapter->stats64_lock);
3794 igb_update_stats(adapter, &adapter->stats64);
3795 spin_unlock(&adapter->stats64_lock);
3796
3797 for (i = 0; i < adapter->num_tx_queues; i++) {
3798 struct igb_ring *tx_ring = adapter->tx_ring[i];
3799 if (!netif_carrier_ok(netdev)) {
3800 /* We've lost link, so the controller stops DMA,
3801 * but we've got queued Tx work that's never going
3802 * to get done, so reset controller to flush Tx.
3803 * (Do the reset outside of interrupt context). */
3804 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3805 adapter->tx_timeout_count++;
3806 schedule_work(&adapter->reset_task);
3807 /* return immediately since reset is imminent */
3808 return;
3809 }
3810 }
3811
3812 /* Force detection of hung controller every watchdog period */
3813 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3814 }
3815
3816 /* Cause software interrupt to ensure rx ring is cleaned */
3817 if (adapter->msix_entries) {
3818 u32 eics = 0;
3819 for (i = 0; i < adapter->num_q_vectors; i++)
3820 eics |= adapter->q_vector[i]->eims_value;
3821 wr32(E1000_EICS, eics);
3822 } else {
3823 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3824 }
3825
3826 igb_spoof_check(adapter);
3827
3828 /* Reset the timer */
3829 if (!test_bit(__IGB_DOWN, &adapter->state))
3830 mod_timer(&adapter->watchdog_timer,
3831 round_jiffies(jiffies + 2 * HZ));
3832 }
3833
3834 enum latency_range {
3835 lowest_latency = 0,
3836 low_latency = 1,
3837 bulk_latency = 2,
3838 latency_invalid = 255
3839 };
3840
3841 /**
3842 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3843 *
3844 * Stores a new ITR value based on strictly on packet size. This
3845 * algorithm is less sophisticated than that used in igb_update_itr,
3846 * due to the difficulty of synchronizing statistics across multiple
3847 * receive rings. The divisors and thresholds used by this function
3848 * were determined based on theoretical maximum wire speed and testing
3849 * data, in order to minimize response time while increasing bulk
3850 * throughput.
3851 * This functionality is controlled by the InterruptThrottleRate module
3852 * parameter (see igb_param.c)
3853 * NOTE: This function is called only when operating in a multiqueue
3854 * receive environment.
3855 * @q_vector: pointer to q_vector
3856 **/
3857 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3858 {
3859 int new_val = q_vector->itr_val;
3860 int avg_wire_size = 0;
3861 struct igb_adapter *adapter = q_vector->adapter;
3862 unsigned int packets;
3863
3864 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3865 * ints/sec - ITR timer value of 120 ticks.
3866 */
3867 if (adapter->link_speed != SPEED_1000) {
3868 new_val = IGB_4K_ITR;
3869 goto set_itr_val;
3870 }
3871
3872 packets = q_vector->rx.total_packets;
3873 if (packets)
3874 avg_wire_size = q_vector->rx.total_bytes / packets;
3875
3876 packets = q_vector->tx.total_packets;
3877 if (packets)
3878 avg_wire_size = max_t(u32, avg_wire_size,
3879 q_vector->tx.total_bytes / packets);
3880
3881 /* if avg_wire_size isn't set no work was done */
3882 if (!avg_wire_size)
3883 goto clear_counts;
3884
3885 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3886 avg_wire_size += 24;
3887
3888 /* Don't starve jumbo frames */
3889 avg_wire_size = min(avg_wire_size, 3000);
3890
3891 /* Give a little boost to mid-size frames */
3892 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3893 new_val = avg_wire_size / 3;
3894 else
3895 new_val = avg_wire_size / 2;
3896
3897 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3898 if (new_val < IGB_20K_ITR &&
3899 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3900 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3901 new_val = IGB_20K_ITR;
3902
3903 set_itr_val:
3904 if (new_val != q_vector->itr_val) {
3905 q_vector->itr_val = new_val;
3906 q_vector->set_itr = 1;
3907 }
3908 clear_counts:
3909 q_vector->rx.total_bytes = 0;
3910 q_vector->rx.total_packets = 0;
3911 q_vector->tx.total_bytes = 0;
3912 q_vector->tx.total_packets = 0;
3913 }
3914
3915 /**
3916 * igb_update_itr - update the dynamic ITR value based on statistics
3917 * Stores a new ITR value based on packets and byte
3918 * counts during the last interrupt. The advantage of per interrupt
3919 * computation is faster updates and more accurate ITR for the current
3920 * traffic pattern. Constants in this function were computed
3921 * based on theoretical maximum wire speed and thresholds were set based
3922 * on testing data as well as attempting to minimize response time
3923 * while increasing bulk throughput.
3924 * this functionality is controlled by the InterruptThrottleRate module
3925 * parameter (see igb_param.c)
3926 * NOTE: These calculations are only valid when operating in a single-
3927 * queue environment.
3928 * @q_vector: pointer to q_vector
3929 * @ring_container: ring info to update the itr for
3930 **/
3931 static void igb_update_itr(struct igb_q_vector *q_vector,
3932 struct igb_ring_container *ring_container)
3933 {
3934 unsigned int packets = ring_container->total_packets;
3935 unsigned int bytes = ring_container->total_bytes;
3936 u8 itrval = ring_container->itr;
3937
3938 /* no packets, exit with status unchanged */
3939 if (packets == 0)
3940 return;
3941
3942 switch (itrval) {
3943 case lowest_latency:
3944 /* handle TSO and jumbo frames */
3945 if (bytes/packets > 8000)
3946 itrval = bulk_latency;
3947 else if ((packets < 5) && (bytes > 512))
3948 itrval = low_latency;
3949 break;
3950 case low_latency: /* 50 usec aka 20000 ints/s */
3951 if (bytes > 10000) {
3952 /* this if handles the TSO accounting */
3953 if (bytes/packets > 8000) {
3954 itrval = bulk_latency;
3955 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3956 itrval = bulk_latency;
3957 } else if ((packets > 35)) {
3958 itrval = lowest_latency;
3959 }
3960 } else if (bytes/packets > 2000) {
3961 itrval = bulk_latency;
3962 } else if (packets <= 2 && bytes < 512) {
3963 itrval = lowest_latency;
3964 }
3965 break;
3966 case bulk_latency: /* 250 usec aka 4000 ints/s */
3967 if (bytes > 25000) {
3968 if (packets > 35)
3969 itrval = low_latency;
3970 } else if (bytes < 1500) {
3971 itrval = low_latency;
3972 }
3973 break;
3974 }
3975
3976 /* clear work counters since we have the values we need */
3977 ring_container->total_bytes = 0;
3978 ring_container->total_packets = 0;
3979
3980 /* write updated itr to ring container */
3981 ring_container->itr = itrval;
3982 }
3983
3984 static void igb_set_itr(struct igb_q_vector *q_vector)
3985 {
3986 struct igb_adapter *adapter = q_vector->adapter;
3987 u32 new_itr = q_vector->itr_val;
3988 u8 current_itr = 0;
3989
3990 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3991 if (adapter->link_speed != SPEED_1000) {
3992 current_itr = 0;
3993 new_itr = IGB_4K_ITR;
3994 goto set_itr_now;
3995 }
3996
3997 igb_update_itr(q_vector, &q_vector->tx);
3998 igb_update_itr(q_vector, &q_vector->rx);
3999
4000 current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4001
4002 /* conservative mode (itr 3) eliminates the lowest_latency setting */
4003 if (current_itr == lowest_latency &&
4004 ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4005 (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4006 current_itr = low_latency;
4007
4008 switch (current_itr) {
4009 /* counts and packets in update_itr are dependent on these numbers */
4010 case lowest_latency:
4011 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4012 break;
4013 case low_latency:
4014 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4015 break;
4016 case bulk_latency:
4017 new_itr = IGB_4K_ITR; /* 4,000 ints/sec */
4018 break;
4019 default:
4020 break;
4021 }
4022
4023 set_itr_now:
4024 if (new_itr != q_vector->itr_val) {
4025 /* this attempts to bias the interrupt rate towards Bulk
4026 * by adding intermediate steps when interrupt rate is
4027 * increasing */
4028 new_itr = new_itr > q_vector->itr_val ?
4029 max((new_itr * q_vector->itr_val) /
4030 (new_itr + (q_vector->itr_val >> 2)),
4031 new_itr) :
4032 new_itr;
4033 /* Don't write the value here; it resets the adapter's
4034 * internal timer, and causes us to delay far longer than
4035 * we should between interrupts. Instead, we write the ITR
4036 * value at the beginning of the next interrupt so the timing
4037 * ends up being correct.
4038 */
4039 q_vector->itr_val = new_itr;
4040 q_vector->set_itr = 1;
4041 }
4042 }
4043
4044 static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4045 u32 type_tucmd, u32 mss_l4len_idx)
4046 {
4047 struct e1000_adv_tx_context_desc *context_desc;
4048 u16 i = tx_ring->next_to_use;
4049
4050 context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4051
4052 i++;
4053 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4054
4055 /* set bits to identify this as an advanced context descriptor */
4056 type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4057
4058 /* For 82575, context index must be unique per ring. */
4059 if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4060 mss_l4len_idx |= tx_ring->reg_idx << 4;
4061
4062 context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
4063 context_desc->seqnum_seed = 0;
4064 context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
4065 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4066 }
4067
4068 static int igb_tso(struct igb_ring *tx_ring,
4069 struct igb_tx_buffer *first,
4070 u8 *hdr_len)
4071 {
4072 struct sk_buff *skb = first->skb;
4073 u32 vlan_macip_lens, type_tucmd;
4074 u32 mss_l4len_idx, l4len;
4075
4076 if (!skb_is_gso(skb))
4077 return 0;
4078
4079 if (skb_header_cloned(skb)) {
4080 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4081 if (err)
4082 return err;
4083 }
4084
4085 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4086 type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4087
4088 if (first->protocol == __constant_htons(ETH_P_IP)) {
4089 struct iphdr *iph = ip_hdr(skb);
4090 iph->tot_len = 0;
4091 iph->check = 0;
4092 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4093 iph->daddr, 0,
4094 IPPROTO_TCP,
4095 0);
4096 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4097 first->tx_flags |= IGB_TX_FLAGS_TSO |
4098 IGB_TX_FLAGS_CSUM |
4099 IGB_TX_FLAGS_IPV4;
4100 } else if (skb_is_gso_v6(skb)) {
4101 ipv6_hdr(skb)->payload_len = 0;
4102 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4103 &ipv6_hdr(skb)->daddr,
4104 0, IPPROTO_TCP, 0);
4105 first->tx_flags |= IGB_TX_FLAGS_TSO |
4106 IGB_TX_FLAGS_CSUM;
4107 }
4108
4109 /* compute header lengths */
4110 l4len = tcp_hdrlen(skb);
4111 *hdr_len = skb_transport_offset(skb) + l4len;
4112
4113 /* update gso size and bytecount with header size */
4114 first->gso_segs = skb_shinfo(skb)->gso_segs;
4115 first->bytecount += (first->gso_segs - 1) * *hdr_len;
4116
4117 /* MSS L4LEN IDX */
4118 mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4119 mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4120
4121 /* VLAN MACLEN IPLEN */
4122 vlan_macip_lens = skb_network_header_len(skb);
4123 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4124 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4125
4126 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4127
4128 return 1;
4129 }
4130
4131 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4132 {
4133 struct sk_buff *skb = first->skb;
4134 u32 vlan_macip_lens = 0;
4135 u32 mss_l4len_idx = 0;
4136 u32 type_tucmd = 0;
4137
4138 if (skb->ip_summed != CHECKSUM_PARTIAL) {
4139 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4140 return;
4141 } else {
4142 u8 l4_hdr = 0;
4143 switch (first->protocol) {
4144 case __constant_htons(ETH_P_IP):
4145 vlan_macip_lens |= skb_network_header_len(skb);
4146 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4147 l4_hdr = ip_hdr(skb)->protocol;
4148 break;
4149 case __constant_htons(ETH_P_IPV6):
4150 vlan_macip_lens |= skb_network_header_len(skb);
4151 l4_hdr = ipv6_hdr(skb)->nexthdr;
4152 break;
4153 default:
4154 if (unlikely(net_ratelimit())) {
4155 dev_warn(tx_ring->dev,
4156 "partial checksum but proto=%x!\n",
4157 first->protocol);
4158 }
4159 break;
4160 }
4161
4162 switch (l4_hdr) {
4163 case IPPROTO_TCP:
4164 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4165 mss_l4len_idx = tcp_hdrlen(skb) <<
4166 E1000_ADVTXD_L4LEN_SHIFT;
4167 break;
4168 case IPPROTO_SCTP:
4169 type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4170 mss_l4len_idx = sizeof(struct sctphdr) <<
4171 E1000_ADVTXD_L4LEN_SHIFT;
4172 break;
4173 case IPPROTO_UDP:
4174 mss_l4len_idx = sizeof(struct udphdr) <<
4175 E1000_ADVTXD_L4LEN_SHIFT;
4176 break;
4177 default:
4178 if (unlikely(net_ratelimit())) {
4179 dev_warn(tx_ring->dev,
4180 "partial checksum but l4 proto=%x!\n",
4181 l4_hdr);
4182 }
4183 break;
4184 }
4185
4186 /* update TX checksum flag */
4187 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4188 }
4189
4190 vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4191 vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4192
4193 igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4194 }
4195
4196 static __le32 igb_tx_cmd_type(u32 tx_flags)
4197 {
4198 /* set type for advanced descriptor with frame checksum insertion */
4199 __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4200 E1000_ADVTXD_DCMD_IFCS |
4201 E1000_ADVTXD_DCMD_DEXT);
4202
4203 /* set HW vlan bit if vlan is present */
4204 if (tx_flags & IGB_TX_FLAGS_VLAN)
4205 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4206
4207 /* set timestamp bit if present */
4208 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4209 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4210
4211 /* set segmentation bits for TSO */
4212 if (tx_flags & IGB_TX_FLAGS_TSO)
4213 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4214
4215 return cmd_type;
4216 }
4217
4218 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4219 union e1000_adv_tx_desc *tx_desc,
4220 u32 tx_flags, unsigned int paylen)
4221 {
4222 u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4223
4224 /* 82575 requires a unique index per ring if any offload is enabled */
4225 if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4226 test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4227 olinfo_status |= tx_ring->reg_idx << 4;
4228
4229 /* insert L4 checksum */
4230 if (tx_flags & IGB_TX_FLAGS_CSUM) {
4231 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4232
4233 /* insert IPv4 checksum */
4234 if (tx_flags & IGB_TX_FLAGS_IPV4)
4235 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4236 }
4237
4238 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4239 }
4240
4241 /*
4242 * The largest size we can write to the descriptor is 65535. In order to
4243 * maintain a power of two alignment we have to limit ourselves to 32K.
4244 */
4245 #define IGB_MAX_TXD_PWR 15
4246 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4247
4248 static void igb_tx_map(struct igb_ring *tx_ring,
4249 struct igb_tx_buffer *first,
4250 const u8 hdr_len)
4251 {
4252 struct sk_buff *skb = first->skb;
4253 struct igb_tx_buffer *tx_buffer_info;
4254 union e1000_adv_tx_desc *tx_desc;
4255 dma_addr_t dma;
4256 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4257 unsigned int data_len = skb->data_len;
4258 unsigned int size = skb_headlen(skb);
4259 unsigned int paylen = skb->len - hdr_len;
4260 __le32 cmd_type;
4261 u32 tx_flags = first->tx_flags;
4262 u16 i = tx_ring->next_to_use;
4263
4264 tx_desc = IGB_TX_DESC(tx_ring, i);
4265
4266 igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4267 cmd_type = igb_tx_cmd_type(tx_flags);
4268
4269 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4270 if (dma_mapping_error(tx_ring->dev, dma))
4271 goto dma_error;
4272
4273 /* record length, and DMA address */
4274 first->length = size;
4275 first->dma = dma;
4276 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4277
4278 for (;;) {
4279 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4280 tx_desc->read.cmd_type_len =
4281 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4282
4283 i++;
4284 tx_desc++;
4285 if (i == tx_ring->count) {
4286 tx_desc = IGB_TX_DESC(tx_ring, 0);
4287 i = 0;
4288 }
4289
4290 dma += IGB_MAX_DATA_PER_TXD;
4291 size -= IGB_MAX_DATA_PER_TXD;
4292
4293 tx_desc->read.olinfo_status = 0;
4294 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4295 }
4296
4297 if (likely(!data_len))
4298 break;
4299
4300 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4301
4302 i++;
4303 tx_desc++;
4304 if (i == tx_ring->count) {
4305 tx_desc = IGB_TX_DESC(tx_ring, 0);
4306 i = 0;
4307 }
4308
4309 size = skb_frag_size(frag);
4310 data_len -= size;
4311
4312 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4313 size, DMA_TO_DEVICE);
4314 if (dma_mapping_error(tx_ring->dev, dma))
4315 goto dma_error;
4316
4317 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4318 tx_buffer_info->length = size;
4319 tx_buffer_info->dma = dma;
4320
4321 tx_desc->read.olinfo_status = 0;
4322 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4323
4324 frag++;
4325 }
4326
4327 netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4328
4329 /* write last descriptor with RS and EOP bits */
4330 cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4331 if (unlikely(skb->no_fcs))
4332 cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS));
4333 tx_desc->read.cmd_type_len = cmd_type;
4334
4335 /* set the timestamp */
4336 first->time_stamp = jiffies;
4337
4338 /*
4339 * Force memory writes to complete before letting h/w know there
4340 * are new descriptors to fetch. (Only applicable for weak-ordered
4341 * memory model archs, such as IA-64).
4342 *
4343 * We also need this memory barrier to make certain all of the
4344 * status bits have been updated before next_to_watch is written.
4345 */
4346 wmb();
4347
4348 /* set next_to_watch value indicating a packet is present */
4349 first->next_to_watch = tx_desc;
4350
4351 i++;
4352 if (i == tx_ring->count)
4353 i = 0;
4354
4355 tx_ring->next_to_use = i;
4356
4357 writel(i, tx_ring->tail);
4358
4359 /* we need this if more than one processor can write to our tail
4360 * at a time, it syncronizes IO on IA64/Altix systems */
4361 mmiowb();
4362
4363 return;
4364
4365 dma_error:
4366 dev_err(tx_ring->dev, "TX DMA map failed\n");
4367
4368 /* clear dma mappings for failed tx_buffer_info map */
4369 for (;;) {
4370 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4371 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4372 if (tx_buffer_info == first)
4373 break;
4374 if (i == 0)
4375 i = tx_ring->count;
4376 i--;
4377 }
4378
4379 tx_ring->next_to_use = i;
4380 }
4381
4382 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4383 {
4384 struct net_device *netdev = tx_ring->netdev;
4385
4386 netif_stop_subqueue(netdev, tx_ring->queue_index);
4387
4388 /* Herbert's original patch had:
4389 * smp_mb__after_netif_stop_queue();
4390 * but since that doesn't exist yet, just open code it. */
4391 smp_mb();
4392
4393 /* We need to check again in a case another CPU has just
4394 * made room available. */
4395 if (igb_desc_unused(tx_ring) < size)
4396 return -EBUSY;
4397
4398 /* A reprieve! */
4399 netif_wake_subqueue(netdev, tx_ring->queue_index);
4400
4401 u64_stats_update_begin(&tx_ring->tx_syncp2);
4402 tx_ring->tx_stats.restart_queue2++;
4403 u64_stats_update_end(&tx_ring->tx_syncp2);
4404
4405 return 0;
4406 }
4407
4408 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4409 {
4410 if (igb_desc_unused(tx_ring) >= size)
4411 return 0;
4412 return __igb_maybe_stop_tx(tx_ring, size);
4413 }
4414
4415 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4416 struct igb_ring *tx_ring)
4417 {
4418 struct igb_tx_buffer *first;
4419 int tso;
4420 u32 tx_flags = 0;
4421 __be16 protocol = vlan_get_protocol(skb);
4422 u8 hdr_len = 0;
4423
4424 /* need: 1 descriptor per page,
4425 * + 2 desc gap to keep tail from touching head,
4426 * + 1 desc for skb->data,
4427 * + 1 desc for context descriptor,
4428 * otherwise try next time */
4429 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4430 /* this is a hard error */
4431 return NETDEV_TX_BUSY;
4432 }
4433
4434 /* record the location of the first descriptor for this packet */
4435 first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4436 first->skb = skb;
4437 first->bytecount = skb->len;
4438 first->gso_segs = 1;
4439
4440 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4441 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4442 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4443 }
4444
4445 if (vlan_tx_tag_present(skb)) {
4446 tx_flags |= IGB_TX_FLAGS_VLAN;
4447 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4448 }
4449
4450 /* record initial flags and protocol */
4451 first->tx_flags = tx_flags;
4452 first->protocol = protocol;
4453
4454 tso = igb_tso(tx_ring, first, &hdr_len);
4455 if (tso < 0)
4456 goto out_drop;
4457 else if (!tso)
4458 igb_tx_csum(tx_ring, first);
4459
4460 igb_tx_map(tx_ring, first, hdr_len);
4461
4462 /* Make sure there is space in the ring for the next send. */
4463 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4464
4465 return NETDEV_TX_OK;
4466
4467 out_drop:
4468 igb_unmap_and_free_tx_resource(tx_ring, first);
4469
4470 return NETDEV_TX_OK;
4471 }
4472
4473 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4474 struct sk_buff *skb)
4475 {
4476 unsigned int r_idx = skb->queue_mapping;
4477
4478 if (r_idx >= adapter->num_tx_queues)
4479 r_idx = r_idx % adapter->num_tx_queues;
4480
4481 return adapter->tx_ring[r_idx];
4482 }
4483
4484 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4485 struct net_device *netdev)
4486 {
4487 struct igb_adapter *adapter = netdev_priv(netdev);
4488
4489 if (test_bit(__IGB_DOWN, &adapter->state)) {
4490 dev_kfree_skb_any(skb);
4491 return NETDEV_TX_OK;
4492 }
4493
4494 if (skb->len <= 0) {
4495 dev_kfree_skb_any(skb);
4496 return NETDEV_TX_OK;
4497 }
4498
4499 /*
4500 * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4501 * in order to meet this minimum size requirement.
4502 */
4503 if (skb->len < 17) {
4504 if (skb_padto(skb, 17))
4505 return NETDEV_TX_OK;
4506 skb->len = 17;
4507 }
4508
4509 return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4510 }
4511
4512 /**
4513 * igb_tx_timeout - Respond to a Tx Hang
4514 * @netdev: network interface device structure
4515 **/
4516 static void igb_tx_timeout(struct net_device *netdev)
4517 {
4518 struct igb_adapter *adapter = netdev_priv(netdev);
4519 struct e1000_hw *hw = &adapter->hw;
4520
4521 /* Do the reset outside of interrupt context */
4522 adapter->tx_timeout_count++;
4523
4524 if (hw->mac.type >= e1000_82580)
4525 hw->dev_spec._82575.global_device_reset = true;
4526
4527 schedule_work(&adapter->reset_task);
4528 wr32(E1000_EICS,
4529 (adapter->eims_enable_mask & ~adapter->eims_other));
4530 }
4531
4532 static void igb_reset_task(struct work_struct *work)
4533 {
4534 struct igb_adapter *adapter;
4535 adapter = container_of(work, struct igb_adapter, reset_task);
4536
4537 igb_dump(adapter);
4538 netdev_err(adapter->netdev, "Reset adapter\n");
4539 igb_reinit_locked(adapter);
4540 }
4541
4542 /**
4543 * igb_get_stats64 - Get System Network Statistics
4544 * @netdev: network interface device structure
4545 * @stats: rtnl_link_stats64 pointer
4546 *
4547 **/
4548 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4549 struct rtnl_link_stats64 *stats)
4550 {
4551 struct igb_adapter *adapter = netdev_priv(netdev);
4552
4553 spin_lock(&adapter->stats64_lock);
4554 igb_update_stats(adapter, &adapter->stats64);
4555 memcpy(stats, &adapter->stats64, sizeof(*stats));
4556 spin_unlock(&adapter->stats64_lock);
4557
4558 return stats;
4559 }
4560
4561 /**
4562 * igb_change_mtu - Change the Maximum Transfer Unit
4563 * @netdev: network interface device structure
4564 * @new_mtu: new value for maximum frame size
4565 *
4566 * Returns 0 on success, negative on failure
4567 **/
4568 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4569 {
4570 struct igb_adapter *adapter = netdev_priv(netdev);
4571 struct pci_dev *pdev = adapter->pdev;
4572 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4573
4574 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4575 dev_err(&pdev->dev, "Invalid MTU setting\n");
4576 return -EINVAL;
4577 }
4578
4579 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4580 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4581 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4582 return -EINVAL;
4583 }
4584
4585 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4586 msleep(1);
4587
4588 /* igb_down has a dependency on max_frame_size */
4589 adapter->max_frame_size = max_frame;
4590
4591 if (netif_running(netdev))
4592 igb_down(adapter);
4593
4594 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4595 netdev->mtu, new_mtu);
4596 netdev->mtu = new_mtu;
4597
4598 if (netif_running(netdev))
4599 igb_up(adapter);
4600 else
4601 igb_reset(adapter);
4602
4603 clear_bit(__IGB_RESETTING, &adapter->state);
4604
4605 return 0;
4606 }
4607
4608 /**
4609 * igb_update_stats - Update the board statistics counters
4610 * @adapter: board private structure
4611 **/
4612
4613 void igb_update_stats(struct igb_adapter *adapter,
4614 struct rtnl_link_stats64 *net_stats)
4615 {
4616 struct e1000_hw *hw = &adapter->hw;
4617 struct pci_dev *pdev = adapter->pdev;
4618 u32 reg, mpc;
4619 u16 phy_tmp;
4620 int i;
4621 u64 bytes, packets;
4622 unsigned int start;
4623 u64 _bytes, _packets;
4624
4625 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4626
4627 /*
4628 * Prevent stats update while adapter is being reset, or if the pci
4629 * connection is down.
4630 */
4631 if (adapter->link_speed == 0)
4632 return;
4633 if (pci_channel_offline(pdev))
4634 return;
4635
4636 bytes = 0;
4637 packets = 0;
4638 for (i = 0; i < adapter->num_rx_queues; i++) {
4639 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4640 struct igb_ring *ring = adapter->rx_ring[i];
4641
4642 ring->rx_stats.drops += rqdpc_tmp;
4643 net_stats->rx_fifo_errors += rqdpc_tmp;
4644
4645 do {
4646 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4647 _bytes = ring->rx_stats.bytes;
4648 _packets = ring->rx_stats.packets;
4649 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4650 bytes += _bytes;
4651 packets += _packets;
4652 }
4653
4654 net_stats->rx_bytes = bytes;
4655 net_stats->rx_packets = packets;
4656
4657 bytes = 0;
4658 packets = 0;
4659 for (i = 0; i < adapter->num_tx_queues; i++) {
4660 struct igb_ring *ring = adapter->tx_ring[i];
4661 do {
4662 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4663 _bytes = ring->tx_stats.bytes;
4664 _packets = ring->tx_stats.packets;
4665 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4666 bytes += _bytes;
4667 packets += _packets;
4668 }
4669 net_stats->tx_bytes = bytes;
4670 net_stats->tx_packets = packets;
4671
4672 /* read stats registers */
4673 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4674 adapter->stats.gprc += rd32(E1000_GPRC);
4675 adapter->stats.gorc += rd32(E1000_GORCL);
4676 rd32(E1000_GORCH); /* clear GORCL */
4677 adapter->stats.bprc += rd32(E1000_BPRC);
4678 adapter->stats.mprc += rd32(E1000_MPRC);
4679 adapter->stats.roc += rd32(E1000_ROC);
4680
4681 adapter->stats.prc64 += rd32(E1000_PRC64);
4682 adapter->stats.prc127 += rd32(E1000_PRC127);
4683 adapter->stats.prc255 += rd32(E1000_PRC255);
4684 adapter->stats.prc511 += rd32(E1000_PRC511);
4685 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4686 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4687 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4688 adapter->stats.sec += rd32(E1000_SEC);
4689
4690 mpc = rd32(E1000_MPC);
4691 adapter->stats.mpc += mpc;
4692 net_stats->rx_fifo_errors += mpc;
4693 adapter->stats.scc += rd32(E1000_SCC);
4694 adapter->stats.ecol += rd32(E1000_ECOL);
4695 adapter->stats.mcc += rd32(E1000_MCC);
4696 adapter->stats.latecol += rd32(E1000_LATECOL);
4697 adapter->stats.dc += rd32(E1000_DC);
4698 adapter->stats.rlec += rd32(E1000_RLEC);
4699 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4700 adapter->stats.xontxc += rd32(E1000_XONTXC);
4701 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4702 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4703 adapter->stats.fcruc += rd32(E1000_FCRUC);
4704 adapter->stats.gptc += rd32(E1000_GPTC);
4705 adapter->stats.gotc += rd32(E1000_GOTCL);
4706 rd32(E1000_GOTCH); /* clear GOTCL */
4707 adapter->stats.rnbc += rd32(E1000_RNBC);
4708 adapter->stats.ruc += rd32(E1000_RUC);
4709 adapter->stats.rfc += rd32(E1000_RFC);
4710 adapter->stats.rjc += rd32(E1000_RJC);
4711 adapter->stats.tor += rd32(E1000_TORH);
4712 adapter->stats.tot += rd32(E1000_TOTH);
4713 adapter->stats.tpr += rd32(E1000_TPR);
4714
4715 adapter->stats.ptc64 += rd32(E1000_PTC64);
4716 adapter->stats.ptc127 += rd32(E1000_PTC127);
4717 adapter->stats.ptc255 += rd32(E1000_PTC255);
4718 adapter->stats.ptc511 += rd32(E1000_PTC511);
4719 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4720 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4721
4722 adapter->stats.mptc += rd32(E1000_MPTC);
4723 adapter->stats.bptc += rd32(E1000_BPTC);
4724
4725 adapter->stats.tpt += rd32(E1000_TPT);
4726 adapter->stats.colc += rd32(E1000_COLC);
4727
4728 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4729 /* read internal phy specific stats */
4730 reg = rd32(E1000_CTRL_EXT);
4731 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4732 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4733 adapter->stats.tncrs += rd32(E1000_TNCRS);
4734 }
4735
4736 adapter->stats.tsctc += rd32(E1000_TSCTC);
4737 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4738
4739 adapter->stats.iac += rd32(E1000_IAC);
4740 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4741 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4742 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4743 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4744 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4745 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4746 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4747 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4748
4749 /* Fill out the OS statistics structure */
4750 net_stats->multicast = adapter->stats.mprc;
4751 net_stats->collisions = adapter->stats.colc;
4752
4753 /* Rx Errors */
4754
4755 /* RLEC on some newer hardware can be incorrect so build
4756 * our own version based on RUC and ROC */
4757 net_stats->rx_errors = adapter->stats.rxerrc +
4758 adapter->stats.crcerrs + adapter->stats.algnerrc +
4759 adapter->stats.ruc + adapter->stats.roc +
4760 adapter->stats.cexterr;
4761 net_stats->rx_length_errors = adapter->stats.ruc +
4762 adapter->stats.roc;
4763 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4764 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4765 net_stats->rx_missed_errors = adapter->stats.mpc;
4766
4767 /* Tx Errors */
4768 net_stats->tx_errors = adapter->stats.ecol +
4769 adapter->stats.latecol;
4770 net_stats->tx_aborted_errors = adapter->stats.ecol;
4771 net_stats->tx_window_errors = adapter->stats.latecol;
4772 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4773
4774 /* Tx Dropped needs to be maintained elsewhere */
4775
4776 /* Phy Stats */
4777 if (hw->phy.media_type == e1000_media_type_copper) {
4778 if ((adapter->link_speed == SPEED_1000) &&
4779 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4780 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4781 adapter->phy_stats.idle_errors += phy_tmp;
4782 }
4783 }
4784
4785 /* Management Stats */
4786 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4787 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4788 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4789
4790 /* OS2BMC Stats */
4791 reg = rd32(E1000_MANC);
4792 if (reg & E1000_MANC_EN_BMC2OS) {
4793 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4794 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4795 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4796 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4797 }
4798 }
4799
4800 static irqreturn_t igb_msix_other(int irq, void *data)
4801 {
4802 struct igb_adapter *adapter = data;
4803 struct e1000_hw *hw = &adapter->hw;
4804 u32 icr = rd32(E1000_ICR);
4805 /* reading ICR causes bit 31 of EICR to be cleared */
4806
4807 if (icr & E1000_ICR_DRSTA)
4808 schedule_work(&adapter->reset_task);
4809
4810 if (icr & E1000_ICR_DOUTSYNC) {
4811 /* HW is reporting DMA is out of sync */
4812 adapter->stats.doosync++;
4813 /* The DMA Out of Sync is also indication of a spoof event
4814 * in IOV mode. Check the Wrong VM Behavior register to
4815 * see if it is really a spoof event. */
4816 igb_check_wvbr(adapter);
4817 }
4818
4819 /* Check for a mailbox event */
4820 if (icr & E1000_ICR_VMMB)
4821 igb_msg_task(adapter);
4822
4823 if (icr & E1000_ICR_LSC) {
4824 hw->mac.get_link_status = 1;
4825 /* guard against interrupt when we're going down */
4826 if (!test_bit(__IGB_DOWN, &adapter->state))
4827 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4828 }
4829
4830 wr32(E1000_EIMS, adapter->eims_other);
4831
4832 return IRQ_HANDLED;
4833 }
4834
4835 static void igb_write_itr(struct igb_q_vector *q_vector)
4836 {
4837 struct igb_adapter *adapter = q_vector->adapter;
4838 u32 itr_val = q_vector->itr_val & 0x7FFC;
4839
4840 if (!q_vector->set_itr)
4841 return;
4842
4843 if (!itr_val)
4844 itr_val = 0x4;
4845
4846 if (adapter->hw.mac.type == e1000_82575)
4847 itr_val |= itr_val << 16;
4848 else
4849 itr_val |= E1000_EITR_CNT_IGNR;
4850
4851 writel(itr_val, q_vector->itr_register);
4852 q_vector->set_itr = 0;
4853 }
4854
4855 static irqreturn_t igb_msix_ring(int irq, void *data)
4856 {
4857 struct igb_q_vector *q_vector = data;
4858
4859 /* Write the ITR value calculated from the previous interrupt. */
4860 igb_write_itr(q_vector);
4861
4862 napi_schedule(&q_vector->napi);
4863
4864 return IRQ_HANDLED;
4865 }
4866
4867 #ifdef CONFIG_IGB_DCA
4868 static void igb_update_dca(struct igb_q_vector *q_vector)
4869 {
4870 struct igb_adapter *adapter = q_vector->adapter;
4871 struct e1000_hw *hw = &adapter->hw;
4872 int cpu = get_cpu();
4873
4874 if (q_vector->cpu == cpu)
4875 goto out_no_update;
4876
4877 if (q_vector->tx.ring) {
4878 int q = q_vector->tx.ring->reg_idx;
4879 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4880 if (hw->mac.type == e1000_82575) {
4881 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4882 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4883 } else {
4884 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4885 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4886 E1000_DCA_TXCTRL_CPUID_SHIFT;
4887 }
4888 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4889 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4890 }
4891 if (q_vector->rx.ring) {
4892 int q = q_vector->rx.ring->reg_idx;
4893 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4894 if (hw->mac.type == e1000_82575) {
4895 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4896 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4897 } else {
4898 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4899 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4900 E1000_DCA_RXCTRL_CPUID_SHIFT;
4901 }
4902 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4903 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4904 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4905 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4906 }
4907 q_vector->cpu = cpu;
4908 out_no_update:
4909 put_cpu();
4910 }
4911
4912 static void igb_setup_dca(struct igb_adapter *adapter)
4913 {
4914 struct e1000_hw *hw = &adapter->hw;
4915 int i;
4916
4917 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4918 return;
4919
4920 /* Always use CB2 mode, difference is masked in the CB driver. */
4921 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4922
4923 for (i = 0; i < adapter->num_q_vectors; i++) {
4924 adapter->q_vector[i]->cpu = -1;
4925 igb_update_dca(adapter->q_vector[i]);
4926 }
4927 }
4928
4929 static int __igb_notify_dca(struct device *dev, void *data)
4930 {
4931 struct net_device *netdev = dev_get_drvdata(dev);
4932 struct igb_adapter *adapter = netdev_priv(netdev);
4933 struct pci_dev *pdev = adapter->pdev;
4934 struct e1000_hw *hw = &adapter->hw;
4935 unsigned long event = *(unsigned long *)data;
4936
4937 switch (event) {
4938 case DCA_PROVIDER_ADD:
4939 /* if already enabled, don't do it again */
4940 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4941 break;
4942 if (dca_add_requester(dev) == 0) {
4943 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4944 dev_info(&pdev->dev, "DCA enabled\n");
4945 igb_setup_dca(adapter);
4946 break;
4947 }
4948 /* Fall Through since DCA is disabled. */
4949 case DCA_PROVIDER_REMOVE:
4950 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4951 /* without this a class_device is left
4952 * hanging around in the sysfs model */
4953 dca_remove_requester(dev);
4954 dev_info(&pdev->dev, "DCA disabled\n");
4955 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4956 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4957 }
4958 break;
4959 }
4960
4961 return 0;
4962 }
4963
4964 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4965 void *p)
4966 {
4967 int ret_val;
4968
4969 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4970 __igb_notify_dca);
4971
4972 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4973 }
4974 #endif /* CONFIG_IGB_DCA */
4975
4976 #ifdef CONFIG_PCI_IOV
4977 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4978 {
4979 unsigned char mac_addr[ETH_ALEN];
4980 struct pci_dev *pdev = adapter->pdev;
4981 struct e1000_hw *hw = &adapter->hw;
4982 struct pci_dev *pvfdev;
4983 unsigned int device_id;
4984 u16 thisvf_devfn;
4985
4986 random_ether_addr(mac_addr);
4987 igb_set_vf_mac(adapter, vf, mac_addr);
4988
4989 switch (adapter->hw.mac.type) {
4990 case e1000_82576:
4991 device_id = IGB_82576_VF_DEV_ID;
4992 /* VF Stride for 82576 is 2 */
4993 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
4994 (pdev->devfn & 1);
4995 break;
4996 case e1000_i350:
4997 device_id = IGB_I350_VF_DEV_ID;
4998 /* VF Stride for I350 is 4 */
4999 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
5000 (pdev->devfn & 3);
5001 break;
5002 default:
5003 device_id = 0;
5004 thisvf_devfn = 0;
5005 break;
5006 }
5007
5008 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5009 while (pvfdev) {
5010 if (pvfdev->devfn == thisvf_devfn)
5011 break;
5012 pvfdev = pci_get_device(hw->vendor_id,
5013 device_id, pvfdev);
5014 }
5015
5016 if (pvfdev)
5017 adapter->vf_data[vf].vfdev = pvfdev;
5018 else
5019 dev_err(&pdev->dev,
5020 "Couldn't find pci dev ptr for VF %4.4x\n",
5021 thisvf_devfn);
5022 return pvfdev != NULL;
5023 }
5024
5025 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
5026 {
5027 struct e1000_hw *hw = &adapter->hw;
5028 struct pci_dev *pdev = adapter->pdev;
5029 struct pci_dev *pvfdev;
5030 u16 vf_devfn = 0;
5031 u16 vf_stride;
5032 unsigned int device_id;
5033 int vfs_found = 0;
5034
5035 switch (adapter->hw.mac.type) {
5036 case e1000_82576:
5037 device_id = IGB_82576_VF_DEV_ID;
5038 /* VF Stride for 82576 is 2 */
5039 vf_stride = 2;
5040 break;
5041 case e1000_i350:
5042 device_id = IGB_I350_VF_DEV_ID;
5043 /* VF Stride for I350 is 4 */
5044 vf_stride = 4;
5045 break;
5046 default:
5047 device_id = 0;
5048 vf_stride = 0;
5049 break;
5050 }
5051
5052 vf_devfn = pdev->devfn + 0x80;
5053 pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5054 while (pvfdev) {
5055 if (pvfdev->devfn == vf_devfn &&
5056 (pvfdev->bus->number >= pdev->bus->number))
5057 vfs_found++;
5058 vf_devfn += vf_stride;
5059 pvfdev = pci_get_device(hw->vendor_id,
5060 device_id, pvfdev);
5061 }
5062
5063 return vfs_found;
5064 }
5065
5066 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5067 {
5068 int i;
5069 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5070 if (adapter->vf_data[i].vfdev) {
5071 if (adapter->vf_data[i].vfdev->dev_flags &
5072 PCI_DEV_FLAGS_ASSIGNED)
5073 return true;
5074 }
5075 }
5076 return false;
5077 }
5078
5079 #endif
5080 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5081 {
5082 struct e1000_hw *hw = &adapter->hw;
5083 u32 ping;
5084 int i;
5085
5086 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5087 ping = E1000_PF_CONTROL_MSG;
5088 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5089 ping |= E1000_VT_MSGTYPE_CTS;
5090 igb_write_mbx(hw, &ping, 1, i);
5091 }
5092 }
5093
5094 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5095 {
5096 struct e1000_hw *hw = &adapter->hw;
5097 u32 vmolr = rd32(E1000_VMOLR(vf));
5098 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5099
5100 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5101 IGB_VF_FLAG_MULTI_PROMISC);
5102 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5103
5104 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5105 vmolr |= E1000_VMOLR_MPME;
5106 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5107 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5108 } else {
5109 /*
5110 * if we have hashes and we are clearing a multicast promisc
5111 * flag we need to write the hashes to the MTA as this step
5112 * was previously skipped
5113 */
5114 if (vf_data->num_vf_mc_hashes > 30) {
5115 vmolr |= E1000_VMOLR_MPME;
5116 } else if (vf_data->num_vf_mc_hashes) {
5117 int j;
5118 vmolr |= E1000_VMOLR_ROMPE;
5119 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5120 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5121 }
5122 }
5123
5124 wr32(E1000_VMOLR(vf), vmolr);
5125
5126 /* there are flags left unprocessed, likely not supported */
5127 if (*msgbuf & E1000_VT_MSGINFO_MASK)
5128 return -EINVAL;
5129
5130 return 0;
5131
5132 }
5133
5134 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5135 u32 *msgbuf, u32 vf)
5136 {
5137 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5138 u16 *hash_list = (u16 *)&msgbuf[1];
5139 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5140 int i;
5141
5142 /* salt away the number of multicast addresses assigned
5143 * to this VF for later use to restore when the PF multi cast
5144 * list changes
5145 */
5146 vf_data->num_vf_mc_hashes = n;
5147
5148 /* only up to 30 hash values supported */
5149 if (n > 30)
5150 n = 30;
5151
5152 /* store the hashes for later use */
5153 for (i = 0; i < n; i++)
5154 vf_data->vf_mc_hashes[i] = hash_list[i];
5155
5156 /* Flush and reset the mta with the new values */
5157 igb_set_rx_mode(adapter->netdev);
5158
5159 return 0;
5160 }
5161
5162 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5163 {
5164 struct e1000_hw *hw = &adapter->hw;
5165 struct vf_data_storage *vf_data;
5166 int i, j;
5167
5168 for (i = 0; i < adapter->vfs_allocated_count; i++) {
5169 u32 vmolr = rd32(E1000_VMOLR(i));
5170 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5171
5172 vf_data = &adapter->vf_data[i];
5173
5174 if ((vf_data->num_vf_mc_hashes > 30) ||
5175 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5176 vmolr |= E1000_VMOLR_MPME;
5177 } else if (vf_data->num_vf_mc_hashes) {
5178 vmolr |= E1000_VMOLR_ROMPE;
5179 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5180 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5181 }
5182 wr32(E1000_VMOLR(i), vmolr);
5183 }
5184 }
5185
5186 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5187 {
5188 struct e1000_hw *hw = &adapter->hw;
5189 u32 pool_mask, reg, vid;
5190 int i;
5191
5192 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5193
5194 /* Find the vlan filter for this id */
5195 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5196 reg = rd32(E1000_VLVF(i));
5197
5198 /* remove the vf from the pool */
5199 reg &= ~pool_mask;
5200
5201 /* if pool is empty then remove entry from vfta */
5202 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5203 (reg & E1000_VLVF_VLANID_ENABLE)) {
5204 reg = 0;
5205 vid = reg & E1000_VLVF_VLANID_MASK;
5206 igb_vfta_set(hw, vid, false);
5207 }
5208
5209 wr32(E1000_VLVF(i), reg);
5210 }
5211
5212 adapter->vf_data[vf].vlans_enabled = 0;
5213 }
5214
5215 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5216 {
5217 struct e1000_hw *hw = &adapter->hw;
5218 u32 reg, i;
5219
5220 /* The vlvf table only exists on 82576 hardware and newer */
5221 if (hw->mac.type < e1000_82576)
5222 return -1;
5223
5224 /* we only need to do this if VMDq is enabled */
5225 if (!adapter->vfs_allocated_count)
5226 return -1;
5227
5228 /* Find the vlan filter for this id */
5229 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5230 reg = rd32(E1000_VLVF(i));
5231 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5232 vid == (reg & E1000_VLVF_VLANID_MASK))
5233 break;
5234 }
5235
5236 if (add) {
5237 if (i == E1000_VLVF_ARRAY_SIZE) {
5238 /* Did not find a matching VLAN ID entry that was
5239 * enabled. Search for a free filter entry, i.e.
5240 * one without the enable bit set
5241 */
5242 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5243 reg = rd32(E1000_VLVF(i));
5244 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5245 break;
5246 }
5247 }
5248 if (i < E1000_VLVF_ARRAY_SIZE) {
5249 /* Found an enabled/available entry */
5250 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5251
5252 /* if !enabled we need to set this up in vfta */
5253 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5254 /* add VID to filter table */
5255 igb_vfta_set(hw, vid, true);
5256 reg |= E1000_VLVF_VLANID_ENABLE;
5257 }
5258 reg &= ~E1000_VLVF_VLANID_MASK;
5259 reg |= vid;
5260 wr32(E1000_VLVF(i), reg);
5261
5262 /* do not modify RLPML for PF devices */
5263 if (vf >= adapter->vfs_allocated_count)
5264 return 0;
5265
5266 if (!adapter->vf_data[vf].vlans_enabled) {
5267 u32 size;
5268 reg = rd32(E1000_VMOLR(vf));
5269 size = reg & E1000_VMOLR_RLPML_MASK;
5270 size += 4;
5271 reg &= ~E1000_VMOLR_RLPML_MASK;
5272 reg |= size;
5273 wr32(E1000_VMOLR(vf), reg);
5274 }
5275
5276 adapter->vf_data[vf].vlans_enabled++;
5277 }
5278 } else {
5279 if (i < E1000_VLVF_ARRAY_SIZE) {
5280 /* remove vf from the pool */
5281 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5282 /* if pool is empty then remove entry from vfta */
5283 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5284 reg = 0;
5285 igb_vfta_set(hw, vid, false);
5286 }
5287 wr32(E1000_VLVF(i), reg);
5288
5289 /* do not modify RLPML for PF devices */
5290 if (vf >= adapter->vfs_allocated_count)
5291 return 0;
5292
5293 adapter->vf_data[vf].vlans_enabled--;
5294 if (!adapter->vf_data[vf].vlans_enabled) {
5295 u32 size;
5296 reg = rd32(E1000_VMOLR(vf));
5297 size = reg & E1000_VMOLR_RLPML_MASK;
5298 size -= 4;
5299 reg &= ~E1000_VMOLR_RLPML_MASK;
5300 reg |= size;
5301 wr32(E1000_VMOLR(vf), reg);
5302 }
5303 }
5304 }
5305 return 0;
5306 }
5307
5308 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5309 {
5310 struct e1000_hw *hw = &adapter->hw;
5311
5312 if (vid)
5313 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5314 else
5315 wr32(E1000_VMVIR(vf), 0);
5316 }
5317
5318 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5319 int vf, u16 vlan, u8 qos)
5320 {
5321 int err = 0;
5322 struct igb_adapter *adapter = netdev_priv(netdev);
5323
5324 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5325 return -EINVAL;
5326 if (vlan || qos) {
5327 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5328 if (err)
5329 goto out;
5330 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5331 igb_set_vmolr(adapter, vf, !vlan);
5332 adapter->vf_data[vf].pf_vlan = vlan;
5333 adapter->vf_data[vf].pf_qos = qos;
5334 dev_info(&adapter->pdev->dev,
5335 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5336 if (test_bit(__IGB_DOWN, &adapter->state)) {
5337 dev_warn(&adapter->pdev->dev,
5338 "The VF VLAN has been set,"
5339 " but the PF device is not up.\n");
5340 dev_warn(&adapter->pdev->dev,
5341 "Bring the PF device up before"
5342 " attempting to use the VF device.\n");
5343 }
5344 } else {
5345 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5346 false, vf);
5347 igb_set_vmvir(adapter, vlan, vf);
5348 igb_set_vmolr(adapter, vf, true);
5349 adapter->vf_data[vf].pf_vlan = 0;
5350 adapter->vf_data[vf].pf_qos = 0;
5351 }
5352 out:
5353 return err;
5354 }
5355
5356 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5357 {
5358 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5359 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5360
5361 return igb_vlvf_set(adapter, vid, add, vf);
5362 }
5363
5364 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5365 {
5366 /* clear flags - except flag that indicates PF has set the MAC */
5367 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5368 adapter->vf_data[vf].last_nack = jiffies;
5369
5370 /* reset offloads to defaults */
5371 igb_set_vmolr(adapter, vf, true);
5372
5373 /* reset vlans for device */
5374 igb_clear_vf_vfta(adapter, vf);
5375 if (adapter->vf_data[vf].pf_vlan)
5376 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5377 adapter->vf_data[vf].pf_vlan,
5378 adapter->vf_data[vf].pf_qos);
5379 else
5380 igb_clear_vf_vfta(adapter, vf);
5381
5382 /* reset multicast table array for vf */
5383 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5384
5385 /* Flush and reset the mta with the new values */
5386 igb_set_rx_mode(adapter->netdev);
5387 }
5388
5389 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5390 {
5391 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5392
5393 /* generate a new mac address as we were hotplug removed/added */
5394 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5395 random_ether_addr(vf_mac);
5396
5397 /* process remaining reset events */
5398 igb_vf_reset(adapter, vf);
5399 }
5400
5401 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5402 {
5403 struct e1000_hw *hw = &adapter->hw;
5404 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5405 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5406 u32 reg, msgbuf[3];
5407 u8 *addr = (u8 *)(&msgbuf[1]);
5408
5409 /* process all the same items cleared in a function level reset */
5410 igb_vf_reset(adapter, vf);
5411
5412 /* set vf mac address */
5413 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5414
5415 /* enable transmit and receive for vf */
5416 reg = rd32(E1000_VFTE);
5417 wr32(E1000_VFTE, reg | (1 << vf));
5418 reg = rd32(E1000_VFRE);
5419 wr32(E1000_VFRE, reg | (1 << vf));
5420
5421 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5422
5423 /* reply to reset with ack and vf mac address */
5424 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5425 memcpy(addr, vf_mac, 6);
5426 igb_write_mbx(hw, msgbuf, 3, vf);
5427 }
5428
5429 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5430 {
5431 /*
5432 * The VF MAC Address is stored in a packed array of bytes
5433 * starting at the second 32 bit word of the msg array
5434 */
5435 unsigned char *addr = (char *)&msg[1];
5436 int err = -1;
5437
5438 if (is_valid_ether_addr(addr))
5439 err = igb_set_vf_mac(adapter, vf, addr);
5440
5441 return err;
5442 }
5443
5444 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5445 {
5446 struct e1000_hw *hw = &adapter->hw;
5447 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5448 u32 msg = E1000_VT_MSGTYPE_NACK;
5449
5450 /* if device isn't clear to send it shouldn't be reading either */
5451 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5452 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5453 igb_write_mbx(hw, &msg, 1, vf);
5454 vf_data->last_nack = jiffies;
5455 }
5456 }
5457
5458 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5459 {
5460 struct pci_dev *pdev = adapter->pdev;
5461 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5462 struct e1000_hw *hw = &adapter->hw;
5463 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5464 s32 retval;
5465
5466 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5467
5468 if (retval) {
5469 /* if receive failed revoke VF CTS stats and restart init */
5470 dev_err(&pdev->dev, "Error receiving message from VF\n");
5471 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5472 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5473 return;
5474 goto out;
5475 }
5476
5477 /* this is a message we already processed, do nothing */
5478 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5479 return;
5480
5481 /*
5482 * until the vf completes a reset it should not be
5483 * allowed to start any configuration.
5484 */
5485
5486 if (msgbuf[0] == E1000_VF_RESET) {
5487 igb_vf_reset_msg(adapter, vf);
5488 return;
5489 }
5490
5491 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5492 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5493 return;
5494 retval = -1;
5495 goto out;
5496 }
5497
5498 switch ((msgbuf[0] & 0xFFFF)) {
5499 case E1000_VF_SET_MAC_ADDR:
5500 retval = -EINVAL;
5501 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5502 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5503 else
5504 dev_warn(&pdev->dev,
5505 "VF %d attempted to override administratively "
5506 "set MAC address\nReload the VF driver to "
5507 "resume operations\n", vf);
5508 break;
5509 case E1000_VF_SET_PROMISC:
5510 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5511 break;
5512 case E1000_VF_SET_MULTICAST:
5513 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5514 break;
5515 case E1000_VF_SET_LPE:
5516 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5517 break;
5518 case E1000_VF_SET_VLAN:
5519 retval = -1;
5520 if (vf_data->pf_vlan)
5521 dev_warn(&pdev->dev,
5522 "VF %d attempted to override administratively "
5523 "set VLAN tag\nReload the VF driver to "
5524 "resume operations\n", vf);
5525 else
5526 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5527 break;
5528 default:
5529 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5530 retval = -1;
5531 break;
5532 }
5533
5534 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5535 out:
5536 /* notify the VF of the results of what it sent us */
5537 if (retval)
5538 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5539 else
5540 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5541
5542 igb_write_mbx(hw, msgbuf, 1, vf);
5543 }
5544
5545 static void igb_msg_task(struct igb_adapter *adapter)
5546 {
5547 struct e1000_hw *hw = &adapter->hw;
5548 u32 vf;
5549
5550 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5551 /* process any reset requests */
5552 if (!igb_check_for_rst(hw, vf))
5553 igb_vf_reset_event(adapter, vf);
5554
5555 /* process any messages pending */
5556 if (!igb_check_for_msg(hw, vf))
5557 igb_rcv_msg_from_vf(adapter, vf);
5558
5559 /* process any acks */
5560 if (!igb_check_for_ack(hw, vf))
5561 igb_rcv_ack_from_vf(adapter, vf);
5562 }
5563 }
5564
5565 /**
5566 * igb_set_uta - Set unicast filter table address
5567 * @adapter: board private structure
5568 *
5569 * The unicast table address is a register array of 32-bit registers.
5570 * The table is meant to be used in a way similar to how the MTA is used
5571 * however due to certain limitations in the hardware it is necessary to
5572 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5573 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5574 **/
5575 static void igb_set_uta(struct igb_adapter *adapter)
5576 {
5577 struct e1000_hw *hw = &adapter->hw;
5578 int i;
5579
5580 /* The UTA table only exists on 82576 hardware and newer */
5581 if (hw->mac.type < e1000_82576)
5582 return;
5583
5584 /* we only need to do this if VMDq is enabled */
5585 if (!adapter->vfs_allocated_count)
5586 return;
5587
5588 for (i = 0; i < hw->mac.uta_reg_count; i++)
5589 array_wr32(E1000_UTA, i, ~0);
5590 }
5591
5592 /**
5593 * igb_intr_msi - Interrupt Handler
5594 * @irq: interrupt number
5595 * @data: pointer to a network interface device structure
5596 **/
5597 static irqreturn_t igb_intr_msi(int irq, void *data)
5598 {
5599 struct igb_adapter *adapter = data;
5600 struct igb_q_vector *q_vector = adapter->q_vector[0];
5601 struct e1000_hw *hw = &adapter->hw;
5602 /* read ICR disables interrupts using IAM */
5603 u32 icr = rd32(E1000_ICR);
5604
5605 igb_write_itr(q_vector);
5606
5607 if (icr & E1000_ICR_DRSTA)
5608 schedule_work(&adapter->reset_task);
5609
5610 if (icr & E1000_ICR_DOUTSYNC) {
5611 /* HW is reporting DMA is out of sync */
5612 adapter->stats.doosync++;
5613 }
5614
5615 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5616 hw->mac.get_link_status = 1;
5617 if (!test_bit(__IGB_DOWN, &adapter->state))
5618 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5619 }
5620
5621 napi_schedule(&q_vector->napi);
5622
5623 return IRQ_HANDLED;
5624 }
5625
5626 /**
5627 * igb_intr - Legacy Interrupt Handler
5628 * @irq: interrupt number
5629 * @data: pointer to a network interface device structure
5630 **/
5631 static irqreturn_t igb_intr(int irq, void *data)
5632 {
5633 struct igb_adapter *adapter = data;
5634 struct igb_q_vector *q_vector = adapter->q_vector[0];
5635 struct e1000_hw *hw = &adapter->hw;
5636 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5637 * need for the IMC write */
5638 u32 icr = rd32(E1000_ICR);
5639
5640 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5641 * not set, then the adapter didn't send an interrupt */
5642 if (!(icr & E1000_ICR_INT_ASSERTED))
5643 return IRQ_NONE;
5644
5645 igb_write_itr(q_vector);
5646
5647 if (icr & E1000_ICR_DRSTA)
5648 schedule_work(&adapter->reset_task);
5649
5650 if (icr & E1000_ICR_DOUTSYNC) {
5651 /* HW is reporting DMA is out of sync */
5652 adapter->stats.doosync++;
5653 }
5654
5655 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5656 hw->mac.get_link_status = 1;
5657 /* guard against interrupt when we're going down */
5658 if (!test_bit(__IGB_DOWN, &adapter->state))
5659 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5660 }
5661
5662 napi_schedule(&q_vector->napi);
5663
5664 return IRQ_HANDLED;
5665 }
5666
5667 static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5668 {
5669 struct igb_adapter *adapter = q_vector->adapter;
5670 struct e1000_hw *hw = &adapter->hw;
5671
5672 if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5673 (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5674 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5675 igb_set_itr(q_vector);
5676 else
5677 igb_update_ring_itr(q_vector);
5678 }
5679
5680 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5681 if (adapter->msix_entries)
5682 wr32(E1000_EIMS, q_vector->eims_value);
5683 else
5684 igb_irq_enable(adapter);
5685 }
5686 }
5687
5688 /**
5689 * igb_poll - NAPI Rx polling callback
5690 * @napi: napi polling structure
5691 * @budget: count of how many packets we should handle
5692 **/
5693 static int igb_poll(struct napi_struct *napi, int budget)
5694 {
5695 struct igb_q_vector *q_vector = container_of(napi,
5696 struct igb_q_vector,
5697 napi);
5698 bool clean_complete = true;
5699
5700 #ifdef CONFIG_IGB_DCA
5701 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5702 igb_update_dca(q_vector);
5703 #endif
5704 if (q_vector->tx.ring)
5705 clean_complete = igb_clean_tx_irq(q_vector);
5706
5707 if (q_vector->rx.ring)
5708 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5709
5710 /* If all work not completed, return budget and keep polling */
5711 if (!clean_complete)
5712 return budget;
5713
5714 /* If not enough Rx work done, exit the polling mode */
5715 napi_complete(napi);
5716 igb_ring_irq_enable(q_vector);
5717
5718 return 0;
5719 }
5720
5721 /**
5722 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5723 * @adapter: board private structure
5724 * @shhwtstamps: timestamp structure to update
5725 * @regval: unsigned 64bit system time value.
5726 *
5727 * We need to convert the system time value stored in the RX/TXSTMP registers
5728 * into a hwtstamp which can be used by the upper level timestamping functions
5729 */
5730 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5731 struct skb_shared_hwtstamps *shhwtstamps,
5732 u64 regval)
5733 {
5734 u64 ns;
5735
5736 /*
5737 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5738 * 24 to match clock shift we setup earlier.
5739 */
5740 if (adapter->hw.mac.type >= e1000_82580)
5741 regval <<= IGB_82580_TSYNC_SHIFT;
5742
5743 ns = timecounter_cyc2time(&adapter->clock, regval);
5744 timecompare_update(&adapter->compare, ns);
5745 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5746 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5747 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5748 }
5749
5750 /**
5751 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5752 * @q_vector: pointer to q_vector containing needed info
5753 * @buffer: pointer to igb_tx_buffer structure
5754 *
5755 * If we were asked to do hardware stamping and such a time stamp is
5756 * available, then it must have been for this skb here because we only
5757 * allow only one such packet into the queue.
5758 */
5759 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5760 struct igb_tx_buffer *buffer_info)
5761 {
5762 struct igb_adapter *adapter = q_vector->adapter;
5763 struct e1000_hw *hw = &adapter->hw;
5764 struct skb_shared_hwtstamps shhwtstamps;
5765 u64 regval;
5766
5767 /* if skb does not support hw timestamp or TX stamp not valid exit */
5768 if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5769 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5770 return;
5771
5772 regval = rd32(E1000_TXSTMPL);
5773 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5774
5775 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5776 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5777 }
5778
5779 /**
5780 * igb_clean_tx_irq - Reclaim resources after transmit completes
5781 * @q_vector: pointer to q_vector containing needed info
5782 * returns true if ring is completely cleaned
5783 **/
5784 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5785 {
5786 struct igb_adapter *adapter = q_vector->adapter;
5787 struct igb_ring *tx_ring = q_vector->tx.ring;
5788 struct igb_tx_buffer *tx_buffer;
5789 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5790 unsigned int total_bytes = 0, total_packets = 0;
5791 unsigned int budget = q_vector->tx.work_limit;
5792 unsigned int i = tx_ring->next_to_clean;
5793
5794 if (test_bit(__IGB_DOWN, &adapter->state))
5795 return true;
5796
5797 tx_buffer = &tx_ring->tx_buffer_info[i];
5798 tx_desc = IGB_TX_DESC(tx_ring, i);
5799 i -= tx_ring->count;
5800
5801 for (; budget; budget--) {
5802 eop_desc = tx_buffer->next_to_watch;
5803
5804 /* prevent any other reads prior to eop_desc */
5805 rmb();
5806
5807 /* if next_to_watch is not set then there is no work pending */
5808 if (!eop_desc)
5809 break;
5810
5811 /* if DD is not set pending work has not been completed */
5812 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5813 break;
5814
5815 /* clear next_to_watch to prevent false hangs */
5816 tx_buffer->next_to_watch = NULL;
5817
5818 /* update the statistics for this packet */
5819 total_bytes += tx_buffer->bytecount;
5820 total_packets += tx_buffer->gso_segs;
5821
5822 /* retrieve hardware timestamp */
5823 igb_tx_hwtstamp(q_vector, tx_buffer);
5824
5825 /* free the skb */
5826 dev_kfree_skb_any(tx_buffer->skb);
5827 tx_buffer->skb = NULL;
5828
5829 /* unmap skb header data */
5830 dma_unmap_single(tx_ring->dev,
5831 tx_buffer->dma,
5832 tx_buffer->length,
5833 DMA_TO_DEVICE);
5834
5835 /* clear last DMA location and unmap remaining buffers */
5836 while (tx_desc != eop_desc) {
5837 tx_buffer->dma = 0;
5838
5839 tx_buffer++;
5840 tx_desc++;
5841 i++;
5842 if (unlikely(!i)) {
5843 i -= tx_ring->count;
5844 tx_buffer = tx_ring->tx_buffer_info;
5845 tx_desc = IGB_TX_DESC(tx_ring, 0);
5846 }
5847
5848 /* unmap any remaining paged data */
5849 if (tx_buffer->dma) {
5850 dma_unmap_page(tx_ring->dev,
5851 tx_buffer->dma,
5852 tx_buffer->length,
5853 DMA_TO_DEVICE);
5854 }
5855 }
5856
5857 /* clear last DMA location */
5858 tx_buffer->dma = 0;
5859
5860 /* move us one more past the eop_desc for start of next pkt */
5861 tx_buffer++;
5862 tx_desc++;
5863 i++;
5864 if (unlikely(!i)) {
5865 i -= tx_ring->count;
5866 tx_buffer = tx_ring->tx_buffer_info;
5867 tx_desc = IGB_TX_DESC(tx_ring, 0);
5868 }
5869 }
5870
5871 netdev_tx_completed_queue(txring_txq(tx_ring),
5872 total_packets, total_bytes);
5873 i += tx_ring->count;
5874 tx_ring->next_to_clean = i;
5875 u64_stats_update_begin(&tx_ring->tx_syncp);
5876 tx_ring->tx_stats.bytes += total_bytes;
5877 tx_ring->tx_stats.packets += total_packets;
5878 u64_stats_update_end(&tx_ring->tx_syncp);
5879 q_vector->tx.total_bytes += total_bytes;
5880 q_vector->tx.total_packets += total_packets;
5881
5882 if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5883 struct e1000_hw *hw = &adapter->hw;
5884
5885 eop_desc = tx_buffer->next_to_watch;
5886
5887 /* Detect a transmit hang in hardware, this serializes the
5888 * check with the clearing of time_stamp and movement of i */
5889 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5890 if (eop_desc &&
5891 time_after(jiffies, tx_buffer->time_stamp +
5892 (adapter->tx_timeout_factor * HZ)) &&
5893 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5894
5895 /* detected Tx unit hang */
5896 dev_err(tx_ring->dev,
5897 "Detected Tx Unit Hang\n"
5898 " Tx Queue <%d>\n"
5899 " TDH <%x>\n"
5900 " TDT <%x>\n"
5901 " next_to_use <%x>\n"
5902 " next_to_clean <%x>\n"
5903 "buffer_info[next_to_clean]\n"
5904 " time_stamp <%lx>\n"
5905 " next_to_watch <%p>\n"
5906 " jiffies <%lx>\n"
5907 " desc.status <%x>\n",
5908 tx_ring->queue_index,
5909 rd32(E1000_TDH(tx_ring->reg_idx)),
5910 readl(tx_ring->tail),
5911 tx_ring->next_to_use,
5912 tx_ring->next_to_clean,
5913 tx_buffer->time_stamp,
5914 eop_desc,
5915 jiffies,
5916 eop_desc->wb.status);
5917 netif_stop_subqueue(tx_ring->netdev,
5918 tx_ring->queue_index);
5919
5920 /* we are about to reset, no point in enabling stuff */
5921 return true;
5922 }
5923 }
5924
5925 if (unlikely(total_packets &&
5926 netif_carrier_ok(tx_ring->netdev) &&
5927 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5928 /* Make sure that anybody stopping the queue after this
5929 * sees the new next_to_clean.
5930 */
5931 smp_mb();
5932 if (__netif_subqueue_stopped(tx_ring->netdev,
5933 tx_ring->queue_index) &&
5934 !(test_bit(__IGB_DOWN, &adapter->state))) {
5935 netif_wake_subqueue(tx_ring->netdev,
5936 tx_ring->queue_index);
5937
5938 u64_stats_update_begin(&tx_ring->tx_syncp);
5939 tx_ring->tx_stats.restart_queue++;
5940 u64_stats_update_end(&tx_ring->tx_syncp);
5941 }
5942 }
5943
5944 return !!budget;
5945 }
5946
5947 static inline void igb_rx_checksum(struct igb_ring *ring,
5948 union e1000_adv_rx_desc *rx_desc,
5949 struct sk_buff *skb)
5950 {
5951 skb_checksum_none_assert(skb);
5952
5953 /* Ignore Checksum bit is set */
5954 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5955 return;
5956
5957 /* Rx checksum disabled via ethtool */
5958 if (!(ring->netdev->features & NETIF_F_RXCSUM))
5959 return;
5960
5961 /* TCP/UDP checksum error bit is set */
5962 if (igb_test_staterr(rx_desc,
5963 E1000_RXDEXT_STATERR_TCPE |
5964 E1000_RXDEXT_STATERR_IPE)) {
5965 /*
5966 * work around errata with sctp packets where the TCPE aka
5967 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5968 * packets, (aka let the stack check the crc32c)
5969 */
5970 if (!((skb->len == 60) &&
5971 test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5972 u64_stats_update_begin(&ring->rx_syncp);
5973 ring->rx_stats.csum_err++;
5974 u64_stats_update_end(&ring->rx_syncp);
5975 }
5976 /* let the stack verify checksum errors */
5977 return;
5978 }
5979 /* It must be a TCP or UDP packet with a valid checksum */
5980 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5981 E1000_RXD_STAT_UDPCS))
5982 skb->ip_summed = CHECKSUM_UNNECESSARY;
5983
5984 dev_dbg(ring->dev, "cksum success: bits %08X\n",
5985 le32_to_cpu(rx_desc->wb.upper.status_error));
5986 }
5987
5988 static inline void igb_rx_hash(struct igb_ring *ring,
5989 union e1000_adv_rx_desc *rx_desc,
5990 struct sk_buff *skb)
5991 {
5992 if (ring->netdev->features & NETIF_F_RXHASH)
5993 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
5994 }
5995
5996 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
5997 union e1000_adv_rx_desc *rx_desc,
5998 struct sk_buff *skb)
5999 {
6000 struct igb_adapter *adapter = q_vector->adapter;
6001 struct e1000_hw *hw = &adapter->hw;
6002 u64 regval;
6003
6004 if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
6005 E1000_RXDADV_STAT_TS))
6006 return;
6007
6008 /*
6009 * If this bit is set, then the RX registers contain the time stamp. No
6010 * other packet will be time stamped until we read these registers, so
6011 * read the registers to make them available again. Because only one
6012 * packet can be time stamped at a time, we know that the register
6013 * values must belong to this one here and therefore we don't need to
6014 * compare any of the additional attributes stored for it.
6015 *
6016 * If nothing went wrong, then it should have a shared tx_flags that we
6017 * can turn into a skb_shared_hwtstamps.
6018 */
6019 if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
6020 u32 *stamp = (u32 *)skb->data;
6021 regval = le32_to_cpu(*(stamp + 2));
6022 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
6023 skb_pull(skb, IGB_TS_HDR_LEN);
6024 } else {
6025 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
6026 return;
6027
6028 regval = rd32(E1000_RXSTMPL);
6029 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
6030 }
6031
6032 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
6033 }
6034
6035 static void igb_rx_vlan(struct igb_ring *ring,
6036 union e1000_adv_rx_desc *rx_desc,
6037 struct sk_buff *skb)
6038 {
6039 if (igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) {
6040 u16 vid;
6041 if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) &&
6042 test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags))
6043 vid = be16_to_cpu(rx_desc->wb.upper.vlan);
6044 else
6045 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
6046
6047 __vlan_hwaccel_put_tag(skb, vid);
6048 }
6049 }
6050
6051 static inline u16 igb_get_hlen(union e1000_adv_rx_desc *rx_desc)
6052 {
6053 /* HW will not DMA in data larger than the given buffer, even if it
6054 * parses the (NFS, of course) header to be larger. In that case, it
6055 * fills the header buffer and spills the rest into the page.
6056 */
6057 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
6058 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
6059 if (hlen > IGB_RX_HDR_LEN)
6060 hlen = IGB_RX_HDR_LEN;
6061 return hlen;
6062 }
6063
6064 static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget)
6065 {
6066 struct igb_ring *rx_ring = q_vector->rx.ring;
6067 union e1000_adv_rx_desc *rx_desc;
6068 const int current_node = numa_node_id();
6069 unsigned int total_bytes = 0, total_packets = 0;
6070 u16 cleaned_count = igb_desc_unused(rx_ring);
6071 u16 i = rx_ring->next_to_clean;
6072
6073 rx_desc = IGB_RX_DESC(rx_ring, i);
6074
6075 while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) {
6076 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
6077 struct sk_buff *skb = buffer_info->skb;
6078 union e1000_adv_rx_desc *next_rxd;
6079
6080 buffer_info->skb = NULL;
6081 prefetch(skb->data);
6082
6083 i++;
6084 if (i == rx_ring->count)
6085 i = 0;
6086
6087 next_rxd = IGB_RX_DESC(rx_ring, i);
6088 prefetch(next_rxd);
6089
6090 /*
6091 * This memory barrier is needed to keep us from reading
6092 * any other fields out of the rx_desc until we know the
6093 * RXD_STAT_DD bit is set
6094 */
6095 rmb();
6096
6097 if (!skb_is_nonlinear(skb)) {
6098 __skb_put(skb, igb_get_hlen(rx_desc));
6099 dma_unmap_single(rx_ring->dev, buffer_info->dma,
6100 IGB_RX_HDR_LEN,
6101 DMA_FROM_DEVICE);
6102 buffer_info->dma = 0;
6103 }
6104
6105 if (rx_desc->wb.upper.length) {
6106 u16 length = le16_to_cpu(rx_desc->wb.upper.length);
6107
6108 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
6109 buffer_info->page,
6110 buffer_info->page_offset,
6111 length);
6112
6113 skb->len += length;
6114 skb->data_len += length;
6115 skb->truesize += PAGE_SIZE / 2;
6116
6117 if ((page_count(buffer_info->page) != 1) ||
6118 (page_to_nid(buffer_info->page) != current_node))
6119 buffer_info->page = NULL;
6120 else
6121 get_page(buffer_info->page);
6122
6123 dma_unmap_page(rx_ring->dev, buffer_info->page_dma,
6124 PAGE_SIZE / 2, DMA_FROM_DEVICE);
6125 buffer_info->page_dma = 0;
6126 }
6127
6128 if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP)) {
6129 struct igb_rx_buffer *next_buffer;
6130 next_buffer = &rx_ring->rx_buffer_info[i];
6131 buffer_info->skb = next_buffer->skb;
6132 buffer_info->dma = next_buffer->dma;
6133 next_buffer->skb = skb;
6134 next_buffer->dma = 0;
6135 goto next_desc;
6136 }
6137
6138 if (unlikely((igb_test_staterr(rx_desc,
6139 E1000_RXDEXT_ERR_FRAME_ERR_MASK))
6140 && !(rx_ring->netdev->features & NETIF_F_RXALL))) {
6141 dev_kfree_skb_any(skb);
6142 goto next_desc;
6143 }
6144
6145 igb_rx_hwtstamp(q_vector, rx_desc, skb);
6146 igb_rx_hash(rx_ring, rx_desc, skb);
6147 igb_rx_checksum(rx_ring, rx_desc, skb);
6148 igb_rx_vlan(rx_ring, rx_desc, skb);
6149
6150 total_bytes += skb->len;
6151 total_packets++;
6152
6153 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
6154
6155 napi_gro_receive(&q_vector->napi, skb);
6156
6157 budget--;
6158 next_desc:
6159 if (!budget)
6160 break;
6161
6162 cleaned_count++;
6163 /* return some buffers to hardware, one at a time is too slow */
6164 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
6165 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6166 cleaned_count = 0;
6167 }
6168
6169 /* use prefetched values */
6170 rx_desc = next_rxd;
6171 }
6172
6173 rx_ring->next_to_clean = i;
6174 u64_stats_update_begin(&rx_ring->rx_syncp);
6175 rx_ring->rx_stats.packets += total_packets;
6176 rx_ring->rx_stats.bytes += total_bytes;
6177 u64_stats_update_end(&rx_ring->rx_syncp);
6178 q_vector->rx.total_packets += total_packets;
6179 q_vector->rx.total_bytes += total_bytes;
6180
6181 if (cleaned_count)
6182 igb_alloc_rx_buffers(rx_ring, cleaned_count);
6183
6184 return !!budget;
6185 }
6186
6187 static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring,
6188 struct igb_rx_buffer *bi)
6189 {
6190 struct sk_buff *skb = bi->skb;
6191 dma_addr_t dma = bi->dma;
6192
6193 if (dma)
6194 return true;
6195
6196 if (likely(!skb)) {
6197 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
6198 IGB_RX_HDR_LEN);
6199 bi->skb = skb;
6200 if (!skb) {
6201 rx_ring->rx_stats.alloc_failed++;
6202 return false;
6203 }
6204
6205 /* initialize skb for ring */
6206 skb_record_rx_queue(skb, rx_ring->queue_index);
6207 }
6208
6209 dma = dma_map_single(rx_ring->dev, skb->data,
6210 IGB_RX_HDR_LEN, DMA_FROM_DEVICE);
6211
6212 if (dma_mapping_error(rx_ring->dev, dma)) {
6213 rx_ring->rx_stats.alloc_failed++;
6214 return false;
6215 }
6216
6217 bi->dma = dma;
6218 return true;
6219 }
6220
6221 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
6222 struct igb_rx_buffer *bi)
6223 {
6224 struct page *page = bi->page;
6225 dma_addr_t page_dma = bi->page_dma;
6226 unsigned int page_offset = bi->page_offset ^ (PAGE_SIZE / 2);
6227
6228 if (page_dma)
6229 return true;
6230
6231 if (!page) {
6232 page = alloc_page(GFP_ATOMIC | __GFP_COLD);
6233 bi->page = page;
6234 if (unlikely(!page)) {
6235 rx_ring->rx_stats.alloc_failed++;
6236 return false;
6237 }
6238 }
6239
6240 page_dma = dma_map_page(rx_ring->dev, page,
6241 page_offset, PAGE_SIZE / 2,
6242 DMA_FROM_DEVICE);
6243
6244 if (dma_mapping_error(rx_ring->dev, page_dma)) {
6245 rx_ring->rx_stats.alloc_failed++;
6246 return false;
6247 }
6248
6249 bi->page_dma = page_dma;
6250 bi->page_offset = page_offset;
6251 return true;
6252 }
6253
6254 /**
6255 * igb_alloc_rx_buffers - Replace used receive buffers; packet split
6256 * @adapter: address of board private structure
6257 **/
6258 void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
6259 {
6260 union e1000_adv_rx_desc *rx_desc;
6261 struct igb_rx_buffer *bi;
6262 u16 i = rx_ring->next_to_use;
6263
6264 rx_desc = IGB_RX_DESC(rx_ring, i);
6265 bi = &rx_ring->rx_buffer_info[i];
6266 i -= rx_ring->count;
6267
6268 while (cleaned_count--) {
6269 if (!igb_alloc_mapped_skb(rx_ring, bi))
6270 break;
6271
6272 /* Refresh the desc even if buffer_addrs didn't change
6273 * because each write-back erases this info. */
6274 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
6275
6276 if (!igb_alloc_mapped_page(rx_ring, bi))
6277 break;
6278
6279 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
6280
6281 rx_desc++;
6282 bi++;
6283 i++;
6284 if (unlikely(!i)) {
6285 rx_desc = IGB_RX_DESC(rx_ring, 0);
6286 bi = rx_ring->rx_buffer_info;
6287 i -= rx_ring->count;
6288 }
6289
6290 /* clear the hdr_addr for the next_to_use descriptor */
6291 rx_desc->read.hdr_addr = 0;
6292 }
6293
6294 i += rx_ring->count;
6295
6296 if (rx_ring->next_to_use != i) {
6297 rx_ring->next_to_use = i;
6298
6299 /* Force memory writes to complete before letting h/w
6300 * know there are new descriptors to fetch. (Only
6301 * applicable for weak-ordered memory model archs,
6302 * such as IA-64). */
6303 wmb();
6304 writel(i, rx_ring->tail);
6305 }
6306 }
6307
6308 /**
6309 * igb_mii_ioctl -
6310 * @netdev:
6311 * @ifreq:
6312 * @cmd:
6313 **/
6314 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6315 {
6316 struct igb_adapter *adapter = netdev_priv(netdev);
6317 struct mii_ioctl_data *data = if_mii(ifr);
6318
6319 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6320 return -EOPNOTSUPP;
6321
6322 switch (cmd) {
6323 case SIOCGMIIPHY:
6324 data->phy_id = adapter->hw.phy.addr;
6325 break;
6326 case SIOCGMIIREG:
6327 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6328 &data->val_out))
6329 return -EIO;
6330 break;
6331 case SIOCSMIIREG:
6332 default:
6333 return -EOPNOTSUPP;
6334 }
6335 return 0;
6336 }
6337
6338 /**
6339 * igb_hwtstamp_ioctl - control hardware time stamping
6340 * @netdev:
6341 * @ifreq:
6342 * @cmd:
6343 *
6344 * Outgoing time stamping can be enabled and disabled. Play nice and
6345 * disable it when requested, although it shouldn't case any overhead
6346 * when no packet needs it. At most one packet in the queue may be
6347 * marked for time stamping, otherwise it would be impossible to tell
6348 * for sure to which packet the hardware time stamp belongs.
6349 *
6350 * Incoming time stamping has to be configured via the hardware
6351 * filters. Not all combinations are supported, in particular event
6352 * type has to be specified. Matching the kind of event packet is
6353 * not supported, with the exception of "all V2 events regardless of
6354 * level 2 or 4".
6355 *
6356 **/
6357 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6358 struct ifreq *ifr, int cmd)
6359 {
6360 struct igb_adapter *adapter = netdev_priv(netdev);
6361 struct e1000_hw *hw = &adapter->hw;
6362 struct hwtstamp_config config;
6363 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6364 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6365 u32 tsync_rx_cfg = 0;
6366 bool is_l4 = false;
6367 bool is_l2 = false;
6368 u32 regval;
6369
6370 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6371 return -EFAULT;
6372
6373 /* reserved for future extensions */
6374 if (config.flags)
6375 return -EINVAL;
6376
6377 switch (config.tx_type) {
6378 case HWTSTAMP_TX_OFF:
6379 tsync_tx_ctl = 0;
6380 case HWTSTAMP_TX_ON:
6381 break;
6382 default:
6383 return -ERANGE;
6384 }
6385
6386 switch (config.rx_filter) {
6387 case HWTSTAMP_FILTER_NONE:
6388 tsync_rx_ctl = 0;
6389 break;
6390 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6391 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6392 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6393 case HWTSTAMP_FILTER_ALL:
6394 /*
6395 * register TSYNCRXCFG must be set, therefore it is not
6396 * possible to time stamp both Sync and Delay_Req messages
6397 * => fall back to time stamping all packets
6398 */
6399 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6400 config.rx_filter = HWTSTAMP_FILTER_ALL;
6401 break;
6402 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6403 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6404 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6405 is_l4 = true;
6406 break;
6407 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6408 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6409 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6410 is_l4 = true;
6411 break;
6412 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6413 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6414 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6415 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6416 is_l2 = true;
6417 is_l4 = true;
6418 config.rx_filter = HWTSTAMP_FILTER_SOME;
6419 break;
6420 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6421 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6422 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6423 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6424 is_l2 = true;
6425 is_l4 = true;
6426 config.rx_filter = HWTSTAMP_FILTER_SOME;
6427 break;
6428 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6429 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6430 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6431 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6432 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6433 is_l2 = true;
6434 is_l4 = true;
6435 break;
6436 default:
6437 return -ERANGE;
6438 }
6439
6440 if (hw->mac.type == e1000_82575) {
6441 if (tsync_rx_ctl | tsync_tx_ctl)
6442 return -EINVAL;
6443 return 0;
6444 }
6445
6446 /*
6447 * Per-packet timestamping only works if all packets are
6448 * timestamped, so enable timestamping in all packets as
6449 * long as one rx filter was configured.
6450 */
6451 if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) {
6452 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6453 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6454 }
6455
6456 /* enable/disable TX */
6457 regval = rd32(E1000_TSYNCTXCTL);
6458 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6459 regval |= tsync_tx_ctl;
6460 wr32(E1000_TSYNCTXCTL, regval);
6461
6462 /* enable/disable RX */
6463 regval = rd32(E1000_TSYNCRXCTL);
6464 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6465 regval |= tsync_rx_ctl;
6466 wr32(E1000_TSYNCRXCTL, regval);
6467
6468 /* define which PTP packets are time stamped */
6469 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6470
6471 /* define ethertype filter for timestamped packets */
6472 if (is_l2)
6473 wr32(E1000_ETQF(3),
6474 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6475 E1000_ETQF_1588 | /* enable timestamping */
6476 ETH_P_1588)); /* 1588 eth protocol type */
6477 else
6478 wr32(E1000_ETQF(3), 0);
6479
6480 #define PTP_PORT 319
6481 /* L4 Queue Filter[3]: filter by destination port and protocol */
6482 if (is_l4) {
6483 u32 ftqf = (IPPROTO_UDP /* UDP */
6484 | E1000_FTQF_VF_BP /* VF not compared */
6485 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6486 | E1000_FTQF_MASK); /* mask all inputs */
6487 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6488
6489 wr32(E1000_IMIR(3), htons(PTP_PORT));
6490 wr32(E1000_IMIREXT(3),
6491 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6492 if (hw->mac.type == e1000_82576) {
6493 /* enable source port check */
6494 wr32(E1000_SPQF(3), htons(PTP_PORT));
6495 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6496 }
6497 wr32(E1000_FTQF(3), ftqf);
6498 } else {
6499 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6500 }
6501 wrfl();
6502
6503 adapter->hwtstamp_config = config;
6504
6505 /* clear TX/RX time stamp registers, just to be sure */
6506 regval = rd32(E1000_TXSTMPH);
6507 regval = rd32(E1000_RXSTMPH);
6508
6509 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6510 -EFAULT : 0;
6511 }
6512
6513 /**
6514 * igb_ioctl -
6515 * @netdev:
6516 * @ifreq:
6517 * @cmd:
6518 **/
6519 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6520 {
6521 switch (cmd) {
6522 case SIOCGMIIPHY:
6523 case SIOCGMIIREG:
6524 case SIOCSMIIREG:
6525 return igb_mii_ioctl(netdev, ifr, cmd);
6526 case SIOCSHWTSTAMP:
6527 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6528 default:
6529 return -EOPNOTSUPP;
6530 }
6531 }
6532
6533 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6534 {
6535 struct igb_adapter *adapter = hw->back;
6536 u16 cap_offset;
6537
6538 cap_offset = adapter->pdev->pcie_cap;
6539 if (!cap_offset)
6540 return -E1000_ERR_CONFIG;
6541
6542 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6543
6544 return 0;
6545 }
6546
6547 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6548 {
6549 struct igb_adapter *adapter = hw->back;
6550 u16 cap_offset;
6551
6552 cap_offset = adapter->pdev->pcie_cap;
6553 if (!cap_offset)
6554 return -E1000_ERR_CONFIG;
6555
6556 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6557
6558 return 0;
6559 }
6560
6561 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
6562 {
6563 struct igb_adapter *adapter = netdev_priv(netdev);
6564 struct e1000_hw *hw = &adapter->hw;
6565 u32 ctrl, rctl;
6566 bool enable = !!(features & NETIF_F_HW_VLAN_RX);
6567
6568 if (enable) {
6569 /* enable VLAN tag insert/strip */
6570 ctrl = rd32(E1000_CTRL);
6571 ctrl |= E1000_CTRL_VME;
6572 wr32(E1000_CTRL, ctrl);
6573
6574 /* Disable CFI check */
6575 rctl = rd32(E1000_RCTL);
6576 rctl &= ~E1000_RCTL_CFIEN;
6577 wr32(E1000_RCTL, rctl);
6578 } else {
6579 /* disable VLAN tag insert/strip */
6580 ctrl = rd32(E1000_CTRL);
6581 ctrl &= ~E1000_CTRL_VME;
6582 wr32(E1000_CTRL, ctrl);
6583 }
6584
6585 igb_rlpml_set(adapter);
6586 }
6587
6588 static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6589 {
6590 struct igb_adapter *adapter = netdev_priv(netdev);
6591 struct e1000_hw *hw = &adapter->hw;
6592 int pf_id = adapter->vfs_allocated_count;
6593
6594 /* attempt to add filter to vlvf array */
6595 igb_vlvf_set(adapter, vid, true, pf_id);
6596
6597 /* add the filter since PF can receive vlans w/o entry in vlvf */
6598 igb_vfta_set(hw, vid, true);
6599
6600 set_bit(vid, adapter->active_vlans);
6601
6602 return 0;
6603 }
6604
6605 static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6606 {
6607 struct igb_adapter *adapter = netdev_priv(netdev);
6608 struct e1000_hw *hw = &adapter->hw;
6609 int pf_id = adapter->vfs_allocated_count;
6610 s32 err;
6611
6612 /* remove vlan from VLVF table array */
6613 err = igb_vlvf_set(adapter, vid, false, pf_id);
6614
6615 /* if vid was not present in VLVF just remove it from table */
6616 if (err)
6617 igb_vfta_set(hw, vid, false);
6618
6619 clear_bit(vid, adapter->active_vlans);
6620
6621 return 0;
6622 }
6623
6624 static void igb_restore_vlan(struct igb_adapter *adapter)
6625 {
6626 u16 vid;
6627
6628 igb_vlan_mode(adapter->netdev, adapter->netdev->features);
6629
6630 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6631 igb_vlan_rx_add_vid(adapter->netdev, vid);
6632 }
6633
6634 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6635 {
6636 struct pci_dev *pdev = adapter->pdev;
6637 struct e1000_mac_info *mac = &adapter->hw.mac;
6638
6639 mac->autoneg = 0;
6640
6641 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6642 * for the switch() below to work */
6643 if ((spd & 1) || (dplx & ~1))
6644 goto err_inval;
6645
6646 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6647 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6648 spd != SPEED_1000 &&
6649 dplx != DUPLEX_FULL)
6650 goto err_inval;
6651
6652 switch (spd + dplx) {
6653 case SPEED_10 + DUPLEX_HALF:
6654 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6655 break;
6656 case SPEED_10 + DUPLEX_FULL:
6657 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6658 break;
6659 case SPEED_100 + DUPLEX_HALF:
6660 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6661 break;
6662 case SPEED_100 + DUPLEX_FULL:
6663 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6664 break;
6665 case SPEED_1000 + DUPLEX_FULL:
6666 mac->autoneg = 1;
6667 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6668 break;
6669 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6670 default:
6671 goto err_inval;
6672 }
6673 return 0;
6674
6675 err_inval:
6676 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6677 return -EINVAL;
6678 }
6679
6680 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake,
6681 bool runtime)
6682 {
6683 struct net_device *netdev = pci_get_drvdata(pdev);
6684 struct igb_adapter *adapter = netdev_priv(netdev);
6685 struct e1000_hw *hw = &adapter->hw;
6686 u32 ctrl, rctl, status;
6687 u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol;
6688 #ifdef CONFIG_PM
6689 int retval = 0;
6690 #endif
6691
6692 netif_device_detach(netdev);
6693
6694 if (netif_running(netdev))
6695 __igb_close(netdev, true);
6696
6697 igb_clear_interrupt_scheme(adapter);
6698
6699 #ifdef CONFIG_PM
6700 retval = pci_save_state(pdev);
6701 if (retval)
6702 return retval;
6703 #endif
6704
6705 status = rd32(E1000_STATUS);
6706 if (status & E1000_STATUS_LU)
6707 wufc &= ~E1000_WUFC_LNKC;
6708
6709 if (wufc) {
6710 igb_setup_rctl(adapter);
6711 igb_set_rx_mode(netdev);
6712
6713 /* turn on all-multi mode if wake on multicast is enabled */
6714 if (wufc & E1000_WUFC_MC) {
6715 rctl = rd32(E1000_RCTL);
6716 rctl |= E1000_RCTL_MPE;
6717 wr32(E1000_RCTL, rctl);
6718 }
6719
6720 ctrl = rd32(E1000_CTRL);
6721 /* advertise wake from D3Cold */
6722 #define E1000_CTRL_ADVD3WUC 0x00100000
6723 /* phy power management enable */
6724 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6725 ctrl |= E1000_CTRL_ADVD3WUC;
6726 wr32(E1000_CTRL, ctrl);
6727
6728 /* Allow time for pending master requests to run */
6729 igb_disable_pcie_master(hw);
6730
6731 wr32(E1000_WUC, E1000_WUC_PME_EN);
6732 wr32(E1000_WUFC, wufc);
6733 } else {
6734 wr32(E1000_WUC, 0);
6735 wr32(E1000_WUFC, 0);
6736 }
6737
6738 *enable_wake = wufc || adapter->en_mng_pt;
6739 if (!*enable_wake)
6740 igb_power_down_link(adapter);
6741 else
6742 igb_power_up_link(adapter);
6743
6744 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6745 * would have already happened in close and is redundant. */
6746 igb_release_hw_control(adapter);
6747
6748 pci_disable_device(pdev);
6749
6750 return 0;
6751 }
6752
6753 #ifdef CONFIG_PM
6754 #ifdef CONFIG_PM_SLEEP
6755 static int igb_suspend(struct device *dev)
6756 {
6757 int retval;
6758 bool wake;
6759 struct pci_dev *pdev = to_pci_dev(dev);
6760
6761 retval = __igb_shutdown(pdev, &wake, 0);
6762 if (retval)
6763 return retval;
6764
6765 if (wake) {
6766 pci_prepare_to_sleep(pdev);
6767 } else {
6768 pci_wake_from_d3(pdev, false);
6769 pci_set_power_state(pdev, PCI_D3hot);
6770 }
6771
6772 return 0;
6773 }
6774 #endif /* CONFIG_PM_SLEEP */
6775
6776 static int igb_resume(struct device *dev)
6777 {
6778 struct pci_dev *pdev = to_pci_dev(dev);
6779 struct net_device *netdev = pci_get_drvdata(pdev);
6780 struct igb_adapter *adapter = netdev_priv(netdev);
6781 struct e1000_hw *hw = &adapter->hw;
6782 u32 err;
6783
6784 pci_set_power_state(pdev, PCI_D0);
6785 pci_restore_state(pdev);
6786 pci_save_state(pdev);
6787
6788 err = pci_enable_device_mem(pdev);
6789 if (err) {
6790 dev_err(&pdev->dev,
6791 "igb: Cannot enable PCI device from suspend\n");
6792 return err;
6793 }
6794 pci_set_master(pdev);
6795
6796 pci_enable_wake(pdev, PCI_D3hot, 0);
6797 pci_enable_wake(pdev, PCI_D3cold, 0);
6798
6799 if (!rtnl_is_locked()) {
6800 /*
6801 * shut up ASSERT_RTNL() warning in
6802 * netif_set_real_num_tx/rx_queues.
6803 */
6804 rtnl_lock();
6805 err = igb_init_interrupt_scheme(adapter);
6806 rtnl_unlock();
6807 } else {
6808 err = igb_init_interrupt_scheme(adapter);
6809 }
6810 if (err) {
6811 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6812 return -ENOMEM;
6813 }
6814
6815 igb_reset(adapter);
6816
6817 /* let the f/w know that the h/w is now under the control of the
6818 * driver. */
6819 igb_get_hw_control(adapter);
6820
6821 wr32(E1000_WUS, ~0);
6822
6823 if (netdev->flags & IFF_UP) {
6824 err = __igb_open(netdev, true);
6825 if (err)
6826 return err;
6827 }
6828
6829 netif_device_attach(netdev);
6830 return 0;
6831 }
6832
6833 #ifdef CONFIG_PM_RUNTIME
6834 static int igb_runtime_idle(struct device *dev)
6835 {
6836 struct pci_dev *pdev = to_pci_dev(dev);
6837 struct net_device *netdev = pci_get_drvdata(pdev);
6838 struct igb_adapter *adapter = netdev_priv(netdev);
6839
6840 if (!igb_has_link(adapter))
6841 pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
6842
6843 return -EBUSY;
6844 }
6845
6846 static int igb_runtime_suspend(struct device *dev)
6847 {
6848 struct pci_dev *pdev = to_pci_dev(dev);
6849 int retval;
6850 bool wake;
6851
6852 retval = __igb_shutdown(pdev, &wake, 1);
6853 if (retval)
6854 return retval;
6855
6856 if (wake) {
6857 pci_prepare_to_sleep(pdev);
6858 } else {
6859 pci_wake_from_d3(pdev, false);
6860 pci_set_power_state(pdev, PCI_D3hot);
6861 }
6862
6863 return 0;
6864 }
6865
6866 static int igb_runtime_resume(struct device *dev)
6867 {
6868 return igb_resume(dev);
6869 }
6870 #endif /* CONFIG_PM_RUNTIME */
6871 #endif
6872
6873 static void igb_shutdown(struct pci_dev *pdev)
6874 {
6875 bool wake;
6876
6877 __igb_shutdown(pdev, &wake, 0);
6878
6879 if (system_state == SYSTEM_POWER_OFF) {
6880 pci_wake_from_d3(pdev, wake);
6881 pci_set_power_state(pdev, PCI_D3hot);
6882 }
6883 }
6884
6885 #ifdef CONFIG_NET_POLL_CONTROLLER
6886 /*
6887 * Polling 'interrupt' - used by things like netconsole to send skbs
6888 * without having to re-enable interrupts. It's not called while
6889 * the interrupt routine is executing.
6890 */
6891 static void igb_netpoll(struct net_device *netdev)
6892 {
6893 struct igb_adapter *adapter = netdev_priv(netdev);
6894 struct e1000_hw *hw = &adapter->hw;
6895 struct igb_q_vector *q_vector;
6896 int i;
6897
6898 for (i = 0; i < adapter->num_q_vectors; i++) {
6899 q_vector = adapter->q_vector[i];
6900 if (adapter->msix_entries)
6901 wr32(E1000_EIMC, q_vector->eims_value);
6902 else
6903 igb_irq_disable(adapter);
6904 napi_schedule(&q_vector->napi);
6905 }
6906 }
6907 #endif /* CONFIG_NET_POLL_CONTROLLER */
6908
6909 /**
6910 * igb_io_error_detected - called when PCI error is detected
6911 * @pdev: Pointer to PCI device
6912 * @state: The current pci connection state
6913 *
6914 * This function is called after a PCI bus error affecting
6915 * this device has been detected.
6916 */
6917 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6918 pci_channel_state_t state)
6919 {
6920 struct net_device *netdev = pci_get_drvdata(pdev);
6921 struct igb_adapter *adapter = netdev_priv(netdev);
6922
6923 netif_device_detach(netdev);
6924
6925 if (state == pci_channel_io_perm_failure)
6926 return PCI_ERS_RESULT_DISCONNECT;
6927
6928 if (netif_running(netdev))
6929 igb_down(adapter);
6930 pci_disable_device(pdev);
6931
6932 /* Request a slot slot reset. */
6933 return PCI_ERS_RESULT_NEED_RESET;
6934 }
6935
6936 /**
6937 * igb_io_slot_reset - called after the pci bus has been reset.
6938 * @pdev: Pointer to PCI device
6939 *
6940 * Restart the card from scratch, as if from a cold-boot. Implementation
6941 * resembles the first-half of the igb_resume routine.
6942 */
6943 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6944 {
6945 struct net_device *netdev = pci_get_drvdata(pdev);
6946 struct igb_adapter *adapter = netdev_priv(netdev);
6947 struct e1000_hw *hw = &adapter->hw;
6948 pci_ers_result_t result;
6949 int err;
6950
6951 if (pci_enable_device_mem(pdev)) {
6952 dev_err(&pdev->dev,
6953 "Cannot re-enable PCI device after reset.\n");
6954 result = PCI_ERS_RESULT_DISCONNECT;
6955 } else {
6956 pci_set_master(pdev);
6957 pci_restore_state(pdev);
6958 pci_save_state(pdev);
6959
6960 pci_enable_wake(pdev, PCI_D3hot, 0);
6961 pci_enable_wake(pdev, PCI_D3cold, 0);
6962
6963 igb_reset(adapter);
6964 wr32(E1000_WUS, ~0);
6965 result = PCI_ERS_RESULT_RECOVERED;
6966 }
6967
6968 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6969 if (err) {
6970 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6971 "failed 0x%0x\n", err);
6972 /* non-fatal, continue */
6973 }
6974
6975 return result;
6976 }
6977
6978 /**
6979 * igb_io_resume - called when traffic can start flowing again.
6980 * @pdev: Pointer to PCI device
6981 *
6982 * This callback is called when the error recovery driver tells us that
6983 * its OK to resume normal operation. Implementation resembles the
6984 * second-half of the igb_resume routine.
6985 */
6986 static void igb_io_resume(struct pci_dev *pdev)
6987 {
6988 struct net_device *netdev = pci_get_drvdata(pdev);
6989 struct igb_adapter *adapter = netdev_priv(netdev);
6990
6991 if (netif_running(netdev)) {
6992 if (igb_up(adapter)) {
6993 dev_err(&pdev->dev, "igb_up failed after reset\n");
6994 return;
6995 }
6996 }
6997
6998 netif_device_attach(netdev);
6999
7000 /* let the f/w know that the h/w is now under the control of the
7001 * driver. */
7002 igb_get_hw_control(adapter);
7003 }
7004
7005 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
7006 u8 qsel)
7007 {
7008 u32 rar_low, rar_high;
7009 struct e1000_hw *hw = &adapter->hw;
7010
7011 /* HW expects these in little endian so we reverse the byte order
7012 * from network order (big endian) to little endian
7013 */
7014 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
7015 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
7016 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
7017
7018 /* Indicate to hardware the Address is Valid. */
7019 rar_high |= E1000_RAH_AV;
7020
7021 if (hw->mac.type == e1000_82575)
7022 rar_high |= E1000_RAH_POOL_1 * qsel;
7023 else
7024 rar_high |= E1000_RAH_POOL_1 << qsel;
7025
7026 wr32(E1000_RAL(index), rar_low);
7027 wrfl();
7028 wr32(E1000_RAH(index), rar_high);
7029 wrfl();
7030 }
7031
7032 static int igb_set_vf_mac(struct igb_adapter *adapter,
7033 int vf, unsigned char *mac_addr)
7034 {
7035 struct e1000_hw *hw = &adapter->hw;
7036 /* VF MAC addresses start at end of receive addresses and moves
7037 * torwards the first, as a result a collision should not be possible */
7038 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
7039
7040 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
7041
7042 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
7043
7044 return 0;
7045 }
7046
7047 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
7048 {
7049 struct igb_adapter *adapter = netdev_priv(netdev);
7050 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
7051 return -EINVAL;
7052 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
7053 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
7054 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
7055 " change effective.");
7056 if (test_bit(__IGB_DOWN, &adapter->state)) {
7057 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
7058 " but the PF device is not up.\n");
7059 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
7060 " attempting to use the VF device.\n");
7061 }
7062 return igb_set_vf_mac(adapter, vf, mac);
7063 }
7064
7065 static int igb_link_mbps(int internal_link_speed)
7066 {
7067 switch (internal_link_speed) {
7068 case SPEED_100:
7069 return 100;
7070 case SPEED_1000:
7071 return 1000;
7072 default:
7073 return 0;
7074 }
7075 }
7076
7077 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
7078 int link_speed)
7079 {
7080 int rf_dec, rf_int;
7081 u32 bcnrc_val;
7082
7083 if (tx_rate != 0) {
7084 /* Calculate the rate factor values to set */
7085 rf_int = link_speed / tx_rate;
7086 rf_dec = (link_speed - (rf_int * tx_rate));
7087 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
7088
7089 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
7090 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
7091 E1000_RTTBCNRC_RF_INT_MASK);
7092 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
7093 } else {
7094 bcnrc_val = 0;
7095 }
7096
7097 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
7098 wr32(E1000_RTTBCNRC, bcnrc_val);
7099 }
7100
7101 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
7102 {
7103 int actual_link_speed, i;
7104 bool reset_rate = false;
7105
7106 /* VF TX rate limit was not set or not supported */
7107 if ((adapter->vf_rate_link_speed == 0) ||
7108 (adapter->hw.mac.type != e1000_82576))
7109 return;
7110
7111 actual_link_speed = igb_link_mbps(adapter->link_speed);
7112 if (actual_link_speed != adapter->vf_rate_link_speed) {
7113 reset_rate = true;
7114 adapter->vf_rate_link_speed = 0;
7115 dev_info(&adapter->pdev->dev,
7116 "Link speed has been changed. VF Transmit "
7117 "rate is disabled\n");
7118 }
7119
7120 for (i = 0; i < adapter->vfs_allocated_count; i++) {
7121 if (reset_rate)
7122 adapter->vf_data[i].tx_rate = 0;
7123
7124 igb_set_vf_rate_limit(&adapter->hw, i,
7125 adapter->vf_data[i].tx_rate,
7126 actual_link_speed);
7127 }
7128 }
7129
7130 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
7131 {
7132 struct igb_adapter *adapter = netdev_priv(netdev);
7133 struct e1000_hw *hw = &adapter->hw;
7134 int actual_link_speed;
7135
7136 if (hw->mac.type != e1000_82576)
7137 return -EOPNOTSUPP;
7138
7139 actual_link_speed = igb_link_mbps(adapter->link_speed);
7140 if ((vf >= adapter->vfs_allocated_count) ||
7141 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
7142 (tx_rate < 0) || (tx_rate > actual_link_speed))
7143 return -EINVAL;
7144
7145 adapter->vf_rate_link_speed = actual_link_speed;
7146 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
7147 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
7148
7149 return 0;
7150 }
7151
7152 static int igb_ndo_get_vf_config(struct net_device *netdev,
7153 int vf, struct ifla_vf_info *ivi)
7154 {
7155 struct igb_adapter *adapter = netdev_priv(netdev);
7156 if (vf >= adapter->vfs_allocated_count)
7157 return -EINVAL;
7158 ivi->vf = vf;
7159 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
7160 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
7161 ivi->vlan = adapter->vf_data[vf].pf_vlan;
7162 ivi->qos = adapter->vf_data[vf].pf_qos;
7163 return 0;
7164 }
7165
7166 static void igb_vmm_control(struct igb_adapter *adapter)
7167 {
7168 struct e1000_hw *hw = &adapter->hw;
7169 u32 reg;
7170
7171 switch (hw->mac.type) {
7172 case e1000_82575:
7173 default:
7174 /* replication is not supported for 82575 */
7175 return;
7176 case e1000_82576:
7177 /* notify HW that the MAC is adding vlan tags */
7178 reg = rd32(E1000_DTXCTL);
7179 reg |= E1000_DTXCTL_VLAN_ADDED;
7180 wr32(E1000_DTXCTL, reg);
7181 case e1000_82580:
7182 /* enable replication vlan tag stripping */
7183 reg = rd32(E1000_RPLOLR);
7184 reg |= E1000_RPLOLR_STRVLAN;
7185 wr32(E1000_RPLOLR, reg);
7186 case e1000_i350:
7187 /* none of the above registers are supported by i350 */
7188 break;
7189 }
7190
7191 if (adapter->vfs_allocated_count) {
7192 igb_vmdq_set_loopback_pf(hw, true);
7193 igb_vmdq_set_replication_pf(hw, true);
7194 igb_vmdq_set_anti_spoofing_pf(hw, true,
7195 adapter->vfs_allocated_count);
7196 } else {
7197 igb_vmdq_set_loopback_pf(hw, false);
7198 igb_vmdq_set_replication_pf(hw, false);
7199 }
7200 }
7201
7202 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba)
7203 {
7204 struct e1000_hw *hw = &adapter->hw;
7205 u32 dmac_thr;
7206 u16 hwm;
7207
7208 if (hw->mac.type > e1000_82580) {
7209 if (adapter->flags & IGB_FLAG_DMAC) {
7210 u32 reg;
7211
7212 /* force threshold to 0. */
7213 wr32(E1000_DMCTXTH, 0);
7214
7215 /*
7216 * DMA Coalescing high water mark needs to be greater
7217 * than the Rx threshold. Set hwm to PBA - max frame
7218 * size in 16B units, capping it at PBA - 6KB.
7219 */
7220 hwm = 64 * pba - adapter->max_frame_size / 16;
7221 if (hwm < 64 * (pba - 6))
7222 hwm = 64 * (pba - 6);
7223 reg = rd32(E1000_FCRTC);
7224 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
7225 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
7226 & E1000_FCRTC_RTH_COAL_MASK);
7227 wr32(E1000_FCRTC, reg);
7228
7229 /*
7230 * Set the DMA Coalescing Rx threshold to PBA - 2 * max
7231 * frame size, capping it at PBA - 10KB.
7232 */
7233 dmac_thr = pba - adapter->max_frame_size / 512;
7234 if (dmac_thr < pba - 10)
7235 dmac_thr = pba - 10;
7236 reg = rd32(E1000_DMACR);
7237 reg &= ~E1000_DMACR_DMACTHR_MASK;
7238 reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT)
7239 & E1000_DMACR_DMACTHR_MASK);
7240
7241 /* transition to L0x or L1 if available..*/
7242 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
7243
7244 /* watchdog timer= +-1000 usec in 32usec intervals */
7245 reg |= (1000 >> 5);
7246 wr32(E1000_DMACR, reg);
7247
7248 /*
7249 * no lower threshold to disable
7250 * coalescing(smart fifb)-UTRESH=0
7251 */
7252 wr32(E1000_DMCRTRH, 0);
7253
7254 reg = (IGB_DMCTLX_DCFLUSH_DIS | 0x4);
7255
7256 wr32(E1000_DMCTLX, reg);
7257
7258 /*
7259 * free space in tx packet buffer to wake from
7260 * DMA coal
7261 */
7262 wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
7263 (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
7264
7265 /*
7266 * make low power state decision controlled
7267 * by DMA coal
7268 */
7269 reg = rd32(E1000_PCIEMISC);
7270 reg &= ~E1000_PCIEMISC_LX_DECISION;
7271 wr32(E1000_PCIEMISC, reg);
7272 } /* endif adapter->dmac is not disabled */
7273 } else if (hw->mac.type == e1000_82580) {
7274 u32 reg = rd32(E1000_PCIEMISC);
7275 wr32(E1000_PCIEMISC, reg & ~E1000_PCIEMISC_LX_DECISION);
7276 wr32(E1000_DMACR, 0);
7277 }
7278 }
7279
7280 /* igb_main.c */
This page took 0.194084 seconds and 6 git commands to generate.