Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/jkirsher/next...
[deliverable/linux.git] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3 Intel(R) Gigabit Ethernet Linux driver
4 Copyright(c) 2007-2011 Intel Corporation.
5
6 This program is free software; you can redistribute it and/or modify it
7 under the terms and conditions of the GNU General Public License,
8 version 2, as published by the Free Software Foundation.
9
10 This program is distributed in the hope it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 more details.
14
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19 The full GNU General Public License is included in this distribution in
20 the file called "COPYING".
21
22 Contact Information:
23 e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24 Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/bitops.h>
32 #include <linux/vmalloc.h>
33 #include <linux/pagemap.h>
34 #include <linux/netdevice.h>
35 #include <linux/ipv6.h>
36 #include <linux/slab.h>
37 #include <net/checksum.h>
38 #include <net/ip6_checksum.h>
39 #include <linux/net_tstamp.h>
40 #include <linux/mii.h>
41 #include <linux/ethtool.h>
42 #include <linux/if_vlan.h>
43 #include <linux/pci.h>
44 #include <linux/pci-aspm.h>
45 #include <linux/delay.h>
46 #include <linux/interrupt.h>
47 #include <linux/if_ether.h>
48 #include <linux/aer.h>
49 #include <linux/prefetch.h>
50 #ifdef CONFIG_IGB_DCA
51 #include <linux/dca.h>
52 #endif
53 #include "igb.h"
54
55 #define MAJ 3
56 #define MIN 0
57 #define BUILD 6
58 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
59 __stringify(BUILD) "-k"
60 char igb_driver_name[] = "igb";
61 char igb_driver_version[] = DRV_VERSION;
62 static const char igb_driver_string[] =
63 "Intel(R) Gigabit Ethernet Network Driver";
64 static const char igb_copyright[] = "Copyright (c) 2007-2011 Intel Corporation.";
65
66 static const struct e1000_info *igb_info_tbl[] = {
67 [board_82575] = &e1000_82575_info,
68 };
69
70 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
71 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
72 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
73 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
74 { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
75 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
76 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
77 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
78 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
79 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
80 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
81 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
82 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
83 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
84 { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
85 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
86 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
87 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
88 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
89 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
90 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
91 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
92 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
93 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
94 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
95 { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
96 /* required last entry */
97 {0, }
98 };
99
100 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
101
102 void igb_reset(struct igb_adapter *);
103 static int igb_setup_all_tx_resources(struct igb_adapter *);
104 static int igb_setup_all_rx_resources(struct igb_adapter *);
105 static void igb_free_all_tx_resources(struct igb_adapter *);
106 static void igb_free_all_rx_resources(struct igb_adapter *);
107 static void igb_setup_mrqc(struct igb_adapter *);
108 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
109 static void __devexit igb_remove(struct pci_dev *pdev);
110 static void igb_init_hw_timer(struct igb_adapter *adapter);
111 static int igb_sw_init(struct igb_adapter *);
112 static int igb_open(struct net_device *);
113 static int igb_close(struct net_device *);
114 static void igb_configure_tx(struct igb_adapter *);
115 static void igb_configure_rx(struct igb_adapter *);
116 static void igb_clean_all_tx_rings(struct igb_adapter *);
117 static void igb_clean_all_rx_rings(struct igb_adapter *);
118 static void igb_clean_tx_ring(struct igb_ring *);
119 static void igb_clean_rx_ring(struct igb_ring *);
120 static void igb_set_rx_mode(struct net_device *);
121 static void igb_update_phy_info(unsigned long);
122 static void igb_watchdog(unsigned long);
123 static void igb_watchdog_task(struct work_struct *);
124 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
125 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
126 struct rtnl_link_stats64 *stats);
127 static int igb_change_mtu(struct net_device *, int);
128 static int igb_set_mac(struct net_device *, void *);
129 static void igb_set_uta(struct igb_adapter *adapter);
130 static irqreturn_t igb_intr(int irq, void *);
131 static irqreturn_t igb_intr_msi(int irq, void *);
132 static irqreturn_t igb_msix_other(int irq, void *);
133 static irqreturn_t igb_msix_ring(int irq, void *);
134 #ifdef CONFIG_IGB_DCA
135 static void igb_update_dca(struct igb_q_vector *);
136 static void igb_setup_dca(struct igb_adapter *);
137 #endif /* CONFIG_IGB_DCA */
138 static bool igb_clean_tx_irq(struct igb_q_vector *);
139 static int igb_poll(struct napi_struct *, int);
140 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
141 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
142 static void igb_tx_timeout(struct net_device *);
143 static void igb_reset_task(struct work_struct *);
144 static void igb_vlan_mode(struct net_device *netdev, u32 features);
145 static void igb_vlan_rx_add_vid(struct net_device *, u16);
146 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
147 static void igb_restore_vlan(struct igb_adapter *);
148 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
149 static void igb_ping_all_vfs(struct igb_adapter *);
150 static void igb_msg_task(struct igb_adapter *);
151 static void igb_vmm_control(struct igb_adapter *);
152 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
153 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
154 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
155 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
156 int vf, u16 vlan, u8 qos);
157 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
158 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
159 struct ifla_vf_info *ivi);
160 static void igb_check_vf_rate_limit(struct igb_adapter *);
161
162 #ifdef CONFIG_PM
163 static int igb_suspend(struct pci_dev *, pm_message_t);
164 static int igb_resume(struct pci_dev *);
165 #endif
166 static void igb_shutdown(struct pci_dev *);
167 #ifdef CONFIG_IGB_DCA
168 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
169 static struct notifier_block dca_notifier = {
170 .notifier_call = igb_notify_dca,
171 .next = NULL,
172 .priority = 0
173 };
174 #endif
175 #ifdef CONFIG_NET_POLL_CONTROLLER
176 /* for netdump / net console */
177 static void igb_netpoll(struct net_device *);
178 #endif
179 #ifdef CONFIG_PCI_IOV
180 static unsigned int max_vfs = 0;
181 module_param(max_vfs, uint, 0);
182 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
183 "per physical function");
184 #endif /* CONFIG_PCI_IOV */
185
186 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
187 pci_channel_state_t);
188 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
189 static void igb_io_resume(struct pci_dev *);
190
191 static struct pci_error_handlers igb_err_handler = {
192 .error_detected = igb_io_error_detected,
193 .slot_reset = igb_io_slot_reset,
194 .resume = igb_io_resume,
195 };
196
197
198 static struct pci_driver igb_driver = {
199 .name = igb_driver_name,
200 .id_table = igb_pci_tbl,
201 .probe = igb_probe,
202 .remove = __devexit_p(igb_remove),
203 #ifdef CONFIG_PM
204 /* Power Management Hooks */
205 .suspend = igb_suspend,
206 .resume = igb_resume,
207 #endif
208 .shutdown = igb_shutdown,
209 .err_handler = &igb_err_handler
210 };
211
212 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
213 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
214 MODULE_LICENSE("GPL");
215 MODULE_VERSION(DRV_VERSION);
216
217 struct igb_reg_info {
218 u32 ofs;
219 char *name;
220 };
221
222 static const struct igb_reg_info igb_reg_info_tbl[] = {
223
224 /* General Registers */
225 {E1000_CTRL, "CTRL"},
226 {E1000_STATUS, "STATUS"},
227 {E1000_CTRL_EXT, "CTRL_EXT"},
228
229 /* Interrupt Registers */
230 {E1000_ICR, "ICR"},
231
232 /* RX Registers */
233 {E1000_RCTL, "RCTL"},
234 {E1000_RDLEN(0), "RDLEN"},
235 {E1000_RDH(0), "RDH"},
236 {E1000_RDT(0), "RDT"},
237 {E1000_RXDCTL(0), "RXDCTL"},
238 {E1000_RDBAL(0), "RDBAL"},
239 {E1000_RDBAH(0), "RDBAH"},
240
241 /* TX Registers */
242 {E1000_TCTL, "TCTL"},
243 {E1000_TDBAL(0), "TDBAL"},
244 {E1000_TDBAH(0), "TDBAH"},
245 {E1000_TDLEN(0), "TDLEN"},
246 {E1000_TDH(0), "TDH"},
247 {E1000_TDT(0), "TDT"},
248 {E1000_TXDCTL(0), "TXDCTL"},
249 {E1000_TDFH, "TDFH"},
250 {E1000_TDFT, "TDFT"},
251 {E1000_TDFHS, "TDFHS"},
252 {E1000_TDFPC, "TDFPC"},
253
254 /* List Terminator */
255 {}
256 };
257
258 /*
259 * igb_regdump - register printout routine
260 */
261 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
262 {
263 int n = 0;
264 char rname[16];
265 u32 regs[8];
266
267 switch (reginfo->ofs) {
268 case E1000_RDLEN(0):
269 for (n = 0; n < 4; n++)
270 regs[n] = rd32(E1000_RDLEN(n));
271 break;
272 case E1000_RDH(0):
273 for (n = 0; n < 4; n++)
274 regs[n] = rd32(E1000_RDH(n));
275 break;
276 case E1000_RDT(0):
277 for (n = 0; n < 4; n++)
278 regs[n] = rd32(E1000_RDT(n));
279 break;
280 case E1000_RXDCTL(0):
281 for (n = 0; n < 4; n++)
282 regs[n] = rd32(E1000_RXDCTL(n));
283 break;
284 case E1000_RDBAL(0):
285 for (n = 0; n < 4; n++)
286 regs[n] = rd32(E1000_RDBAL(n));
287 break;
288 case E1000_RDBAH(0):
289 for (n = 0; n < 4; n++)
290 regs[n] = rd32(E1000_RDBAH(n));
291 break;
292 case E1000_TDBAL(0):
293 for (n = 0; n < 4; n++)
294 regs[n] = rd32(E1000_RDBAL(n));
295 break;
296 case E1000_TDBAH(0):
297 for (n = 0; n < 4; n++)
298 regs[n] = rd32(E1000_TDBAH(n));
299 break;
300 case E1000_TDLEN(0):
301 for (n = 0; n < 4; n++)
302 regs[n] = rd32(E1000_TDLEN(n));
303 break;
304 case E1000_TDH(0):
305 for (n = 0; n < 4; n++)
306 regs[n] = rd32(E1000_TDH(n));
307 break;
308 case E1000_TDT(0):
309 for (n = 0; n < 4; n++)
310 regs[n] = rd32(E1000_TDT(n));
311 break;
312 case E1000_TXDCTL(0):
313 for (n = 0; n < 4; n++)
314 regs[n] = rd32(E1000_TXDCTL(n));
315 break;
316 default:
317 printk(KERN_INFO "%-15s %08x\n",
318 reginfo->name, rd32(reginfo->ofs));
319 return;
320 }
321
322 snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
323 printk(KERN_INFO "%-15s ", rname);
324 for (n = 0; n < 4; n++)
325 printk(KERN_CONT "%08x ", regs[n]);
326 printk(KERN_CONT "\n");
327 }
328
329 /*
330 * igb_dump - Print registers, tx-rings and rx-rings
331 */
332 static void igb_dump(struct igb_adapter *adapter)
333 {
334 struct net_device *netdev = adapter->netdev;
335 struct e1000_hw *hw = &adapter->hw;
336 struct igb_reg_info *reginfo;
337 int n = 0;
338 struct igb_ring *tx_ring;
339 union e1000_adv_tx_desc *tx_desc;
340 struct my_u0 { u64 a; u64 b; } *u0;
341 struct igb_buffer *buffer_info;
342 struct igb_ring *rx_ring;
343 union e1000_adv_rx_desc *rx_desc;
344 u32 staterr;
345 int i = 0;
346
347 if (!netif_msg_hw(adapter))
348 return;
349
350 /* Print netdevice Info */
351 if (netdev) {
352 dev_info(&adapter->pdev->dev, "Net device Info\n");
353 printk(KERN_INFO "Device Name state "
354 "trans_start last_rx\n");
355 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
356 netdev->name,
357 netdev->state,
358 netdev->trans_start,
359 netdev->last_rx);
360 }
361
362 /* Print Registers */
363 dev_info(&adapter->pdev->dev, "Register Dump\n");
364 printk(KERN_INFO " Register Name Value\n");
365 for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
366 reginfo->name; reginfo++) {
367 igb_regdump(hw, reginfo);
368 }
369
370 /* Print TX Ring Summary */
371 if (!netdev || !netif_running(netdev))
372 goto exit;
373
374 dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
375 printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]"
376 " leng ntw timestamp\n");
377 for (n = 0; n < adapter->num_tx_queues; n++) {
378 tx_ring = adapter->tx_ring[n];
379 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
380 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
381 n, tx_ring->next_to_use, tx_ring->next_to_clean,
382 (u64)buffer_info->dma,
383 buffer_info->length,
384 buffer_info->next_to_watch,
385 (u64)buffer_info->time_stamp);
386 }
387
388 /* Print TX Rings */
389 if (!netif_msg_tx_done(adapter))
390 goto rx_ring_summary;
391
392 dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
393
394 /* Transmit Descriptor Formats
395 *
396 * Advanced Transmit Descriptor
397 * +--------------------------------------------------------------+
398 * 0 | Buffer Address [63:0] |
399 * +--------------------------------------------------------------+
400 * 8 | PAYLEN | PORTS |CC|IDX | STA | DCMD |DTYP|MAC|RSV| DTALEN |
401 * +--------------------------------------------------------------+
402 * 63 46 45 40 39 38 36 35 32 31 24 15 0
403 */
404
405 for (n = 0; n < adapter->num_tx_queues; n++) {
406 tx_ring = adapter->tx_ring[n];
407 printk(KERN_INFO "------------------------------------\n");
408 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
409 printk(KERN_INFO "------------------------------------\n");
410 printk(KERN_INFO "T [desc] [address 63:0 ] "
411 "[PlPOCIStDDM Ln] [bi->dma ] "
412 "leng ntw timestamp bi->skb\n");
413
414 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
415 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
416 buffer_info = &tx_ring->buffer_info[i];
417 u0 = (struct my_u0 *)tx_desc;
418 printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX"
419 " %04X %3X %016llX %p", i,
420 le64_to_cpu(u0->a),
421 le64_to_cpu(u0->b),
422 (u64)buffer_info->dma,
423 buffer_info->length,
424 buffer_info->next_to_watch,
425 (u64)buffer_info->time_stamp,
426 buffer_info->skb);
427 if (i == tx_ring->next_to_use &&
428 i == tx_ring->next_to_clean)
429 printk(KERN_CONT " NTC/U\n");
430 else if (i == tx_ring->next_to_use)
431 printk(KERN_CONT " NTU\n");
432 else if (i == tx_ring->next_to_clean)
433 printk(KERN_CONT " NTC\n");
434 else
435 printk(KERN_CONT "\n");
436
437 if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
438 print_hex_dump(KERN_INFO, "",
439 DUMP_PREFIX_ADDRESS,
440 16, 1, phys_to_virt(buffer_info->dma),
441 buffer_info->length, true);
442 }
443 }
444
445 /* Print RX Rings Summary */
446 rx_ring_summary:
447 dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
448 printk(KERN_INFO "Queue [NTU] [NTC]\n");
449 for (n = 0; n < adapter->num_rx_queues; n++) {
450 rx_ring = adapter->rx_ring[n];
451 printk(KERN_INFO " %5d %5X %5X\n", n,
452 rx_ring->next_to_use, rx_ring->next_to_clean);
453 }
454
455 /* Print RX Rings */
456 if (!netif_msg_rx_status(adapter))
457 goto exit;
458
459 dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
460
461 /* Advanced Receive Descriptor (Read) Format
462 * 63 1 0
463 * +-----------------------------------------------------+
464 * 0 | Packet Buffer Address [63:1] |A0/NSE|
465 * +----------------------------------------------+------+
466 * 8 | Header Buffer Address [63:1] | DD |
467 * +-----------------------------------------------------+
468 *
469 *
470 * Advanced Receive Descriptor (Write-Back) Format
471 *
472 * 63 48 47 32 31 30 21 20 17 16 4 3 0
473 * +------------------------------------------------------+
474 * 0 | Packet IP |SPH| HDR_LEN | RSV|Packet| RSS |
475 * | Checksum Ident | | | | Type | Type |
476 * +------------------------------------------------------+
477 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
478 * +------------------------------------------------------+
479 * 63 48 47 32 31 20 19 0
480 */
481
482 for (n = 0; n < adapter->num_rx_queues; n++) {
483 rx_ring = adapter->rx_ring[n];
484 printk(KERN_INFO "------------------------------------\n");
485 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
486 printk(KERN_INFO "------------------------------------\n");
487 printk(KERN_INFO "R [desc] [ PktBuf A0] "
488 "[ HeadBuf DD] [bi->dma ] [bi->skb] "
489 "<-- Adv Rx Read format\n");
490 printk(KERN_INFO "RWB[desc] [PcsmIpSHl PtRs] "
491 "[vl er S cks ln] ---------------- [bi->skb] "
492 "<-- Adv Rx Write-Back format\n");
493
494 for (i = 0; i < rx_ring->count; i++) {
495 buffer_info = &rx_ring->buffer_info[i];
496 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
497 u0 = (struct my_u0 *)rx_desc;
498 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
499 if (staterr & E1000_RXD_STAT_DD) {
500 /* Descriptor Done */
501 printk(KERN_INFO "RWB[0x%03X] %016llX "
502 "%016llX ---------------- %p", i,
503 le64_to_cpu(u0->a),
504 le64_to_cpu(u0->b),
505 buffer_info->skb);
506 } else {
507 printk(KERN_INFO "R [0x%03X] %016llX "
508 "%016llX %016llX %p", i,
509 le64_to_cpu(u0->a),
510 le64_to_cpu(u0->b),
511 (u64)buffer_info->dma,
512 buffer_info->skb);
513
514 if (netif_msg_pktdata(adapter)) {
515 print_hex_dump(KERN_INFO, "",
516 DUMP_PREFIX_ADDRESS,
517 16, 1,
518 phys_to_virt(buffer_info->dma),
519 rx_ring->rx_buffer_len, true);
520 if (rx_ring->rx_buffer_len
521 < IGB_RXBUFFER_1024)
522 print_hex_dump(KERN_INFO, "",
523 DUMP_PREFIX_ADDRESS,
524 16, 1,
525 phys_to_virt(
526 buffer_info->page_dma +
527 buffer_info->page_offset),
528 PAGE_SIZE/2, true);
529 }
530 }
531
532 if (i == rx_ring->next_to_use)
533 printk(KERN_CONT " NTU\n");
534 else if (i == rx_ring->next_to_clean)
535 printk(KERN_CONT " NTC\n");
536 else
537 printk(KERN_CONT "\n");
538
539 }
540 }
541
542 exit:
543 return;
544 }
545
546
547 /**
548 * igb_read_clock - read raw cycle counter (to be used by time counter)
549 */
550 static cycle_t igb_read_clock(const struct cyclecounter *tc)
551 {
552 struct igb_adapter *adapter =
553 container_of(tc, struct igb_adapter, cycles);
554 struct e1000_hw *hw = &adapter->hw;
555 u64 stamp = 0;
556 int shift = 0;
557
558 /*
559 * The timestamp latches on lowest register read. For the 82580
560 * the lowest register is SYSTIMR instead of SYSTIML. However we never
561 * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
562 */
563 if (hw->mac.type == e1000_82580) {
564 stamp = rd32(E1000_SYSTIMR) >> 8;
565 shift = IGB_82580_TSYNC_SHIFT;
566 }
567
568 stamp |= (u64)rd32(E1000_SYSTIML) << shift;
569 stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
570 return stamp;
571 }
572
573 /**
574 * igb_get_hw_dev - return device
575 * used by hardware layer to print debugging information
576 **/
577 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
578 {
579 struct igb_adapter *adapter = hw->back;
580 return adapter->netdev;
581 }
582
583 /**
584 * igb_init_module - Driver Registration Routine
585 *
586 * igb_init_module is the first routine called when the driver is
587 * loaded. All it does is register with the PCI subsystem.
588 **/
589 static int __init igb_init_module(void)
590 {
591 int ret;
592 printk(KERN_INFO "%s - version %s\n",
593 igb_driver_string, igb_driver_version);
594
595 printk(KERN_INFO "%s\n", igb_copyright);
596
597 #ifdef CONFIG_IGB_DCA
598 dca_register_notify(&dca_notifier);
599 #endif
600 ret = pci_register_driver(&igb_driver);
601 return ret;
602 }
603
604 module_init(igb_init_module);
605
606 /**
607 * igb_exit_module - Driver Exit Cleanup Routine
608 *
609 * igb_exit_module is called just before the driver is removed
610 * from memory.
611 **/
612 static void __exit igb_exit_module(void)
613 {
614 #ifdef CONFIG_IGB_DCA
615 dca_unregister_notify(&dca_notifier);
616 #endif
617 pci_unregister_driver(&igb_driver);
618 }
619
620 module_exit(igb_exit_module);
621
622 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
623 /**
624 * igb_cache_ring_register - Descriptor ring to register mapping
625 * @adapter: board private structure to initialize
626 *
627 * Once we know the feature-set enabled for the device, we'll cache
628 * the register offset the descriptor ring is assigned to.
629 **/
630 static void igb_cache_ring_register(struct igb_adapter *adapter)
631 {
632 int i = 0, j = 0;
633 u32 rbase_offset = adapter->vfs_allocated_count;
634
635 switch (adapter->hw.mac.type) {
636 case e1000_82576:
637 /* The queues are allocated for virtualization such that VF 0
638 * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
639 * In order to avoid collision we start at the first free queue
640 * and continue consuming queues in the same sequence
641 */
642 if (adapter->vfs_allocated_count) {
643 for (; i < adapter->rss_queues; i++)
644 adapter->rx_ring[i]->reg_idx = rbase_offset +
645 Q_IDX_82576(i);
646 }
647 case e1000_82575:
648 case e1000_82580:
649 case e1000_i350:
650 default:
651 for (; i < adapter->num_rx_queues; i++)
652 adapter->rx_ring[i]->reg_idx = rbase_offset + i;
653 for (; j < adapter->num_tx_queues; j++)
654 adapter->tx_ring[j]->reg_idx = rbase_offset + j;
655 break;
656 }
657 }
658
659 static void igb_free_queues(struct igb_adapter *adapter)
660 {
661 int i;
662
663 for (i = 0; i < adapter->num_tx_queues; i++) {
664 kfree(adapter->tx_ring[i]);
665 adapter->tx_ring[i] = NULL;
666 }
667 for (i = 0; i < adapter->num_rx_queues; i++) {
668 kfree(adapter->rx_ring[i]);
669 adapter->rx_ring[i] = NULL;
670 }
671 adapter->num_rx_queues = 0;
672 adapter->num_tx_queues = 0;
673 }
674
675 /**
676 * igb_alloc_queues - Allocate memory for all rings
677 * @adapter: board private structure to initialize
678 *
679 * We allocate one ring per queue at run-time since we don't know the
680 * number of queues at compile-time.
681 **/
682 static int igb_alloc_queues(struct igb_adapter *adapter)
683 {
684 struct igb_ring *ring;
685 int i;
686
687 for (i = 0; i < adapter->num_tx_queues; i++) {
688 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
689 if (!ring)
690 goto err;
691 ring->count = adapter->tx_ring_count;
692 ring->queue_index = i;
693 ring->dev = &adapter->pdev->dev;
694 ring->netdev = adapter->netdev;
695 /* For 82575, context index must be unique per ring. */
696 if (adapter->hw.mac.type == e1000_82575)
697 ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
698 adapter->tx_ring[i] = ring;
699 }
700
701 for (i = 0; i < adapter->num_rx_queues; i++) {
702 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
703 if (!ring)
704 goto err;
705 ring->count = adapter->rx_ring_count;
706 ring->queue_index = i;
707 ring->dev = &adapter->pdev->dev;
708 ring->netdev = adapter->netdev;
709 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
710 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
711 /* set flag indicating ring supports SCTP checksum offload */
712 if (adapter->hw.mac.type >= e1000_82576)
713 ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
714 adapter->rx_ring[i] = ring;
715 }
716
717 igb_cache_ring_register(adapter);
718
719 return 0;
720
721 err:
722 igb_free_queues(adapter);
723
724 return -ENOMEM;
725 }
726
727 #define IGB_N0_QUEUE -1
728 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
729 {
730 u32 msixbm = 0;
731 struct igb_adapter *adapter = q_vector->adapter;
732 struct e1000_hw *hw = &adapter->hw;
733 u32 ivar, index;
734 int rx_queue = IGB_N0_QUEUE;
735 int tx_queue = IGB_N0_QUEUE;
736
737 if (q_vector->rx_ring)
738 rx_queue = q_vector->rx_ring->reg_idx;
739 if (q_vector->tx_ring)
740 tx_queue = q_vector->tx_ring->reg_idx;
741
742 switch (hw->mac.type) {
743 case e1000_82575:
744 /* The 82575 assigns vectors using a bitmask, which matches the
745 bitmask for the EICR/EIMS/EIMC registers. To assign one
746 or more queues to a vector, we write the appropriate bits
747 into the MSIXBM register for that vector. */
748 if (rx_queue > IGB_N0_QUEUE)
749 msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
750 if (tx_queue > IGB_N0_QUEUE)
751 msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
752 if (!adapter->msix_entries && msix_vector == 0)
753 msixbm |= E1000_EIMS_OTHER;
754 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
755 q_vector->eims_value = msixbm;
756 break;
757 case e1000_82576:
758 /* 82576 uses a table-based method for assigning vectors.
759 Each queue has a single entry in the table to which we write
760 a vector number along with a "valid" bit. Sadly, the layout
761 of the table is somewhat counterintuitive. */
762 if (rx_queue > IGB_N0_QUEUE) {
763 index = (rx_queue & 0x7);
764 ivar = array_rd32(E1000_IVAR0, index);
765 if (rx_queue < 8) {
766 /* vector goes into low byte of register */
767 ivar = ivar & 0xFFFFFF00;
768 ivar |= msix_vector | E1000_IVAR_VALID;
769 } else {
770 /* vector goes into third byte of register */
771 ivar = ivar & 0xFF00FFFF;
772 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
773 }
774 array_wr32(E1000_IVAR0, index, ivar);
775 }
776 if (tx_queue > IGB_N0_QUEUE) {
777 index = (tx_queue & 0x7);
778 ivar = array_rd32(E1000_IVAR0, index);
779 if (tx_queue < 8) {
780 /* vector goes into second byte of register */
781 ivar = ivar & 0xFFFF00FF;
782 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
783 } else {
784 /* vector goes into high byte of register */
785 ivar = ivar & 0x00FFFFFF;
786 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
787 }
788 array_wr32(E1000_IVAR0, index, ivar);
789 }
790 q_vector->eims_value = 1 << msix_vector;
791 break;
792 case e1000_82580:
793 case e1000_i350:
794 /* 82580 uses the same table-based approach as 82576 but has fewer
795 entries as a result we carry over for queues greater than 4. */
796 if (rx_queue > IGB_N0_QUEUE) {
797 index = (rx_queue >> 1);
798 ivar = array_rd32(E1000_IVAR0, index);
799 if (rx_queue & 0x1) {
800 /* vector goes into third byte of register */
801 ivar = ivar & 0xFF00FFFF;
802 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
803 } else {
804 /* vector goes into low byte of register */
805 ivar = ivar & 0xFFFFFF00;
806 ivar |= msix_vector | E1000_IVAR_VALID;
807 }
808 array_wr32(E1000_IVAR0, index, ivar);
809 }
810 if (tx_queue > IGB_N0_QUEUE) {
811 index = (tx_queue >> 1);
812 ivar = array_rd32(E1000_IVAR0, index);
813 if (tx_queue & 0x1) {
814 /* vector goes into high byte of register */
815 ivar = ivar & 0x00FFFFFF;
816 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
817 } else {
818 /* vector goes into second byte of register */
819 ivar = ivar & 0xFFFF00FF;
820 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
821 }
822 array_wr32(E1000_IVAR0, index, ivar);
823 }
824 q_vector->eims_value = 1 << msix_vector;
825 break;
826 default:
827 BUG();
828 break;
829 }
830
831 /* add q_vector eims value to global eims_enable_mask */
832 adapter->eims_enable_mask |= q_vector->eims_value;
833
834 /* configure q_vector to set itr on first interrupt */
835 q_vector->set_itr = 1;
836 }
837
838 /**
839 * igb_configure_msix - Configure MSI-X hardware
840 *
841 * igb_configure_msix sets up the hardware to properly
842 * generate MSI-X interrupts.
843 **/
844 static void igb_configure_msix(struct igb_adapter *adapter)
845 {
846 u32 tmp;
847 int i, vector = 0;
848 struct e1000_hw *hw = &adapter->hw;
849
850 adapter->eims_enable_mask = 0;
851
852 /* set vector for other causes, i.e. link changes */
853 switch (hw->mac.type) {
854 case e1000_82575:
855 tmp = rd32(E1000_CTRL_EXT);
856 /* enable MSI-X PBA support*/
857 tmp |= E1000_CTRL_EXT_PBA_CLR;
858
859 /* Auto-Mask interrupts upon ICR read. */
860 tmp |= E1000_CTRL_EXT_EIAME;
861 tmp |= E1000_CTRL_EXT_IRCA;
862
863 wr32(E1000_CTRL_EXT, tmp);
864
865 /* enable msix_other interrupt */
866 array_wr32(E1000_MSIXBM(0), vector++,
867 E1000_EIMS_OTHER);
868 adapter->eims_other = E1000_EIMS_OTHER;
869
870 break;
871
872 case e1000_82576:
873 case e1000_82580:
874 case e1000_i350:
875 /* Turn on MSI-X capability first, or our settings
876 * won't stick. And it will take days to debug. */
877 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
878 E1000_GPIE_PBA | E1000_GPIE_EIAME |
879 E1000_GPIE_NSICR);
880
881 /* enable msix_other interrupt */
882 adapter->eims_other = 1 << vector;
883 tmp = (vector++ | E1000_IVAR_VALID) << 8;
884
885 wr32(E1000_IVAR_MISC, tmp);
886 break;
887 default:
888 /* do nothing, since nothing else supports MSI-X */
889 break;
890 } /* switch (hw->mac.type) */
891
892 adapter->eims_enable_mask |= adapter->eims_other;
893
894 for (i = 0; i < adapter->num_q_vectors; i++)
895 igb_assign_vector(adapter->q_vector[i], vector++);
896
897 wrfl();
898 }
899
900 /**
901 * igb_request_msix - Initialize MSI-X interrupts
902 *
903 * igb_request_msix allocates MSI-X vectors and requests interrupts from the
904 * kernel.
905 **/
906 static int igb_request_msix(struct igb_adapter *adapter)
907 {
908 struct net_device *netdev = adapter->netdev;
909 struct e1000_hw *hw = &adapter->hw;
910 int i, err = 0, vector = 0;
911
912 err = request_irq(adapter->msix_entries[vector].vector,
913 igb_msix_other, 0, netdev->name, adapter);
914 if (err)
915 goto out;
916 vector++;
917
918 for (i = 0; i < adapter->num_q_vectors; i++) {
919 struct igb_q_vector *q_vector = adapter->q_vector[i];
920
921 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
922
923 if (q_vector->rx_ring && q_vector->tx_ring)
924 sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
925 q_vector->rx_ring->queue_index);
926 else if (q_vector->tx_ring)
927 sprintf(q_vector->name, "%s-tx-%u", netdev->name,
928 q_vector->tx_ring->queue_index);
929 else if (q_vector->rx_ring)
930 sprintf(q_vector->name, "%s-rx-%u", netdev->name,
931 q_vector->rx_ring->queue_index);
932 else
933 sprintf(q_vector->name, "%s-unused", netdev->name);
934
935 err = request_irq(adapter->msix_entries[vector].vector,
936 igb_msix_ring, 0, q_vector->name,
937 q_vector);
938 if (err)
939 goto out;
940 vector++;
941 }
942
943 igb_configure_msix(adapter);
944 return 0;
945 out:
946 return err;
947 }
948
949 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
950 {
951 if (adapter->msix_entries) {
952 pci_disable_msix(adapter->pdev);
953 kfree(adapter->msix_entries);
954 adapter->msix_entries = NULL;
955 } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
956 pci_disable_msi(adapter->pdev);
957 }
958 }
959
960 /**
961 * igb_free_q_vectors - Free memory allocated for interrupt vectors
962 * @adapter: board private structure to initialize
963 *
964 * This function frees the memory allocated to the q_vectors. In addition if
965 * NAPI is enabled it will delete any references to the NAPI struct prior
966 * to freeing the q_vector.
967 **/
968 static void igb_free_q_vectors(struct igb_adapter *adapter)
969 {
970 int v_idx;
971
972 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
973 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
974 adapter->q_vector[v_idx] = NULL;
975 if (!q_vector)
976 continue;
977 netif_napi_del(&q_vector->napi);
978 kfree(q_vector);
979 }
980 adapter->num_q_vectors = 0;
981 }
982
983 /**
984 * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
985 *
986 * This function resets the device so that it has 0 rx queues, tx queues, and
987 * MSI-X interrupts allocated.
988 */
989 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
990 {
991 igb_free_queues(adapter);
992 igb_free_q_vectors(adapter);
993 igb_reset_interrupt_capability(adapter);
994 }
995
996 /**
997 * igb_set_interrupt_capability - set MSI or MSI-X if supported
998 *
999 * Attempt to configure interrupts using the best available
1000 * capabilities of the hardware and kernel.
1001 **/
1002 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1003 {
1004 int err;
1005 int numvecs, i;
1006
1007 /* Number of supported queues. */
1008 adapter->num_rx_queues = adapter->rss_queues;
1009 if (adapter->vfs_allocated_count)
1010 adapter->num_tx_queues = 1;
1011 else
1012 adapter->num_tx_queues = adapter->rss_queues;
1013
1014 /* start with one vector for every rx queue */
1015 numvecs = adapter->num_rx_queues;
1016
1017 /* if tx handler is separate add 1 for every tx queue */
1018 if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1019 numvecs += adapter->num_tx_queues;
1020
1021 /* store the number of vectors reserved for queues */
1022 adapter->num_q_vectors = numvecs;
1023
1024 /* add 1 vector for link status interrupts */
1025 numvecs++;
1026 adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1027 GFP_KERNEL);
1028 if (!adapter->msix_entries)
1029 goto msi_only;
1030
1031 for (i = 0; i < numvecs; i++)
1032 adapter->msix_entries[i].entry = i;
1033
1034 err = pci_enable_msix(adapter->pdev,
1035 adapter->msix_entries,
1036 numvecs);
1037 if (err == 0)
1038 goto out;
1039
1040 igb_reset_interrupt_capability(adapter);
1041
1042 /* If we can't do MSI-X, try MSI */
1043 msi_only:
1044 #ifdef CONFIG_PCI_IOV
1045 /* disable SR-IOV for non MSI-X configurations */
1046 if (adapter->vf_data) {
1047 struct e1000_hw *hw = &adapter->hw;
1048 /* disable iov and allow time for transactions to clear */
1049 pci_disable_sriov(adapter->pdev);
1050 msleep(500);
1051
1052 kfree(adapter->vf_data);
1053 adapter->vf_data = NULL;
1054 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1055 wrfl();
1056 msleep(100);
1057 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1058 }
1059 #endif
1060 adapter->vfs_allocated_count = 0;
1061 adapter->rss_queues = 1;
1062 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1063 adapter->num_rx_queues = 1;
1064 adapter->num_tx_queues = 1;
1065 adapter->num_q_vectors = 1;
1066 if (!pci_enable_msi(adapter->pdev))
1067 adapter->flags |= IGB_FLAG_HAS_MSI;
1068 out:
1069 /* Notify the stack of the (possibly) reduced queue counts. */
1070 netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1071 return netif_set_real_num_rx_queues(adapter->netdev,
1072 adapter->num_rx_queues);
1073 }
1074
1075 /**
1076 * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1077 * @adapter: board private structure to initialize
1078 *
1079 * We allocate one q_vector per queue interrupt. If allocation fails we
1080 * return -ENOMEM.
1081 **/
1082 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1083 {
1084 struct igb_q_vector *q_vector;
1085 struct e1000_hw *hw = &adapter->hw;
1086 int v_idx;
1087
1088 for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1089 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1090 if (!q_vector)
1091 goto err_out;
1092 q_vector->adapter = adapter;
1093 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1094 q_vector->itr_val = IGB_START_ITR;
1095 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1096 adapter->q_vector[v_idx] = q_vector;
1097 }
1098 return 0;
1099
1100 err_out:
1101 igb_free_q_vectors(adapter);
1102 return -ENOMEM;
1103 }
1104
1105 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1106 int ring_idx, int v_idx)
1107 {
1108 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1109
1110 q_vector->rx_ring = adapter->rx_ring[ring_idx];
1111 q_vector->rx_ring->q_vector = q_vector;
1112 q_vector->itr_val = adapter->rx_itr_setting;
1113 if (q_vector->itr_val && q_vector->itr_val <= 3)
1114 q_vector->itr_val = IGB_START_ITR;
1115 }
1116
1117 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1118 int ring_idx, int v_idx)
1119 {
1120 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1121
1122 q_vector->tx_ring = adapter->tx_ring[ring_idx];
1123 q_vector->tx_ring->q_vector = q_vector;
1124 q_vector->itr_val = adapter->tx_itr_setting;
1125 if (q_vector->itr_val && q_vector->itr_val <= 3)
1126 q_vector->itr_val = IGB_START_ITR;
1127 }
1128
1129 /**
1130 * igb_map_ring_to_vector - maps allocated queues to vectors
1131 *
1132 * This function maps the recently allocated queues to vectors.
1133 **/
1134 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1135 {
1136 int i;
1137 int v_idx = 0;
1138
1139 if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1140 (adapter->num_q_vectors < adapter->num_tx_queues))
1141 return -ENOMEM;
1142
1143 if (adapter->num_q_vectors >=
1144 (adapter->num_rx_queues + adapter->num_tx_queues)) {
1145 for (i = 0; i < adapter->num_rx_queues; i++)
1146 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1147 for (i = 0; i < adapter->num_tx_queues; i++)
1148 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1149 } else {
1150 for (i = 0; i < adapter->num_rx_queues; i++) {
1151 if (i < adapter->num_tx_queues)
1152 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1153 igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1154 }
1155 for (; i < adapter->num_tx_queues; i++)
1156 igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1157 }
1158 return 0;
1159 }
1160
1161 /**
1162 * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1163 *
1164 * This function initializes the interrupts and allocates all of the queues.
1165 **/
1166 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1167 {
1168 struct pci_dev *pdev = adapter->pdev;
1169 int err;
1170
1171 err = igb_set_interrupt_capability(adapter);
1172 if (err)
1173 return err;
1174
1175 err = igb_alloc_q_vectors(adapter);
1176 if (err) {
1177 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1178 goto err_alloc_q_vectors;
1179 }
1180
1181 err = igb_alloc_queues(adapter);
1182 if (err) {
1183 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1184 goto err_alloc_queues;
1185 }
1186
1187 err = igb_map_ring_to_vector(adapter);
1188 if (err) {
1189 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1190 goto err_map_queues;
1191 }
1192
1193
1194 return 0;
1195 err_map_queues:
1196 igb_free_queues(adapter);
1197 err_alloc_queues:
1198 igb_free_q_vectors(adapter);
1199 err_alloc_q_vectors:
1200 igb_reset_interrupt_capability(adapter);
1201 return err;
1202 }
1203
1204 /**
1205 * igb_request_irq - initialize interrupts
1206 *
1207 * Attempts to configure interrupts using the best available
1208 * capabilities of the hardware and kernel.
1209 **/
1210 static int igb_request_irq(struct igb_adapter *adapter)
1211 {
1212 struct net_device *netdev = adapter->netdev;
1213 struct pci_dev *pdev = adapter->pdev;
1214 int err = 0;
1215
1216 if (adapter->msix_entries) {
1217 err = igb_request_msix(adapter);
1218 if (!err)
1219 goto request_done;
1220 /* fall back to MSI */
1221 igb_clear_interrupt_scheme(adapter);
1222 if (!pci_enable_msi(adapter->pdev))
1223 adapter->flags |= IGB_FLAG_HAS_MSI;
1224 igb_free_all_tx_resources(adapter);
1225 igb_free_all_rx_resources(adapter);
1226 adapter->num_tx_queues = 1;
1227 adapter->num_rx_queues = 1;
1228 adapter->num_q_vectors = 1;
1229 err = igb_alloc_q_vectors(adapter);
1230 if (err) {
1231 dev_err(&pdev->dev,
1232 "Unable to allocate memory for vectors\n");
1233 goto request_done;
1234 }
1235 err = igb_alloc_queues(adapter);
1236 if (err) {
1237 dev_err(&pdev->dev,
1238 "Unable to allocate memory for queues\n");
1239 igb_free_q_vectors(adapter);
1240 goto request_done;
1241 }
1242 igb_setup_all_tx_resources(adapter);
1243 igb_setup_all_rx_resources(adapter);
1244 } else {
1245 igb_assign_vector(adapter->q_vector[0], 0);
1246 }
1247
1248 if (adapter->flags & IGB_FLAG_HAS_MSI) {
1249 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1250 netdev->name, adapter);
1251 if (!err)
1252 goto request_done;
1253
1254 /* fall back to legacy interrupts */
1255 igb_reset_interrupt_capability(adapter);
1256 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1257 }
1258
1259 err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1260 netdev->name, adapter);
1261
1262 if (err)
1263 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1264 err);
1265
1266 request_done:
1267 return err;
1268 }
1269
1270 static void igb_free_irq(struct igb_adapter *adapter)
1271 {
1272 if (adapter->msix_entries) {
1273 int vector = 0, i;
1274
1275 free_irq(adapter->msix_entries[vector++].vector, adapter);
1276
1277 for (i = 0; i < adapter->num_q_vectors; i++) {
1278 struct igb_q_vector *q_vector = adapter->q_vector[i];
1279 free_irq(adapter->msix_entries[vector++].vector,
1280 q_vector);
1281 }
1282 } else {
1283 free_irq(adapter->pdev->irq, adapter);
1284 }
1285 }
1286
1287 /**
1288 * igb_irq_disable - Mask off interrupt generation on the NIC
1289 * @adapter: board private structure
1290 **/
1291 static void igb_irq_disable(struct igb_adapter *adapter)
1292 {
1293 struct e1000_hw *hw = &adapter->hw;
1294
1295 /*
1296 * we need to be careful when disabling interrupts. The VFs are also
1297 * mapped into these registers and so clearing the bits can cause
1298 * issues on the VF drivers so we only need to clear what we set
1299 */
1300 if (adapter->msix_entries) {
1301 u32 regval = rd32(E1000_EIAM);
1302 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1303 wr32(E1000_EIMC, adapter->eims_enable_mask);
1304 regval = rd32(E1000_EIAC);
1305 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1306 }
1307
1308 wr32(E1000_IAM, 0);
1309 wr32(E1000_IMC, ~0);
1310 wrfl();
1311 if (adapter->msix_entries) {
1312 int i;
1313 for (i = 0; i < adapter->num_q_vectors; i++)
1314 synchronize_irq(adapter->msix_entries[i].vector);
1315 } else {
1316 synchronize_irq(adapter->pdev->irq);
1317 }
1318 }
1319
1320 /**
1321 * igb_irq_enable - Enable default interrupt generation settings
1322 * @adapter: board private structure
1323 **/
1324 static void igb_irq_enable(struct igb_adapter *adapter)
1325 {
1326 struct e1000_hw *hw = &adapter->hw;
1327
1328 if (adapter->msix_entries) {
1329 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1330 u32 regval = rd32(E1000_EIAC);
1331 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1332 regval = rd32(E1000_EIAM);
1333 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1334 wr32(E1000_EIMS, adapter->eims_enable_mask);
1335 if (adapter->vfs_allocated_count) {
1336 wr32(E1000_MBVFIMR, 0xFF);
1337 ims |= E1000_IMS_VMMB;
1338 }
1339 if (adapter->hw.mac.type == e1000_82580)
1340 ims |= E1000_IMS_DRSTA;
1341
1342 wr32(E1000_IMS, ims);
1343 } else {
1344 wr32(E1000_IMS, IMS_ENABLE_MASK |
1345 E1000_IMS_DRSTA);
1346 wr32(E1000_IAM, IMS_ENABLE_MASK |
1347 E1000_IMS_DRSTA);
1348 }
1349 }
1350
1351 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1352 {
1353 struct e1000_hw *hw = &adapter->hw;
1354 u16 vid = adapter->hw.mng_cookie.vlan_id;
1355 u16 old_vid = adapter->mng_vlan_id;
1356
1357 if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1358 /* add VID to filter table */
1359 igb_vfta_set(hw, vid, true);
1360 adapter->mng_vlan_id = vid;
1361 } else {
1362 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1363 }
1364
1365 if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1366 (vid != old_vid) &&
1367 !test_bit(old_vid, adapter->active_vlans)) {
1368 /* remove VID from filter table */
1369 igb_vfta_set(hw, old_vid, false);
1370 }
1371 }
1372
1373 /**
1374 * igb_release_hw_control - release control of the h/w to f/w
1375 * @adapter: address of board private structure
1376 *
1377 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1378 * For ASF and Pass Through versions of f/w this means that the
1379 * driver is no longer loaded.
1380 *
1381 **/
1382 static void igb_release_hw_control(struct igb_adapter *adapter)
1383 {
1384 struct e1000_hw *hw = &adapter->hw;
1385 u32 ctrl_ext;
1386
1387 /* Let firmware take over control of h/w */
1388 ctrl_ext = rd32(E1000_CTRL_EXT);
1389 wr32(E1000_CTRL_EXT,
1390 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1391 }
1392
1393 /**
1394 * igb_get_hw_control - get control of the h/w from f/w
1395 * @adapter: address of board private structure
1396 *
1397 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1398 * For ASF and Pass Through versions of f/w this means that
1399 * the driver is loaded.
1400 *
1401 **/
1402 static void igb_get_hw_control(struct igb_adapter *adapter)
1403 {
1404 struct e1000_hw *hw = &adapter->hw;
1405 u32 ctrl_ext;
1406
1407 /* Let firmware know the driver has taken over */
1408 ctrl_ext = rd32(E1000_CTRL_EXT);
1409 wr32(E1000_CTRL_EXT,
1410 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1411 }
1412
1413 /**
1414 * igb_configure - configure the hardware for RX and TX
1415 * @adapter: private board structure
1416 **/
1417 static void igb_configure(struct igb_adapter *adapter)
1418 {
1419 struct net_device *netdev = adapter->netdev;
1420 int i;
1421
1422 igb_get_hw_control(adapter);
1423 igb_set_rx_mode(netdev);
1424
1425 igb_restore_vlan(adapter);
1426
1427 igb_setup_tctl(adapter);
1428 igb_setup_mrqc(adapter);
1429 igb_setup_rctl(adapter);
1430
1431 igb_configure_tx(adapter);
1432 igb_configure_rx(adapter);
1433
1434 igb_rx_fifo_flush_82575(&adapter->hw);
1435
1436 /* call igb_desc_unused which always leaves
1437 * at least 1 descriptor unused to make sure
1438 * next_to_use != next_to_clean */
1439 for (i = 0; i < adapter->num_rx_queues; i++) {
1440 struct igb_ring *ring = adapter->rx_ring[i];
1441 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1442 }
1443 }
1444
1445 /**
1446 * igb_power_up_link - Power up the phy/serdes link
1447 * @adapter: address of board private structure
1448 **/
1449 void igb_power_up_link(struct igb_adapter *adapter)
1450 {
1451 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1452 igb_power_up_phy_copper(&adapter->hw);
1453 else
1454 igb_power_up_serdes_link_82575(&adapter->hw);
1455 }
1456
1457 /**
1458 * igb_power_down_link - Power down the phy/serdes link
1459 * @adapter: address of board private structure
1460 */
1461 static void igb_power_down_link(struct igb_adapter *adapter)
1462 {
1463 if (adapter->hw.phy.media_type == e1000_media_type_copper)
1464 igb_power_down_phy_copper_82575(&adapter->hw);
1465 else
1466 igb_shutdown_serdes_link_82575(&adapter->hw);
1467 }
1468
1469 /**
1470 * igb_up - Open the interface and prepare it to handle traffic
1471 * @adapter: board private structure
1472 **/
1473 int igb_up(struct igb_adapter *adapter)
1474 {
1475 struct e1000_hw *hw = &adapter->hw;
1476 int i;
1477
1478 /* hardware has been reset, we need to reload some things */
1479 igb_configure(adapter);
1480
1481 clear_bit(__IGB_DOWN, &adapter->state);
1482
1483 for (i = 0; i < adapter->num_q_vectors; i++) {
1484 struct igb_q_vector *q_vector = adapter->q_vector[i];
1485 napi_enable(&q_vector->napi);
1486 }
1487 if (adapter->msix_entries)
1488 igb_configure_msix(adapter);
1489 else
1490 igb_assign_vector(adapter->q_vector[0], 0);
1491
1492 /* Clear any pending interrupts. */
1493 rd32(E1000_ICR);
1494 igb_irq_enable(adapter);
1495
1496 /* notify VFs that reset has been completed */
1497 if (adapter->vfs_allocated_count) {
1498 u32 reg_data = rd32(E1000_CTRL_EXT);
1499 reg_data |= E1000_CTRL_EXT_PFRSTD;
1500 wr32(E1000_CTRL_EXT, reg_data);
1501 }
1502
1503 netif_tx_start_all_queues(adapter->netdev);
1504
1505 /* start the watchdog. */
1506 hw->mac.get_link_status = 1;
1507 schedule_work(&adapter->watchdog_task);
1508
1509 return 0;
1510 }
1511
1512 void igb_down(struct igb_adapter *adapter)
1513 {
1514 struct net_device *netdev = adapter->netdev;
1515 struct e1000_hw *hw = &adapter->hw;
1516 u32 tctl, rctl;
1517 int i;
1518
1519 /* signal that we're down so the interrupt handler does not
1520 * reschedule our watchdog timer */
1521 set_bit(__IGB_DOWN, &adapter->state);
1522
1523 /* disable receives in the hardware */
1524 rctl = rd32(E1000_RCTL);
1525 wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1526 /* flush and sleep below */
1527
1528 netif_tx_stop_all_queues(netdev);
1529
1530 /* disable transmits in the hardware */
1531 tctl = rd32(E1000_TCTL);
1532 tctl &= ~E1000_TCTL_EN;
1533 wr32(E1000_TCTL, tctl);
1534 /* flush both disables and wait for them to finish */
1535 wrfl();
1536 msleep(10);
1537
1538 for (i = 0; i < adapter->num_q_vectors; i++) {
1539 struct igb_q_vector *q_vector = adapter->q_vector[i];
1540 napi_disable(&q_vector->napi);
1541 }
1542
1543 igb_irq_disable(adapter);
1544
1545 del_timer_sync(&adapter->watchdog_timer);
1546 del_timer_sync(&adapter->phy_info_timer);
1547
1548 netif_carrier_off(netdev);
1549
1550 /* record the stats before reset*/
1551 spin_lock(&adapter->stats64_lock);
1552 igb_update_stats(adapter, &adapter->stats64);
1553 spin_unlock(&adapter->stats64_lock);
1554
1555 adapter->link_speed = 0;
1556 adapter->link_duplex = 0;
1557
1558 if (!pci_channel_offline(adapter->pdev))
1559 igb_reset(adapter);
1560 igb_clean_all_tx_rings(adapter);
1561 igb_clean_all_rx_rings(adapter);
1562 #ifdef CONFIG_IGB_DCA
1563
1564 /* since we reset the hardware DCA settings were cleared */
1565 igb_setup_dca(adapter);
1566 #endif
1567 }
1568
1569 void igb_reinit_locked(struct igb_adapter *adapter)
1570 {
1571 WARN_ON(in_interrupt());
1572 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1573 msleep(1);
1574 igb_down(adapter);
1575 igb_up(adapter);
1576 clear_bit(__IGB_RESETTING, &adapter->state);
1577 }
1578
1579 void igb_reset(struct igb_adapter *adapter)
1580 {
1581 struct pci_dev *pdev = adapter->pdev;
1582 struct e1000_hw *hw = &adapter->hw;
1583 struct e1000_mac_info *mac = &hw->mac;
1584 struct e1000_fc_info *fc = &hw->fc;
1585 u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1586 u16 hwm;
1587
1588 /* Repartition Pba for greater than 9k mtu
1589 * To take effect CTRL.RST is required.
1590 */
1591 switch (mac->type) {
1592 case e1000_i350:
1593 case e1000_82580:
1594 pba = rd32(E1000_RXPBS);
1595 pba = igb_rxpbs_adjust_82580(pba);
1596 break;
1597 case e1000_82576:
1598 pba = rd32(E1000_RXPBS);
1599 pba &= E1000_RXPBS_SIZE_MASK_82576;
1600 break;
1601 case e1000_82575:
1602 default:
1603 pba = E1000_PBA_34K;
1604 break;
1605 }
1606
1607 if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1608 (mac->type < e1000_82576)) {
1609 /* adjust PBA for jumbo frames */
1610 wr32(E1000_PBA, pba);
1611
1612 /* To maintain wire speed transmits, the Tx FIFO should be
1613 * large enough to accommodate two full transmit packets,
1614 * rounded up to the next 1KB and expressed in KB. Likewise,
1615 * the Rx FIFO should be large enough to accommodate at least
1616 * one full receive packet and is similarly rounded up and
1617 * expressed in KB. */
1618 pba = rd32(E1000_PBA);
1619 /* upper 16 bits has Tx packet buffer allocation size in KB */
1620 tx_space = pba >> 16;
1621 /* lower 16 bits has Rx packet buffer allocation size in KB */
1622 pba &= 0xffff;
1623 /* the tx fifo also stores 16 bytes of information about the tx
1624 * but don't include ethernet FCS because hardware appends it */
1625 min_tx_space = (adapter->max_frame_size +
1626 sizeof(union e1000_adv_tx_desc) -
1627 ETH_FCS_LEN) * 2;
1628 min_tx_space = ALIGN(min_tx_space, 1024);
1629 min_tx_space >>= 10;
1630 /* software strips receive CRC, so leave room for it */
1631 min_rx_space = adapter->max_frame_size;
1632 min_rx_space = ALIGN(min_rx_space, 1024);
1633 min_rx_space >>= 10;
1634
1635 /* If current Tx allocation is less than the min Tx FIFO size,
1636 * and the min Tx FIFO size is less than the current Rx FIFO
1637 * allocation, take space away from current Rx allocation */
1638 if (tx_space < min_tx_space &&
1639 ((min_tx_space - tx_space) < pba)) {
1640 pba = pba - (min_tx_space - tx_space);
1641
1642 /* if short on rx space, rx wins and must trump tx
1643 * adjustment */
1644 if (pba < min_rx_space)
1645 pba = min_rx_space;
1646 }
1647 wr32(E1000_PBA, pba);
1648 }
1649
1650 /* flow control settings */
1651 /* The high water mark must be low enough to fit one full frame
1652 * (or the size used for early receive) above it in the Rx FIFO.
1653 * Set it to the lower of:
1654 * - 90% of the Rx FIFO size, or
1655 * - the full Rx FIFO size minus one full frame */
1656 hwm = min(((pba << 10) * 9 / 10),
1657 ((pba << 10) - 2 * adapter->max_frame_size));
1658
1659 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
1660 fc->low_water = fc->high_water - 16;
1661 fc->pause_time = 0xFFFF;
1662 fc->send_xon = 1;
1663 fc->current_mode = fc->requested_mode;
1664
1665 /* disable receive for all VFs and wait one second */
1666 if (adapter->vfs_allocated_count) {
1667 int i;
1668 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1669 adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1670
1671 /* ping all the active vfs to let them know we are going down */
1672 igb_ping_all_vfs(adapter);
1673
1674 /* disable transmits and receives */
1675 wr32(E1000_VFRE, 0);
1676 wr32(E1000_VFTE, 0);
1677 }
1678
1679 /* Allow time for pending master requests to run */
1680 hw->mac.ops.reset_hw(hw);
1681 wr32(E1000_WUC, 0);
1682
1683 if (hw->mac.ops.init_hw(hw))
1684 dev_err(&pdev->dev, "Hardware Error\n");
1685 if (hw->mac.type > e1000_82580) {
1686 if (adapter->flags & IGB_FLAG_DMAC) {
1687 u32 reg;
1688
1689 /*
1690 * DMA Coalescing high water mark needs to be higher
1691 * than * the * Rx threshold. The Rx threshold is
1692 * currently * pba - 6, so we * should use a high water
1693 * mark of pba * - 4. */
1694 hwm = (pba - 4) << 10;
1695
1696 reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
1697 & E1000_DMACR_DMACTHR_MASK);
1698
1699 /* transition to L0x or L1 if available..*/
1700 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
1701
1702 /* watchdog timer= +-1000 usec in 32usec intervals */
1703 reg |= (1000 >> 5);
1704 wr32(E1000_DMACR, reg);
1705
1706 /* no lower threshold to disable coalescing(smart fifb)
1707 * -UTRESH=0*/
1708 wr32(E1000_DMCRTRH, 0);
1709
1710 /* set hwm to PBA - 2 * max frame size */
1711 wr32(E1000_FCRTC, hwm);
1712
1713 /*
1714 * This sets the time to wait before requesting tran-
1715 * sition to * low power state to number of usecs needed
1716 * to receive 1 512 * byte frame at gigabit line rate
1717 */
1718 reg = rd32(E1000_DMCTLX);
1719 reg |= IGB_DMCTLX_DCFLUSH_DIS;
1720
1721 /* Delay 255 usec before entering Lx state. */
1722 reg |= 0xFF;
1723 wr32(E1000_DMCTLX, reg);
1724
1725 /* free space in Tx packet buffer to wake from DMAC */
1726 wr32(E1000_DMCTXTH,
1727 (IGB_MIN_TXPBSIZE -
1728 (IGB_TX_BUF_4096 + adapter->max_frame_size))
1729 >> 6);
1730
1731 /* make low power state decision controlled by DMAC */
1732 reg = rd32(E1000_PCIEMISC);
1733 reg |= E1000_PCIEMISC_LX_DECISION;
1734 wr32(E1000_PCIEMISC, reg);
1735 } /* end if IGB_FLAG_DMAC set */
1736 }
1737 if (hw->mac.type == e1000_82580) {
1738 u32 reg = rd32(E1000_PCIEMISC);
1739 wr32(E1000_PCIEMISC,
1740 reg & ~E1000_PCIEMISC_LX_DECISION);
1741 }
1742 if (!netif_running(adapter->netdev))
1743 igb_power_down_link(adapter);
1744
1745 igb_update_mng_vlan(adapter);
1746
1747 /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1748 wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1749
1750 igb_get_phy_info(hw);
1751 }
1752
1753 static u32 igb_fix_features(struct net_device *netdev, u32 features)
1754 {
1755 /*
1756 * Since there is no support for separate rx/tx vlan accel
1757 * enable/disable make sure tx flag is always in same state as rx.
1758 */
1759 if (features & NETIF_F_HW_VLAN_RX)
1760 features |= NETIF_F_HW_VLAN_TX;
1761 else
1762 features &= ~NETIF_F_HW_VLAN_TX;
1763
1764 return features;
1765 }
1766
1767 static int igb_set_features(struct net_device *netdev, u32 features)
1768 {
1769 struct igb_adapter *adapter = netdev_priv(netdev);
1770 int i;
1771 u32 changed = netdev->features ^ features;
1772
1773 for (i = 0; i < adapter->num_rx_queues; i++) {
1774 if (features & NETIF_F_RXCSUM)
1775 adapter->rx_ring[i]->flags |= IGB_RING_FLAG_RX_CSUM;
1776 else
1777 adapter->rx_ring[i]->flags &= ~IGB_RING_FLAG_RX_CSUM;
1778 }
1779
1780 if (changed & NETIF_F_HW_VLAN_RX)
1781 igb_vlan_mode(netdev, features);
1782
1783 return 0;
1784 }
1785
1786 static const struct net_device_ops igb_netdev_ops = {
1787 .ndo_open = igb_open,
1788 .ndo_stop = igb_close,
1789 .ndo_start_xmit = igb_xmit_frame_adv,
1790 .ndo_get_stats64 = igb_get_stats64,
1791 .ndo_set_rx_mode = igb_set_rx_mode,
1792 .ndo_set_multicast_list = igb_set_rx_mode,
1793 .ndo_set_mac_address = igb_set_mac,
1794 .ndo_change_mtu = igb_change_mtu,
1795 .ndo_do_ioctl = igb_ioctl,
1796 .ndo_tx_timeout = igb_tx_timeout,
1797 .ndo_validate_addr = eth_validate_addr,
1798 .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid,
1799 .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid,
1800 .ndo_set_vf_mac = igb_ndo_set_vf_mac,
1801 .ndo_set_vf_vlan = igb_ndo_set_vf_vlan,
1802 .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw,
1803 .ndo_get_vf_config = igb_ndo_get_vf_config,
1804 #ifdef CONFIG_NET_POLL_CONTROLLER
1805 .ndo_poll_controller = igb_netpoll,
1806 #endif
1807 .ndo_fix_features = igb_fix_features,
1808 .ndo_set_features = igb_set_features,
1809 };
1810
1811 /**
1812 * igb_probe - Device Initialization Routine
1813 * @pdev: PCI device information struct
1814 * @ent: entry in igb_pci_tbl
1815 *
1816 * Returns 0 on success, negative on failure
1817 *
1818 * igb_probe initializes an adapter identified by a pci_dev structure.
1819 * The OS initialization, configuring of the adapter private structure,
1820 * and a hardware reset occur.
1821 **/
1822 static int __devinit igb_probe(struct pci_dev *pdev,
1823 const struct pci_device_id *ent)
1824 {
1825 struct net_device *netdev;
1826 struct igb_adapter *adapter;
1827 struct e1000_hw *hw;
1828 u16 eeprom_data = 0;
1829 s32 ret_val;
1830 static int global_quad_port_a; /* global quad port a indication */
1831 const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1832 unsigned long mmio_start, mmio_len;
1833 int err, pci_using_dac;
1834 u16 eeprom_apme_mask = IGB_EEPROM_APME;
1835 u8 part_str[E1000_PBANUM_LENGTH];
1836
1837 /* Catch broken hardware that put the wrong VF device ID in
1838 * the PCIe SR-IOV capability.
1839 */
1840 if (pdev->is_virtfn) {
1841 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1842 pci_name(pdev), pdev->vendor, pdev->device);
1843 return -EINVAL;
1844 }
1845
1846 err = pci_enable_device_mem(pdev);
1847 if (err)
1848 return err;
1849
1850 pci_using_dac = 0;
1851 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1852 if (!err) {
1853 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1854 if (!err)
1855 pci_using_dac = 1;
1856 } else {
1857 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1858 if (err) {
1859 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1860 if (err) {
1861 dev_err(&pdev->dev, "No usable DMA "
1862 "configuration, aborting\n");
1863 goto err_dma;
1864 }
1865 }
1866 }
1867
1868 err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1869 IORESOURCE_MEM),
1870 igb_driver_name);
1871 if (err)
1872 goto err_pci_reg;
1873
1874 pci_enable_pcie_error_reporting(pdev);
1875
1876 pci_set_master(pdev);
1877 pci_save_state(pdev);
1878
1879 err = -ENOMEM;
1880 netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1881 IGB_ABS_MAX_TX_QUEUES);
1882 if (!netdev)
1883 goto err_alloc_etherdev;
1884
1885 SET_NETDEV_DEV(netdev, &pdev->dev);
1886
1887 pci_set_drvdata(pdev, netdev);
1888 adapter = netdev_priv(netdev);
1889 adapter->netdev = netdev;
1890 adapter->pdev = pdev;
1891 hw = &adapter->hw;
1892 hw->back = adapter;
1893 adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1894
1895 mmio_start = pci_resource_start(pdev, 0);
1896 mmio_len = pci_resource_len(pdev, 0);
1897
1898 err = -EIO;
1899 hw->hw_addr = ioremap(mmio_start, mmio_len);
1900 if (!hw->hw_addr)
1901 goto err_ioremap;
1902
1903 netdev->netdev_ops = &igb_netdev_ops;
1904 igb_set_ethtool_ops(netdev);
1905 netdev->watchdog_timeo = 5 * HZ;
1906
1907 strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1908
1909 netdev->mem_start = mmio_start;
1910 netdev->mem_end = mmio_start + mmio_len;
1911
1912 /* PCI config space info */
1913 hw->vendor_id = pdev->vendor;
1914 hw->device_id = pdev->device;
1915 hw->revision_id = pdev->revision;
1916 hw->subsystem_vendor_id = pdev->subsystem_vendor;
1917 hw->subsystem_device_id = pdev->subsystem_device;
1918
1919 /* Copy the default MAC, PHY and NVM function pointers */
1920 memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1921 memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1922 memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1923 /* Initialize skew-specific constants */
1924 err = ei->get_invariants(hw);
1925 if (err)
1926 goto err_sw_init;
1927
1928 /* setup the private structure */
1929 err = igb_sw_init(adapter);
1930 if (err)
1931 goto err_sw_init;
1932
1933 igb_get_bus_info_pcie(hw);
1934
1935 hw->phy.autoneg_wait_to_complete = false;
1936
1937 /* Copper options */
1938 if (hw->phy.media_type == e1000_media_type_copper) {
1939 hw->phy.mdix = AUTO_ALL_MODES;
1940 hw->phy.disable_polarity_correction = false;
1941 hw->phy.ms_type = e1000_ms_hw_default;
1942 }
1943
1944 if (igb_check_reset_block(hw))
1945 dev_info(&pdev->dev,
1946 "PHY reset is blocked due to SOL/IDER session.\n");
1947
1948 netdev->hw_features = NETIF_F_SG |
1949 NETIF_F_IP_CSUM |
1950 NETIF_F_IPV6_CSUM |
1951 NETIF_F_TSO |
1952 NETIF_F_TSO6 |
1953 NETIF_F_RXCSUM |
1954 NETIF_F_HW_VLAN_RX;
1955
1956 netdev->features = netdev->hw_features |
1957 NETIF_F_HW_VLAN_TX |
1958 NETIF_F_HW_VLAN_FILTER;
1959
1960 netdev->vlan_features |= NETIF_F_TSO;
1961 netdev->vlan_features |= NETIF_F_TSO6;
1962 netdev->vlan_features |= NETIF_F_IP_CSUM;
1963 netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1964 netdev->vlan_features |= NETIF_F_SG;
1965
1966 if (pci_using_dac) {
1967 netdev->features |= NETIF_F_HIGHDMA;
1968 netdev->vlan_features |= NETIF_F_HIGHDMA;
1969 }
1970
1971 if (hw->mac.type >= e1000_82576) {
1972 netdev->hw_features |= NETIF_F_SCTP_CSUM;
1973 netdev->features |= NETIF_F_SCTP_CSUM;
1974 }
1975
1976 adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1977
1978 /* before reading the NVM, reset the controller to put the device in a
1979 * known good starting state */
1980 hw->mac.ops.reset_hw(hw);
1981
1982 /* make sure the NVM is good */
1983 if (hw->nvm.ops.validate(hw) < 0) {
1984 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1985 err = -EIO;
1986 goto err_eeprom;
1987 }
1988
1989 /* copy the MAC address out of the NVM */
1990 if (hw->mac.ops.read_mac_addr(hw))
1991 dev_err(&pdev->dev, "NVM Read Error\n");
1992
1993 memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1994 memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1995
1996 if (!is_valid_ether_addr(netdev->perm_addr)) {
1997 dev_err(&pdev->dev, "Invalid MAC Address\n");
1998 err = -EIO;
1999 goto err_eeprom;
2000 }
2001
2002 setup_timer(&adapter->watchdog_timer, igb_watchdog,
2003 (unsigned long) adapter);
2004 setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2005 (unsigned long) adapter);
2006
2007 INIT_WORK(&adapter->reset_task, igb_reset_task);
2008 INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2009
2010 /* Initialize link properties that are user-changeable */
2011 adapter->fc_autoneg = true;
2012 hw->mac.autoneg = true;
2013 hw->phy.autoneg_advertised = 0x2f;
2014
2015 hw->fc.requested_mode = e1000_fc_default;
2016 hw->fc.current_mode = e1000_fc_default;
2017
2018 igb_validate_mdi_setting(hw);
2019
2020 /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2021 * enable the ACPI Magic Packet filter
2022 */
2023
2024 if (hw->bus.func == 0)
2025 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2026 else if (hw->mac.type >= e1000_82580)
2027 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2028 NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2029 &eeprom_data);
2030 else if (hw->bus.func == 1)
2031 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2032
2033 if (eeprom_data & eeprom_apme_mask)
2034 adapter->eeprom_wol |= E1000_WUFC_MAG;
2035
2036 /* now that we have the eeprom settings, apply the special cases where
2037 * the eeprom may be wrong or the board simply won't support wake on
2038 * lan on a particular port */
2039 switch (pdev->device) {
2040 case E1000_DEV_ID_82575GB_QUAD_COPPER:
2041 adapter->eeprom_wol = 0;
2042 break;
2043 case E1000_DEV_ID_82575EB_FIBER_SERDES:
2044 case E1000_DEV_ID_82576_FIBER:
2045 case E1000_DEV_ID_82576_SERDES:
2046 /* Wake events only supported on port A for dual fiber
2047 * regardless of eeprom setting */
2048 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2049 adapter->eeprom_wol = 0;
2050 break;
2051 case E1000_DEV_ID_82576_QUAD_COPPER:
2052 case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2053 /* if quad port adapter, disable WoL on all but port A */
2054 if (global_quad_port_a != 0)
2055 adapter->eeprom_wol = 0;
2056 else
2057 adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2058 /* Reset for multiple quad port adapters */
2059 if (++global_quad_port_a == 4)
2060 global_quad_port_a = 0;
2061 break;
2062 }
2063
2064 /* initialize the wol settings based on the eeprom settings */
2065 adapter->wol = adapter->eeprom_wol;
2066 device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2067
2068 /* reset the hardware with the new settings */
2069 igb_reset(adapter);
2070
2071 /* let the f/w know that the h/w is now under the control of the
2072 * driver. */
2073 igb_get_hw_control(adapter);
2074
2075 strcpy(netdev->name, "eth%d");
2076 err = register_netdev(netdev);
2077 if (err)
2078 goto err_register;
2079
2080 igb_vlan_mode(netdev, netdev->features);
2081
2082 /* carrier off reporting is important to ethtool even BEFORE open */
2083 netif_carrier_off(netdev);
2084
2085 #ifdef CONFIG_IGB_DCA
2086 if (dca_add_requester(&pdev->dev) == 0) {
2087 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2088 dev_info(&pdev->dev, "DCA enabled\n");
2089 igb_setup_dca(adapter);
2090 }
2091
2092 #endif
2093 /* do hw tstamp init after resetting */
2094 igb_init_hw_timer(adapter);
2095
2096 dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2097 /* print bus type/speed/width info */
2098 dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2099 netdev->name,
2100 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2101 (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2102 "unknown"),
2103 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2104 (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2105 (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2106 "unknown"),
2107 netdev->dev_addr);
2108
2109 ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2110 if (ret_val)
2111 strcpy(part_str, "Unknown");
2112 dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2113 dev_info(&pdev->dev,
2114 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2115 adapter->msix_entries ? "MSI-X" :
2116 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2117 adapter->num_rx_queues, adapter->num_tx_queues);
2118 switch (hw->mac.type) {
2119 case e1000_i350:
2120 igb_set_eee_i350(hw);
2121 break;
2122 default:
2123 break;
2124 }
2125 return 0;
2126
2127 err_register:
2128 igb_release_hw_control(adapter);
2129 err_eeprom:
2130 if (!igb_check_reset_block(hw))
2131 igb_reset_phy(hw);
2132
2133 if (hw->flash_address)
2134 iounmap(hw->flash_address);
2135 err_sw_init:
2136 igb_clear_interrupt_scheme(adapter);
2137 iounmap(hw->hw_addr);
2138 err_ioremap:
2139 free_netdev(netdev);
2140 err_alloc_etherdev:
2141 pci_release_selected_regions(pdev,
2142 pci_select_bars(pdev, IORESOURCE_MEM));
2143 err_pci_reg:
2144 err_dma:
2145 pci_disable_device(pdev);
2146 return err;
2147 }
2148
2149 /**
2150 * igb_remove - Device Removal Routine
2151 * @pdev: PCI device information struct
2152 *
2153 * igb_remove is called by the PCI subsystem to alert the driver
2154 * that it should release a PCI device. The could be caused by a
2155 * Hot-Plug event, or because the driver is going to be removed from
2156 * memory.
2157 **/
2158 static void __devexit igb_remove(struct pci_dev *pdev)
2159 {
2160 struct net_device *netdev = pci_get_drvdata(pdev);
2161 struct igb_adapter *adapter = netdev_priv(netdev);
2162 struct e1000_hw *hw = &adapter->hw;
2163
2164 /*
2165 * The watchdog timer may be rescheduled, so explicitly
2166 * disable watchdog from being rescheduled.
2167 */
2168 set_bit(__IGB_DOWN, &adapter->state);
2169 del_timer_sync(&adapter->watchdog_timer);
2170 del_timer_sync(&adapter->phy_info_timer);
2171
2172 cancel_work_sync(&adapter->reset_task);
2173 cancel_work_sync(&adapter->watchdog_task);
2174
2175 #ifdef CONFIG_IGB_DCA
2176 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2177 dev_info(&pdev->dev, "DCA disabled\n");
2178 dca_remove_requester(&pdev->dev);
2179 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2180 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2181 }
2182 #endif
2183
2184 /* Release control of h/w to f/w. If f/w is AMT enabled, this
2185 * would have already happened in close and is redundant. */
2186 igb_release_hw_control(adapter);
2187
2188 unregister_netdev(netdev);
2189
2190 igb_clear_interrupt_scheme(adapter);
2191
2192 #ifdef CONFIG_PCI_IOV
2193 /* reclaim resources allocated to VFs */
2194 if (adapter->vf_data) {
2195 /* disable iov and allow time for transactions to clear */
2196 pci_disable_sriov(pdev);
2197 msleep(500);
2198
2199 kfree(adapter->vf_data);
2200 adapter->vf_data = NULL;
2201 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2202 wrfl();
2203 msleep(100);
2204 dev_info(&pdev->dev, "IOV Disabled\n");
2205 }
2206 #endif
2207
2208 iounmap(hw->hw_addr);
2209 if (hw->flash_address)
2210 iounmap(hw->flash_address);
2211 pci_release_selected_regions(pdev,
2212 pci_select_bars(pdev, IORESOURCE_MEM));
2213
2214 free_netdev(netdev);
2215
2216 pci_disable_pcie_error_reporting(pdev);
2217
2218 pci_disable_device(pdev);
2219 }
2220
2221 /**
2222 * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2223 * @adapter: board private structure to initialize
2224 *
2225 * This function initializes the vf specific data storage and then attempts to
2226 * allocate the VFs. The reason for ordering it this way is because it is much
2227 * mor expensive time wise to disable SR-IOV than it is to allocate and free
2228 * the memory for the VFs.
2229 **/
2230 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2231 {
2232 #ifdef CONFIG_PCI_IOV
2233 struct pci_dev *pdev = adapter->pdev;
2234
2235 if (adapter->vfs_allocated_count) {
2236 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2237 sizeof(struct vf_data_storage),
2238 GFP_KERNEL);
2239 /* if allocation failed then we do not support SR-IOV */
2240 if (!adapter->vf_data) {
2241 adapter->vfs_allocated_count = 0;
2242 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2243 "Data Storage\n");
2244 }
2245 }
2246
2247 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2248 kfree(adapter->vf_data);
2249 adapter->vf_data = NULL;
2250 #endif /* CONFIG_PCI_IOV */
2251 adapter->vfs_allocated_count = 0;
2252 #ifdef CONFIG_PCI_IOV
2253 } else {
2254 unsigned char mac_addr[ETH_ALEN];
2255 int i;
2256 dev_info(&pdev->dev, "%d vfs allocated\n",
2257 adapter->vfs_allocated_count);
2258 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2259 random_ether_addr(mac_addr);
2260 igb_set_vf_mac(adapter, i, mac_addr);
2261 }
2262 /* DMA Coalescing is not supported in IOV mode. */
2263 if (adapter->flags & IGB_FLAG_DMAC)
2264 adapter->flags &= ~IGB_FLAG_DMAC;
2265 }
2266 #endif /* CONFIG_PCI_IOV */
2267 }
2268
2269
2270 /**
2271 * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2272 * @adapter: board private structure to initialize
2273 *
2274 * igb_init_hw_timer initializes the function pointer and values for the hw
2275 * timer found in hardware.
2276 **/
2277 static void igb_init_hw_timer(struct igb_adapter *adapter)
2278 {
2279 struct e1000_hw *hw = &adapter->hw;
2280
2281 switch (hw->mac.type) {
2282 case e1000_i350:
2283 case e1000_82580:
2284 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2285 adapter->cycles.read = igb_read_clock;
2286 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2287 adapter->cycles.mult = 1;
2288 /*
2289 * The 82580 timesync updates the system timer every 8ns by 8ns
2290 * and the value cannot be shifted. Instead we need to shift
2291 * the registers to generate a 64bit timer value. As a result
2292 * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2293 * 24 in order to generate a larger value for synchronization.
2294 */
2295 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2296 /* disable system timer temporarily by setting bit 31 */
2297 wr32(E1000_TSAUXC, 0x80000000);
2298 wrfl();
2299
2300 /* Set registers so that rollover occurs soon to test this. */
2301 wr32(E1000_SYSTIMR, 0x00000000);
2302 wr32(E1000_SYSTIML, 0x80000000);
2303 wr32(E1000_SYSTIMH, 0x000000FF);
2304 wrfl();
2305
2306 /* enable system timer by clearing bit 31 */
2307 wr32(E1000_TSAUXC, 0x0);
2308 wrfl();
2309
2310 timecounter_init(&adapter->clock,
2311 &adapter->cycles,
2312 ktime_to_ns(ktime_get_real()));
2313 /*
2314 * Synchronize our NIC clock against system wall clock. NIC
2315 * time stamp reading requires ~3us per sample, each sample
2316 * was pretty stable even under load => only require 10
2317 * samples for each offset comparison.
2318 */
2319 memset(&adapter->compare, 0, sizeof(adapter->compare));
2320 adapter->compare.source = &adapter->clock;
2321 adapter->compare.target = ktime_get_real;
2322 adapter->compare.num_samples = 10;
2323 timecompare_update(&adapter->compare, 0);
2324 break;
2325 case e1000_82576:
2326 /*
2327 * Initialize hardware timer: we keep it running just in case
2328 * that some program needs it later on.
2329 */
2330 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2331 adapter->cycles.read = igb_read_clock;
2332 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2333 adapter->cycles.mult = 1;
2334 /**
2335 * Scale the NIC clock cycle by a large factor so that
2336 * relatively small clock corrections can be added or
2337 * subtracted at each clock tick. The drawbacks of a large
2338 * factor are a) that the clock register overflows more quickly
2339 * (not such a big deal) and b) that the increment per tick has
2340 * to fit into 24 bits. As a result we need to use a shift of
2341 * 19 so we can fit a value of 16 into the TIMINCA register.
2342 */
2343 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2344 wr32(E1000_TIMINCA,
2345 (1 << E1000_TIMINCA_16NS_SHIFT) |
2346 (16 << IGB_82576_TSYNC_SHIFT));
2347
2348 /* Set registers so that rollover occurs soon to test this. */
2349 wr32(E1000_SYSTIML, 0x00000000);
2350 wr32(E1000_SYSTIMH, 0xFF800000);
2351 wrfl();
2352
2353 timecounter_init(&adapter->clock,
2354 &adapter->cycles,
2355 ktime_to_ns(ktime_get_real()));
2356 /*
2357 * Synchronize our NIC clock against system wall clock. NIC
2358 * time stamp reading requires ~3us per sample, each sample
2359 * was pretty stable even under load => only require 10
2360 * samples for each offset comparison.
2361 */
2362 memset(&adapter->compare, 0, sizeof(adapter->compare));
2363 adapter->compare.source = &adapter->clock;
2364 adapter->compare.target = ktime_get_real;
2365 adapter->compare.num_samples = 10;
2366 timecompare_update(&adapter->compare, 0);
2367 break;
2368 case e1000_82575:
2369 /* 82575 does not support timesync */
2370 default:
2371 break;
2372 }
2373
2374 }
2375
2376 /**
2377 * igb_sw_init - Initialize general software structures (struct igb_adapter)
2378 * @adapter: board private structure to initialize
2379 *
2380 * igb_sw_init initializes the Adapter private data structure.
2381 * Fields are initialized based on PCI device information and
2382 * OS network device settings (MTU size).
2383 **/
2384 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2385 {
2386 struct e1000_hw *hw = &adapter->hw;
2387 struct net_device *netdev = adapter->netdev;
2388 struct pci_dev *pdev = adapter->pdev;
2389
2390 pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2391
2392 adapter->tx_ring_count = IGB_DEFAULT_TXD;
2393 adapter->rx_ring_count = IGB_DEFAULT_RXD;
2394 adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2395 adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2396
2397 adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2398 adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2399
2400 spin_lock_init(&adapter->stats64_lock);
2401 #ifdef CONFIG_PCI_IOV
2402 switch (hw->mac.type) {
2403 case e1000_82576:
2404 case e1000_i350:
2405 if (max_vfs > 7) {
2406 dev_warn(&pdev->dev,
2407 "Maximum of 7 VFs per PF, using max\n");
2408 adapter->vfs_allocated_count = 7;
2409 } else
2410 adapter->vfs_allocated_count = max_vfs;
2411 break;
2412 default:
2413 break;
2414 }
2415 #endif /* CONFIG_PCI_IOV */
2416 adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2417 /* i350 cannot do RSS and SR-IOV at the same time */
2418 if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2419 adapter->rss_queues = 1;
2420
2421 /*
2422 * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2423 * then we should combine the queues into a queue pair in order to
2424 * conserve interrupts due to limited supply
2425 */
2426 if ((adapter->rss_queues > 4) ||
2427 ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2428 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2429
2430 /* This call may decrease the number of queues */
2431 if (igb_init_interrupt_scheme(adapter)) {
2432 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2433 return -ENOMEM;
2434 }
2435
2436 igb_probe_vfs(adapter);
2437
2438 /* Explicitly disable IRQ since the NIC can be in any state. */
2439 igb_irq_disable(adapter);
2440
2441 if (hw->mac.type == e1000_i350)
2442 adapter->flags &= ~IGB_FLAG_DMAC;
2443
2444 set_bit(__IGB_DOWN, &adapter->state);
2445 return 0;
2446 }
2447
2448 /**
2449 * igb_open - Called when a network interface is made active
2450 * @netdev: network interface device structure
2451 *
2452 * Returns 0 on success, negative value on failure
2453 *
2454 * The open entry point is called when a network interface is made
2455 * active by the system (IFF_UP). At this point all resources needed
2456 * for transmit and receive operations are allocated, the interrupt
2457 * handler is registered with the OS, the watchdog timer is started,
2458 * and the stack is notified that the interface is ready.
2459 **/
2460 static int igb_open(struct net_device *netdev)
2461 {
2462 struct igb_adapter *adapter = netdev_priv(netdev);
2463 struct e1000_hw *hw = &adapter->hw;
2464 int err;
2465 int i;
2466
2467 /* disallow open during test */
2468 if (test_bit(__IGB_TESTING, &adapter->state))
2469 return -EBUSY;
2470
2471 netif_carrier_off(netdev);
2472
2473 /* allocate transmit descriptors */
2474 err = igb_setup_all_tx_resources(adapter);
2475 if (err)
2476 goto err_setup_tx;
2477
2478 /* allocate receive descriptors */
2479 err = igb_setup_all_rx_resources(adapter);
2480 if (err)
2481 goto err_setup_rx;
2482
2483 igb_power_up_link(adapter);
2484
2485 /* before we allocate an interrupt, we must be ready to handle it.
2486 * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2487 * as soon as we call pci_request_irq, so we have to setup our
2488 * clean_rx handler before we do so. */
2489 igb_configure(adapter);
2490
2491 err = igb_request_irq(adapter);
2492 if (err)
2493 goto err_req_irq;
2494
2495 /* From here on the code is the same as igb_up() */
2496 clear_bit(__IGB_DOWN, &adapter->state);
2497
2498 for (i = 0; i < adapter->num_q_vectors; i++) {
2499 struct igb_q_vector *q_vector = adapter->q_vector[i];
2500 napi_enable(&q_vector->napi);
2501 }
2502
2503 /* Clear any pending interrupts. */
2504 rd32(E1000_ICR);
2505
2506 igb_irq_enable(adapter);
2507
2508 /* notify VFs that reset has been completed */
2509 if (adapter->vfs_allocated_count) {
2510 u32 reg_data = rd32(E1000_CTRL_EXT);
2511 reg_data |= E1000_CTRL_EXT_PFRSTD;
2512 wr32(E1000_CTRL_EXT, reg_data);
2513 }
2514
2515 netif_tx_start_all_queues(netdev);
2516
2517 /* start the watchdog. */
2518 hw->mac.get_link_status = 1;
2519 schedule_work(&adapter->watchdog_task);
2520
2521 return 0;
2522
2523 err_req_irq:
2524 igb_release_hw_control(adapter);
2525 igb_power_down_link(adapter);
2526 igb_free_all_rx_resources(adapter);
2527 err_setup_rx:
2528 igb_free_all_tx_resources(adapter);
2529 err_setup_tx:
2530 igb_reset(adapter);
2531
2532 return err;
2533 }
2534
2535 /**
2536 * igb_close - Disables a network interface
2537 * @netdev: network interface device structure
2538 *
2539 * Returns 0, this is not allowed to fail
2540 *
2541 * The close entry point is called when an interface is de-activated
2542 * by the OS. The hardware is still under the driver's control, but
2543 * needs to be disabled. A global MAC reset is issued to stop the
2544 * hardware, and all transmit and receive resources are freed.
2545 **/
2546 static int igb_close(struct net_device *netdev)
2547 {
2548 struct igb_adapter *adapter = netdev_priv(netdev);
2549
2550 WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2551 igb_down(adapter);
2552
2553 igb_free_irq(adapter);
2554
2555 igb_free_all_tx_resources(adapter);
2556 igb_free_all_rx_resources(adapter);
2557
2558 return 0;
2559 }
2560
2561 /**
2562 * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2563 * @tx_ring: tx descriptor ring (for a specific queue) to setup
2564 *
2565 * Return 0 on success, negative on failure
2566 **/
2567 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2568 {
2569 struct device *dev = tx_ring->dev;
2570 int size;
2571
2572 size = sizeof(struct igb_buffer) * tx_ring->count;
2573 tx_ring->buffer_info = vzalloc(size);
2574 if (!tx_ring->buffer_info)
2575 goto err;
2576
2577 /* round up to nearest 4K */
2578 tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2579 tx_ring->size = ALIGN(tx_ring->size, 4096);
2580
2581 tx_ring->desc = dma_alloc_coherent(dev,
2582 tx_ring->size,
2583 &tx_ring->dma,
2584 GFP_KERNEL);
2585
2586 if (!tx_ring->desc)
2587 goto err;
2588
2589 tx_ring->next_to_use = 0;
2590 tx_ring->next_to_clean = 0;
2591 return 0;
2592
2593 err:
2594 vfree(tx_ring->buffer_info);
2595 dev_err(dev,
2596 "Unable to allocate memory for the transmit descriptor ring\n");
2597 return -ENOMEM;
2598 }
2599
2600 /**
2601 * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2602 * (Descriptors) for all queues
2603 * @adapter: board private structure
2604 *
2605 * Return 0 on success, negative on failure
2606 **/
2607 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2608 {
2609 struct pci_dev *pdev = adapter->pdev;
2610 int i, err = 0;
2611
2612 for (i = 0; i < adapter->num_tx_queues; i++) {
2613 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2614 if (err) {
2615 dev_err(&pdev->dev,
2616 "Allocation for Tx Queue %u failed\n", i);
2617 for (i--; i >= 0; i--)
2618 igb_free_tx_resources(adapter->tx_ring[i]);
2619 break;
2620 }
2621 }
2622
2623 for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2624 int r_idx = i % adapter->num_tx_queues;
2625 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2626 }
2627 return err;
2628 }
2629
2630 /**
2631 * igb_setup_tctl - configure the transmit control registers
2632 * @adapter: Board private structure
2633 **/
2634 void igb_setup_tctl(struct igb_adapter *adapter)
2635 {
2636 struct e1000_hw *hw = &adapter->hw;
2637 u32 tctl;
2638
2639 /* disable queue 0 which is enabled by default on 82575 and 82576 */
2640 wr32(E1000_TXDCTL(0), 0);
2641
2642 /* Program the Transmit Control Register */
2643 tctl = rd32(E1000_TCTL);
2644 tctl &= ~E1000_TCTL_CT;
2645 tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2646 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2647
2648 igb_config_collision_dist(hw);
2649
2650 /* Enable transmits */
2651 tctl |= E1000_TCTL_EN;
2652
2653 wr32(E1000_TCTL, tctl);
2654 }
2655
2656 /**
2657 * igb_configure_tx_ring - Configure transmit ring after Reset
2658 * @adapter: board private structure
2659 * @ring: tx ring to configure
2660 *
2661 * Configure a transmit ring after a reset.
2662 **/
2663 void igb_configure_tx_ring(struct igb_adapter *adapter,
2664 struct igb_ring *ring)
2665 {
2666 struct e1000_hw *hw = &adapter->hw;
2667 u32 txdctl;
2668 u64 tdba = ring->dma;
2669 int reg_idx = ring->reg_idx;
2670
2671 /* disable the queue */
2672 txdctl = rd32(E1000_TXDCTL(reg_idx));
2673 wr32(E1000_TXDCTL(reg_idx),
2674 txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2675 wrfl();
2676 mdelay(10);
2677
2678 wr32(E1000_TDLEN(reg_idx),
2679 ring->count * sizeof(union e1000_adv_tx_desc));
2680 wr32(E1000_TDBAL(reg_idx),
2681 tdba & 0x00000000ffffffffULL);
2682 wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2683
2684 ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2685 ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2686 writel(0, ring->head);
2687 writel(0, ring->tail);
2688
2689 txdctl |= IGB_TX_PTHRESH;
2690 txdctl |= IGB_TX_HTHRESH << 8;
2691 txdctl |= IGB_TX_WTHRESH << 16;
2692
2693 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2694 wr32(E1000_TXDCTL(reg_idx), txdctl);
2695 }
2696
2697 /**
2698 * igb_configure_tx - Configure transmit Unit after Reset
2699 * @adapter: board private structure
2700 *
2701 * Configure the Tx unit of the MAC after a reset.
2702 **/
2703 static void igb_configure_tx(struct igb_adapter *adapter)
2704 {
2705 int i;
2706
2707 for (i = 0; i < adapter->num_tx_queues; i++)
2708 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2709 }
2710
2711 /**
2712 * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2713 * @rx_ring: rx descriptor ring (for a specific queue) to setup
2714 *
2715 * Returns 0 on success, negative on failure
2716 **/
2717 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2718 {
2719 struct device *dev = rx_ring->dev;
2720 int size, desc_len;
2721
2722 size = sizeof(struct igb_buffer) * rx_ring->count;
2723 rx_ring->buffer_info = vzalloc(size);
2724 if (!rx_ring->buffer_info)
2725 goto err;
2726
2727 desc_len = sizeof(union e1000_adv_rx_desc);
2728
2729 /* Round up to nearest 4K */
2730 rx_ring->size = rx_ring->count * desc_len;
2731 rx_ring->size = ALIGN(rx_ring->size, 4096);
2732
2733 rx_ring->desc = dma_alloc_coherent(dev,
2734 rx_ring->size,
2735 &rx_ring->dma,
2736 GFP_KERNEL);
2737
2738 if (!rx_ring->desc)
2739 goto err;
2740
2741 rx_ring->next_to_clean = 0;
2742 rx_ring->next_to_use = 0;
2743
2744 return 0;
2745
2746 err:
2747 vfree(rx_ring->buffer_info);
2748 rx_ring->buffer_info = NULL;
2749 dev_err(dev, "Unable to allocate memory for the receive descriptor"
2750 " ring\n");
2751 return -ENOMEM;
2752 }
2753
2754 /**
2755 * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2756 * (Descriptors) for all queues
2757 * @adapter: board private structure
2758 *
2759 * Return 0 on success, negative on failure
2760 **/
2761 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2762 {
2763 struct pci_dev *pdev = adapter->pdev;
2764 int i, err = 0;
2765
2766 for (i = 0; i < adapter->num_rx_queues; i++) {
2767 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2768 if (err) {
2769 dev_err(&pdev->dev,
2770 "Allocation for Rx Queue %u failed\n", i);
2771 for (i--; i >= 0; i--)
2772 igb_free_rx_resources(adapter->rx_ring[i]);
2773 break;
2774 }
2775 }
2776
2777 return err;
2778 }
2779
2780 /**
2781 * igb_setup_mrqc - configure the multiple receive queue control registers
2782 * @adapter: Board private structure
2783 **/
2784 static void igb_setup_mrqc(struct igb_adapter *adapter)
2785 {
2786 struct e1000_hw *hw = &adapter->hw;
2787 u32 mrqc, rxcsum;
2788 u32 j, num_rx_queues, shift = 0, shift2 = 0;
2789 union e1000_reta {
2790 u32 dword;
2791 u8 bytes[4];
2792 } reta;
2793 static const u8 rsshash[40] = {
2794 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2795 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2796 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2797 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2798
2799 /* Fill out hash function seeds */
2800 for (j = 0; j < 10; j++) {
2801 u32 rsskey = rsshash[(j * 4)];
2802 rsskey |= rsshash[(j * 4) + 1] << 8;
2803 rsskey |= rsshash[(j * 4) + 2] << 16;
2804 rsskey |= rsshash[(j * 4) + 3] << 24;
2805 array_wr32(E1000_RSSRK(0), j, rsskey);
2806 }
2807
2808 num_rx_queues = adapter->rss_queues;
2809
2810 if (adapter->vfs_allocated_count) {
2811 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2812 switch (hw->mac.type) {
2813 case e1000_i350:
2814 case e1000_82580:
2815 num_rx_queues = 1;
2816 shift = 0;
2817 break;
2818 case e1000_82576:
2819 shift = 3;
2820 num_rx_queues = 2;
2821 break;
2822 case e1000_82575:
2823 shift = 2;
2824 shift2 = 6;
2825 default:
2826 break;
2827 }
2828 } else {
2829 if (hw->mac.type == e1000_82575)
2830 shift = 6;
2831 }
2832
2833 for (j = 0; j < (32 * 4); j++) {
2834 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2835 if (shift2)
2836 reta.bytes[j & 3] |= num_rx_queues << shift2;
2837 if ((j & 3) == 3)
2838 wr32(E1000_RETA(j >> 2), reta.dword);
2839 }
2840
2841 /*
2842 * Disable raw packet checksumming so that RSS hash is placed in
2843 * descriptor on writeback. No need to enable TCP/UDP/IP checksum
2844 * offloads as they are enabled by default
2845 */
2846 rxcsum = rd32(E1000_RXCSUM);
2847 rxcsum |= E1000_RXCSUM_PCSD;
2848
2849 if (adapter->hw.mac.type >= e1000_82576)
2850 /* Enable Receive Checksum Offload for SCTP */
2851 rxcsum |= E1000_RXCSUM_CRCOFL;
2852
2853 /* Don't need to set TUOFL or IPOFL, they default to 1 */
2854 wr32(E1000_RXCSUM, rxcsum);
2855
2856 /* If VMDq is enabled then we set the appropriate mode for that, else
2857 * we default to RSS so that an RSS hash is calculated per packet even
2858 * if we are only using one queue */
2859 if (adapter->vfs_allocated_count) {
2860 if (hw->mac.type > e1000_82575) {
2861 /* Set the default pool for the PF's first queue */
2862 u32 vtctl = rd32(E1000_VT_CTL);
2863 vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2864 E1000_VT_CTL_DISABLE_DEF_POOL);
2865 vtctl |= adapter->vfs_allocated_count <<
2866 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2867 wr32(E1000_VT_CTL, vtctl);
2868 }
2869 if (adapter->rss_queues > 1)
2870 mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2871 else
2872 mrqc = E1000_MRQC_ENABLE_VMDQ;
2873 } else {
2874 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2875 }
2876 igb_vmm_control(adapter);
2877
2878 /*
2879 * Generate RSS hash based on TCP port numbers and/or
2880 * IPv4/v6 src and dst addresses since UDP cannot be
2881 * hashed reliably due to IP fragmentation
2882 */
2883 mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2884 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2885 E1000_MRQC_RSS_FIELD_IPV6 |
2886 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2887 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2888
2889 wr32(E1000_MRQC, mrqc);
2890 }
2891
2892 /**
2893 * igb_setup_rctl - configure the receive control registers
2894 * @adapter: Board private structure
2895 **/
2896 void igb_setup_rctl(struct igb_adapter *adapter)
2897 {
2898 struct e1000_hw *hw = &adapter->hw;
2899 u32 rctl;
2900
2901 rctl = rd32(E1000_RCTL);
2902
2903 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2904 rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2905
2906 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2907 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2908
2909 /*
2910 * enable stripping of CRC. It's unlikely this will break BMC
2911 * redirection as it did with e1000. Newer features require
2912 * that the HW strips the CRC.
2913 */
2914 rctl |= E1000_RCTL_SECRC;
2915
2916 /* disable store bad packets and clear size bits. */
2917 rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2918
2919 /* enable LPE to prevent packets larger than max_frame_size */
2920 rctl |= E1000_RCTL_LPE;
2921
2922 /* disable queue 0 to prevent tail write w/o re-config */
2923 wr32(E1000_RXDCTL(0), 0);
2924
2925 /* Attention!!! For SR-IOV PF driver operations you must enable
2926 * queue drop for all VF and PF queues to prevent head of line blocking
2927 * if an un-trusted VF does not provide descriptors to hardware.
2928 */
2929 if (adapter->vfs_allocated_count) {
2930 /* set all queue drop enable bits */
2931 wr32(E1000_QDE, ALL_QUEUES);
2932 }
2933
2934 wr32(E1000_RCTL, rctl);
2935 }
2936
2937 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2938 int vfn)
2939 {
2940 struct e1000_hw *hw = &adapter->hw;
2941 u32 vmolr;
2942
2943 /* if it isn't the PF check to see if VFs are enabled and
2944 * increase the size to support vlan tags */
2945 if (vfn < adapter->vfs_allocated_count &&
2946 adapter->vf_data[vfn].vlans_enabled)
2947 size += VLAN_TAG_SIZE;
2948
2949 vmolr = rd32(E1000_VMOLR(vfn));
2950 vmolr &= ~E1000_VMOLR_RLPML_MASK;
2951 vmolr |= size | E1000_VMOLR_LPE;
2952 wr32(E1000_VMOLR(vfn), vmolr);
2953
2954 return 0;
2955 }
2956
2957 /**
2958 * igb_rlpml_set - set maximum receive packet size
2959 * @adapter: board private structure
2960 *
2961 * Configure maximum receivable packet size.
2962 **/
2963 static void igb_rlpml_set(struct igb_adapter *adapter)
2964 {
2965 u32 max_frame_size;
2966 struct e1000_hw *hw = &adapter->hw;
2967 u16 pf_id = adapter->vfs_allocated_count;
2968
2969 max_frame_size = adapter->max_frame_size + VLAN_TAG_SIZE;
2970
2971 /* if vfs are enabled we set RLPML to the largest possible request
2972 * size and set the VMOLR RLPML to the size we need */
2973 if (pf_id) {
2974 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2975 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2976 }
2977
2978 wr32(E1000_RLPML, max_frame_size);
2979 }
2980
2981 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2982 int vfn, bool aupe)
2983 {
2984 struct e1000_hw *hw = &adapter->hw;
2985 u32 vmolr;
2986
2987 /*
2988 * This register exists only on 82576 and newer so if we are older then
2989 * we should exit and do nothing
2990 */
2991 if (hw->mac.type < e1000_82576)
2992 return;
2993
2994 vmolr = rd32(E1000_VMOLR(vfn));
2995 vmolr |= E1000_VMOLR_STRVLAN; /* Strip vlan tags */
2996 if (aupe)
2997 vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */
2998 else
2999 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3000
3001 /* clear all bits that might not be set */
3002 vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3003
3004 if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3005 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3006 /*
3007 * for VMDq only allow the VFs and pool 0 to accept broadcast and
3008 * multicast packets
3009 */
3010 if (vfn <= adapter->vfs_allocated_count)
3011 vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */
3012
3013 wr32(E1000_VMOLR(vfn), vmolr);
3014 }
3015
3016 /**
3017 * igb_configure_rx_ring - Configure a receive ring after Reset
3018 * @adapter: board private structure
3019 * @ring: receive ring to be configured
3020 *
3021 * Configure the Rx unit of the MAC after a reset.
3022 **/
3023 void igb_configure_rx_ring(struct igb_adapter *adapter,
3024 struct igb_ring *ring)
3025 {
3026 struct e1000_hw *hw = &adapter->hw;
3027 u64 rdba = ring->dma;
3028 int reg_idx = ring->reg_idx;
3029 u32 srrctl, rxdctl;
3030
3031 /* disable the queue */
3032 rxdctl = rd32(E1000_RXDCTL(reg_idx));
3033 wr32(E1000_RXDCTL(reg_idx),
3034 rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
3035
3036 /* Set DMA base address registers */
3037 wr32(E1000_RDBAL(reg_idx),
3038 rdba & 0x00000000ffffffffULL);
3039 wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3040 wr32(E1000_RDLEN(reg_idx),
3041 ring->count * sizeof(union e1000_adv_rx_desc));
3042
3043 /* initialize head and tail */
3044 ring->head = hw->hw_addr + E1000_RDH(reg_idx);
3045 ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3046 writel(0, ring->head);
3047 writel(0, ring->tail);
3048
3049 /* set descriptor configuration */
3050 if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
3051 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
3052 E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3053 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3054 srrctl |= IGB_RXBUFFER_16384 >>
3055 E1000_SRRCTL_BSIZEPKT_SHIFT;
3056 #else
3057 srrctl |= (PAGE_SIZE / 2) >>
3058 E1000_SRRCTL_BSIZEPKT_SHIFT;
3059 #endif
3060 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3061 } else {
3062 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
3063 E1000_SRRCTL_BSIZEPKT_SHIFT;
3064 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3065 }
3066 if (hw->mac.type == e1000_82580)
3067 srrctl |= E1000_SRRCTL_TIMESTAMP;
3068 /* Only set Drop Enable if we are supporting multiple queues */
3069 if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3070 srrctl |= E1000_SRRCTL_DROP_EN;
3071
3072 wr32(E1000_SRRCTL(reg_idx), srrctl);
3073
3074 /* set filtering for VMDQ pools */
3075 igb_set_vmolr(adapter, reg_idx & 0x7, true);
3076
3077 /* enable receive descriptor fetching */
3078 rxdctl = rd32(E1000_RXDCTL(reg_idx));
3079 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3080 rxdctl &= 0xFFF00000;
3081 rxdctl |= IGB_RX_PTHRESH;
3082 rxdctl |= IGB_RX_HTHRESH << 8;
3083 rxdctl |= IGB_RX_WTHRESH << 16;
3084 wr32(E1000_RXDCTL(reg_idx), rxdctl);
3085 }
3086
3087 /**
3088 * igb_configure_rx - Configure receive Unit after Reset
3089 * @adapter: board private structure
3090 *
3091 * Configure the Rx unit of the MAC after a reset.
3092 **/
3093 static void igb_configure_rx(struct igb_adapter *adapter)
3094 {
3095 int i;
3096
3097 /* set UTA to appropriate mode */
3098 igb_set_uta(adapter);
3099
3100 /* set the correct pool for the PF default MAC address in entry 0 */
3101 igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3102 adapter->vfs_allocated_count);
3103
3104 /* Setup the HW Rx Head and Tail Descriptor Pointers and
3105 * the Base and Length of the Rx Descriptor Ring */
3106 for (i = 0; i < adapter->num_rx_queues; i++)
3107 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3108 }
3109
3110 /**
3111 * igb_free_tx_resources - Free Tx Resources per Queue
3112 * @tx_ring: Tx descriptor ring for a specific queue
3113 *
3114 * Free all transmit software resources
3115 **/
3116 void igb_free_tx_resources(struct igb_ring *tx_ring)
3117 {
3118 igb_clean_tx_ring(tx_ring);
3119
3120 vfree(tx_ring->buffer_info);
3121 tx_ring->buffer_info = NULL;
3122
3123 /* if not set, then don't free */
3124 if (!tx_ring->desc)
3125 return;
3126
3127 dma_free_coherent(tx_ring->dev, tx_ring->size,
3128 tx_ring->desc, tx_ring->dma);
3129
3130 tx_ring->desc = NULL;
3131 }
3132
3133 /**
3134 * igb_free_all_tx_resources - Free Tx Resources for All Queues
3135 * @adapter: board private structure
3136 *
3137 * Free all transmit software resources
3138 **/
3139 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3140 {
3141 int i;
3142
3143 for (i = 0; i < adapter->num_tx_queues; i++)
3144 igb_free_tx_resources(adapter->tx_ring[i]);
3145 }
3146
3147 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3148 struct igb_buffer *buffer_info)
3149 {
3150 if (buffer_info->dma) {
3151 if (buffer_info->mapped_as_page)
3152 dma_unmap_page(tx_ring->dev,
3153 buffer_info->dma,
3154 buffer_info->length,
3155 DMA_TO_DEVICE);
3156 else
3157 dma_unmap_single(tx_ring->dev,
3158 buffer_info->dma,
3159 buffer_info->length,
3160 DMA_TO_DEVICE);
3161 buffer_info->dma = 0;
3162 }
3163 if (buffer_info->skb) {
3164 dev_kfree_skb_any(buffer_info->skb);
3165 buffer_info->skb = NULL;
3166 }
3167 buffer_info->time_stamp = 0;
3168 buffer_info->length = 0;
3169 buffer_info->next_to_watch = 0;
3170 buffer_info->mapped_as_page = false;
3171 }
3172
3173 /**
3174 * igb_clean_tx_ring - Free Tx Buffers
3175 * @tx_ring: ring to be cleaned
3176 **/
3177 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3178 {
3179 struct igb_buffer *buffer_info;
3180 unsigned long size;
3181 unsigned int i;
3182
3183 if (!tx_ring->buffer_info)
3184 return;
3185 /* Free all the Tx ring sk_buffs */
3186
3187 for (i = 0; i < tx_ring->count; i++) {
3188 buffer_info = &tx_ring->buffer_info[i];
3189 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3190 }
3191
3192 size = sizeof(struct igb_buffer) * tx_ring->count;
3193 memset(tx_ring->buffer_info, 0, size);
3194
3195 /* Zero out the descriptor ring */
3196 memset(tx_ring->desc, 0, tx_ring->size);
3197
3198 tx_ring->next_to_use = 0;
3199 tx_ring->next_to_clean = 0;
3200 }
3201
3202 /**
3203 * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3204 * @adapter: board private structure
3205 **/
3206 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3207 {
3208 int i;
3209
3210 for (i = 0; i < adapter->num_tx_queues; i++)
3211 igb_clean_tx_ring(adapter->tx_ring[i]);
3212 }
3213
3214 /**
3215 * igb_free_rx_resources - Free Rx Resources
3216 * @rx_ring: ring to clean the resources from
3217 *
3218 * Free all receive software resources
3219 **/
3220 void igb_free_rx_resources(struct igb_ring *rx_ring)
3221 {
3222 igb_clean_rx_ring(rx_ring);
3223
3224 vfree(rx_ring->buffer_info);
3225 rx_ring->buffer_info = NULL;
3226
3227 /* if not set, then don't free */
3228 if (!rx_ring->desc)
3229 return;
3230
3231 dma_free_coherent(rx_ring->dev, rx_ring->size,
3232 rx_ring->desc, rx_ring->dma);
3233
3234 rx_ring->desc = NULL;
3235 }
3236
3237 /**
3238 * igb_free_all_rx_resources - Free Rx Resources for All Queues
3239 * @adapter: board private structure
3240 *
3241 * Free all receive software resources
3242 **/
3243 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3244 {
3245 int i;
3246
3247 for (i = 0; i < adapter->num_rx_queues; i++)
3248 igb_free_rx_resources(adapter->rx_ring[i]);
3249 }
3250
3251 /**
3252 * igb_clean_rx_ring - Free Rx Buffers per Queue
3253 * @rx_ring: ring to free buffers from
3254 **/
3255 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3256 {
3257 struct igb_buffer *buffer_info;
3258 unsigned long size;
3259 unsigned int i;
3260
3261 if (!rx_ring->buffer_info)
3262 return;
3263
3264 /* Free all the Rx ring sk_buffs */
3265 for (i = 0; i < rx_ring->count; i++) {
3266 buffer_info = &rx_ring->buffer_info[i];
3267 if (buffer_info->dma) {
3268 dma_unmap_single(rx_ring->dev,
3269 buffer_info->dma,
3270 rx_ring->rx_buffer_len,
3271 DMA_FROM_DEVICE);
3272 buffer_info->dma = 0;
3273 }
3274
3275 if (buffer_info->skb) {
3276 dev_kfree_skb(buffer_info->skb);
3277 buffer_info->skb = NULL;
3278 }
3279 if (buffer_info->page_dma) {
3280 dma_unmap_page(rx_ring->dev,
3281 buffer_info->page_dma,
3282 PAGE_SIZE / 2,
3283 DMA_FROM_DEVICE);
3284 buffer_info->page_dma = 0;
3285 }
3286 if (buffer_info->page) {
3287 put_page(buffer_info->page);
3288 buffer_info->page = NULL;
3289 buffer_info->page_offset = 0;
3290 }
3291 }
3292
3293 size = sizeof(struct igb_buffer) * rx_ring->count;
3294 memset(rx_ring->buffer_info, 0, size);
3295
3296 /* Zero out the descriptor ring */
3297 memset(rx_ring->desc, 0, rx_ring->size);
3298
3299 rx_ring->next_to_clean = 0;
3300 rx_ring->next_to_use = 0;
3301 }
3302
3303 /**
3304 * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3305 * @adapter: board private structure
3306 **/
3307 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3308 {
3309 int i;
3310
3311 for (i = 0; i < adapter->num_rx_queues; i++)
3312 igb_clean_rx_ring(adapter->rx_ring[i]);
3313 }
3314
3315 /**
3316 * igb_set_mac - Change the Ethernet Address of the NIC
3317 * @netdev: network interface device structure
3318 * @p: pointer to an address structure
3319 *
3320 * Returns 0 on success, negative on failure
3321 **/
3322 static int igb_set_mac(struct net_device *netdev, void *p)
3323 {
3324 struct igb_adapter *adapter = netdev_priv(netdev);
3325 struct e1000_hw *hw = &adapter->hw;
3326 struct sockaddr *addr = p;
3327
3328 if (!is_valid_ether_addr(addr->sa_data))
3329 return -EADDRNOTAVAIL;
3330
3331 memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3332 memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3333
3334 /* set the correct pool for the new PF MAC address in entry 0 */
3335 igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3336 adapter->vfs_allocated_count);
3337
3338 return 0;
3339 }
3340
3341 /**
3342 * igb_write_mc_addr_list - write multicast addresses to MTA
3343 * @netdev: network interface device structure
3344 *
3345 * Writes multicast address list to the MTA hash table.
3346 * Returns: -ENOMEM on failure
3347 * 0 on no addresses written
3348 * X on writing X addresses to MTA
3349 **/
3350 static int igb_write_mc_addr_list(struct net_device *netdev)
3351 {
3352 struct igb_adapter *adapter = netdev_priv(netdev);
3353 struct e1000_hw *hw = &adapter->hw;
3354 struct netdev_hw_addr *ha;
3355 u8 *mta_list;
3356 int i;
3357
3358 if (netdev_mc_empty(netdev)) {
3359 /* nothing to program, so clear mc list */
3360 igb_update_mc_addr_list(hw, NULL, 0);
3361 igb_restore_vf_multicasts(adapter);
3362 return 0;
3363 }
3364
3365 mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3366 if (!mta_list)
3367 return -ENOMEM;
3368
3369 /* The shared function expects a packed array of only addresses. */
3370 i = 0;
3371 netdev_for_each_mc_addr(ha, netdev)
3372 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3373
3374 igb_update_mc_addr_list(hw, mta_list, i);
3375 kfree(mta_list);
3376
3377 return netdev_mc_count(netdev);
3378 }
3379
3380 /**
3381 * igb_write_uc_addr_list - write unicast addresses to RAR table
3382 * @netdev: network interface device structure
3383 *
3384 * Writes unicast address list to the RAR table.
3385 * Returns: -ENOMEM on failure/insufficient address space
3386 * 0 on no addresses written
3387 * X on writing X addresses to the RAR table
3388 **/
3389 static int igb_write_uc_addr_list(struct net_device *netdev)
3390 {
3391 struct igb_adapter *adapter = netdev_priv(netdev);
3392 struct e1000_hw *hw = &adapter->hw;
3393 unsigned int vfn = adapter->vfs_allocated_count;
3394 unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3395 int count = 0;
3396
3397 /* return ENOMEM indicating insufficient memory for addresses */
3398 if (netdev_uc_count(netdev) > rar_entries)
3399 return -ENOMEM;
3400
3401 if (!netdev_uc_empty(netdev) && rar_entries) {
3402 struct netdev_hw_addr *ha;
3403
3404 netdev_for_each_uc_addr(ha, netdev) {
3405 if (!rar_entries)
3406 break;
3407 igb_rar_set_qsel(adapter, ha->addr,
3408 rar_entries--,
3409 vfn);
3410 count++;
3411 }
3412 }
3413 /* write the addresses in reverse order to avoid write combining */
3414 for (; rar_entries > 0 ; rar_entries--) {
3415 wr32(E1000_RAH(rar_entries), 0);
3416 wr32(E1000_RAL(rar_entries), 0);
3417 }
3418 wrfl();
3419
3420 return count;
3421 }
3422
3423 /**
3424 * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3425 * @netdev: network interface device structure
3426 *
3427 * The set_rx_mode entry point is called whenever the unicast or multicast
3428 * address lists or the network interface flags are updated. This routine is
3429 * responsible for configuring the hardware for proper unicast, multicast,
3430 * promiscuous mode, and all-multi behavior.
3431 **/
3432 static void igb_set_rx_mode(struct net_device *netdev)
3433 {
3434 struct igb_adapter *adapter = netdev_priv(netdev);
3435 struct e1000_hw *hw = &adapter->hw;
3436 unsigned int vfn = adapter->vfs_allocated_count;
3437 u32 rctl, vmolr = 0;
3438 int count;
3439
3440 /* Check for Promiscuous and All Multicast modes */
3441 rctl = rd32(E1000_RCTL);
3442
3443 /* clear the effected bits */
3444 rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3445
3446 if (netdev->flags & IFF_PROMISC) {
3447 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3448 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3449 } else {
3450 if (netdev->flags & IFF_ALLMULTI) {
3451 rctl |= E1000_RCTL_MPE;
3452 vmolr |= E1000_VMOLR_MPME;
3453 } else {
3454 /*
3455 * Write addresses to the MTA, if the attempt fails
3456 * then we should just turn on promiscuous mode so
3457 * that we can at least receive multicast traffic
3458 */
3459 count = igb_write_mc_addr_list(netdev);
3460 if (count < 0) {
3461 rctl |= E1000_RCTL_MPE;
3462 vmolr |= E1000_VMOLR_MPME;
3463 } else if (count) {
3464 vmolr |= E1000_VMOLR_ROMPE;
3465 }
3466 }
3467 /*
3468 * Write addresses to available RAR registers, if there is not
3469 * sufficient space to store all the addresses then enable
3470 * unicast promiscuous mode
3471 */
3472 count = igb_write_uc_addr_list(netdev);
3473 if (count < 0) {
3474 rctl |= E1000_RCTL_UPE;
3475 vmolr |= E1000_VMOLR_ROPE;
3476 }
3477 rctl |= E1000_RCTL_VFE;
3478 }
3479 wr32(E1000_RCTL, rctl);
3480
3481 /*
3482 * In order to support SR-IOV and eventually VMDq it is necessary to set
3483 * the VMOLR to enable the appropriate modes. Without this workaround
3484 * we will have issues with VLAN tag stripping not being done for frames
3485 * that are only arriving because we are the default pool
3486 */
3487 if (hw->mac.type < e1000_82576)
3488 return;
3489
3490 vmolr |= rd32(E1000_VMOLR(vfn)) &
3491 ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3492 wr32(E1000_VMOLR(vfn), vmolr);
3493 igb_restore_vf_multicasts(adapter);
3494 }
3495
3496 static void igb_check_wvbr(struct igb_adapter *adapter)
3497 {
3498 struct e1000_hw *hw = &adapter->hw;
3499 u32 wvbr = 0;
3500
3501 switch (hw->mac.type) {
3502 case e1000_82576:
3503 case e1000_i350:
3504 if (!(wvbr = rd32(E1000_WVBR)))
3505 return;
3506 break;
3507 default:
3508 break;
3509 }
3510
3511 adapter->wvbr |= wvbr;
3512 }
3513
3514 #define IGB_STAGGERED_QUEUE_OFFSET 8
3515
3516 static void igb_spoof_check(struct igb_adapter *adapter)
3517 {
3518 int j;
3519
3520 if (!adapter->wvbr)
3521 return;
3522
3523 for(j = 0; j < adapter->vfs_allocated_count; j++) {
3524 if (adapter->wvbr & (1 << j) ||
3525 adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3526 dev_warn(&adapter->pdev->dev,
3527 "Spoof event(s) detected on VF %d\n", j);
3528 adapter->wvbr &=
3529 ~((1 << j) |
3530 (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3531 }
3532 }
3533 }
3534
3535 /* Need to wait a few seconds after link up to get diagnostic information from
3536 * the phy */
3537 static void igb_update_phy_info(unsigned long data)
3538 {
3539 struct igb_adapter *adapter = (struct igb_adapter *) data;
3540 igb_get_phy_info(&adapter->hw);
3541 }
3542
3543 /**
3544 * igb_has_link - check shared code for link and determine up/down
3545 * @adapter: pointer to driver private info
3546 **/
3547 bool igb_has_link(struct igb_adapter *adapter)
3548 {
3549 struct e1000_hw *hw = &adapter->hw;
3550 bool link_active = false;
3551 s32 ret_val = 0;
3552
3553 /* get_link_status is set on LSC (link status) interrupt or
3554 * rx sequence error interrupt. get_link_status will stay
3555 * false until the e1000_check_for_link establishes link
3556 * for copper adapters ONLY
3557 */
3558 switch (hw->phy.media_type) {
3559 case e1000_media_type_copper:
3560 if (hw->mac.get_link_status) {
3561 ret_val = hw->mac.ops.check_for_link(hw);
3562 link_active = !hw->mac.get_link_status;
3563 } else {
3564 link_active = true;
3565 }
3566 break;
3567 case e1000_media_type_internal_serdes:
3568 ret_val = hw->mac.ops.check_for_link(hw);
3569 link_active = hw->mac.serdes_has_link;
3570 break;
3571 default:
3572 case e1000_media_type_unknown:
3573 break;
3574 }
3575
3576 return link_active;
3577 }
3578
3579 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3580 {
3581 bool ret = false;
3582 u32 ctrl_ext, thstat;
3583
3584 /* check for thermal sensor event on i350, copper only */
3585 if (hw->mac.type == e1000_i350) {
3586 thstat = rd32(E1000_THSTAT);
3587 ctrl_ext = rd32(E1000_CTRL_EXT);
3588
3589 if ((hw->phy.media_type == e1000_media_type_copper) &&
3590 !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3591 ret = !!(thstat & event);
3592 }
3593 }
3594
3595 return ret;
3596 }
3597
3598 /**
3599 * igb_watchdog - Timer Call-back
3600 * @data: pointer to adapter cast into an unsigned long
3601 **/
3602 static void igb_watchdog(unsigned long data)
3603 {
3604 struct igb_adapter *adapter = (struct igb_adapter *)data;
3605 /* Do the rest outside of interrupt context */
3606 schedule_work(&adapter->watchdog_task);
3607 }
3608
3609 static void igb_watchdog_task(struct work_struct *work)
3610 {
3611 struct igb_adapter *adapter = container_of(work,
3612 struct igb_adapter,
3613 watchdog_task);
3614 struct e1000_hw *hw = &adapter->hw;
3615 struct net_device *netdev = adapter->netdev;
3616 u32 link;
3617 int i;
3618
3619 link = igb_has_link(adapter);
3620 if (link) {
3621 if (!netif_carrier_ok(netdev)) {
3622 u32 ctrl;
3623 hw->mac.ops.get_speed_and_duplex(hw,
3624 &adapter->link_speed,
3625 &adapter->link_duplex);
3626
3627 ctrl = rd32(E1000_CTRL);
3628 /* Links status message must follow this format */
3629 printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3630 "Flow Control: %s\n",
3631 netdev->name,
3632 adapter->link_speed,
3633 adapter->link_duplex == FULL_DUPLEX ?
3634 "Full Duplex" : "Half Duplex",
3635 ((ctrl & E1000_CTRL_TFCE) &&
3636 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3637 ((ctrl & E1000_CTRL_RFCE) ? "RX" :
3638 ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None")));
3639
3640 /* check for thermal sensor event */
3641 if (igb_thermal_sensor_event(hw, E1000_THSTAT_LINK_THROTTLE)) {
3642 printk(KERN_INFO "igb: %s The network adapter "
3643 "link speed was downshifted "
3644 "because it overheated.\n",
3645 netdev->name);
3646 }
3647
3648 /* adjust timeout factor according to speed/duplex */
3649 adapter->tx_timeout_factor = 1;
3650 switch (adapter->link_speed) {
3651 case SPEED_10:
3652 adapter->tx_timeout_factor = 14;
3653 break;
3654 case SPEED_100:
3655 /* maybe add some timeout factor ? */
3656 break;
3657 }
3658
3659 netif_carrier_on(netdev);
3660
3661 igb_ping_all_vfs(adapter);
3662 igb_check_vf_rate_limit(adapter);
3663
3664 /* link state has changed, schedule phy info update */
3665 if (!test_bit(__IGB_DOWN, &adapter->state))
3666 mod_timer(&adapter->phy_info_timer,
3667 round_jiffies(jiffies + 2 * HZ));
3668 }
3669 } else {
3670 if (netif_carrier_ok(netdev)) {
3671 adapter->link_speed = 0;
3672 adapter->link_duplex = 0;
3673
3674 /* check for thermal sensor event */
3675 if (igb_thermal_sensor_event(hw, E1000_THSTAT_PWR_DOWN)) {
3676 printk(KERN_ERR "igb: %s The network adapter "
3677 "was stopped because it "
3678 "overheated.\n",
3679 netdev->name);
3680 }
3681
3682 /* Links status message must follow this format */
3683 printk(KERN_INFO "igb: %s NIC Link is Down\n",
3684 netdev->name);
3685 netif_carrier_off(netdev);
3686
3687 igb_ping_all_vfs(adapter);
3688
3689 /* link state has changed, schedule phy info update */
3690 if (!test_bit(__IGB_DOWN, &adapter->state))
3691 mod_timer(&adapter->phy_info_timer,
3692 round_jiffies(jiffies + 2 * HZ));
3693 }
3694 }
3695
3696 spin_lock(&adapter->stats64_lock);
3697 igb_update_stats(adapter, &adapter->stats64);
3698 spin_unlock(&adapter->stats64_lock);
3699
3700 for (i = 0; i < adapter->num_tx_queues; i++) {
3701 struct igb_ring *tx_ring = adapter->tx_ring[i];
3702 if (!netif_carrier_ok(netdev)) {
3703 /* We've lost link, so the controller stops DMA,
3704 * but we've got queued Tx work that's never going
3705 * to get done, so reset controller to flush Tx.
3706 * (Do the reset outside of interrupt context). */
3707 if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3708 adapter->tx_timeout_count++;
3709 schedule_work(&adapter->reset_task);
3710 /* return immediately since reset is imminent */
3711 return;
3712 }
3713 }
3714
3715 /* Force detection of hung controller every watchdog period */
3716 tx_ring->detect_tx_hung = true;
3717 }
3718
3719 /* Cause software interrupt to ensure rx ring is cleaned */
3720 if (adapter->msix_entries) {
3721 u32 eics = 0;
3722 for (i = 0; i < adapter->num_q_vectors; i++) {
3723 struct igb_q_vector *q_vector = adapter->q_vector[i];
3724 eics |= q_vector->eims_value;
3725 }
3726 wr32(E1000_EICS, eics);
3727 } else {
3728 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3729 }
3730
3731 igb_spoof_check(adapter);
3732
3733 /* Reset the timer */
3734 if (!test_bit(__IGB_DOWN, &adapter->state))
3735 mod_timer(&adapter->watchdog_timer,
3736 round_jiffies(jiffies + 2 * HZ));
3737 }
3738
3739 enum latency_range {
3740 lowest_latency = 0,
3741 low_latency = 1,
3742 bulk_latency = 2,
3743 latency_invalid = 255
3744 };
3745
3746 /**
3747 * igb_update_ring_itr - update the dynamic ITR value based on packet size
3748 *
3749 * Stores a new ITR value based on strictly on packet size. This
3750 * algorithm is less sophisticated than that used in igb_update_itr,
3751 * due to the difficulty of synchronizing statistics across multiple
3752 * receive rings. The divisors and thresholds used by this function
3753 * were determined based on theoretical maximum wire speed and testing
3754 * data, in order to minimize response time while increasing bulk
3755 * throughput.
3756 * This functionality is controlled by the InterruptThrottleRate module
3757 * parameter (see igb_param.c)
3758 * NOTE: This function is called only when operating in a multiqueue
3759 * receive environment.
3760 * @q_vector: pointer to q_vector
3761 **/
3762 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3763 {
3764 int new_val = q_vector->itr_val;
3765 int avg_wire_size = 0;
3766 struct igb_adapter *adapter = q_vector->adapter;
3767 struct igb_ring *ring;
3768 unsigned int packets;
3769
3770 /* For non-gigabit speeds, just fix the interrupt rate at 4000
3771 * ints/sec - ITR timer value of 120 ticks.
3772 */
3773 if (adapter->link_speed != SPEED_1000) {
3774 new_val = 976;
3775 goto set_itr_val;
3776 }
3777
3778 ring = q_vector->rx_ring;
3779 if (ring) {
3780 packets = ACCESS_ONCE(ring->total_packets);
3781
3782 if (packets)
3783 avg_wire_size = ring->total_bytes / packets;
3784 }
3785
3786 ring = q_vector->tx_ring;
3787 if (ring) {
3788 packets = ACCESS_ONCE(ring->total_packets);
3789
3790 if (packets)
3791 avg_wire_size = max_t(u32, avg_wire_size,
3792 ring->total_bytes / packets);
3793 }
3794
3795 /* if avg_wire_size isn't set no work was done */
3796 if (!avg_wire_size)
3797 goto clear_counts;
3798
3799 /* Add 24 bytes to size to account for CRC, preamble, and gap */
3800 avg_wire_size += 24;
3801
3802 /* Don't starve jumbo frames */
3803 avg_wire_size = min(avg_wire_size, 3000);
3804
3805 /* Give a little boost to mid-size frames */
3806 if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3807 new_val = avg_wire_size / 3;
3808 else
3809 new_val = avg_wire_size / 2;
3810
3811 /* when in itr mode 3 do not exceed 20K ints/sec */
3812 if (adapter->rx_itr_setting == 3 && new_val < 196)
3813 new_val = 196;
3814
3815 set_itr_val:
3816 if (new_val != q_vector->itr_val) {
3817 q_vector->itr_val = new_val;
3818 q_vector->set_itr = 1;
3819 }
3820 clear_counts:
3821 if (q_vector->rx_ring) {
3822 q_vector->rx_ring->total_bytes = 0;
3823 q_vector->rx_ring->total_packets = 0;
3824 }
3825 if (q_vector->tx_ring) {
3826 q_vector->tx_ring->total_bytes = 0;
3827 q_vector->tx_ring->total_packets = 0;
3828 }
3829 }
3830
3831 /**
3832 * igb_update_itr - update the dynamic ITR value based on statistics
3833 * Stores a new ITR value based on packets and byte
3834 * counts during the last interrupt. The advantage of per interrupt
3835 * computation is faster updates and more accurate ITR for the current
3836 * traffic pattern. Constants in this function were computed
3837 * based on theoretical maximum wire speed and thresholds were set based
3838 * on testing data as well as attempting to minimize response time
3839 * while increasing bulk throughput.
3840 * this functionality is controlled by the InterruptThrottleRate module
3841 * parameter (see igb_param.c)
3842 * NOTE: These calculations are only valid when operating in a single-
3843 * queue environment.
3844 * @adapter: pointer to adapter
3845 * @itr_setting: current q_vector->itr_val
3846 * @packets: the number of packets during this measurement interval
3847 * @bytes: the number of bytes during this measurement interval
3848 **/
3849 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3850 int packets, int bytes)
3851 {
3852 unsigned int retval = itr_setting;
3853
3854 if (packets == 0)
3855 goto update_itr_done;
3856
3857 switch (itr_setting) {
3858 case lowest_latency:
3859 /* handle TSO and jumbo frames */
3860 if (bytes/packets > 8000)
3861 retval = bulk_latency;
3862 else if ((packets < 5) && (bytes > 512))
3863 retval = low_latency;
3864 break;
3865 case low_latency: /* 50 usec aka 20000 ints/s */
3866 if (bytes > 10000) {
3867 /* this if handles the TSO accounting */
3868 if (bytes/packets > 8000) {
3869 retval = bulk_latency;
3870 } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3871 retval = bulk_latency;
3872 } else if ((packets > 35)) {
3873 retval = lowest_latency;
3874 }
3875 } else if (bytes/packets > 2000) {
3876 retval = bulk_latency;
3877 } else if (packets <= 2 && bytes < 512) {
3878 retval = lowest_latency;
3879 }
3880 break;
3881 case bulk_latency: /* 250 usec aka 4000 ints/s */
3882 if (bytes > 25000) {
3883 if (packets > 35)
3884 retval = low_latency;
3885 } else if (bytes < 1500) {
3886 retval = low_latency;
3887 }
3888 break;
3889 }
3890
3891 update_itr_done:
3892 return retval;
3893 }
3894
3895 static void igb_set_itr(struct igb_adapter *adapter)
3896 {
3897 struct igb_q_vector *q_vector = adapter->q_vector[0];
3898 u16 current_itr;
3899 u32 new_itr = q_vector->itr_val;
3900
3901 /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3902 if (adapter->link_speed != SPEED_1000) {
3903 current_itr = 0;
3904 new_itr = 4000;
3905 goto set_itr_now;
3906 }
3907
3908 adapter->rx_itr = igb_update_itr(adapter,
3909 adapter->rx_itr,
3910 q_vector->rx_ring->total_packets,
3911 q_vector->rx_ring->total_bytes);
3912
3913 adapter->tx_itr = igb_update_itr(adapter,
3914 adapter->tx_itr,
3915 q_vector->tx_ring->total_packets,
3916 q_vector->tx_ring->total_bytes);
3917 current_itr = max(adapter->rx_itr, adapter->tx_itr);
3918
3919 /* conservative mode (itr 3) eliminates the lowest_latency setting */
3920 if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3921 current_itr = low_latency;
3922
3923 switch (current_itr) {
3924 /* counts and packets in update_itr are dependent on these numbers */
3925 case lowest_latency:
3926 new_itr = 56; /* aka 70,000 ints/sec */
3927 break;
3928 case low_latency:
3929 new_itr = 196; /* aka 20,000 ints/sec */
3930 break;
3931 case bulk_latency:
3932 new_itr = 980; /* aka 4,000 ints/sec */
3933 break;
3934 default:
3935 break;
3936 }
3937
3938 set_itr_now:
3939 q_vector->rx_ring->total_bytes = 0;
3940 q_vector->rx_ring->total_packets = 0;
3941 q_vector->tx_ring->total_bytes = 0;
3942 q_vector->tx_ring->total_packets = 0;
3943
3944 if (new_itr != q_vector->itr_val) {
3945 /* this attempts to bias the interrupt rate towards Bulk
3946 * by adding intermediate steps when interrupt rate is
3947 * increasing */
3948 new_itr = new_itr > q_vector->itr_val ?
3949 max((new_itr * q_vector->itr_val) /
3950 (new_itr + (q_vector->itr_val >> 2)),
3951 new_itr) :
3952 new_itr;
3953 /* Don't write the value here; it resets the adapter's
3954 * internal timer, and causes us to delay far longer than
3955 * we should between interrupts. Instead, we write the ITR
3956 * value at the beginning of the next interrupt so the timing
3957 * ends up being correct.
3958 */
3959 q_vector->itr_val = new_itr;
3960 q_vector->set_itr = 1;
3961 }
3962 }
3963
3964 #define IGB_TX_FLAGS_CSUM 0x00000001
3965 #define IGB_TX_FLAGS_VLAN 0x00000002
3966 #define IGB_TX_FLAGS_TSO 0x00000004
3967 #define IGB_TX_FLAGS_IPV4 0x00000008
3968 #define IGB_TX_FLAGS_TSTAMP 0x00000010
3969 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
3970 #define IGB_TX_FLAGS_VLAN_SHIFT 16
3971
3972 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3973 struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3974 {
3975 struct e1000_adv_tx_context_desc *context_desc;
3976 unsigned int i;
3977 int err;
3978 struct igb_buffer *buffer_info;
3979 u32 info = 0, tu_cmd = 0;
3980 u32 mss_l4len_idx;
3981 u8 l4len;
3982
3983 if (skb_header_cloned(skb)) {
3984 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3985 if (err)
3986 return err;
3987 }
3988
3989 l4len = tcp_hdrlen(skb);
3990 *hdr_len += l4len;
3991
3992 if (skb->protocol == htons(ETH_P_IP)) {
3993 struct iphdr *iph = ip_hdr(skb);
3994 iph->tot_len = 0;
3995 iph->check = 0;
3996 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3997 iph->daddr, 0,
3998 IPPROTO_TCP,
3999 0);
4000 } else if (skb_is_gso_v6(skb)) {
4001 ipv6_hdr(skb)->payload_len = 0;
4002 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4003 &ipv6_hdr(skb)->daddr,
4004 0, IPPROTO_TCP, 0);
4005 }
4006
4007 i = tx_ring->next_to_use;
4008
4009 buffer_info = &tx_ring->buffer_info[i];
4010 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4011 /* VLAN MACLEN IPLEN */
4012 if (tx_flags & IGB_TX_FLAGS_VLAN)
4013 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4014 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4015 *hdr_len += skb_network_offset(skb);
4016 info |= skb_network_header_len(skb);
4017 *hdr_len += skb_network_header_len(skb);
4018 context_desc->vlan_macip_lens = cpu_to_le32(info);
4019
4020 /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4021 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4022
4023 if (skb->protocol == htons(ETH_P_IP))
4024 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4025 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4026
4027 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4028
4029 /* MSS L4LEN IDX */
4030 mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
4031 mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
4032
4033 /* For 82575, context index must be unique per ring. */
4034 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4035 mss_l4len_idx |= tx_ring->reg_idx << 4;
4036
4037 context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
4038 context_desc->seqnum_seed = 0;
4039
4040 buffer_info->time_stamp = jiffies;
4041 buffer_info->next_to_watch = i;
4042 buffer_info->dma = 0;
4043 i++;
4044 if (i == tx_ring->count)
4045 i = 0;
4046
4047 tx_ring->next_to_use = i;
4048
4049 return true;
4050 }
4051
4052 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
4053 struct sk_buff *skb, u32 tx_flags)
4054 {
4055 struct e1000_adv_tx_context_desc *context_desc;
4056 struct device *dev = tx_ring->dev;
4057 struct igb_buffer *buffer_info;
4058 u32 info = 0, tu_cmd = 0;
4059 unsigned int i;
4060
4061 if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
4062 (tx_flags & IGB_TX_FLAGS_VLAN)) {
4063 i = tx_ring->next_to_use;
4064 buffer_info = &tx_ring->buffer_info[i];
4065 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
4066
4067 if (tx_flags & IGB_TX_FLAGS_VLAN)
4068 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
4069
4070 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
4071 if (skb->ip_summed == CHECKSUM_PARTIAL)
4072 info |= skb_network_header_len(skb);
4073
4074 context_desc->vlan_macip_lens = cpu_to_le32(info);
4075
4076 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
4077
4078 if (skb->ip_summed == CHECKSUM_PARTIAL) {
4079 __be16 protocol;
4080
4081 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
4082 const struct vlan_ethhdr *vhdr =
4083 (const struct vlan_ethhdr*)skb->data;
4084
4085 protocol = vhdr->h_vlan_encapsulated_proto;
4086 } else {
4087 protocol = skb->protocol;
4088 }
4089
4090 switch (protocol) {
4091 case cpu_to_be16(ETH_P_IP):
4092 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
4093 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
4094 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4095 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
4096 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4097 break;
4098 case cpu_to_be16(ETH_P_IPV6):
4099 /* XXX what about other V6 headers?? */
4100 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
4101 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4102 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
4103 tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4104 break;
4105 default:
4106 if (unlikely(net_ratelimit()))
4107 dev_warn(dev,
4108 "partial checksum but proto=%x!\n",
4109 skb->protocol);
4110 break;
4111 }
4112 }
4113
4114 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
4115 context_desc->seqnum_seed = 0;
4116 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
4117 context_desc->mss_l4len_idx =
4118 cpu_to_le32(tx_ring->reg_idx << 4);
4119
4120 buffer_info->time_stamp = jiffies;
4121 buffer_info->next_to_watch = i;
4122 buffer_info->dma = 0;
4123
4124 i++;
4125 if (i == tx_ring->count)
4126 i = 0;
4127 tx_ring->next_to_use = i;
4128
4129 return true;
4130 }
4131 return false;
4132 }
4133
4134 #define IGB_MAX_TXD_PWR 16
4135 #define IGB_MAX_DATA_PER_TXD (1<<IGB_MAX_TXD_PWR)
4136
4137 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
4138 unsigned int first)
4139 {
4140 struct igb_buffer *buffer_info;
4141 struct device *dev = tx_ring->dev;
4142 unsigned int hlen = skb_headlen(skb);
4143 unsigned int count = 0, i;
4144 unsigned int f;
4145 u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
4146
4147 i = tx_ring->next_to_use;
4148
4149 buffer_info = &tx_ring->buffer_info[i];
4150 BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
4151 buffer_info->length = hlen;
4152 /* set time_stamp *before* dma to help avoid a possible race */
4153 buffer_info->time_stamp = jiffies;
4154 buffer_info->next_to_watch = i;
4155 buffer_info->dma = dma_map_single(dev, skb->data, hlen,
4156 DMA_TO_DEVICE);
4157 if (dma_mapping_error(dev, buffer_info->dma))
4158 goto dma_error;
4159
4160 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
4161 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
4162 unsigned int len = frag->size;
4163
4164 count++;
4165 i++;
4166 if (i == tx_ring->count)
4167 i = 0;
4168
4169 buffer_info = &tx_ring->buffer_info[i];
4170 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
4171 buffer_info->length = len;
4172 buffer_info->time_stamp = jiffies;
4173 buffer_info->next_to_watch = i;
4174 buffer_info->mapped_as_page = true;
4175 buffer_info->dma = dma_map_page(dev,
4176 frag->page,
4177 frag->page_offset,
4178 len,
4179 DMA_TO_DEVICE);
4180 if (dma_mapping_error(dev, buffer_info->dma))
4181 goto dma_error;
4182
4183 }
4184
4185 tx_ring->buffer_info[i].skb = skb;
4186 tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
4187 /* multiply data chunks by size of headers */
4188 tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
4189 tx_ring->buffer_info[i].gso_segs = gso_segs;
4190 tx_ring->buffer_info[first].next_to_watch = i;
4191
4192 return ++count;
4193
4194 dma_error:
4195 dev_err(dev, "TX DMA map failed\n");
4196
4197 /* clear timestamp and dma mappings for failed buffer_info mapping */
4198 buffer_info->dma = 0;
4199 buffer_info->time_stamp = 0;
4200 buffer_info->length = 0;
4201 buffer_info->next_to_watch = 0;
4202 buffer_info->mapped_as_page = false;
4203
4204 /* clear timestamp and dma mappings for remaining portion of packet */
4205 while (count--) {
4206 if (i == 0)
4207 i = tx_ring->count;
4208 i--;
4209 buffer_info = &tx_ring->buffer_info[i];
4210 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4211 }
4212
4213 return 0;
4214 }
4215
4216 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
4217 u32 tx_flags, int count, u32 paylen,
4218 u8 hdr_len)
4219 {
4220 union e1000_adv_tx_desc *tx_desc;
4221 struct igb_buffer *buffer_info;
4222 u32 olinfo_status = 0, cmd_type_len;
4223 unsigned int i = tx_ring->next_to_use;
4224
4225 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4226 E1000_ADVTXD_DCMD_DEXT);
4227
4228 if (tx_flags & IGB_TX_FLAGS_VLAN)
4229 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4230
4231 if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4232 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4233
4234 if (tx_flags & IGB_TX_FLAGS_TSO) {
4235 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4236
4237 /* insert tcp checksum */
4238 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4239
4240 /* insert ip checksum */
4241 if (tx_flags & IGB_TX_FLAGS_IPV4)
4242 olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4243
4244 } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4245 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4246 }
4247
4248 if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4249 (tx_flags & (IGB_TX_FLAGS_CSUM |
4250 IGB_TX_FLAGS_TSO |
4251 IGB_TX_FLAGS_VLAN)))
4252 olinfo_status |= tx_ring->reg_idx << 4;
4253
4254 olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4255
4256 do {
4257 buffer_info = &tx_ring->buffer_info[i];
4258 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4259 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4260 tx_desc->read.cmd_type_len =
4261 cpu_to_le32(cmd_type_len | buffer_info->length);
4262 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4263 count--;
4264 i++;
4265 if (i == tx_ring->count)
4266 i = 0;
4267 } while (count > 0);
4268
4269 tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4270 /* Force memory writes to complete before letting h/w
4271 * know there are new descriptors to fetch. (Only
4272 * applicable for weak-ordered memory model archs,
4273 * such as IA-64). */
4274 wmb();
4275
4276 tx_ring->next_to_use = i;
4277 writel(i, tx_ring->tail);
4278 /* we need this if more than one processor can write to our tail
4279 * at a time, it syncronizes IO on IA64/Altix systems */
4280 mmiowb();
4281 }
4282
4283 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4284 {
4285 struct net_device *netdev = tx_ring->netdev;
4286
4287 netif_stop_subqueue(netdev, tx_ring->queue_index);
4288
4289 /* Herbert's original patch had:
4290 * smp_mb__after_netif_stop_queue();
4291 * but since that doesn't exist yet, just open code it. */
4292 smp_mb();
4293
4294 /* We need to check again in a case another CPU has just
4295 * made room available. */
4296 if (igb_desc_unused(tx_ring) < size)
4297 return -EBUSY;
4298
4299 /* A reprieve! */
4300 netif_wake_subqueue(netdev, tx_ring->queue_index);
4301
4302 u64_stats_update_begin(&tx_ring->tx_syncp2);
4303 tx_ring->tx_stats.restart_queue2++;
4304 u64_stats_update_end(&tx_ring->tx_syncp2);
4305
4306 return 0;
4307 }
4308
4309 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4310 {
4311 if (igb_desc_unused(tx_ring) >= size)
4312 return 0;
4313 return __igb_maybe_stop_tx(tx_ring, size);
4314 }
4315
4316 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4317 struct igb_ring *tx_ring)
4318 {
4319 int tso = 0, count;
4320 u32 tx_flags = 0;
4321 u16 first;
4322 u8 hdr_len = 0;
4323
4324 /* need: 1 descriptor per page,
4325 * + 2 desc gap to keep tail from touching head,
4326 * + 1 desc for skb->data,
4327 * + 1 desc for context descriptor,
4328 * otherwise try next time */
4329 if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4330 /* this is a hard error */
4331 return NETDEV_TX_BUSY;
4332 }
4333
4334 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4335 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4336 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4337 }
4338
4339 if (vlan_tx_tag_present(skb)) {
4340 tx_flags |= IGB_TX_FLAGS_VLAN;
4341 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4342 }
4343
4344 if (skb->protocol == htons(ETH_P_IP))
4345 tx_flags |= IGB_TX_FLAGS_IPV4;
4346
4347 first = tx_ring->next_to_use;
4348 if (skb_is_gso(skb)) {
4349 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4350
4351 if (tso < 0) {
4352 dev_kfree_skb_any(skb);
4353 return NETDEV_TX_OK;
4354 }
4355 }
4356
4357 if (tso)
4358 tx_flags |= IGB_TX_FLAGS_TSO;
4359 else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4360 (skb->ip_summed == CHECKSUM_PARTIAL))
4361 tx_flags |= IGB_TX_FLAGS_CSUM;
4362
4363 /*
4364 * count reflects descriptors mapped, if 0 or less then mapping error
4365 * has occurred and we need to rewind the descriptor queue
4366 */
4367 count = igb_tx_map_adv(tx_ring, skb, first);
4368 if (!count) {
4369 dev_kfree_skb_any(skb);
4370 tx_ring->buffer_info[first].time_stamp = 0;
4371 tx_ring->next_to_use = first;
4372 return NETDEV_TX_OK;
4373 }
4374
4375 igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4376
4377 /* Make sure there is space in the ring for the next send. */
4378 igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4379
4380 return NETDEV_TX_OK;
4381 }
4382
4383 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4384 struct net_device *netdev)
4385 {
4386 struct igb_adapter *adapter = netdev_priv(netdev);
4387 struct igb_ring *tx_ring;
4388 int r_idx = 0;
4389
4390 if (test_bit(__IGB_DOWN, &adapter->state)) {
4391 dev_kfree_skb_any(skb);
4392 return NETDEV_TX_OK;
4393 }
4394
4395 if (skb->len <= 0) {
4396 dev_kfree_skb_any(skb);
4397 return NETDEV_TX_OK;
4398 }
4399
4400 r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4401 tx_ring = adapter->multi_tx_table[r_idx];
4402
4403 /* This goes back to the question of how to logically map a tx queue
4404 * to a flow. Right now, performance is impacted slightly negatively
4405 * if using multiple tx queues. If the stack breaks away from a
4406 * single qdisc implementation, we can look at this again. */
4407 return igb_xmit_frame_ring_adv(skb, tx_ring);
4408 }
4409
4410 /**
4411 * igb_tx_timeout - Respond to a Tx Hang
4412 * @netdev: network interface device structure
4413 **/
4414 static void igb_tx_timeout(struct net_device *netdev)
4415 {
4416 struct igb_adapter *adapter = netdev_priv(netdev);
4417 struct e1000_hw *hw = &adapter->hw;
4418
4419 /* Do the reset outside of interrupt context */
4420 adapter->tx_timeout_count++;
4421
4422 if (hw->mac.type == e1000_82580)
4423 hw->dev_spec._82575.global_device_reset = true;
4424
4425 schedule_work(&adapter->reset_task);
4426 wr32(E1000_EICS,
4427 (adapter->eims_enable_mask & ~adapter->eims_other));
4428 }
4429
4430 static void igb_reset_task(struct work_struct *work)
4431 {
4432 struct igb_adapter *adapter;
4433 adapter = container_of(work, struct igb_adapter, reset_task);
4434
4435 igb_dump(adapter);
4436 netdev_err(adapter->netdev, "Reset adapter\n");
4437 igb_reinit_locked(adapter);
4438 }
4439
4440 /**
4441 * igb_get_stats64 - Get System Network Statistics
4442 * @netdev: network interface device structure
4443 * @stats: rtnl_link_stats64 pointer
4444 *
4445 **/
4446 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4447 struct rtnl_link_stats64 *stats)
4448 {
4449 struct igb_adapter *adapter = netdev_priv(netdev);
4450
4451 spin_lock(&adapter->stats64_lock);
4452 igb_update_stats(adapter, &adapter->stats64);
4453 memcpy(stats, &adapter->stats64, sizeof(*stats));
4454 spin_unlock(&adapter->stats64_lock);
4455
4456 return stats;
4457 }
4458
4459 /**
4460 * igb_change_mtu - Change the Maximum Transfer Unit
4461 * @netdev: network interface device structure
4462 * @new_mtu: new value for maximum frame size
4463 *
4464 * Returns 0 on success, negative on failure
4465 **/
4466 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4467 {
4468 struct igb_adapter *adapter = netdev_priv(netdev);
4469 struct pci_dev *pdev = adapter->pdev;
4470 int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4471 u32 rx_buffer_len, i;
4472
4473 if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4474 dev_err(&pdev->dev, "Invalid MTU setting\n");
4475 return -EINVAL;
4476 }
4477
4478 if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4479 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4480 return -EINVAL;
4481 }
4482
4483 while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4484 msleep(1);
4485
4486 /* igb_down has a dependency on max_frame_size */
4487 adapter->max_frame_size = max_frame;
4488
4489 /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4490 * means we reserve 2 more, this pushes us to allocate from the next
4491 * larger slab size.
4492 * i.e. RXBUFFER_2048 --> size-4096 slab
4493 */
4494
4495 if (adapter->hw.mac.type == e1000_82580)
4496 max_frame += IGB_TS_HDR_LEN;
4497
4498 if (max_frame <= IGB_RXBUFFER_1024)
4499 rx_buffer_len = IGB_RXBUFFER_1024;
4500 else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4501 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4502 else
4503 rx_buffer_len = IGB_RXBUFFER_128;
4504
4505 if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4506 (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4507 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4508
4509 if ((adapter->hw.mac.type == e1000_82580) &&
4510 (rx_buffer_len == IGB_RXBUFFER_128))
4511 rx_buffer_len += IGB_RXBUFFER_64;
4512
4513 if (netif_running(netdev))
4514 igb_down(adapter);
4515
4516 dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4517 netdev->mtu, new_mtu);
4518 netdev->mtu = new_mtu;
4519
4520 for (i = 0; i < adapter->num_rx_queues; i++)
4521 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4522
4523 if (netif_running(netdev))
4524 igb_up(adapter);
4525 else
4526 igb_reset(adapter);
4527
4528 clear_bit(__IGB_RESETTING, &adapter->state);
4529
4530 return 0;
4531 }
4532
4533 /**
4534 * igb_update_stats - Update the board statistics counters
4535 * @adapter: board private structure
4536 **/
4537
4538 void igb_update_stats(struct igb_adapter *adapter,
4539 struct rtnl_link_stats64 *net_stats)
4540 {
4541 struct e1000_hw *hw = &adapter->hw;
4542 struct pci_dev *pdev = adapter->pdev;
4543 u32 reg, mpc;
4544 u16 phy_tmp;
4545 int i;
4546 u64 bytes, packets;
4547 unsigned int start;
4548 u64 _bytes, _packets;
4549
4550 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4551
4552 /*
4553 * Prevent stats update while adapter is being reset, or if the pci
4554 * connection is down.
4555 */
4556 if (adapter->link_speed == 0)
4557 return;
4558 if (pci_channel_offline(pdev))
4559 return;
4560
4561 bytes = 0;
4562 packets = 0;
4563 for (i = 0; i < adapter->num_rx_queues; i++) {
4564 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4565 struct igb_ring *ring = adapter->rx_ring[i];
4566
4567 ring->rx_stats.drops += rqdpc_tmp;
4568 net_stats->rx_fifo_errors += rqdpc_tmp;
4569
4570 do {
4571 start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4572 _bytes = ring->rx_stats.bytes;
4573 _packets = ring->rx_stats.packets;
4574 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4575 bytes += _bytes;
4576 packets += _packets;
4577 }
4578
4579 net_stats->rx_bytes = bytes;
4580 net_stats->rx_packets = packets;
4581
4582 bytes = 0;
4583 packets = 0;
4584 for (i = 0; i < adapter->num_tx_queues; i++) {
4585 struct igb_ring *ring = adapter->tx_ring[i];
4586 do {
4587 start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4588 _bytes = ring->tx_stats.bytes;
4589 _packets = ring->tx_stats.packets;
4590 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4591 bytes += _bytes;
4592 packets += _packets;
4593 }
4594 net_stats->tx_bytes = bytes;
4595 net_stats->tx_packets = packets;
4596
4597 /* read stats registers */
4598 adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4599 adapter->stats.gprc += rd32(E1000_GPRC);
4600 adapter->stats.gorc += rd32(E1000_GORCL);
4601 rd32(E1000_GORCH); /* clear GORCL */
4602 adapter->stats.bprc += rd32(E1000_BPRC);
4603 adapter->stats.mprc += rd32(E1000_MPRC);
4604 adapter->stats.roc += rd32(E1000_ROC);
4605
4606 adapter->stats.prc64 += rd32(E1000_PRC64);
4607 adapter->stats.prc127 += rd32(E1000_PRC127);
4608 adapter->stats.prc255 += rd32(E1000_PRC255);
4609 adapter->stats.prc511 += rd32(E1000_PRC511);
4610 adapter->stats.prc1023 += rd32(E1000_PRC1023);
4611 adapter->stats.prc1522 += rd32(E1000_PRC1522);
4612 adapter->stats.symerrs += rd32(E1000_SYMERRS);
4613 adapter->stats.sec += rd32(E1000_SEC);
4614
4615 mpc = rd32(E1000_MPC);
4616 adapter->stats.mpc += mpc;
4617 net_stats->rx_fifo_errors += mpc;
4618 adapter->stats.scc += rd32(E1000_SCC);
4619 adapter->stats.ecol += rd32(E1000_ECOL);
4620 adapter->stats.mcc += rd32(E1000_MCC);
4621 adapter->stats.latecol += rd32(E1000_LATECOL);
4622 adapter->stats.dc += rd32(E1000_DC);
4623 adapter->stats.rlec += rd32(E1000_RLEC);
4624 adapter->stats.xonrxc += rd32(E1000_XONRXC);
4625 adapter->stats.xontxc += rd32(E1000_XONTXC);
4626 adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4627 adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4628 adapter->stats.fcruc += rd32(E1000_FCRUC);
4629 adapter->stats.gptc += rd32(E1000_GPTC);
4630 adapter->stats.gotc += rd32(E1000_GOTCL);
4631 rd32(E1000_GOTCH); /* clear GOTCL */
4632 adapter->stats.rnbc += rd32(E1000_RNBC);
4633 adapter->stats.ruc += rd32(E1000_RUC);
4634 adapter->stats.rfc += rd32(E1000_RFC);
4635 adapter->stats.rjc += rd32(E1000_RJC);
4636 adapter->stats.tor += rd32(E1000_TORH);
4637 adapter->stats.tot += rd32(E1000_TOTH);
4638 adapter->stats.tpr += rd32(E1000_TPR);
4639
4640 adapter->stats.ptc64 += rd32(E1000_PTC64);
4641 adapter->stats.ptc127 += rd32(E1000_PTC127);
4642 adapter->stats.ptc255 += rd32(E1000_PTC255);
4643 adapter->stats.ptc511 += rd32(E1000_PTC511);
4644 adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4645 adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4646
4647 adapter->stats.mptc += rd32(E1000_MPTC);
4648 adapter->stats.bptc += rd32(E1000_BPTC);
4649
4650 adapter->stats.tpt += rd32(E1000_TPT);
4651 adapter->stats.colc += rd32(E1000_COLC);
4652
4653 adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4654 /* read internal phy specific stats */
4655 reg = rd32(E1000_CTRL_EXT);
4656 if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4657 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4658 adapter->stats.tncrs += rd32(E1000_TNCRS);
4659 }
4660
4661 adapter->stats.tsctc += rd32(E1000_TSCTC);
4662 adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4663
4664 adapter->stats.iac += rd32(E1000_IAC);
4665 adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4666 adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4667 adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4668 adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4669 adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4670 adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4671 adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4672 adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4673
4674 /* Fill out the OS statistics structure */
4675 net_stats->multicast = adapter->stats.mprc;
4676 net_stats->collisions = adapter->stats.colc;
4677
4678 /* Rx Errors */
4679
4680 /* RLEC on some newer hardware can be incorrect so build
4681 * our own version based on RUC and ROC */
4682 net_stats->rx_errors = adapter->stats.rxerrc +
4683 adapter->stats.crcerrs + adapter->stats.algnerrc +
4684 adapter->stats.ruc + adapter->stats.roc +
4685 adapter->stats.cexterr;
4686 net_stats->rx_length_errors = adapter->stats.ruc +
4687 adapter->stats.roc;
4688 net_stats->rx_crc_errors = adapter->stats.crcerrs;
4689 net_stats->rx_frame_errors = adapter->stats.algnerrc;
4690 net_stats->rx_missed_errors = adapter->stats.mpc;
4691
4692 /* Tx Errors */
4693 net_stats->tx_errors = adapter->stats.ecol +
4694 adapter->stats.latecol;
4695 net_stats->tx_aborted_errors = adapter->stats.ecol;
4696 net_stats->tx_window_errors = adapter->stats.latecol;
4697 net_stats->tx_carrier_errors = adapter->stats.tncrs;
4698
4699 /* Tx Dropped needs to be maintained elsewhere */
4700
4701 /* Phy Stats */
4702 if (hw->phy.media_type == e1000_media_type_copper) {
4703 if ((adapter->link_speed == SPEED_1000) &&
4704 (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4705 phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4706 adapter->phy_stats.idle_errors += phy_tmp;
4707 }
4708 }
4709
4710 /* Management Stats */
4711 adapter->stats.mgptc += rd32(E1000_MGTPTC);
4712 adapter->stats.mgprc += rd32(E1000_MGTPRC);
4713 adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4714
4715 /* OS2BMC Stats */
4716 reg = rd32(E1000_MANC);
4717 if (reg & E1000_MANC_EN_BMC2OS) {
4718 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4719 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4720 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4721 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4722 }
4723 }
4724
4725 static irqreturn_t igb_msix_other(int irq, void *data)
4726 {
4727 struct igb_adapter *adapter = data;
4728 struct e1000_hw *hw = &adapter->hw;
4729 u32 icr = rd32(E1000_ICR);
4730 /* reading ICR causes bit 31 of EICR to be cleared */
4731
4732 if (icr & E1000_ICR_DRSTA)
4733 schedule_work(&adapter->reset_task);
4734
4735 if (icr & E1000_ICR_DOUTSYNC) {
4736 /* HW is reporting DMA is out of sync */
4737 adapter->stats.doosync++;
4738 /* The DMA Out of Sync is also indication of a spoof event
4739 * in IOV mode. Check the Wrong VM Behavior register to
4740 * see if it is really a spoof event. */
4741 igb_check_wvbr(adapter);
4742 }
4743
4744 /* Check for a mailbox event */
4745 if (icr & E1000_ICR_VMMB)
4746 igb_msg_task(adapter);
4747
4748 if (icr & E1000_ICR_LSC) {
4749 hw->mac.get_link_status = 1;
4750 /* guard against interrupt when we're going down */
4751 if (!test_bit(__IGB_DOWN, &adapter->state))
4752 mod_timer(&adapter->watchdog_timer, jiffies + 1);
4753 }
4754
4755 if (adapter->vfs_allocated_count)
4756 wr32(E1000_IMS, E1000_IMS_LSC |
4757 E1000_IMS_VMMB |
4758 E1000_IMS_DOUTSYNC);
4759 else
4760 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4761 wr32(E1000_EIMS, adapter->eims_other);
4762
4763 return IRQ_HANDLED;
4764 }
4765
4766 static void igb_write_itr(struct igb_q_vector *q_vector)
4767 {
4768 struct igb_adapter *adapter = q_vector->adapter;
4769 u32 itr_val = q_vector->itr_val & 0x7FFC;
4770
4771 if (!q_vector->set_itr)
4772 return;
4773
4774 if (!itr_val)
4775 itr_val = 0x4;
4776
4777 if (adapter->hw.mac.type == e1000_82575)
4778 itr_val |= itr_val << 16;
4779 else
4780 itr_val |= 0x8000000;
4781
4782 writel(itr_val, q_vector->itr_register);
4783 q_vector->set_itr = 0;
4784 }
4785
4786 static irqreturn_t igb_msix_ring(int irq, void *data)
4787 {
4788 struct igb_q_vector *q_vector = data;
4789
4790 /* Write the ITR value calculated from the previous interrupt. */
4791 igb_write_itr(q_vector);
4792
4793 napi_schedule(&q_vector->napi);
4794
4795 return IRQ_HANDLED;
4796 }
4797
4798 #ifdef CONFIG_IGB_DCA
4799 static void igb_update_dca(struct igb_q_vector *q_vector)
4800 {
4801 struct igb_adapter *adapter = q_vector->adapter;
4802 struct e1000_hw *hw = &adapter->hw;
4803 int cpu = get_cpu();
4804
4805 if (q_vector->cpu == cpu)
4806 goto out_no_update;
4807
4808 if (q_vector->tx_ring) {
4809 int q = q_vector->tx_ring->reg_idx;
4810 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4811 if (hw->mac.type == e1000_82575) {
4812 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4813 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4814 } else {
4815 dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4816 dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4817 E1000_DCA_TXCTRL_CPUID_SHIFT;
4818 }
4819 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4820 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4821 }
4822 if (q_vector->rx_ring) {
4823 int q = q_vector->rx_ring->reg_idx;
4824 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4825 if (hw->mac.type == e1000_82575) {
4826 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4827 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4828 } else {
4829 dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4830 dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4831 E1000_DCA_RXCTRL_CPUID_SHIFT;
4832 }
4833 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4834 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4835 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4836 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4837 }
4838 q_vector->cpu = cpu;
4839 out_no_update:
4840 put_cpu();
4841 }
4842
4843 static void igb_setup_dca(struct igb_adapter *adapter)
4844 {
4845 struct e1000_hw *hw = &adapter->hw;
4846 int i;
4847
4848 if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4849 return;
4850
4851 /* Always use CB2 mode, difference is masked in the CB driver. */
4852 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4853
4854 for (i = 0; i < adapter->num_q_vectors; i++) {
4855 adapter->q_vector[i]->cpu = -1;
4856 igb_update_dca(adapter->q_vector[i]);
4857 }
4858 }
4859
4860 static int __igb_notify_dca(struct device *dev, void *data)
4861 {
4862 struct net_device *netdev = dev_get_drvdata(dev);
4863 struct igb_adapter *adapter = netdev_priv(netdev);
4864 struct pci_dev *pdev = adapter->pdev;
4865 struct e1000_hw *hw = &adapter->hw;
4866 unsigned long event = *(unsigned long *)data;
4867
4868 switch (event) {
4869 case DCA_PROVIDER_ADD:
4870 /* if already enabled, don't do it again */
4871 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4872 break;
4873 if (dca_add_requester(dev) == 0) {
4874 adapter->flags |= IGB_FLAG_DCA_ENABLED;
4875 dev_info(&pdev->dev, "DCA enabled\n");
4876 igb_setup_dca(adapter);
4877 break;
4878 }
4879 /* Fall Through since DCA is disabled. */
4880 case DCA_PROVIDER_REMOVE:
4881 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4882 /* without this a class_device is left
4883 * hanging around in the sysfs model */
4884 dca_remove_requester(dev);
4885 dev_info(&pdev->dev, "DCA disabled\n");
4886 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4887 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4888 }
4889 break;
4890 }
4891
4892 return 0;
4893 }
4894
4895 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4896 void *p)
4897 {
4898 int ret_val;
4899
4900 ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4901 __igb_notify_dca);
4902
4903 return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4904 }
4905 #endif /* CONFIG_IGB_DCA */
4906
4907 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4908 {
4909 struct e1000_hw *hw = &adapter->hw;
4910 u32 ping;
4911 int i;
4912
4913 for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4914 ping = E1000_PF_CONTROL_MSG;
4915 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4916 ping |= E1000_VT_MSGTYPE_CTS;
4917 igb_write_mbx(hw, &ping, 1, i);
4918 }
4919 }
4920
4921 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4922 {
4923 struct e1000_hw *hw = &adapter->hw;
4924 u32 vmolr = rd32(E1000_VMOLR(vf));
4925 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4926
4927 vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4928 IGB_VF_FLAG_MULTI_PROMISC);
4929 vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4930
4931 if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4932 vmolr |= E1000_VMOLR_MPME;
4933 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4934 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4935 } else {
4936 /*
4937 * if we have hashes and we are clearing a multicast promisc
4938 * flag we need to write the hashes to the MTA as this step
4939 * was previously skipped
4940 */
4941 if (vf_data->num_vf_mc_hashes > 30) {
4942 vmolr |= E1000_VMOLR_MPME;
4943 } else if (vf_data->num_vf_mc_hashes) {
4944 int j;
4945 vmolr |= E1000_VMOLR_ROMPE;
4946 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4947 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4948 }
4949 }
4950
4951 wr32(E1000_VMOLR(vf), vmolr);
4952
4953 /* there are flags left unprocessed, likely not supported */
4954 if (*msgbuf & E1000_VT_MSGINFO_MASK)
4955 return -EINVAL;
4956
4957 return 0;
4958
4959 }
4960
4961 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4962 u32 *msgbuf, u32 vf)
4963 {
4964 int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4965 u16 *hash_list = (u16 *)&msgbuf[1];
4966 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4967 int i;
4968
4969 /* salt away the number of multicast addresses assigned
4970 * to this VF for later use to restore when the PF multi cast
4971 * list changes
4972 */
4973 vf_data->num_vf_mc_hashes = n;
4974
4975 /* only up to 30 hash values supported */
4976 if (n > 30)
4977 n = 30;
4978
4979 /* store the hashes for later use */
4980 for (i = 0; i < n; i++)
4981 vf_data->vf_mc_hashes[i] = hash_list[i];
4982
4983 /* Flush and reset the mta with the new values */
4984 igb_set_rx_mode(adapter->netdev);
4985
4986 return 0;
4987 }
4988
4989 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4990 {
4991 struct e1000_hw *hw = &adapter->hw;
4992 struct vf_data_storage *vf_data;
4993 int i, j;
4994
4995 for (i = 0; i < adapter->vfs_allocated_count; i++) {
4996 u32 vmolr = rd32(E1000_VMOLR(i));
4997 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4998
4999 vf_data = &adapter->vf_data[i];
5000
5001 if ((vf_data->num_vf_mc_hashes > 30) ||
5002 (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5003 vmolr |= E1000_VMOLR_MPME;
5004 } else if (vf_data->num_vf_mc_hashes) {
5005 vmolr |= E1000_VMOLR_ROMPE;
5006 for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5007 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5008 }
5009 wr32(E1000_VMOLR(i), vmolr);
5010 }
5011 }
5012
5013 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5014 {
5015 struct e1000_hw *hw = &adapter->hw;
5016 u32 pool_mask, reg, vid;
5017 int i;
5018
5019 pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5020
5021 /* Find the vlan filter for this id */
5022 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5023 reg = rd32(E1000_VLVF(i));
5024
5025 /* remove the vf from the pool */
5026 reg &= ~pool_mask;
5027
5028 /* if pool is empty then remove entry from vfta */
5029 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5030 (reg & E1000_VLVF_VLANID_ENABLE)) {
5031 reg = 0;
5032 vid = reg & E1000_VLVF_VLANID_MASK;
5033 igb_vfta_set(hw, vid, false);
5034 }
5035
5036 wr32(E1000_VLVF(i), reg);
5037 }
5038
5039 adapter->vf_data[vf].vlans_enabled = 0;
5040 }
5041
5042 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5043 {
5044 struct e1000_hw *hw = &adapter->hw;
5045 u32 reg, i;
5046
5047 /* The vlvf table only exists on 82576 hardware and newer */
5048 if (hw->mac.type < e1000_82576)
5049 return -1;
5050
5051 /* we only need to do this if VMDq is enabled */
5052 if (!adapter->vfs_allocated_count)
5053 return -1;
5054
5055 /* Find the vlan filter for this id */
5056 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5057 reg = rd32(E1000_VLVF(i));
5058 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5059 vid == (reg & E1000_VLVF_VLANID_MASK))
5060 break;
5061 }
5062
5063 if (add) {
5064 if (i == E1000_VLVF_ARRAY_SIZE) {
5065 /* Did not find a matching VLAN ID entry that was
5066 * enabled. Search for a free filter entry, i.e.
5067 * one without the enable bit set
5068 */
5069 for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5070 reg = rd32(E1000_VLVF(i));
5071 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5072 break;
5073 }
5074 }
5075 if (i < E1000_VLVF_ARRAY_SIZE) {
5076 /* Found an enabled/available entry */
5077 reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5078
5079 /* if !enabled we need to set this up in vfta */
5080 if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5081 /* add VID to filter table */
5082 igb_vfta_set(hw, vid, true);
5083 reg |= E1000_VLVF_VLANID_ENABLE;
5084 }
5085 reg &= ~E1000_VLVF_VLANID_MASK;
5086 reg |= vid;
5087 wr32(E1000_VLVF(i), reg);
5088
5089 /* do not modify RLPML for PF devices */
5090 if (vf >= adapter->vfs_allocated_count)
5091 return 0;
5092
5093 if (!adapter->vf_data[vf].vlans_enabled) {
5094 u32 size;
5095 reg = rd32(E1000_VMOLR(vf));
5096 size = reg & E1000_VMOLR_RLPML_MASK;
5097 size += 4;
5098 reg &= ~E1000_VMOLR_RLPML_MASK;
5099 reg |= size;
5100 wr32(E1000_VMOLR(vf), reg);
5101 }
5102
5103 adapter->vf_data[vf].vlans_enabled++;
5104 return 0;
5105 }
5106 } else {
5107 if (i < E1000_VLVF_ARRAY_SIZE) {
5108 /* remove vf from the pool */
5109 reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5110 /* if pool is empty then remove entry from vfta */
5111 if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5112 reg = 0;
5113 igb_vfta_set(hw, vid, false);
5114 }
5115 wr32(E1000_VLVF(i), reg);
5116
5117 /* do not modify RLPML for PF devices */
5118 if (vf >= adapter->vfs_allocated_count)
5119 return 0;
5120
5121 adapter->vf_data[vf].vlans_enabled--;
5122 if (!adapter->vf_data[vf].vlans_enabled) {
5123 u32 size;
5124 reg = rd32(E1000_VMOLR(vf));
5125 size = reg & E1000_VMOLR_RLPML_MASK;
5126 size -= 4;
5127 reg &= ~E1000_VMOLR_RLPML_MASK;
5128 reg |= size;
5129 wr32(E1000_VMOLR(vf), reg);
5130 }
5131 }
5132 }
5133 return 0;
5134 }
5135
5136 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5137 {
5138 struct e1000_hw *hw = &adapter->hw;
5139
5140 if (vid)
5141 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5142 else
5143 wr32(E1000_VMVIR(vf), 0);
5144 }
5145
5146 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5147 int vf, u16 vlan, u8 qos)
5148 {
5149 int err = 0;
5150 struct igb_adapter *adapter = netdev_priv(netdev);
5151
5152 if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5153 return -EINVAL;
5154 if (vlan || qos) {
5155 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5156 if (err)
5157 goto out;
5158 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5159 igb_set_vmolr(adapter, vf, !vlan);
5160 adapter->vf_data[vf].pf_vlan = vlan;
5161 adapter->vf_data[vf].pf_qos = qos;
5162 dev_info(&adapter->pdev->dev,
5163 "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5164 if (test_bit(__IGB_DOWN, &adapter->state)) {
5165 dev_warn(&adapter->pdev->dev,
5166 "The VF VLAN has been set,"
5167 " but the PF device is not up.\n");
5168 dev_warn(&adapter->pdev->dev,
5169 "Bring the PF device up before"
5170 " attempting to use the VF device.\n");
5171 }
5172 } else {
5173 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5174 false, vf);
5175 igb_set_vmvir(adapter, vlan, vf);
5176 igb_set_vmolr(adapter, vf, true);
5177 adapter->vf_data[vf].pf_vlan = 0;
5178 adapter->vf_data[vf].pf_qos = 0;
5179 }
5180 out:
5181 return err;
5182 }
5183
5184 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5185 {
5186 int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5187 int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5188
5189 return igb_vlvf_set(adapter, vid, add, vf);
5190 }
5191
5192 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5193 {
5194 /* clear flags - except flag that indicates PF has set the MAC */
5195 adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5196 adapter->vf_data[vf].last_nack = jiffies;
5197
5198 /* reset offloads to defaults */
5199 igb_set_vmolr(adapter, vf, true);
5200
5201 /* reset vlans for device */
5202 igb_clear_vf_vfta(adapter, vf);
5203 if (adapter->vf_data[vf].pf_vlan)
5204 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5205 adapter->vf_data[vf].pf_vlan,
5206 adapter->vf_data[vf].pf_qos);
5207 else
5208 igb_clear_vf_vfta(adapter, vf);
5209
5210 /* reset multicast table array for vf */
5211 adapter->vf_data[vf].num_vf_mc_hashes = 0;
5212
5213 /* Flush and reset the mta with the new values */
5214 igb_set_rx_mode(adapter->netdev);
5215 }
5216
5217 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5218 {
5219 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5220
5221 /* generate a new mac address as we were hotplug removed/added */
5222 if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5223 random_ether_addr(vf_mac);
5224
5225 /* process remaining reset events */
5226 igb_vf_reset(adapter, vf);
5227 }
5228
5229 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5230 {
5231 struct e1000_hw *hw = &adapter->hw;
5232 unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5233 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5234 u32 reg, msgbuf[3];
5235 u8 *addr = (u8 *)(&msgbuf[1]);
5236
5237 /* process all the same items cleared in a function level reset */
5238 igb_vf_reset(adapter, vf);
5239
5240 /* set vf mac address */
5241 igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5242
5243 /* enable transmit and receive for vf */
5244 reg = rd32(E1000_VFTE);
5245 wr32(E1000_VFTE, reg | (1 << vf));
5246 reg = rd32(E1000_VFRE);
5247 wr32(E1000_VFRE, reg | (1 << vf));
5248
5249 adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5250
5251 /* reply to reset with ack and vf mac address */
5252 msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5253 memcpy(addr, vf_mac, 6);
5254 igb_write_mbx(hw, msgbuf, 3, vf);
5255 }
5256
5257 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5258 {
5259 /*
5260 * The VF MAC Address is stored in a packed array of bytes
5261 * starting at the second 32 bit word of the msg array
5262 */
5263 unsigned char *addr = (char *)&msg[1];
5264 int err = -1;
5265
5266 if (is_valid_ether_addr(addr))
5267 err = igb_set_vf_mac(adapter, vf, addr);
5268
5269 return err;
5270 }
5271
5272 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5273 {
5274 struct e1000_hw *hw = &adapter->hw;
5275 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5276 u32 msg = E1000_VT_MSGTYPE_NACK;
5277
5278 /* if device isn't clear to send it shouldn't be reading either */
5279 if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5280 time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5281 igb_write_mbx(hw, &msg, 1, vf);
5282 vf_data->last_nack = jiffies;
5283 }
5284 }
5285
5286 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5287 {
5288 struct pci_dev *pdev = adapter->pdev;
5289 u32 msgbuf[E1000_VFMAILBOX_SIZE];
5290 struct e1000_hw *hw = &adapter->hw;
5291 struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5292 s32 retval;
5293
5294 retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5295
5296 if (retval) {
5297 /* if receive failed revoke VF CTS stats and restart init */
5298 dev_err(&pdev->dev, "Error receiving message from VF\n");
5299 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5300 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5301 return;
5302 goto out;
5303 }
5304
5305 /* this is a message we already processed, do nothing */
5306 if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5307 return;
5308
5309 /*
5310 * until the vf completes a reset it should not be
5311 * allowed to start any configuration.
5312 */
5313
5314 if (msgbuf[0] == E1000_VF_RESET) {
5315 igb_vf_reset_msg(adapter, vf);
5316 return;
5317 }
5318
5319 if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5320 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5321 return;
5322 retval = -1;
5323 goto out;
5324 }
5325
5326 switch ((msgbuf[0] & 0xFFFF)) {
5327 case E1000_VF_SET_MAC_ADDR:
5328 retval = -EINVAL;
5329 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5330 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5331 else
5332 dev_warn(&pdev->dev,
5333 "VF %d attempted to override administratively "
5334 "set MAC address\nReload the VF driver to "
5335 "resume operations\n", vf);
5336 break;
5337 case E1000_VF_SET_PROMISC:
5338 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5339 break;
5340 case E1000_VF_SET_MULTICAST:
5341 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5342 break;
5343 case E1000_VF_SET_LPE:
5344 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5345 break;
5346 case E1000_VF_SET_VLAN:
5347 retval = -1;
5348 if (vf_data->pf_vlan)
5349 dev_warn(&pdev->dev,
5350 "VF %d attempted to override administratively "
5351 "set VLAN tag\nReload the VF driver to "
5352 "resume operations\n", vf);
5353 else
5354 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5355 break;
5356 default:
5357 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5358 retval = -1;
5359 break;
5360 }
5361
5362 msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5363 out:
5364 /* notify the VF of the results of what it sent us */
5365 if (retval)
5366 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5367 else
5368 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5369
5370 igb_write_mbx(hw, msgbuf, 1, vf);
5371 }
5372
5373 static void igb_msg_task(struct igb_adapter *adapter)
5374 {
5375 struct e1000_hw *hw = &adapter->hw;
5376 u32 vf;
5377
5378 for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5379 /* process any reset requests */
5380 if (!igb_check_for_rst(hw, vf))
5381 igb_vf_reset_event(adapter, vf);
5382
5383 /* process any messages pending */
5384 if (!igb_check_for_msg(hw, vf))
5385 igb_rcv_msg_from_vf(adapter, vf);
5386
5387 /* process any acks */
5388 if (!igb_check_for_ack(hw, vf))
5389 igb_rcv_ack_from_vf(adapter, vf);
5390 }
5391 }
5392
5393 /**
5394 * igb_set_uta - Set unicast filter table address
5395 * @adapter: board private structure
5396 *
5397 * The unicast table address is a register array of 32-bit registers.
5398 * The table is meant to be used in a way similar to how the MTA is used
5399 * however due to certain limitations in the hardware it is necessary to
5400 * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5401 * enable bit to allow vlan tag stripping when promiscuous mode is enabled
5402 **/
5403 static void igb_set_uta(struct igb_adapter *adapter)
5404 {
5405 struct e1000_hw *hw = &adapter->hw;
5406 int i;
5407
5408 /* The UTA table only exists on 82576 hardware and newer */
5409 if (hw->mac.type < e1000_82576)
5410 return;
5411
5412 /* we only need to do this if VMDq is enabled */
5413 if (!adapter->vfs_allocated_count)
5414 return;
5415
5416 for (i = 0; i < hw->mac.uta_reg_count; i++)
5417 array_wr32(E1000_UTA, i, ~0);
5418 }
5419
5420 /**
5421 * igb_intr_msi - Interrupt Handler
5422 * @irq: interrupt number
5423 * @data: pointer to a network interface device structure
5424 **/
5425 static irqreturn_t igb_intr_msi(int irq, void *data)
5426 {
5427 struct igb_adapter *adapter = data;
5428 struct igb_q_vector *q_vector = adapter->q_vector[0];
5429 struct e1000_hw *hw = &adapter->hw;
5430 /* read ICR disables interrupts using IAM */
5431 u32 icr = rd32(E1000_ICR);
5432
5433 igb_write_itr(q_vector);
5434
5435 if (icr & E1000_ICR_DRSTA)
5436 schedule_work(&adapter->reset_task);
5437
5438 if (icr & E1000_ICR_DOUTSYNC) {
5439 /* HW is reporting DMA is out of sync */
5440 adapter->stats.doosync++;
5441 }
5442
5443 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5444 hw->mac.get_link_status = 1;
5445 if (!test_bit(__IGB_DOWN, &adapter->state))
5446 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5447 }
5448
5449 napi_schedule(&q_vector->napi);
5450
5451 return IRQ_HANDLED;
5452 }
5453
5454 /**
5455 * igb_intr - Legacy Interrupt Handler
5456 * @irq: interrupt number
5457 * @data: pointer to a network interface device structure
5458 **/
5459 static irqreturn_t igb_intr(int irq, void *data)
5460 {
5461 struct igb_adapter *adapter = data;
5462 struct igb_q_vector *q_vector = adapter->q_vector[0];
5463 struct e1000_hw *hw = &adapter->hw;
5464 /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No
5465 * need for the IMC write */
5466 u32 icr = rd32(E1000_ICR);
5467 if (!icr)
5468 return IRQ_NONE; /* Not our interrupt */
5469
5470 igb_write_itr(q_vector);
5471
5472 /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5473 * not set, then the adapter didn't send an interrupt */
5474 if (!(icr & E1000_ICR_INT_ASSERTED))
5475 return IRQ_NONE;
5476
5477 if (icr & E1000_ICR_DRSTA)
5478 schedule_work(&adapter->reset_task);
5479
5480 if (icr & E1000_ICR_DOUTSYNC) {
5481 /* HW is reporting DMA is out of sync */
5482 adapter->stats.doosync++;
5483 }
5484
5485 if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5486 hw->mac.get_link_status = 1;
5487 /* guard against interrupt when we're going down */
5488 if (!test_bit(__IGB_DOWN, &adapter->state))
5489 mod_timer(&adapter->watchdog_timer, jiffies + 1);
5490 }
5491
5492 napi_schedule(&q_vector->napi);
5493
5494 return IRQ_HANDLED;
5495 }
5496
5497 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5498 {
5499 struct igb_adapter *adapter = q_vector->adapter;
5500 struct e1000_hw *hw = &adapter->hw;
5501
5502 if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5503 (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5504 if (!adapter->msix_entries)
5505 igb_set_itr(adapter);
5506 else
5507 igb_update_ring_itr(q_vector);
5508 }
5509
5510 if (!test_bit(__IGB_DOWN, &adapter->state)) {
5511 if (adapter->msix_entries)
5512 wr32(E1000_EIMS, q_vector->eims_value);
5513 else
5514 igb_irq_enable(adapter);
5515 }
5516 }
5517
5518 /**
5519 * igb_poll - NAPI Rx polling callback
5520 * @napi: napi polling structure
5521 * @budget: count of how many packets we should handle
5522 **/
5523 static int igb_poll(struct napi_struct *napi, int budget)
5524 {
5525 struct igb_q_vector *q_vector = container_of(napi,
5526 struct igb_q_vector,
5527 napi);
5528 int tx_clean_complete = 1, work_done = 0;
5529
5530 #ifdef CONFIG_IGB_DCA
5531 if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5532 igb_update_dca(q_vector);
5533 #endif
5534 if (q_vector->tx_ring)
5535 tx_clean_complete = igb_clean_tx_irq(q_vector);
5536
5537 if (q_vector->rx_ring)
5538 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5539
5540 if (!tx_clean_complete)
5541 work_done = budget;
5542
5543 /* If not enough Rx work done, exit the polling mode */
5544 if (work_done < budget) {
5545 napi_complete(napi);
5546 igb_ring_irq_enable(q_vector);
5547 }
5548
5549 return work_done;
5550 }
5551
5552 /**
5553 * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5554 * @adapter: board private structure
5555 * @shhwtstamps: timestamp structure to update
5556 * @regval: unsigned 64bit system time value.
5557 *
5558 * We need to convert the system time value stored in the RX/TXSTMP registers
5559 * into a hwtstamp which can be used by the upper level timestamping functions
5560 */
5561 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5562 struct skb_shared_hwtstamps *shhwtstamps,
5563 u64 regval)
5564 {
5565 u64 ns;
5566
5567 /*
5568 * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5569 * 24 to match clock shift we setup earlier.
5570 */
5571 if (adapter->hw.mac.type == e1000_82580)
5572 regval <<= IGB_82580_TSYNC_SHIFT;
5573
5574 ns = timecounter_cyc2time(&adapter->clock, regval);
5575 timecompare_update(&adapter->compare, ns);
5576 memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5577 shhwtstamps->hwtstamp = ns_to_ktime(ns);
5578 shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5579 }
5580
5581 /**
5582 * igb_tx_hwtstamp - utility function which checks for TX time stamp
5583 * @q_vector: pointer to q_vector containing needed info
5584 * @buffer: pointer to igb_buffer structure
5585 *
5586 * If we were asked to do hardware stamping and such a time stamp is
5587 * available, then it must have been for this skb here because we only
5588 * allow only one such packet into the queue.
5589 */
5590 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5591 {
5592 struct igb_adapter *adapter = q_vector->adapter;
5593 struct e1000_hw *hw = &adapter->hw;
5594 struct skb_shared_hwtstamps shhwtstamps;
5595 u64 regval;
5596
5597 /* if skb does not support hw timestamp or TX stamp not valid exit */
5598 if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5599 !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5600 return;
5601
5602 regval = rd32(E1000_TXSTMPL);
5603 regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5604
5605 igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5606 skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5607 }
5608
5609 /**
5610 * igb_clean_tx_irq - Reclaim resources after transmit completes
5611 * @q_vector: pointer to q_vector containing needed info
5612 * returns true if ring is completely cleaned
5613 **/
5614 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5615 {
5616 struct igb_adapter *adapter = q_vector->adapter;
5617 struct igb_ring *tx_ring = q_vector->tx_ring;
5618 struct net_device *netdev = tx_ring->netdev;
5619 struct e1000_hw *hw = &adapter->hw;
5620 struct igb_buffer *buffer_info;
5621 union e1000_adv_tx_desc *tx_desc, *eop_desc;
5622 unsigned int total_bytes = 0, total_packets = 0;
5623 unsigned int i, eop, count = 0;
5624 bool cleaned = false;
5625
5626 i = tx_ring->next_to_clean;
5627 eop = tx_ring->buffer_info[i].next_to_watch;
5628 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5629
5630 while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5631 (count < tx_ring->count)) {
5632 rmb(); /* read buffer_info after eop_desc status */
5633 for (cleaned = false; !cleaned; count++) {
5634 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5635 buffer_info = &tx_ring->buffer_info[i];
5636 cleaned = (i == eop);
5637
5638 if (buffer_info->skb) {
5639 total_bytes += buffer_info->bytecount;
5640 /* gso_segs is currently only valid for tcp */
5641 total_packets += buffer_info->gso_segs;
5642 igb_tx_hwtstamp(q_vector, buffer_info);
5643 }
5644
5645 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5646 tx_desc->wb.status = 0;
5647
5648 i++;
5649 if (i == tx_ring->count)
5650 i = 0;
5651 }
5652 eop = tx_ring->buffer_info[i].next_to_watch;
5653 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5654 }
5655
5656 tx_ring->next_to_clean = i;
5657
5658 if (unlikely(count &&
5659 netif_carrier_ok(netdev) &&
5660 igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5661 /* Make sure that anybody stopping the queue after this
5662 * sees the new next_to_clean.
5663 */
5664 smp_mb();
5665 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5666 !(test_bit(__IGB_DOWN, &adapter->state))) {
5667 netif_wake_subqueue(netdev, tx_ring->queue_index);
5668
5669 u64_stats_update_begin(&tx_ring->tx_syncp);
5670 tx_ring->tx_stats.restart_queue++;
5671 u64_stats_update_end(&tx_ring->tx_syncp);
5672 }
5673 }
5674
5675 if (tx_ring->detect_tx_hung) {
5676 /* Detect a transmit hang in hardware, this serializes the
5677 * check with the clearing of time_stamp and movement of i */
5678 tx_ring->detect_tx_hung = false;
5679 if (tx_ring->buffer_info[i].time_stamp &&
5680 time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5681 (adapter->tx_timeout_factor * HZ)) &&
5682 !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5683
5684 /* detected Tx unit hang */
5685 dev_err(tx_ring->dev,
5686 "Detected Tx Unit Hang\n"
5687 " Tx Queue <%d>\n"
5688 " TDH <%x>\n"
5689 " TDT <%x>\n"
5690 " next_to_use <%x>\n"
5691 " next_to_clean <%x>\n"
5692 "buffer_info[next_to_clean]\n"
5693 " time_stamp <%lx>\n"
5694 " next_to_watch <%x>\n"
5695 " jiffies <%lx>\n"
5696 " desc.status <%x>\n",
5697 tx_ring->queue_index,
5698 readl(tx_ring->head),
5699 readl(tx_ring->tail),
5700 tx_ring->next_to_use,
5701 tx_ring->next_to_clean,
5702 tx_ring->buffer_info[eop].time_stamp,
5703 eop,
5704 jiffies,
5705 eop_desc->wb.status);
5706 netif_stop_subqueue(netdev, tx_ring->queue_index);
5707 }
5708 }
5709 tx_ring->total_bytes += total_bytes;
5710 tx_ring->total_packets += total_packets;
5711 u64_stats_update_begin(&tx_ring->tx_syncp);
5712 tx_ring->tx_stats.bytes += total_bytes;
5713 tx_ring->tx_stats.packets += total_packets;
5714 u64_stats_update_end(&tx_ring->tx_syncp);
5715 return count < tx_ring->count;
5716 }
5717
5718 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5719 u32 status_err, struct sk_buff *skb)
5720 {
5721 skb_checksum_none_assert(skb);
5722
5723 /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5724 if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5725 (status_err & E1000_RXD_STAT_IXSM))
5726 return;
5727
5728 /* TCP/UDP checksum error bit is set */
5729 if (status_err &
5730 (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5731 /*
5732 * work around errata with sctp packets where the TCPE aka
5733 * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5734 * packets, (aka let the stack check the crc32c)
5735 */
5736 if ((skb->len == 60) &&
5737 (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM)) {
5738 u64_stats_update_begin(&ring->rx_syncp);
5739 ring->rx_stats.csum_err++;
5740 u64_stats_update_end(&ring->rx_syncp);
5741 }
5742 /* let the stack verify checksum errors */
5743 return;
5744 }
5745 /* It must be a TCP or UDP packet with a valid checksum */
5746 if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5747 skb->ip_summed = CHECKSUM_UNNECESSARY;
5748
5749 dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5750 }
5751
5752 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5753 struct sk_buff *skb)
5754 {
5755 struct igb_adapter *adapter = q_vector->adapter;
5756 struct e1000_hw *hw = &adapter->hw;
5757 u64 regval;
5758
5759 /*
5760 * If this bit is set, then the RX registers contain the time stamp. No
5761 * other packet will be time stamped until we read these registers, so
5762 * read the registers to make them available again. Because only one
5763 * packet can be time stamped at a time, we know that the register
5764 * values must belong to this one here and therefore we don't need to
5765 * compare any of the additional attributes stored for it.
5766 *
5767 * If nothing went wrong, then it should have a shared tx_flags that we
5768 * can turn into a skb_shared_hwtstamps.
5769 */
5770 if (staterr & E1000_RXDADV_STAT_TSIP) {
5771 u32 *stamp = (u32 *)skb->data;
5772 regval = le32_to_cpu(*(stamp + 2));
5773 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5774 skb_pull(skb, IGB_TS_HDR_LEN);
5775 } else {
5776 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5777 return;
5778
5779 regval = rd32(E1000_RXSTMPL);
5780 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5781 }
5782
5783 igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5784 }
5785 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5786 union e1000_adv_rx_desc *rx_desc)
5787 {
5788 /* HW will not DMA in data larger than the given buffer, even if it
5789 * parses the (NFS, of course) header to be larger. In that case, it
5790 * fills the header buffer and spills the rest into the page.
5791 */
5792 u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5793 E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5794 if (hlen > rx_ring->rx_buffer_len)
5795 hlen = rx_ring->rx_buffer_len;
5796 return hlen;
5797 }
5798
5799 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5800 int *work_done, int budget)
5801 {
5802 struct igb_ring *rx_ring = q_vector->rx_ring;
5803 struct net_device *netdev = rx_ring->netdev;
5804 struct device *dev = rx_ring->dev;
5805 union e1000_adv_rx_desc *rx_desc , *next_rxd;
5806 struct igb_buffer *buffer_info , *next_buffer;
5807 struct sk_buff *skb;
5808 bool cleaned = false;
5809 int cleaned_count = 0;
5810 int current_node = numa_node_id();
5811 unsigned int total_bytes = 0, total_packets = 0;
5812 unsigned int i;
5813 u32 staterr;
5814 u16 length;
5815
5816 i = rx_ring->next_to_clean;
5817 buffer_info = &rx_ring->buffer_info[i];
5818 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5819 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5820
5821 while (staterr & E1000_RXD_STAT_DD) {
5822 if (*work_done >= budget)
5823 break;
5824 (*work_done)++;
5825 rmb(); /* read descriptor and rx_buffer_info after status DD */
5826
5827 skb = buffer_info->skb;
5828 prefetch(skb->data - NET_IP_ALIGN);
5829 buffer_info->skb = NULL;
5830
5831 i++;
5832 if (i == rx_ring->count)
5833 i = 0;
5834
5835 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5836 prefetch(next_rxd);
5837 next_buffer = &rx_ring->buffer_info[i];
5838
5839 length = le16_to_cpu(rx_desc->wb.upper.length);
5840 cleaned = true;
5841 cleaned_count++;
5842
5843 if (buffer_info->dma) {
5844 dma_unmap_single(dev, buffer_info->dma,
5845 rx_ring->rx_buffer_len,
5846 DMA_FROM_DEVICE);
5847 buffer_info->dma = 0;
5848 if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5849 skb_put(skb, length);
5850 goto send_up;
5851 }
5852 skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5853 }
5854
5855 if (length) {
5856 dma_unmap_page(dev, buffer_info->page_dma,
5857 PAGE_SIZE / 2, DMA_FROM_DEVICE);
5858 buffer_info->page_dma = 0;
5859
5860 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5861 buffer_info->page,
5862 buffer_info->page_offset,
5863 length);
5864
5865 if ((page_count(buffer_info->page) != 1) ||
5866 (page_to_nid(buffer_info->page) != current_node))
5867 buffer_info->page = NULL;
5868 else
5869 get_page(buffer_info->page);
5870
5871 skb->len += length;
5872 skb->data_len += length;
5873 skb->truesize += length;
5874 }
5875
5876 if (!(staterr & E1000_RXD_STAT_EOP)) {
5877 buffer_info->skb = next_buffer->skb;
5878 buffer_info->dma = next_buffer->dma;
5879 next_buffer->skb = skb;
5880 next_buffer->dma = 0;
5881 goto next_desc;
5882 }
5883 send_up:
5884 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5885 dev_kfree_skb_irq(skb);
5886 goto next_desc;
5887 }
5888
5889 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5890 igb_rx_hwtstamp(q_vector, staterr, skb);
5891 total_bytes += skb->len;
5892 total_packets++;
5893
5894 igb_rx_checksum_adv(rx_ring, staterr, skb);
5895
5896 skb->protocol = eth_type_trans(skb, netdev);
5897 skb_record_rx_queue(skb, rx_ring->queue_index);
5898
5899 if (staterr & E1000_RXD_STAT_VP) {
5900 u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
5901
5902 __vlan_hwaccel_put_tag(skb, vid);
5903 }
5904 napi_gro_receive(&q_vector->napi, skb);
5905
5906 next_desc:
5907 rx_desc->wb.upper.status_error = 0;
5908
5909 /* return some buffers to hardware, one at a time is too slow */
5910 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5911 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5912 cleaned_count = 0;
5913 }
5914
5915 /* use prefetched values */
5916 rx_desc = next_rxd;
5917 buffer_info = next_buffer;
5918 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5919 }
5920
5921 rx_ring->next_to_clean = i;
5922 cleaned_count = igb_desc_unused(rx_ring);
5923
5924 if (cleaned_count)
5925 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5926
5927 rx_ring->total_packets += total_packets;
5928 rx_ring->total_bytes += total_bytes;
5929 u64_stats_update_begin(&rx_ring->rx_syncp);
5930 rx_ring->rx_stats.packets += total_packets;
5931 rx_ring->rx_stats.bytes += total_bytes;
5932 u64_stats_update_end(&rx_ring->rx_syncp);
5933 return cleaned;
5934 }
5935
5936 /**
5937 * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5938 * @adapter: address of board private structure
5939 **/
5940 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5941 {
5942 struct net_device *netdev = rx_ring->netdev;
5943 union e1000_adv_rx_desc *rx_desc;
5944 struct igb_buffer *buffer_info;
5945 struct sk_buff *skb;
5946 unsigned int i;
5947 int bufsz;
5948
5949 i = rx_ring->next_to_use;
5950 buffer_info = &rx_ring->buffer_info[i];
5951
5952 bufsz = rx_ring->rx_buffer_len;
5953
5954 while (cleaned_count--) {
5955 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5956
5957 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5958 if (!buffer_info->page) {
5959 buffer_info->page = netdev_alloc_page(netdev);
5960 if (unlikely(!buffer_info->page)) {
5961 u64_stats_update_begin(&rx_ring->rx_syncp);
5962 rx_ring->rx_stats.alloc_failed++;
5963 u64_stats_update_end(&rx_ring->rx_syncp);
5964 goto no_buffers;
5965 }
5966 buffer_info->page_offset = 0;
5967 } else {
5968 buffer_info->page_offset ^= PAGE_SIZE / 2;
5969 }
5970 buffer_info->page_dma =
5971 dma_map_page(rx_ring->dev, buffer_info->page,
5972 buffer_info->page_offset,
5973 PAGE_SIZE / 2,
5974 DMA_FROM_DEVICE);
5975 if (dma_mapping_error(rx_ring->dev,
5976 buffer_info->page_dma)) {
5977 buffer_info->page_dma = 0;
5978 u64_stats_update_begin(&rx_ring->rx_syncp);
5979 rx_ring->rx_stats.alloc_failed++;
5980 u64_stats_update_end(&rx_ring->rx_syncp);
5981 goto no_buffers;
5982 }
5983 }
5984
5985 skb = buffer_info->skb;
5986 if (!skb) {
5987 skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5988 if (unlikely(!skb)) {
5989 u64_stats_update_begin(&rx_ring->rx_syncp);
5990 rx_ring->rx_stats.alloc_failed++;
5991 u64_stats_update_end(&rx_ring->rx_syncp);
5992 goto no_buffers;
5993 }
5994
5995 buffer_info->skb = skb;
5996 }
5997 if (!buffer_info->dma) {
5998 buffer_info->dma = dma_map_single(rx_ring->dev,
5999 skb->data,
6000 bufsz,
6001 DMA_FROM_DEVICE);
6002 if (dma_mapping_error(rx_ring->dev,
6003 buffer_info->dma)) {
6004 buffer_info->dma = 0;
6005 u64_stats_update_begin(&rx_ring->rx_syncp);
6006 rx_ring->rx_stats.alloc_failed++;
6007 u64_stats_update_end(&rx_ring->rx_syncp);
6008 goto no_buffers;
6009 }
6010 }
6011 /* Refresh the desc even if buffer_addrs didn't change because
6012 * each write-back erases this info. */
6013 if (bufsz < IGB_RXBUFFER_1024) {
6014 rx_desc->read.pkt_addr =
6015 cpu_to_le64(buffer_info->page_dma);
6016 rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
6017 } else {
6018 rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
6019 rx_desc->read.hdr_addr = 0;
6020 }
6021
6022 i++;
6023 if (i == rx_ring->count)
6024 i = 0;
6025 buffer_info = &rx_ring->buffer_info[i];
6026 }
6027
6028 no_buffers:
6029 if (rx_ring->next_to_use != i) {
6030 rx_ring->next_to_use = i;
6031 if (i == 0)
6032 i = (rx_ring->count - 1);
6033 else
6034 i--;
6035
6036 /* Force memory writes to complete before letting h/w
6037 * know there are new descriptors to fetch. (Only
6038 * applicable for weak-ordered memory model archs,
6039 * such as IA-64). */
6040 wmb();
6041 writel(i, rx_ring->tail);
6042 }
6043 }
6044
6045 /**
6046 * igb_mii_ioctl -
6047 * @netdev:
6048 * @ifreq:
6049 * @cmd:
6050 **/
6051 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6052 {
6053 struct igb_adapter *adapter = netdev_priv(netdev);
6054 struct mii_ioctl_data *data = if_mii(ifr);
6055
6056 if (adapter->hw.phy.media_type != e1000_media_type_copper)
6057 return -EOPNOTSUPP;
6058
6059 switch (cmd) {
6060 case SIOCGMIIPHY:
6061 data->phy_id = adapter->hw.phy.addr;
6062 break;
6063 case SIOCGMIIREG:
6064 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
6065 &data->val_out))
6066 return -EIO;
6067 break;
6068 case SIOCSMIIREG:
6069 default:
6070 return -EOPNOTSUPP;
6071 }
6072 return 0;
6073 }
6074
6075 /**
6076 * igb_hwtstamp_ioctl - control hardware time stamping
6077 * @netdev:
6078 * @ifreq:
6079 * @cmd:
6080 *
6081 * Outgoing time stamping can be enabled and disabled. Play nice and
6082 * disable it when requested, although it shouldn't case any overhead
6083 * when no packet needs it. At most one packet in the queue may be
6084 * marked for time stamping, otherwise it would be impossible to tell
6085 * for sure to which packet the hardware time stamp belongs.
6086 *
6087 * Incoming time stamping has to be configured via the hardware
6088 * filters. Not all combinations are supported, in particular event
6089 * type has to be specified. Matching the kind of event packet is
6090 * not supported, with the exception of "all V2 events regardless of
6091 * level 2 or 4".
6092 *
6093 **/
6094 static int igb_hwtstamp_ioctl(struct net_device *netdev,
6095 struct ifreq *ifr, int cmd)
6096 {
6097 struct igb_adapter *adapter = netdev_priv(netdev);
6098 struct e1000_hw *hw = &adapter->hw;
6099 struct hwtstamp_config config;
6100 u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
6101 u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6102 u32 tsync_rx_cfg = 0;
6103 bool is_l4 = false;
6104 bool is_l2 = false;
6105 u32 regval;
6106
6107 if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
6108 return -EFAULT;
6109
6110 /* reserved for future extensions */
6111 if (config.flags)
6112 return -EINVAL;
6113
6114 switch (config.tx_type) {
6115 case HWTSTAMP_TX_OFF:
6116 tsync_tx_ctl = 0;
6117 case HWTSTAMP_TX_ON:
6118 break;
6119 default:
6120 return -ERANGE;
6121 }
6122
6123 switch (config.rx_filter) {
6124 case HWTSTAMP_FILTER_NONE:
6125 tsync_rx_ctl = 0;
6126 break;
6127 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
6128 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
6129 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
6130 case HWTSTAMP_FILTER_ALL:
6131 /*
6132 * register TSYNCRXCFG must be set, therefore it is not
6133 * possible to time stamp both Sync and Delay_Req messages
6134 * => fall back to time stamping all packets
6135 */
6136 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6137 config.rx_filter = HWTSTAMP_FILTER_ALL;
6138 break;
6139 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
6140 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6141 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
6142 is_l4 = true;
6143 break;
6144 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
6145 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
6146 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
6147 is_l4 = true;
6148 break;
6149 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
6150 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
6151 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6152 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
6153 is_l2 = true;
6154 is_l4 = true;
6155 config.rx_filter = HWTSTAMP_FILTER_SOME;
6156 break;
6157 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
6158 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
6159 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
6160 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
6161 is_l2 = true;
6162 is_l4 = true;
6163 config.rx_filter = HWTSTAMP_FILTER_SOME;
6164 break;
6165 case HWTSTAMP_FILTER_PTP_V2_EVENT:
6166 case HWTSTAMP_FILTER_PTP_V2_SYNC:
6167 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
6168 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
6169 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
6170 is_l2 = true;
6171 break;
6172 default:
6173 return -ERANGE;
6174 }
6175
6176 if (hw->mac.type == e1000_82575) {
6177 if (tsync_rx_ctl | tsync_tx_ctl)
6178 return -EINVAL;
6179 return 0;
6180 }
6181
6182 /*
6183 * Per-packet timestamping only works if all packets are
6184 * timestamped, so enable timestamping in all packets as
6185 * long as one rx filter was configured.
6186 */
6187 if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
6188 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
6189 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
6190 }
6191
6192 /* enable/disable TX */
6193 regval = rd32(E1000_TSYNCTXCTL);
6194 regval &= ~E1000_TSYNCTXCTL_ENABLED;
6195 regval |= tsync_tx_ctl;
6196 wr32(E1000_TSYNCTXCTL, regval);
6197
6198 /* enable/disable RX */
6199 regval = rd32(E1000_TSYNCRXCTL);
6200 regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
6201 regval |= tsync_rx_ctl;
6202 wr32(E1000_TSYNCRXCTL, regval);
6203
6204 /* define which PTP packets are time stamped */
6205 wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
6206
6207 /* define ethertype filter for timestamped packets */
6208 if (is_l2)
6209 wr32(E1000_ETQF(3),
6210 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
6211 E1000_ETQF_1588 | /* enable timestamping */
6212 ETH_P_1588)); /* 1588 eth protocol type */
6213 else
6214 wr32(E1000_ETQF(3), 0);
6215
6216 #define PTP_PORT 319
6217 /* L4 Queue Filter[3]: filter by destination port and protocol */
6218 if (is_l4) {
6219 u32 ftqf = (IPPROTO_UDP /* UDP */
6220 | E1000_FTQF_VF_BP /* VF not compared */
6221 | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
6222 | E1000_FTQF_MASK); /* mask all inputs */
6223 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
6224
6225 wr32(E1000_IMIR(3), htons(PTP_PORT));
6226 wr32(E1000_IMIREXT(3),
6227 (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
6228 if (hw->mac.type == e1000_82576) {
6229 /* enable source port check */
6230 wr32(E1000_SPQF(3), htons(PTP_PORT));
6231 ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
6232 }
6233 wr32(E1000_FTQF(3), ftqf);
6234 } else {
6235 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
6236 }
6237 wrfl();
6238
6239 adapter->hwtstamp_config = config;
6240
6241 /* clear TX/RX time stamp registers, just to be sure */
6242 regval = rd32(E1000_TXSTMPH);
6243 regval = rd32(E1000_RXSTMPH);
6244
6245 return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
6246 -EFAULT : 0;
6247 }
6248
6249 /**
6250 * igb_ioctl -
6251 * @netdev:
6252 * @ifreq:
6253 * @cmd:
6254 **/
6255 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
6256 {
6257 switch (cmd) {
6258 case SIOCGMIIPHY:
6259 case SIOCGMIIREG:
6260 case SIOCSMIIREG:
6261 return igb_mii_ioctl(netdev, ifr, cmd);
6262 case SIOCSHWTSTAMP:
6263 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
6264 default:
6265 return -EOPNOTSUPP;
6266 }
6267 }
6268
6269 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6270 {
6271 struct igb_adapter *adapter = hw->back;
6272 u16 cap_offset;
6273
6274 cap_offset = adapter->pdev->pcie_cap;
6275 if (!cap_offset)
6276 return -E1000_ERR_CONFIG;
6277
6278 pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6279
6280 return 0;
6281 }
6282
6283 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6284 {
6285 struct igb_adapter *adapter = hw->back;
6286 u16 cap_offset;
6287
6288 cap_offset = adapter->pdev->pcie_cap;
6289 if (!cap_offset)
6290 return -E1000_ERR_CONFIG;
6291
6292 pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6293
6294 return 0;
6295 }
6296
6297 static void igb_vlan_mode(struct net_device *netdev, u32 features)
6298 {
6299 struct igb_adapter *adapter = netdev_priv(netdev);
6300 struct e1000_hw *hw = &adapter->hw;
6301 u32 ctrl, rctl;
6302
6303 igb_irq_disable(adapter);
6304
6305 if (features & NETIF_F_HW_VLAN_RX) {
6306 /* enable VLAN tag insert/strip */
6307 ctrl = rd32(E1000_CTRL);
6308 ctrl |= E1000_CTRL_VME;
6309 wr32(E1000_CTRL, ctrl);
6310
6311 /* Disable CFI check */
6312 rctl = rd32(E1000_RCTL);
6313 rctl &= ~E1000_RCTL_CFIEN;
6314 wr32(E1000_RCTL, rctl);
6315 } else {
6316 /* disable VLAN tag insert/strip */
6317 ctrl = rd32(E1000_CTRL);
6318 ctrl &= ~E1000_CTRL_VME;
6319 wr32(E1000_CTRL, ctrl);
6320 }
6321
6322 igb_rlpml_set(adapter);
6323
6324 if (!test_bit(__IGB_DOWN, &adapter->state))
6325 igb_irq_enable(adapter);
6326 }
6327
6328 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6329 {
6330 struct igb_adapter *adapter = netdev_priv(netdev);
6331 struct e1000_hw *hw = &adapter->hw;
6332 int pf_id = adapter->vfs_allocated_count;
6333
6334 /* attempt to add filter to vlvf array */
6335 igb_vlvf_set(adapter, vid, true, pf_id);
6336
6337 /* add the filter since PF can receive vlans w/o entry in vlvf */
6338 igb_vfta_set(hw, vid, true);
6339
6340 set_bit(vid, adapter->active_vlans);
6341 }
6342
6343 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6344 {
6345 struct igb_adapter *adapter = netdev_priv(netdev);
6346 struct e1000_hw *hw = &adapter->hw;
6347 int pf_id = adapter->vfs_allocated_count;
6348 s32 err;
6349
6350 igb_irq_disable(adapter);
6351
6352 if (!test_bit(__IGB_DOWN, &adapter->state))
6353 igb_irq_enable(adapter);
6354
6355 /* remove vlan from VLVF table array */
6356 err = igb_vlvf_set(adapter, vid, false, pf_id);
6357
6358 /* if vid was not present in VLVF just remove it from table */
6359 if (err)
6360 igb_vfta_set(hw, vid, false);
6361
6362 clear_bit(vid, adapter->active_vlans);
6363 }
6364
6365 static void igb_restore_vlan(struct igb_adapter *adapter)
6366 {
6367 u16 vid;
6368
6369 for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
6370 igb_vlan_rx_add_vid(adapter->netdev, vid);
6371 }
6372
6373 int igb_set_spd_dplx(struct igb_adapter *adapter, u32 spd, u8 dplx)
6374 {
6375 struct pci_dev *pdev = adapter->pdev;
6376 struct e1000_mac_info *mac = &adapter->hw.mac;
6377
6378 mac->autoneg = 0;
6379
6380 /* Make sure dplx is at most 1 bit and lsb of speed is not set
6381 * for the switch() below to work */
6382 if ((spd & 1) || (dplx & ~1))
6383 goto err_inval;
6384
6385 /* Fiber NIC's only allow 1000 Gbps Full duplex */
6386 if ((adapter->hw.phy.media_type == e1000_media_type_internal_serdes) &&
6387 spd != SPEED_1000 &&
6388 dplx != DUPLEX_FULL)
6389 goto err_inval;
6390
6391 switch (spd + dplx) {
6392 case SPEED_10 + DUPLEX_HALF:
6393 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6394 break;
6395 case SPEED_10 + DUPLEX_FULL:
6396 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6397 break;
6398 case SPEED_100 + DUPLEX_HALF:
6399 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6400 break;
6401 case SPEED_100 + DUPLEX_FULL:
6402 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6403 break;
6404 case SPEED_1000 + DUPLEX_FULL:
6405 mac->autoneg = 1;
6406 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6407 break;
6408 case SPEED_1000 + DUPLEX_HALF: /* not supported */
6409 default:
6410 goto err_inval;
6411 }
6412 return 0;
6413
6414 err_inval:
6415 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6416 return -EINVAL;
6417 }
6418
6419 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6420 {
6421 struct net_device *netdev = pci_get_drvdata(pdev);
6422 struct igb_adapter *adapter = netdev_priv(netdev);
6423 struct e1000_hw *hw = &adapter->hw;
6424 u32 ctrl, rctl, status;
6425 u32 wufc = adapter->wol;
6426 #ifdef CONFIG_PM
6427 int retval = 0;
6428 #endif
6429
6430 netif_device_detach(netdev);
6431
6432 if (netif_running(netdev))
6433 igb_close(netdev);
6434
6435 igb_clear_interrupt_scheme(adapter);
6436
6437 #ifdef CONFIG_PM
6438 retval = pci_save_state(pdev);
6439 if (retval)
6440 return retval;
6441 #endif
6442
6443 status = rd32(E1000_STATUS);
6444 if (status & E1000_STATUS_LU)
6445 wufc &= ~E1000_WUFC_LNKC;
6446
6447 if (wufc) {
6448 igb_setup_rctl(adapter);
6449 igb_set_rx_mode(netdev);
6450
6451 /* turn on all-multi mode if wake on multicast is enabled */
6452 if (wufc & E1000_WUFC_MC) {
6453 rctl = rd32(E1000_RCTL);
6454 rctl |= E1000_RCTL_MPE;
6455 wr32(E1000_RCTL, rctl);
6456 }
6457
6458 ctrl = rd32(E1000_CTRL);
6459 /* advertise wake from D3Cold */
6460 #define E1000_CTRL_ADVD3WUC 0x00100000
6461 /* phy power management enable */
6462 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6463 ctrl |= E1000_CTRL_ADVD3WUC;
6464 wr32(E1000_CTRL, ctrl);
6465
6466 /* Allow time for pending master requests to run */
6467 igb_disable_pcie_master(hw);
6468
6469 wr32(E1000_WUC, E1000_WUC_PME_EN);
6470 wr32(E1000_WUFC, wufc);
6471 } else {
6472 wr32(E1000_WUC, 0);
6473 wr32(E1000_WUFC, 0);
6474 }
6475
6476 *enable_wake = wufc || adapter->en_mng_pt;
6477 if (!*enable_wake)
6478 igb_power_down_link(adapter);
6479 else
6480 igb_power_up_link(adapter);
6481
6482 /* Release control of h/w to f/w. If f/w is AMT enabled, this
6483 * would have already happened in close and is redundant. */
6484 igb_release_hw_control(adapter);
6485
6486 pci_disable_device(pdev);
6487
6488 return 0;
6489 }
6490
6491 #ifdef CONFIG_PM
6492 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6493 {
6494 int retval;
6495 bool wake;
6496
6497 retval = __igb_shutdown(pdev, &wake);
6498 if (retval)
6499 return retval;
6500
6501 if (wake) {
6502 pci_prepare_to_sleep(pdev);
6503 } else {
6504 pci_wake_from_d3(pdev, false);
6505 pci_set_power_state(pdev, PCI_D3hot);
6506 }
6507
6508 return 0;
6509 }
6510
6511 static int igb_resume(struct pci_dev *pdev)
6512 {
6513 struct net_device *netdev = pci_get_drvdata(pdev);
6514 struct igb_adapter *adapter = netdev_priv(netdev);
6515 struct e1000_hw *hw = &adapter->hw;
6516 u32 err;
6517
6518 pci_set_power_state(pdev, PCI_D0);
6519 pci_restore_state(pdev);
6520 pci_save_state(pdev);
6521
6522 err = pci_enable_device_mem(pdev);
6523 if (err) {
6524 dev_err(&pdev->dev,
6525 "igb: Cannot enable PCI device from suspend\n");
6526 return err;
6527 }
6528 pci_set_master(pdev);
6529
6530 pci_enable_wake(pdev, PCI_D3hot, 0);
6531 pci_enable_wake(pdev, PCI_D3cold, 0);
6532
6533 if (igb_init_interrupt_scheme(adapter)) {
6534 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6535 return -ENOMEM;
6536 }
6537
6538 igb_reset(adapter);
6539
6540 /* let the f/w know that the h/w is now under the control of the
6541 * driver. */
6542 igb_get_hw_control(adapter);
6543
6544 wr32(E1000_WUS, ~0);
6545
6546 if (netif_running(netdev)) {
6547 err = igb_open(netdev);
6548 if (err)
6549 return err;
6550 }
6551
6552 netif_device_attach(netdev);
6553
6554 return 0;
6555 }
6556 #endif
6557
6558 static void igb_shutdown(struct pci_dev *pdev)
6559 {
6560 bool wake;
6561
6562 __igb_shutdown(pdev, &wake);
6563
6564 if (system_state == SYSTEM_POWER_OFF) {
6565 pci_wake_from_d3(pdev, wake);
6566 pci_set_power_state(pdev, PCI_D3hot);
6567 }
6568 }
6569
6570 #ifdef CONFIG_NET_POLL_CONTROLLER
6571 /*
6572 * Polling 'interrupt' - used by things like netconsole to send skbs
6573 * without having to re-enable interrupts. It's not called while
6574 * the interrupt routine is executing.
6575 */
6576 static void igb_netpoll(struct net_device *netdev)
6577 {
6578 struct igb_adapter *adapter = netdev_priv(netdev);
6579 struct e1000_hw *hw = &adapter->hw;
6580 int i;
6581
6582 if (!adapter->msix_entries) {
6583 struct igb_q_vector *q_vector = adapter->q_vector[0];
6584 igb_irq_disable(adapter);
6585 napi_schedule(&q_vector->napi);
6586 return;
6587 }
6588
6589 for (i = 0; i < adapter->num_q_vectors; i++) {
6590 struct igb_q_vector *q_vector = adapter->q_vector[i];
6591 wr32(E1000_EIMC, q_vector->eims_value);
6592 napi_schedule(&q_vector->napi);
6593 }
6594 }
6595 #endif /* CONFIG_NET_POLL_CONTROLLER */
6596
6597 /**
6598 * igb_io_error_detected - called when PCI error is detected
6599 * @pdev: Pointer to PCI device
6600 * @state: The current pci connection state
6601 *
6602 * This function is called after a PCI bus error affecting
6603 * this device has been detected.
6604 */
6605 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6606 pci_channel_state_t state)
6607 {
6608 struct net_device *netdev = pci_get_drvdata(pdev);
6609 struct igb_adapter *adapter = netdev_priv(netdev);
6610
6611 netif_device_detach(netdev);
6612
6613 if (state == pci_channel_io_perm_failure)
6614 return PCI_ERS_RESULT_DISCONNECT;
6615
6616 if (netif_running(netdev))
6617 igb_down(adapter);
6618 pci_disable_device(pdev);
6619
6620 /* Request a slot slot reset. */
6621 return PCI_ERS_RESULT_NEED_RESET;
6622 }
6623
6624 /**
6625 * igb_io_slot_reset - called after the pci bus has been reset.
6626 * @pdev: Pointer to PCI device
6627 *
6628 * Restart the card from scratch, as if from a cold-boot. Implementation
6629 * resembles the first-half of the igb_resume routine.
6630 */
6631 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6632 {
6633 struct net_device *netdev = pci_get_drvdata(pdev);
6634 struct igb_adapter *adapter = netdev_priv(netdev);
6635 struct e1000_hw *hw = &adapter->hw;
6636 pci_ers_result_t result;
6637 int err;
6638
6639 if (pci_enable_device_mem(pdev)) {
6640 dev_err(&pdev->dev,
6641 "Cannot re-enable PCI device after reset.\n");
6642 result = PCI_ERS_RESULT_DISCONNECT;
6643 } else {
6644 pci_set_master(pdev);
6645 pci_restore_state(pdev);
6646 pci_save_state(pdev);
6647
6648 pci_enable_wake(pdev, PCI_D3hot, 0);
6649 pci_enable_wake(pdev, PCI_D3cold, 0);
6650
6651 igb_reset(adapter);
6652 wr32(E1000_WUS, ~0);
6653 result = PCI_ERS_RESULT_RECOVERED;
6654 }
6655
6656 err = pci_cleanup_aer_uncorrect_error_status(pdev);
6657 if (err) {
6658 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6659 "failed 0x%0x\n", err);
6660 /* non-fatal, continue */
6661 }
6662
6663 return result;
6664 }
6665
6666 /**
6667 * igb_io_resume - called when traffic can start flowing again.
6668 * @pdev: Pointer to PCI device
6669 *
6670 * This callback is called when the error recovery driver tells us that
6671 * its OK to resume normal operation. Implementation resembles the
6672 * second-half of the igb_resume routine.
6673 */
6674 static void igb_io_resume(struct pci_dev *pdev)
6675 {
6676 struct net_device *netdev = pci_get_drvdata(pdev);
6677 struct igb_adapter *adapter = netdev_priv(netdev);
6678
6679 if (netif_running(netdev)) {
6680 if (igb_up(adapter)) {
6681 dev_err(&pdev->dev, "igb_up failed after reset\n");
6682 return;
6683 }
6684 }
6685
6686 netif_device_attach(netdev);
6687
6688 /* let the f/w know that the h/w is now under the control of the
6689 * driver. */
6690 igb_get_hw_control(adapter);
6691 }
6692
6693 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6694 u8 qsel)
6695 {
6696 u32 rar_low, rar_high;
6697 struct e1000_hw *hw = &adapter->hw;
6698
6699 /* HW expects these in little endian so we reverse the byte order
6700 * from network order (big endian) to little endian
6701 */
6702 rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6703 ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6704 rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6705
6706 /* Indicate to hardware the Address is Valid. */
6707 rar_high |= E1000_RAH_AV;
6708
6709 if (hw->mac.type == e1000_82575)
6710 rar_high |= E1000_RAH_POOL_1 * qsel;
6711 else
6712 rar_high |= E1000_RAH_POOL_1 << qsel;
6713
6714 wr32(E1000_RAL(index), rar_low);
6715 wrfl();
6716 wr32(E1000_RAH(index), rar_high);
6717 wrfl();
6718 }
6719
6720 static int igb_set_vf_mac(struct igb_adapter *adapter,
6721 int vf, unsigned char *mac_addr)
6722 {
6723 struct e1000_hw *hw = &adapter->hw;
6724 /* VF MAC addresses start at end of receive addresses and moves
6725 * torwards the first, as a result a collision should not be possible */
6726 int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6727
6728 memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6729
6730 igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6731
6732 return 0;
6733 }
6734
6735 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6736 {
6737 struct igb_adapter *adapter = netdev_priv(netdev);
6738 if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6739 return -EINVAL;
6740 adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6741 dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6742 dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6743 " change effective.");
6744 if (test_bit(__IGB_DOWN, &adapter->state)) {
6745 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6746 " but the PF device is not up.\n");
6747 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6748 " attempting to use the VF device.\n");
6749 }
6750 return igb_set_vf_mac(adapter, vf, mac);
6751 }
6752
6753 static int igb_link_mbps(int internal_link_speed)
6754 {
6755 switch (internal_link_speed) {
6756 case SPEED_100:
6757 return 100;
6758 case SPEED_1000:
6759 return 1000;
6760 default:
6761 return 0;
6762 }
6763 }
6764
6765 static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate,
6766 int link_speed)
6767 {
6768 int rf_dec, rf_int;
6769 u32 bcnrc_val;
6770
6771 if (tx_rate != 0) {
6772 /* Calculate the rate factor values to set */
6773 rf_int = link_speed / tx_rate;
6774 rf_dec = (link_speed - (rf_int * tx_rate));
6775 rf_dec = (rf_dec * (1<<E1000_RTTBCNRC_RF_INT_SHIFT)) / tx_rate;
6776
6777 bcnrc_val = E1000_RTTBCNRC_RS_ENA;
6778 bcnrc_val |= ((rf_int<<E1000_RTTBCNRC_RF_INT_SHIFT) &
6779 E1000_RTTBCNRC_RF_INT_MASK);
6780 bcnrc_val |= (rf_dec & E1000_RTTBCNRC_RF_DEC_MASK);
6781 } else {
6782 bcnrc_val = 0;
6783 }
6784
6785 wr32(E1000_RTTDQSEL, vf); /* vf X uses queue X */
6786 wr32(E1000_RTTBCNRC, bcnrc_val);
6787 }
6788
6789 static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
6790 {
6791 int actual_link_speed, i;
6792 bool reset_rate = false;
6793
6794 /* VF TX rate limit was not set or not supported */
6795 if ((adapter->vf_rate_link_speed == 0) ||
6796 (adapter->hw.mac.type != e1000_82576))
6797 return;
6798
6799 actual_link_speed = igb_link_mbps(adapter->link_speed);
6800 if (actual_link_speed != adapter->vf_rate_link_speed) {
6801 reset_rate = true;
6802 adapter->vf_rate_link_speed = 0;
6803 dev_info(&adapter->pdev->dev,
6804 "Link speed has been changed. VF Transmit "
6805 "rate is disabled\n");
6806 }
6807
6808 for (i = 0; i < adapter->vfs_allocated_count; i++) {
6809 if (reset_rate)
6810 adapter->vf_data[i].tx_rate = 0;
6811
6812 igb_set_vf_rate_limit(&adapter->hw, i,
6813 adapter->vf_data[i].tx_rate,
6814 actual_link_speed);
6815 }
6816 }
6817
6818 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6819 {
6820 struct igb_adapter *adapter = netdev_priv(netdev);
6821 struct e1000_hw *hw = &adapter->hw;
6822 int actual_link_speed;
6823
6824 if (hw->mac.type != e1000_82576)
6825 return -EOPNOTSUPP;
6826
6827 actual_link_speed = igb_link_mbps(adapter->link_speed);
6828 if ((vf >= adapter->vfs_allocated_count) ||
6829 (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
6830 (tx_rate < 0) || (tx_rate > actual_link_speed))
6831 return -EINVAL;
6832
6833 adapter->vf_rate_link_speed = actual_link_speed;
6834 adapter->vf_data[vf].tx_rate = (u16)tx_rate;
6835 igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
6836
6837 return 0;
6838 }
6839
6840 static int igb_ndo_get_vf_config(struct net_device *netdev,
6841 int vf, struct ifla_vf_info *ivi)
6842 {
6843 struct igb_adapter *adapter = netdev_priv(netdev);
6844 if (vf >= adapter->vfs_allocated_count)
6845 return -EINVAL;
6846 ivi->vf = vf;
6847 memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6848 ivi->tx_rate = adapter->vf_data[vf].tx_rate;
6849 ivi->vlan = adapter->vf_data[vf].pf_vlan;
6850 ivi->qos = adapter->vf_data[vf].pf_qos;
6851 return 0;
6852 }
6853
6854 static void igb_vmm_control(struct igb_adapter *adapter)
6855 {
6856 struct e1000_hw *hw = &adapter->hw;
6857 u32 reg;
6858
6859 switch (hw->mac.type) {
6860 case e1000_82575:
6861 default:
6862 /* replication is not supported for 82575 */
6863 return;
6864 case e1000_82576:
6865 /* notify HW that the MAC is adding vlan tags */
6866 reg = rd32(E1000_DTXCTL);
6867 reg |= E1000_DTXCTL_VLAN_ADDED;
6868 wr32(E1000_DTXCTL, reg);
6869 case e1000_82580:
6870 /* enable replication vlan tag stripping */
6871 reg = rd32(E1000_RPLOLR);
6872 reg |= E1000_RPLOLR_STRVLAN;
6873 wr32(E1000_RPLOLR, reg);
6874 case e1000_i350:
6875 /* none of the above registers are supported by i350 */
6876 break;
6877 }
6878
6879 if (adapter->vfs_allocated_count) {
6880 igb_vmdq_set_loopback_pf(hw, true);
6881 igb_vmdq_set_replication_pf(hw, true);
6882 igb_vmdq_set_anti_spoofing_pf(hw, true,
6883 adapter->vfs_allocated_count);
6884 } else {
6885 igb_vmdq_set_loopback_pf(hw, false);
6886 igb_vmdq_set_replication_pf(hw, false);
6887 }
6888 }
6889
6890 /* igb_main.c */
This page took 0.170506 seconds and 6 git commands to generate.