net/core/dev.c

   1 /*
   2  *      NET3    Protocol independent device support routines.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  *      Derived from the non IP parts of dev.c 1.0.19
  10  *              Authors:        Ross Biro
  11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *
  14  *      Additional Authors:
  15  *              Florian la Roche <rzsfl@rz.uni-sb.de>
  16  *              Alan Cox <gw4pts@gw4pts.ampr.org>
  17  *              David Hinds <dahinds@users.sourceforge.net>
  18  *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19  *              Adam Sulmicki <adam@cfar.umd.edu>
  20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21  *
  22  *      Changes:
  23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24  *                                      to 2 if register_netdev gets called
  25  *                                      before net_dev_init & also removed a
  26  *                                      few lines of code in the process.
  27  *              Alan Cox        :       device private ioctl copies fields back.
  28  *              Alan Cox        :       Transmit queue code does relevant
  29  *                                      stunts to keep the queue safe.
  30  *              Alan Cox        :       Fixed double lock.
  31  *              Alan Cox        :       Fixed promisc NULL pointer trap
  32  *              ????????        :       Support the full private ioctl range
  33  *              Alan Cox        :       Moved ioctl permission check into
  34  *                                      drivers
  35  *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
  36  *              Alan Cox        :       100 backlog just doesn't cut it when
  37  *                                      you start doing multicast video 8)
  38  *              Alan Cox        :       Rewrote net_bh and list manager.
  39  *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
  40  *              Alan Cox        :       Took out transmit every packet pass
  41  *                                      Saved a few bytes in the ioctl handler
  42  *              Alan Cox        :       Network driver sets packet type before
  43  *                                      calling netif_rx. Saves a function
  44  *                                      call a packet.
  45  *              Alan Cox        :       Hashed net_bh()
  46  *              Richard Kooijman:       Timestamp fixes.
  47  *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
  48  *              Alan Cox        :       Device lock protection.
  49  *              Alan Cox        :       Fixed nasty side effect of device close
  50  *                                      changes.
  51  *              Rudi Cilibrasi  :       Pass the right thing to
  52  *                                      set_mac_address()
  53  *              Dave Miller     :       32bit quantity for the device lock to
  54  *                                      make it work out on a Sparc.
  55  *              Bjorn Ekwall    :       Added KERNELD hack.
  56  *              Alan Cox        :       Cleaned up the backlog initialise.
  57  *              Craig Metz      :       SIOCGIFCONF fix if space for under
  58  *                                      1 device.
  59  *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
  60  *                                      is no device open function.
  61  *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
  62  *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
  63  *              Cyrus Durgin    :       Cleaned for KMOD
  64  *              Adam Sulmicki   :       Bug Fix : Network Device Unload
  65  *                                      A network device unload needs to purge
  66  *                                      the backlog queue.
  67  *      Paul Rusty Russell      :       SIOCSIFNAME
  68  *              Pekka Riikonen  :       Netdev boot-time settings code
  69  *              Andrew Morton   :       Make unregister_netdevice wait
  70  *                                      indefinitely on dev->refcnt
  71  *              J Hadi Salim    :       - Backlog queue sampling
  72  *                                      - netif_rx() feedback
  73  */
  74
  75 #include <asm/uaccess.h>
  76 #include <asm/system.h>
  77 #include <linux/bitops.h>
  78 #include <linux/capability.h>
  79 #include <linux/cpu.h>
  80 #include <linux/types.h>
  81 #include <linux/kernel.h>
  82 #include <linux/sched.h>
  83 #include <linux/mutex.h>
  84 #include <linux/string.h>
  85 #include <linux/mm.h>
  86 #include <linux/socket.h>
  87 #include <linux/sockios.h>
  88 #include <linux/errno.h>
  89 #include <linux/interrupt.h>
  90 #include <linux/if_ether.h>
  91 #include <linux/netdevice.h>
  92 #include <linux/etherdevice.h>
  93 #include <linux/notifier.h>
  94 #include <linux/skbuff.h>
  95 #include <net/sock.h>
  96 #include <linux/rtnetlink.h>
  97 #include <linux/proc_fs.h>
  98 #include <linux/seq_file.h>
  99 #include <linux/stat.h>
 100 #include <linux/if_bridge.h>
 101 #include <linux/if_macvlan.h>
 102 #include <net/dst.h>
 103 #include <net/pkt_sched.h>
 104 #include <net/checksum.h>
 105 #include <linux/highmem.h>
 106 #include <linux/init.h>
 107 #include <linux/kmod.h>
 108 #include <linux/module.h>
 109 #include <linux/kallsyms.h>
 110 #include <linux/netpoll.h>
 111 #include <linux/rcupdate.h>
 112 #include <linux/delay.h>
 113 #include <net/wext.h>
 114 #include <net/iw_handler.h>
 115 #include <asm/current.h>
 116 #include <linux/audit.h>
 117 #include <linux/dmaengine.h>
 118 #include <linux/err.h>
 119 #include <linux/ctype.h>
 120 #include <linux/if_arp.h>
 121
 122 /*
 123  *      The list of packet types we will receive (as opposed to discard)
 124  *      and the routines to invoke.
 125  *
 126  *      Why 16. Because with 16 the only overlap we get on a hash of the
 127  *      low nibble of the protocol value is RARP/SNAP/X.25.
 128  *
 129  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 130  *             sure which should go first, but I bet it won't make much
 131  *             difference if we are running VLANs.  The good news is that
 132  *             this protocol won't be in the list unless compiled in, so
 133  *             the average user (w/out VLANs) will not be adversely affected.
 134  *             --BLG
 135  *
 136  *              0800    IP
 137  *              8100    802.1Q VLAN
 138  *              0001    802.3
 139  *              0002    AX.25
 140  *              0004    802.2
 141  *              8035    RARP
 142  *              0005    SNAP
 143  *              0805    X.25
 144  *              0806    ARP
 145  *              8137    IPX
 146  *              0009    Localtalk
 147  *              86DD    IPv6
 148  */
 149
 150 static DEFINE_SPINLOCK(ptype_lock);
 151 static struct list_head ptype_base[16] __read_mostly;   /* 16 way hashed list */
 152 static struct list_head ptype_all __read_mostly;        /* Taps */
 153
 154 #ifdef CONFIG_NET_DMA
 155 struct net_dma {
 156         struct dma_client client;
 157         spinlock_t lock;
 158         cpumask_t channel_mask;
 159         struct dma_chan *channels[NR_CPUS];
 160 };
 161
 162 static enum dma_state_client
 163 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
 164         enum dma_state state);
 165
 166 static struct net_dma net_dma = {
 167         .client = {
 168                 .event_callback = netdev_dma_event,
 169         },
 170 };
 171 #endif
 172
 173 /*
 174  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
 175  * semaphore.
 176  *
 177  * Pure readers hold dev_base_lock for reading.
 178  *
 179  * Writers must hold the rtnl semaphore while they loop through the
 180  * dev_base_head list, and hold dev_base_lock for writing when they do the
 181  * actual updates.  This allows pure readers to access the list even
 182  * while a writer is preparing to update it.
 183  *
 184  * To put it another way, dev_base_lock is held for writing only to
 185  * protect against pure readers; the rtnl semaphore provides the
 186  * protection against other writers.
 187  *
 188  * See, for example usages, register_netdevice() and
 189  * unregister_netdevice(), which must be called with the rtnl
 190  * semaphore held.
 191  */
 192 LIST_HEAD(dev_base_head);
 193 DEFINE_RWLOCK(dev_base_lock);
 194
 195 EXPORT_SYMBOL(dev_base_head);
 196 EXPORT_SYMBOL(dev_base_lock);
 197
 198 #define NETDEV_HASHBITS 8
 199 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
 200 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
 201
 202 static inline struct hlist_head *dev_name_hash(const char *name)
 203 {
 204         unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 205         return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
 206 }
 207
 208 static inline struct hlist_head *dev_index_hash(int ifindex)
 209 {
 210         return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
 211 }
 212
 213 /*
 214  *      Our notifier list
 215  */
 216
 217 static RAW_NOTIFIER_HEAD(netdev_chain);
 218
 219 /*
 220  *      Device drivers call our routines to queue packets here. We empty the
 221  *      queue in the local softnet handler.
 222  */
 223
 224 DEFINE_PER_CPU(struct softnet_data, softnet_data);
 225
 226 #ifdef CONFIG_SYSFS
 227 extern int netdev_sysfs_init(void);
 228 extern int netdev_register_sysfs(struct net_device *);
 229 extern void netdev_unregister_sysfs(struct net_device *);
 230 #else
 231 #define netdev_sysfs_init()             (0)
 232 #define netdev_register_sysfs(dev)      (0)
 233 #define netdev_unregister_sysfs(dev)    do { } while(0)
 234 #endif
 235
 236 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 237 /*
 238  * register_netdevice() inits dev->_xmit_lock and sets lockdep class
 239  * according to dev->type
 240  */
 241 static const unsigned short netdev_lock_type[] =
 242         {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
 243          ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
 244          ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
 245          ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
 246          ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
 247          ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
 248          ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
 249          ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
 250          ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
 251          ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
 252          ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
 253          ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 254          ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
 255          ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
 256          ARPHRD_NONE};
 257
 258 static const char *netdev_lock_name[] =
 259         {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 260          "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 261          "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
 262          "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
 263          "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
 264          "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
 265          "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
 266          "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
 267          "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
 268          "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
 269          "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
 270          "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 271          "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
 272          "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
 273          "_xmit_NONE"};
 274
 275 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 276
 277 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 278 {
 279         int i;
 280
 281         for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
 282                 if (netdev_lock_type[i] == dev_type)
 283                         return i;
 284         /* the last key is used by default */
 285         return ARRAY_SIZE(netdev_lock_type) - 1;
 286 }
 287
 288 static inline void netdev_set_lockdep_class(spinlock_t *lock,
 289                                             unsigned short dev_type)
 290 {
 291         int i;
 292
 293         i = netdev_lock_pos(dev_type);
 294         lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
 295                                    netdev_lock_name[i]);
 296 }
 297 #else
 298 static inline void netdev_set_lockdep_class(spinlock_t *lock,
 299                                             unsigned short dev_type)
 300 {
 301 }
 302 #endif
 303
 304 /*******************************************************************************
 305
 306                 Protocol management and registration routines
 307
 308 *******************************************************************************/
 309
 310 /*
 311  *      Add a protocol ID to the list. Now that the input handler is
 312  *      smarter we can dispense with all the messy stuff that used to be
 313  *      here.
 314  *
 315  *      BEWARE!!! Protocol handlers, mangling input packets,
 316  *      MUST BE last in hash buckets and checking protocol handlers
 317  *      MUST start from promiscuous ptype_all chain in net_bh.
 318  *      It is true now, do not change it.
 319  *      Explanation follows: if protocol handler, mangling packet, will
 320  *      be the first on list, it is not able to sense, that packet
 321  *      is cloned and should be copied-on-write, so that it will
 322  *      change it and subsequent readers will get broken packet.
 323  *                                                      --ANK (980803)
 324  */
 325
 326 /**
 327  *      dev_add_pack - add packet handler
 328  *      @pt: packet type declaration
 329  *
 330  *      Add a protocol handler to the networking stack. The passed &packet_type
 331  *      is linked into kernel lists and may not be freed until it has been
 332  *      removed from the kernel lists.
 333  *
 334  *      This call does not sleep therefore it can not
 335  *      guarantee all CPU's that are in middle of receiving packets
 336  *      will see the new packet type (until the next received packet).
 337  */
 338
 339 void dev_add_pack(struct packet_type *pt)
 340 {
 341         int hash;
 342
 343         spin_lock_bh(&ptype_lock);
 344         if (pt->type == htons(ETH_P_ALL))
 345                 list_add_rcu(&pt->list, &ptype_all);
 346         else {
 347                 hash = ntohs(pt->type) & 15;
 348                 list_add_rcu(&pt->list, &ptype_base[hash]);
 349         }
 350         spin_unlock_bh(&ptype_lock);
 351 }
 352
 353 /**
 354  *      __dev_remove_pack        - remove packet handler
 355  *      @pt: packet type declaration
 356  *
 357  *      Remove a protocol handler that was previously added to the kernel
 358  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 359  *      from the kernel lists and can be freed or reused once this function
 360  *      returns.
 361  *
 362  *      The packet type might still be in use by receivers
 363  *      and must not be freed until after all the CPU's have gone
 364  *      through a quiescent state.
 365  */
 366 void __dev_remove_pack(struct packet_type *pt)
 367 {
 368         struct list_head *head;
 369         struct packet_type *pt1;
 370
 371         spin_lock_bh(&ptype_lock);
 372
 373         if (pt->type == htons(ETH_P_ALL))
 374                 head = &ptype_all;
 375         else
 376                 head = &ptype_base[ntohs(pt->type) & 15];
 377
 378         list_for_each_entry(pt1, head, list) {
 379                 if (pt == pt1) {
 380                         list_del_rcu(&pt->list);
 381                         goto out;
 382                 }
 383         }
 384
 385         printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 386 out:
 387         spin_unlock_bh(&ptype_lock);
 388 }
 389 /**
 390  *      dev_remove_pack  - remove packet handler
 391  *      @pt: packet type declaration
 392  *
 393  *      Remove a protocol handler that was previously added to the kernel
 394  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 395  *      from the kernel lists and can be freed or reused once this function
 396  *      returns.
 397  *
 398  *      This call sleeps to guarantee that no CPU is looking at the packet
 399  *      type after return.
 400  */
 401 void dev_remove_pack(struct packet_type *pt)
 402 {
 403         __dev_remove_pack(pt);
 404
 405         synchronize_net();
 406 }
 407
 408 /******************************************************************************
 409
 410                       Device Boot-time Settings Routines
 411
 412 *******************************************************************************/
 413
 414 /* Boot time configuration table */
 415 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 416
 417 /**
 418  *      netdev_boot_setup_add   - add new setup entry
 419  *      @name: name of the device
 420  *      @map: configured settings for the device
 421  *
 422  *      Adds new setup entry to the dev_boot_setup list.  The function
 423  *      returns 0 on error and 1 on success.  This is a generic routine to
 424  *      all netdevices.
 425  */
 426 static int netdev_boot_setup_add(char *name, struct ifmap *map)
 427 {
 428         struct netdev_boot_setup *s;
 429         int i;
 430
 431         s = dev_boot_setup;
 432         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 433                 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 434                         memset(s[i].name, 0, sizeof(s[i].name));
 435                         strcpy(s[i].name, name);
 436                         memcpy(&s[i].map, map, sizeof(s[i].map));
 437                         break;
 438                 }
 439         }
 440
 441         return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 442 }
 443
 444 /**
 445  *      netdev_boot_setup_check - check boot time settings
 446  *      @dev: the netdevice
 447  *
 448  *      Check boot time settings for the device.
 449  *      The found settings are set for the device to be used
 450  *      later in the device probing.
 451  *      Returns 0 if no settings found, 1 if they are.
 452  */
 453 int netdev_boot_setup_check(struct net_device *dev)
 454 {
 455         struct netdev_boot_setup *s = dev_boot_setup;
 456         int i;
 457
 458         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 459                 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 460                     !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
 461                         dev->irq        = s[i].map.irq;
 462                         dev->base_addr  = s[i].map.base_addr;
 463                         dev->mem_start  = s[i].map.mem_start;
 464                         dev->mem_end    = s[i].map.mem_end;
 465                         return 1;
 466                 }
 467         }
 468         return 0;
 469 }
 470
 471
 472 /**
 473  *      netdev_boot_base        - get address from boot time settings
 474  *      @prefix: prefix for network device
 475  *      @unit: id for network device
 476  *
 477  *      Check boot time settings for the base address of device.
 478  *      The found settings are set for the device to be used
 479  *      later in the device probing.
 480  *      Returns 0 if no settings found.
 481  */
 482 unsigned long netdev_boot_base(const char *prefix, int unit)
 483 {
 484         const struct netdev_boot_setup *s = dev_boot_setup;
 485         char name[IFNAMSIZ];
 486         int i;
 487
 488         sprintf(name, "%s%d", prefix, unit);
 489
 490         /*
 491          * If device already registered then return base of 1
 492          * to indicate not to probe for this interface
 493          */
 494         if (__dev_get_by_name(name))
 495                 return 1;
 496
 497         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 498                 if (!strcmp(name, s[i].name))
 499                         return s[i].map.base_addr;
 500         return 0;
 501 }
 502
 503 /*
 504  * Saves at boot time configured settings for any netdevice.
 505  */
 506 int __init netdev_boot_setup(char *str)
 507 {
 508         int ints[5];
 509         struct ifmap map;
 510
 511         str = get_options(str, ARRAY_SIZE(ints), ints);
 512         if (!str || !*str)
 513                 return 0;
 514
 515         /* Save settings */
 516         memset(&map, 0, sizeof(map));
 517         if (ints[0] > 0)
 518                 map.irq = ints[1];
 519         if (ints[0] > 1)
 520                 map.base_addr = ints[2];
 521         if (ints[0] > 2)
 522                 map.mem_start = ints[3];
 523         if (ints[0] > 3)
 524                 map.mem_end = ints[4];
 525
 526         /* Add new entry to the list */
 527         return netdev_boot_setup_add(str, &map);
 528 }
 529
 530 __setup("netdev=", netdev_boot_setup);
 531
 532 /*******************************************************************************
 533
 534                             Device Interface Subroutines
 535
 536 *******************************************************************************/
 537
 538 /**
 539  *      __dev_get_by_name       - find a device by its name
 540  *      @name: name to find
 541  *
 542  *      Find an interface by name. Must be called under RTNL semaphore
 543  *      or @dev_base_lock. If the name is found a pointer to the device
 544  *      is returned. If the name is not found then %NULL is returned. The
 545  *      reference counters are not incremented so the caller must be
 546  *      careful with locks.
 547  */
 548
 549 struct net_device *__dev_get_by_name(const char *name)
 550 {
 551         struct hlist_node *p;
 552
 553         hlist_for_each(p, dev_name_hash(name)) {
 554                 struct net_device *dev
 555                         = hlist_entry(p, struct net_device, name_hlist);
 556                 if (!strncmp(dev->name, name, IFNAMSIZ))
 557                         return dev;
 558         }
 559         return NULL;
 560 }
 561
 562 /**
 563  *      dev_get_by_name         - find a device by its name
 564  *      @name: name to find
 565  *
 566  *      Find an interface by name. This can be called from any
 567  *      context and does its own locking. The returned handle has
 568  *      the usage count incremented and the caller must use dev_put() to
 569  *      release it when it is no longer needed. %NULL is returned if no
 570  *      matching device is found.
 571  */
 572
 573 struct net_device *dev_get_by_name(const char *name)
 574 {
 575         struct net_device *dev;
 576
 577         read_lock(&dev_base_lock);
 578         dev = __dev_get_by_name(name);
 579         if (dev)
 580                 dev_hold(dev);
 581         read_unlock(&dev_base_lock);
 582         return dev;
 583 }
 584
 585 /**
 586  *      __dev_get_by_index - find a device by its ifindex
 587  *      @ifindex: index of device
 588  *
 589  *      Search for an interface by index. Returns %NULL if the device
 590  *      is not found or a pointer to the device. The device has not
 591  *      had its reference counter increased so the caller must be careful
 592  *      about locking. The caller must hold either the RTNL semaphore
 593  *      or @dev_base_lock.
 594  */
 595
 596 struct net_device *__dev_get_by_index(int ifindex)
 597 {
 598         struct hlist_node *p;
 599
 600         hlist_for_each(p, dev_index_hash(ifindex)) {
 601                 struct net_device *dev
 602                         = hlist_entry(p, struct net_device, index_hlist);
 603                 if (dev->ifindex == ifindex)
 604                         return dev;
 605         }
 606         return NULL;
 607 }
 608
 609
 610 /**
 611  *      dev_get_by_index - find a device by its ifindex
 612  *      @ifindex: index of device
 613  *
 614  *      Search for an interface by index. Returns NULL if the device
 615  *      is not found or a pointer to the device. The device returned has
 616  *      had a reference added and the pointer is safe until the user calls
 617  *      dev_put to indicate they have finished with it.
 618  */
 619
 620 struct net_device *dev_get_by_index(int ifindex)
 621 {
 622         struct net_device *dev;
 623
 624         read_lock(&dev_base_lock);
 625         dev = __dev_get_by_index(ifindex);
 626         if (dev)
 627                 dev_hold(dev);
 628         read_unlock(&dev_base_lock);
 629         return dev;
 630 }
 631
 632 /**
 633  *      dev_getbyhwaddr - find a device by its hardware address
 634  *      @type: media type of device
 635  *      @ha: hardware address
 636  *
 637  *      Search for an interface by MAC address. Returns NULL if the device
 638  *      is not found or a pointer to the device. The caller must hold the
 639  *      rtnl semaphore. The returned device has not had its ref count increased
 640  *      and the caller must therefore be careful about locking
 641  *
 642  *      BUGS:
 643  *      If the API was consistent this would be __dev_get_by_hwaddr
 644  */
 645
 646 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
 647 {
 648         struct net_device *dev;
 649
 650         ASSERT_RTNL();
 651
 652         for_each_netdev(dev)
 653                 if (dev->type == type &&
 654                     !memcmp(dev->dev_addr, ha, dev->addr_len))
 655                         return dev;
 656
 657         return NULL;
 658 }
 659
 660 EXPORT_SYMBOL(dev_getbyhwaddr);
 661
 662 struct net_device *__dev_getfirstbyhwtype(unsigned short type)
 663 {
 664         struct net_device *dev;
 665
 666         ASSERT_RTNL();
 667         for_each_netdev(dev)
 668                 if (dev->type == type)
 669                         return dev;
 670
 671         return NULL;
 672 }
 673
 674 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 675
 676 struct net_device *dev_getfirstbyhwtype(unsigned short type)
 677 {
 678         struct net_device *dev;
 679
 680         rtnl_lock();
 681         dev = __dev_getfirstbyhwtype(type);
 682         if (dev)
 683                 dev_hold(dev);
 684         rtnl_unlock();
 685         return dev;
 686 }
 687
 688 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 689
 690 /**
 691  *      dev_get_by_flags - find any device with given flags
 692  *      @if_flags: IFF_* values
 693  *      @mask: bitmask of bits in if_flags to check
 694  *
 695  *      Search for any interface with the given flags. Returns NULL if a device
 696  *      is not found or a pointer to the device. The device returned has
 697  *      had a reference added and the pointer is safe until the user calls
 698  *      dev_put to indicate they have finished with it.
 699  */
 700
 701 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
 702 {
 703         struct net_device *dev, *ret;
 704
 705         ret = NULL;
 706         read_lock(&dev_base_lock);
 707         for_each_netdev(dev) {
 708                 if (((dev->flags ^ if_flags) & mask) == 0) {
 709                         dev_hold(dev);
 710                         ret = dev;
 711                         break;
 712                 }
 713         }
 714         read_unlock(&dev_base_lock);
 715         return ret;
 716 }
 717
 718 /**
 719  *      dev_valid_name - check if name is okay for network device
 720  *      @name: name string
 721  *
 722  *      Network device names need to be valid file names to
 723  *      to allow sysfs to work.  We also disallow any kind of
 724  *      whitespace.
 725  */
 726 int dev_valid_name(const char *name)
 727 {
 728         if (*name == '\0')
 729                 return 0;
 730         if (strlen(name) >= IFNAMSIZ)
 731                 return 0;
 732         if (!strcmp(name, ".") || !strcmp(name, ".."))
 733                 return 0;
 734
 735         while (*name) {
 736                 if (*name == '/' || isspace(*name))
 737                         return 0;
 738                 name++;
 739         }
 740         return 1;
 741 }
 742
 743 /**
 744  *      dev_alloc_name - allocate a name for a device
 745  *      @dev: device
 746  *      @name: name format string
 747  *
 748  *      Passed a format string - eg "lt%d" it will try and find a suitable
 749  *      id. It scans list of devices to build up a free map, then chooses
 750  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 751  *      while allocating the name and adding the device in order to avoid
 752  *      duplicates.
 753  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 754  *      Returns the number of the unit assigned or a negative errno code.
 755  */
 756
 757 int dev_alloc_name(struct net_device *dev, const char *name)
 758 {
 759         int i = 0;
 760         char buf[IFNAMSIZ];
 761         const char *p;
 762         const int max_netdevices = 8*PAGE_SIZE;
 763         long *inuse;
 764         struct net_device *d;
 765
 766         p = strnchr(name, IFNAMSIZ-1, '%');
 767         if (p) {
 768                 /*
 769                  * Verify the string as this thing may have come from
 770                  * the user.  There must be either one "%d" and no other "%"
 771                  * characters.
 772                  */
 773                 if (p[1] != 'd' || strchr(p + 2, '%'))
 774                         return -EINVAL;
 775
 776                 /* Use one page as a bit array of possible slots */
 777                 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
 778                 if (!inuse)
 779                         return -ENOMEM;
 780
 781                 for_each_netdev(d) {
 782                         if (!sscanf(d->name, name, &i))
 783                                 continue;
 784                         if (i < 0 || i >= max_netdevices)
 785                                 continue;
 786
 787                         /*  avoid cases where sscanf is not exact inverse of printf */
 788                         snprintf(buf, sizeof(buf), name, i);
 789                         if (!strncmp(buf, d->name, IFNAMSIZ))
 790                                 set_bit(i, inuse);
 791                 }
 792
 793                 i = find_first_zero_bit(inuse, max_netdevices);
 794                 free_page((unsigned long) inuse);
 795         }
 796
 797         snprintf(buf, sizeof(buf), name, i);
 798         if (!__dev_get_by_name(buf)) {
 799                 strlcpy(dev->name, buf, IFNAMSIZ);
 800                 return i;
 801         }
 802
 803         /* It is possible to run out of possible slots
 804          * when the name is long and there isn't enough space left
 805          * for the digits, or if all bits are used.
 806          */
 807         return -ENFILE;
 808 }
 809
 810
 811 /**
 812  *      dev_change_name - change name of a device
 813  *      @dev: device
 814  *      @newname: name (or format string) must be at least IFNAMSIZ
 815  *
 816  *      Change name of a device, can pass format strings "eth%d".
 817  *      for wildcarding.
 818  */
 819 int dev_change_name(struct net_device *dev, char *newname)
 820 {
 821         char oldname[IFNAMSIZ];
 822         int err = 0;
 823         int ret;
 824
 825         ASSERT_RTNL();
 826
 827         if (dev->flags & IFF_UP)
 828                 return -EBUSY;
 829
 830         if (!dev_valid_name(newname))
 831                 return -EINVAL;
 832
 833         memcpy(oldname, dev->name, IFNAMSIZ);
 834
 835         if (strchr(newname, '%')) {
 836                 err = dev_alloc_name(dev, newname);
 837                 if (err < 0)
 838                         return err;
 839                 strcpy(newname, dev->name);
 840         }
 841         else if (__dev_get_by_name(newname))
 842                 return -EEXIST;
 843         else
 844                 strlcpy(dev->name, newname, IFNAMSIZ);
 845
 846 rollback:
 847         device_rename(&dev->dev, dev->name);
 848
 849         write_lock_bh(&dev_base_lock);
 850         hlist_del(&dev->name_hlist);
 851         hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
 852         write_unlock_bh(&dev_base_lock);
 853
 854         ret = raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
 855         ret = notifier_to_errno(ret);
 856
 857         if (ret) {
 858                 if (err) {
 859                         printk(KERN_ERR
 860                                "%s: name change rollback failed: %d.\n",
 861                                dev->name, ret);
 862                 } else {
 863                         err = ret;
 864                         memcpy(dev->name, oldname, IFNAMSIZ);
 865                         goto rollback;
 866                 }
 867         }
 868
 869         return err;
 870 }
 871
 872 /**
 873  *      netdev_features_change - device changes features
 874  *      @dev: device to cause notification
 875  *
 876  *      Called to indicate a device has changed features.
 877  */
 878 void netdev_features_change(struct net_device *dev)
 879 {
 880         raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
 881 }
 882 EXPORT_SYMBOL(netdev_features_change);
 883
 884 /**
 885  *      netdev_state_change - device changes state
 886  *      @dev: device to cause notification
 887  *
 888  *      Called to indicate a device has changed state. This function calls
 889  *      the notifier chains for netdev_chain and sends a NEWLINK message
 890  *      to the routing socket.
 891  */
 892 void netdev_state_change(struct net_device *dev)
 893 {
 894         if (dev->flags & IFF_UP) {
 895                 raw_notifier_call_chain(&netdev_chain,
 896                                 NETDEV_CHANGE, dev);
 897                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 898         }
 899 }
 900
 901 /**
 902  *      dev_load        - load a network module
 903  *      @name: name of interface
 904  *
 905  *      If a network interface is not present and the process has suitable
 906  *      privileges this function loads the module. If module loading is not
 907  *      available in this kernel then it becomes a nop.
 908  */
 909
 910 void dev_load(const char *name)
 911 {
 912         struct net_device *dev;
 913
 914         read_lock(&dev_base_lock);
 915         dev = __dev_get_by_name(name);
 916         read_unlock(&dev_base_lock);
 917
 918         if (!dev && capable(CAP_SYS_MODULE))
 919                 request_module("%s", name);
 920 }
 921
 922 static int default_rebuild_header(struct sk_buff *skb)
 923 {
 924         printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
 925                skb->dev ? skb->dev->name : "NULL!!!");
 926         kfree_skb(skb);
 927         return 1;
 928 }
 929
 930 /**
 931  *      dev_open        - prepare an interface for use.
 932  *      @dev:   device to open
 933  *
 934  *      Takes a device from down to up state. The device's private open
 935  *      function is invoked and then the multicast lists are loaded. Finally
 936  *      the device is moved into the up state and a %NETDEV_UP message is
 937  *      sent to the netdev notifier chain.
 938  *
 939  *      Calling this function on an active interface is a nop. On a failure
 940  *      a negative errno code is returned.
 941  */
 942 int dev_open(struct net_device *dev)
 943 {
 944         int ret = 0;
 945
 946         /*
 947          *      Is it already up?
 948          */
 949
 950         if (dev->flags & IFF_UP)
 951                 return 0;
 952
 953         /*
 954          *      Is it even present?
 955          */
 956         if (!netif_device_present(dev))
 957                 return -ENODEV;
 958
 959         /*
 960          *      Call device private open method
 961          */
 962         set_bit(__LINK_STATE_START, &dev->state);
 963         if (dev->open) {
 964                 ret = dev->open(dev);
 965                 if (ret)
 966                         clear_bit(__LINK_STATE_START, &dev->state);
 967         }
 968
 969         /*
 970          *      If it went open OK then:
 971          */
 972
 973         if (!ret) {
 974                 /*
 975                  *      Set the flags.
 976                  */
 977                 dev->flags |= IFF_UP;
 978
 979                 /*
 980                  *      Initialize multicasting status
 981                  */
 982                 dev_set_rx_mode(dev);
 983
 984                 /*
 985                  *      Wakeup transmit queue engine
 986                  */
 987                 dev_activate(dev);
 988
 989                 /*
 990                  *      ... and announce new interface.
 991                  */
 992                 raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
 993         }
 994         return ret;
 995 }
 996
 997 /**
 998  *      dev_close - shutdown an interface.
 999  *      @dev: device to shutdown
1000  *
1001  *      This function moves an active device into down state. A
1002  *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1003  *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1004  *      chain.
1005  */
1006 int dev_close(struct net_device *dev)
1007 {
1008         if (!(dev->flags & IFF_UP))
1009                 return 0;
1010
1011         /*
1012          *      Tell people we are going down, so that they can
1013          *      prepare to death, when device is still operating.
1014          */
1015         raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
1016
1017         dev_deactivate(dev);
1018
1019         clear_bit(__LINK_STATE_START, &dev->state);
1020
1021         /* Synchronize to scheduled poll. We cannot touch poll list,
1022          * it can be even on different cpu. So just clear netif_running().
1023          *
1024          * dev->stop() will invoke napi_disable() on all of it's
1025          * napi_struct instances on this device.
1026          */
1027         smp_mb__after_clear_bit(); /* Commit netif_running(). */
1028
1029         /*
1030          *      Call the device specific close. This cannot fail.
1031          *      Only if device is UP
1032          *
1033          *      We allow it to be called even after a DETACH hot-plug
1034          *      event.
1035          */
1036         if (dev->stop)
1037                 dev->stop(dev);
1038
1039         /*
1040          *      Device is now down.
1041          */
1042
1043         dev->flags &= ~IFF_UP;
1044
1045         /*
1046          * Tell people we are down
1047          */
1048         raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
1049
1050         return 0;
1051 }
1052
1053
1054 /*
1055  *      Device change register/unregister. These are not inline or static
1056  *      as we export them to the world.
1057  */
1058
1059 /**
1060  *      register_netdevice_notifier - register a network notifier block
1061  *      @nb: notifier
1062  *
1063  *      Register a notifier to be called when network device events occur.
1064  *      The notifier passed is linked into the kernel structures and must
1065  *      not be reused until it has been unregistered. A negative errno code
1066  *      is returned on a failure.
1067  *
1068  *      When registered all registration and up events are replayed
1069  *      to the new notifier to allow device to have a race free
1070  *      view of the network device list.
1071  */
1072
1073 int register_netdevice_notifier(struct notifier_block *nb)
1074 {
1075         struct net_device *dev;
1076         struct net_device *last;
1077         int err;
1078
1079         rtnl_lock();
1080         err = raw_notifier_chain_register(&netdev_chain, nb);
1081         if (err)
1082                 goto unlock;
1083
1084         for_each_netdev(dev) {
1085                 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1086                 err = notifier_to_errno(err);
1087                 if (err)
1088                         goto rollback;
1089
1090                 if (!(dev->flags & IFF_UP))
1091                         continue;
1092
1093                 nb->notifier_call(nb, NETDEV_UP, dev);
1094         }
1095
1096 unlock:
1097         rtnl_unlock();
1098         return err;
1099
1100 rollback:
1101         last = dev;
1102         for_each_netdev(dev) {
1103                 if (dev == last)
1104                         break;
1105
1106                 if (dev->flags & IFF_UP) {
1107                         nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1108                         nb->notifier_call(nb, NETDEV_DOWN, dev);
1109                 }
1110                 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1111         }
1112         goto unlock;
1113 }
1114
1115 /**
1116  *      unregister_netdevice_notifier - unregister a network notifier block
1117  *      @nb: notifier
1118  *
1119  *      Unregister a notifier previously registered by
1120  *      register_netdevice_notifier(). The notifier is unlinked into the
1121  *      kernel structures and may then be reused. A negative errno code
1122  *      is returned on a failure.
1123  */
1124
1125 int unregister_netdevice_notifier(struct notifier_block *nb)
1126 {
1127         int err;
1128
1129         rtnl_lock();
1130         err = raw_notifier_chain_unregister(&netdev_chain, nb);
1131         rtnl_unlock();
1132         return err;
1133 }
1134
1135 /**
1136  *      call_netdevice_notifiers - call all network notifier blocks
1137  *      @val: value passed unmodified to notifier function
1138  *      @v:   pointer passed unmodified to notifier function
1139  *
1140  *      Call all network notifier blocks.  Parameters and return value
1141  *      are as for raw_notifier_call_chain().
1142  */
1143
1144 int call_netdevice_notifiers(unsigned long val, void *v)
1145 {
1146         return raw_notifier_call_chain(&netdev_chain, val, v);
1147 }
1148
1149 /* When > 0 there are consumers of rx skb time stamps */
1150 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1151
1152 void net_enable_timestamp(void)
1153 {
1154         atomic_inc(&netstamp_needed);
1155 }
1156
1157 void net_disable_timestamp(void)
1158 {
1159         atomic_dec(&netstamp_needed);
1160 }
1161
1162 static inline void net_timestamp(struct sk_buff *skb)
1163 {
1164         if (atomic_read(&netstamp_needed))
1165                 __net_timestamp(skb);
1166         else
1167                 skb->tstamp.tv64 = 0;
1168 }
1169
1170 /*
1171  *      Support routine. Sends outgoing frames to any network
1172  *      taps currently in use.
1173  */
1174
1175 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1176 {
1177         struct packet_type *ptype;
1178
1179         net_timestamp(skb);
1180
1181         rcu_read_lock();
1182         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1183                 /* Never send packets back to the socket
1184                  * they originated from - MvS (miquels@drinkel.ow.org)
1185                  */
1186                 if ((ptype->dev == dev || !ptype->dev) &&
1187                     (ptype->af_packet_priv == NULL ||
1188                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
1189                         struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1190                         if (!skb2)
1191                                 break;
1192
1193                         /* skb->nh should be correctly
1194                            set by sender, so that the second statement is
1195                            just protection against buggy protocols.
1196                          */
1197                         skb_reset_mac_header(skb2);
1198
1199                         if (skb_network_header(skb2) < skb2->data ||
1200                             skb2->network_header > skb2->tail) {
1201                                 if (net_ratelimit())
1202                                         printk(KERN_CRIT "protocol %04x is "
1203                                                "buggy, dev %s\n",
1204                                                skb2->protocol, dev->name);
1205                                 skb_reset_network_header(skb2);
1206                         }
1207
1208                         skb2->transport_header = skb2->network_header;
1209                         skb2->pkt_type = PACKET_OUTGOING;
1210                         ptype->func(skb2, skb->dev, ptype, skb->dev);
1211                 }
1212         }
1213         rcu_read_unlock();
1214 }
1215
1216
1217 void __netif_schedule(struct net_device *dev)
1218 {
1219         if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1220                 unsigned long flags;
1221                 struct softnet_data *sd;
1222
1223                 local_irq_save(flags);
1224                 sd = &__get_cpu_var(softnet_data);
1225                 dev->next_sched = sd->output_queue;
1226                 sd->output_queue = dev;
1227                 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1228                 local_irq_restore(flags);
1229         }
1230 }
1231 EXPORT_SYMBOL(__netif_schedule);
1232
1233 void dev_kfree_skb_irq(struct sk_buff *skb)
1234 {
1235         if (atomic_dec_and_test(&skb->users)) {
1236                 struct softnet_data *sd;
1237                 unsigned long flags;
1238
1239                 local_irq_save(flags);
1240                 sd = &__get_cpu_var(softnet_data);
1241                 skb->next = sd->completion_queue;
1242                 sd->completion_queue = skb;
1243                 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1244                 local_irq_restore(flags);
1245         }
1246 }
1247 EXPORT_SYMBOL(dev_kfree_skb_irq);
1248
1249 void dev_kfree_skb_any(struct sk_buff *skb)
1250 {
1251         if (in_irq() || irqs_disabled())
1252                 dev_kfree_skb_irq(skb);
1253         else
1254                 dev_kfree_skb(skb);
1255 }
1256 EXPORT_SYMBOL(dev_kfree_skb_any);
1257
1258
1259 /**
1260  * netif_device_detach - mark device as removed
1261  * @dev: network device
1262  *
1263  * Mark device as removed from system and therefore no longer available.
1264  */
1265 void netif_device_detach(struct net_device *dev)
1266 {
1267         if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1268             netif_running(dev)) {
1269                 netif_stop_queue(dev);
1270         }
1271 }
1272 EXPORT_SYMBOL(netif_device_detach);
1273
1274 /**
1275  * netif_device_attach - mark device as attached
1276  * @dev: network device
1277  *
1278  * Mark device as attached from system and restart if needed.
1279  */
1280 void netif_device_attach(struct net_device *dev)
1281 {
1282         if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1283             netif_running(dev)) {
1284                 netif_wake_queue(dev);
1285                 __netdev_watchdog_up(dev);
1286         }
1287 }
1288 EXPORT_SYMBOL(netif_device_attach);
1289
1290
1291 /*
1292  * Invalidate hardware checksum when packet is to be mangled, and
1293  * complete checksum manually on outgoing path.
1294  */
1295 int skb_checksum_help(struct sk_buff *skb)
1296 {
1297         __wsum csum;
1298         int ret = 0, offset;
1299
1300         if (skb->ip_summed == CHECKSUM_COMPLETE)
1301                 goto out_set_summed;
1302
1303         if (unlikely(skb_shinfo(skb)->gso_size)) {
1304                 /* Let GSO fix up the checksum. */
1305                 goto out_set_summed;
1306         }
1307
1308         if (skb_cloned(skb)) {
1309                 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1310                 if (ret)
1311                         goto out;
1312         }
1313
1314         offset = skb->csum_start - skb_headroom(skb);
1315         BUG_ON(offset > (int)skb->len);
1316         csum = skb_checksum(skb, offset, skb->len-offset, 0);
1317
1318         offset = skb_headlen(skb) - offset;
1319         BUG_ON(offset <= 0);
1320         BUG_ON(skb->csum_offset + 2 > offset);
1321
1322         *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) =
1323                 csum_fold(csum);
1324 out_set_summed:
1325         skb->ip_summed = CHECKSUM_NONE;
1326 out:
1327         return ret;
1328 }
1329
1330 /**
1331  *      skb_gso_segment - Perform segmentation on skb.
1332  *      @skb: buffer to segment
1333  *      @features: features for the output path (see dev->features)
1334  *
1335  *      This function segments the given skb and returns a list of segments.
1336  *
1337  *      It may return NULL if the skb requires no segmentation.  This is
1338  *      only possible when GSO is used for verifying header integrity.
1339  */
1340 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1341 {
1342         struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1343         struct packet_type *ptype;
1344         __be16 type = skb->protocol;
1345         int err;
1346
1347         BUG_ON(skb_shinfo(skb)->frag_list);
1348
1349         skb_reset_mac_header(skb);
1350         skb->mac_len = skb->network_header - skb->mac_header;
1351         __skb_pull(skb, skb->mac_len);
1352
1353         if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1354                 if (skb_header_cloned(skb) &&
1355                     (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1356                         return ERR_PTR(err);
1357         }
1358
1359         rcu_read_lock();
1360         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1361                 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1362                         if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1363                                 err = ptype->gso_send_check(skb);
1364                                 segs = ERR_PTR(err);
1365                                 if (err || skb_gso_ok(skb, features))
1366                                         break;
1367                                 __skb_push(skb, (skb->data -
1368                                                  skb_network_header(skb)));
1369                         }
1370                         segs = ptype->gso_segment(skb, features);
1371                         break;
1372                 }
1373         }
1374         rcu_read_unlock();
1375
1376         __skb_push(skb, skb->data - skb_mac_header(skb));
1377
1378         return segs;
1379 }
1380
1381 EXPORT_SYMBOL(skb_gso_segment);
1382
1383 /* Take action when hardware reception checksum errors are detected. */
1384 #ifdef CONFIG_BUG
1385 void netdev_rx_csum_fault(struct net_device *dev)
1386 {
1387         if (net_ratelimit()) {
1388                 printk(KERN_ERR "%s: hw csum failure.\n",
1389                         dev ? dev->name : "<unknown>");
1390                 dump_stack();
1391         }
1392 }
1393 EXPORT_SYMBOL(netdev_rx_csum_fault);
1394 #endif
1395
1396 /* Actually, we should eliminate this check as soon as we know, that:
1397  * 1. IOMMU is present and allows to map all the memory.
1398  * 2. No high memory really exists on this machine.
1399  */
1400
1401 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1402 {
1403 #ifdef CONFIG_HIGHMEM
1404         int i;
1405
1406         if (dev->features & NETIF_F_HIGHDMA)
1407                 return 0;
1408
1409         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1410                 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1411                         return 1;
1412
1413 #endif
1414         return 0;
1415 }
1416
1417 struct dev_gso_cb {
1418         void (*destructor)(struct sk_buff *skb);
1419 };
1420
1421 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1422
1423 static void dev_gso_skb_destructor(struct sk_buff *skb)
1424 {
1425         struct dev_gso_cb *cb;
1426
1427         do {
1428                 struct sk_buff *nskb = skb->next;
1429
1430                 skb->next = nskb->next;
1431                 nskb->next = NULL;
1432                 kfree_skb(nskb);
1433         } while (skb->next);
1434
1435         cb = DEV_GSO_CB(skb);
1436         if (cb->destructor)
1437                 cb->destructor(skb);
1438 }
1439
1440 /**
1441  *      dev_gso_segment - Perform emulated hardware segmentation on skb.
1442  *      @skb: buffer to segment
1443  *
1444  *      This function segments the given skb and stores the list of segments
1445  *      in skb->next.
1446  */
1447 static int dev_gso_segment(struct sk_buff *skb)
1448 {
1449         struct net_device *dev = skb->dev;
1450         struct sk_buff *segs;
1451         int features = dev->features & ~(illegal_highdma(dev, skb) ?
1452                                          NETIF_F_SG : 0);
1453
1454         segs = skb_gso_segment(skb, features);
1455
1456         /* Verifying header integrity only. */
1457         if (!segs)
1458                 return 0;
1459
1460         if (unlikely(IS_ERR(segs)))
1461                 return PTR_ERR(segs);
1462
1463         skb->next = segs;
1464         DEV_GSO_CB(skb)->destructor = skb->destructor;
1465         skb->destructor = dev_gso_skb_destructor;
1466
1467         return 0;
1468 }
1469
1470 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1471 {
1472         if (likely(!skb->next)) {
1473                 if (!list_empty(&ptype_all))
1474                         dev_queue_xmit_nit(skb, dev);
1475
1476                 if (netif_needs_gso(dev, skb)) {
1477                         if (unlikely(dev_gso_segment(skb)))
1478                                 goto out_kfree_skb;
1479                         if (skb->next)
1480                                 goto gso;
1481                 }
1482
1483                 return dev->hard_start_xmit(skb, dev);
1484         }
1485
1486 gso:
1487         do {
1488                 struct sk_buff *nskb = skb->next;
1489                 int rc;
1490
1491                 skb->next = nskb->next;
1492                 nskb->next = NULL;
1493                 rc = dev->hard_start_xmit(nskb, dev);
1494                 if (unlikely(rc)) {
1495                         nskb->next = skb->next;
1496                         skb->next = nskb;
1497                         return rc;
1498                 }
1499                 if (unlikely((netif_queue_stopped(dev) ||
1500                              netif_subqueue_stopped(dev, skb->queue_mapping)) &&
1501                              skb->next))
1502                         return NETDEV_TX_BUSY;
1503         } while (skb->next);
1504
1505         skb->destructor = DEV_GSO_CB(skb)->destructor;
1506
1507 out_kfree_skb:
1508         kfree_skb(skb);
1509         return 0;
1510 }
1511
1512 #define HARD_TX_LOCK(dev, cpu) {                        \
1513         if ((dev->features & NETIF_F_LLTX) == 0) {      \
1514                 netif_tx_lock(dev);                     \
1515         }                                               \
1516 }
1517
1518 #define HARD_TX_UNLOCK(dev) {                           \
1519         if ((dev->features & NETIF_F_LLTX) == 0) {      \
1520                 netif_tx_unlock(dev);                   \
1521         }                                               \
1522 }
1523
1524 /**
1525  *      dev_queue_xmit - transmit a buffer
1526  *      @skb: buffer to transmit
1527  *
1528  *      Queue a buffer for transmission to a network device. The caller must
1529  *      have set the device and priority and built the buffer before calling
1530  *      this function. The function can be called from an interrupt.
1531  *
1532  *      A negative errno code is returned on a failure. A success does not
1533  *      guarantee the frame will be transmitted as it may be dropped due
1534  *      to congestion or traffic shaping.
1535  *
1536  * -----------------------------------------------------------------------------------
1537  *      I notice this method can also return errors from the queue disciplines,
1538  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1539  *      be positive.
1540  *
1541  *      Regardless of the return value, the skb is consumed, so it is currently
1542  *      difficult to retry a send to this method.  (You can bump the ref count
1543  *      before sending to hold a reference for retry if you are careful.)
1544  *
1545  *      When calling this method, interrupts MUST be enabled.  This is because
1546  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1547  *          --BLG
1548  */
1549
1550 int dev_queue_xmit(struct sk_buff *skb)
1551 {
1552         struct net_device *dev = skb->dev;
1553         struct Qdisc *q;
1554         int rc = -ENOMEM;
1555
1556         /* GSO will handle the following emulations directly. */
1557         if (netif_needs_gso(dev, skb))
1558                 goto gso;
1559
1560         if (skb_shinfo(skb)->frag_list &&
1561             !(dev->features & NETIF_F_FRAGLIST) &&
1562             __skb_linearize(skb))
1563                 goto out_kfree_skb;
1564
1565         /* Fragmented skb is linearized if device does not support SG,
1566          * or if at least one of fragments is in highmem and device
1567          * does not support DMA from it.
1568          */
1569         if (skb_shinfo(skb)->nr_frags &&
1570             (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1571             __skb_linearize(skb))
1572                 goto out_kfree_skb;
1573
1574         /* If packet is not checksummed and device does not support
1575          * checksumming for this protocol, complete checksumming here.
1576          */
1577         if (skb->ip_summed == CHECKSUM_PARTIAL) {
1578                 skb_set_transport_header(skb, skb->csum_start -
1579                                               skb_headroom(skb));
1580
1581                 if (!(dev->features & NETIF_F_GEN_CSUM) &&
1582                     !((dev->features & NETIF_F_IP_CSUM) &&
1583                       skb->protocol == htons(ETH_P_IP)) &&
1584                     !((dev->features & NETIF_F_IPV6_CSUM) &&
1585                       skb->protocol == htons(ETH_P_IPV6)))
1586                         if (skb_checksum_help(skb))
1587                                 goto out_kfree_skb;
1588         }
1589
1590 gso:
1591         spin_lock_prefetch(&dev->queue_lock);
1592
1593         /* Disable soft irqs for various locks below. Also
1594          * stops preemption for RCU.
1595          */
1596         rcu_read_lock_bh();
1597
1598         /* Updates of qdisc are serialized by queue_lock.
1599          * The struct Qdisc which is pointed to by qdisc is now a
1600          * rcu structure - it may be accessed without acquiring
1601          * a lock (but the structure may be stale.) The freeing of the
1602          * qdisc will be deferred until it's known that there are no
1603          * more references to it.
1604          *
1605          * If the qdisc has an enqueue function, we still need to
1606          * hold the queue_lock before calling it, since queue_lock
1607          * also serializes access to the device queue.
1608          */
1609
1610         q = rcu_dereference(dev->qdisc);
1611 #ifdef CONFIG_NET_CLS_ACT
1612         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1613 #endif
1614         if (q->enqueue) {
1615                 /* Grab device queue */
1616                 spin_lock(&dev->queue_lock);
1617                 q = dev->qdisc;
1618                 if (q->enqueue) {
1619                         /* reset queue_mapping to zero */
1620                         skb->queue_mapping = 0;
1621                         rc = q->enqueue(skb, q);
1622                         qdisc_run(dev);
1623                         spin_unlock(&dev->queue_lock);
1624
1625                         rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1626                         goto out;
1627                 }
1628                 spin_unlock(&dev->queue_lock);
1629         }
1630
1631         /* The device has no queue. Common case for software devices:
1632            loopback, all the sorts of tunnels...
1633
1634            Really, it is unlikely that netif_tx_lock protection is necessary
1635            here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1636            counters.)
1637            However, it is possible, that they rely on protection
1638            made by us here.
1639
1640            Check this and shot the lock. It is not prone from deadlocks.
1641            Either shot noqueue qdisc, it is even simpler 8)
1642          */
1643         if (dev->flags & IFF_UP) {
1644                 int cpu = smp_processor_id(); /* ok because BHs are off */
1645
1646                 if (dev->xmit_lock_owner != cpu) {
1647
1648                         HARD_TX_LOCK(dev, cpu);
1649
1650                         if (!netif_queue_stopped(dev) &&
1651                             !netif_subqueue_stopped(dev, skb->queue_mapping)) {
1652                                 rc = 0;
1653                                 if (!dev_hard_start_xmit(skb, dev)) {
1654                                         HARD_TX_UNLOCK(dev);
1655                                         goto out;
1656                                 }
1657                         }
1658                         HARD_TX_UNLOCK(dev);
1659                         if (net_ratelimit())
1660                                 printk(KERN_CRIT "Virtual device %s asks to "
1661                                        "queue packet!\n", dev->name);
1662                 } else {
1663                         /* Recursion is detected! It is possible,
1664                          * unfortunately */
1665                         if (net_ratelimit())
1666                                 printk(KERN_CRIT "Dead loop on virtual device "
1667                                        "%s, fix it urgently!\n", dev->name);
1668                 }
1669         }
1670
1671         rc = -ENETDOWN;
1672         rcu_read_unlock_bh();
1673
1674 out_kfree_skb:
1675         kfree_skb(skb);
1676         return rc;
1677 out:
1678         rcu_read_unlock_bh();
1679         return rc;
1680 }
1681
1682
1683 /*=======================================================================
1684                         Receiver routines
1685   =======================================================================*/
1686
1687 int netdev_max_backlog __read_mostly = 1000;
1688 int netdev_budget __read_mostly = 300;
1689 int weight_p __read_mostly = 64;            /* old backlog weight */
1690
1691 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1692
1693
1694 /**
1695  *      netif_rx        -       post buffer to the network code
1696  *      @skb: buffer to post
1697  *
1698  *      This function receives a packet from a device driver and queues it for
1699  *      the upper (protocol) levels to process.  It always succeeds. The buffer
1700  *      may be dropped during processing for congestion control or by the
1701  *      protocol layers.
1702  *
1703  *      return values:
1704  *      NET_RX_SUCCESS  (no congestion)
1705  *      NET_RX_CN_LOW   (low congestion)
1706  *      NET_RX_CN_MOD   (moderate congestion)
1707  *      NET_RX_CN_HIGH  (high congestion)
1708  *      NET_RX_DROP     (packet was dropped)
1709  *
1710  */
1711
1712 int netif_rx(struct sk_buff *skb)
1713 {
1714         struct softnet_data *queue;
1715         unsigned long flags;
1716
1717         /* if netpoll wants it, pretend we never saw it */
1718         if (netpoll_rx(skb))
1719                 return NET_RX_DROP;
1720
1721         if (!skb->tstamp.tv64)
1722                 net_timestamp(skb);
1723
1724         /*
1725          * The code is rearranged so that the path is the most
1726          * short when CPU is congested, but is still operating.
1727          */
1728         local_irq_save(flags);
1729         queue = &__get_cpu_var(softnet_data);
1730
1731         __get_cpu_var(netdev_rx_stat).total++;
1732         if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1733                 if (queue->input_pkt_queue.qlen) {
1734 enqueue:
1735                         dev_hold(skb->dev);
1736                         __skb_queue_tail(&queue->input_pkt_queue, skb);
1737                         local_irq_restore(flags);
1738                         return NET_RX_SUCCESS;
1739                 }
1740
1741                 napi_schedule(&queue->backlog);
1742                 goto enqueue;
1743         }
1744
1745         __get_cpu_var(netdev_rx_stat).dropped++;
1746         local_irq_restore(flags);
1747
1748         kfree_skb(skb);
1749         return NET_RX_DROP;
1750 }
1751
1752 int netif_rx_ni(struct sk_buff *skb)
1753 {
1754         int err;
1755
1756         preempt_disable();
1757         err = netif_rx(skb);
1758         if (local_softirq_pending())
1759                 do_softirq();
1760         preempt_enable();
1761
1762         return err;
1763 }
1764
1765 EXPORT_SYMBOL(netif_rx_ni);
1766
1767 static inline struct net_device *skb_bond(struct sk_buff *skb)
1768 {
1769         struct net_device *dev = skb->dev;
1770
1771         if (dev->master) {
1772                 if (skb_bond_should_drop(skb)) {
1773                         kfree_skb(skb);
1774                         return NULL;
1775                 }
1776                 skb->dev = dev->master;
1777         }
1778
1779         return dev;
1780 }
1781
1782
1783 static void net_tx_action(struct softirq_action *h)
1784 {
1785         struct softnet_data *sd = &__get_cpu_var(softnet_data);
1786
1787         if (sd->completion_queue) {
1788                 struct sk_buff *clist;
1789
1790                 local_irq_disable();
1791                 clist = sd->completion_queue;
1792                 sd->completion_queue = NULL;
1793                 local_irq_enable();
1794
1795                 while (clist) {
1796                         struct sk_buff *skb = clist;
1797                         clist = clist->next;
1798
1799                         BUG_TRAP(!atomic_read(&skb->users));
1800                         __kfree_skb(skb);
1801                 }
1802         }
1803
1804         if (sd->output_queue) {
1805                 struct net_device *head;
1806
1807                 local_irq_disable();
1808                 head = sd->output_queue;
1809                 sd->output_queue = NULL;
1810                 local_irq_enable();
1811
1812                 while (head) {
1813                         struct net_device *dev = head;
1814                         head = head->next_sched;
1815
1816                         smp_mb__before_clear_bit();
1817                         clear_bit(__LINK_STATE_SCHED, &dev->state);
1818
1819                         if (spin_trylock(&dev->queue_lock)) {
1820                                 qdisc_run(dev);
1821                                 spin_unlock(&dev->queue_lock);
1822                         } else {
1823                                 netif_schedule(dev);
1824                         }
1825                 }
1826         }
1827 }
1828
1829 static inline int deliver_skb(struct sk_buff *skb,
1830                               struct packet_type *pt_prev,
1831                               struct net_device *orig_dev)
1832 {
1833         atomic_inc(&skb->users);
1834         return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1835 }
1836
1837 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1838 /* These hooks defined here for ATM */
1839 struct net_bridge;
1840 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1841                                                 unsigned char *addr);
1842 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
1843
1844 /*
1845  * If bridge module is loaded call bridging hook.
1846  *  returns NULL if packet was consumed.
1847  */
1848 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
1849                                         struct sk_buff *skb) __read_mostly;
1850 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
1851                                             struct packet_type **pt_prev, int *ret,
1852                                             struct net_device *orig_dev)
1853 {
1854         struct net_bridge_port *port;
1855
1856         if (skb->pkt_type == PACKET_LOOPBACK ||
1857             (port = rcu_dereference(skb->dev->br_port)) == NULL)
1858                 return skb;
1859
1860         if (*pt_prev) {
1861                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1862                 *pt_prev = NULL;
1863         }
1864
1865         return br_handle_frame_hook(port, skb);
1866 }
1867 #else
1868 #define handle_bridge(skb, pt_prev, ret, orig_dev)      (skb)
1869 #endif
1870
1871 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
1872 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
1873 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
1874
1875 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
1876                                              struct packet_type **pt_prev,
1877                                              int *ret,
1878                                              struct net_device *orig_dev)
1879 {
1880         if (skb->dev->macvlan_port == NULL)
1881                 return skb;
1882
1883         if (*pt_prev) {
1884                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1885                 *pt_prev = NULL;
1886         }
1887         return macvlan_handle_frame_hook(skb);
1888 }
1889 #else
1890 #define handle_macvlan(skb, pt_prev, ret, orig_dev)     (skb)
1891 #endif
1892
1893 #ifdef CONFIG_NET_CLS_ACT
1894 /* TODO: Maybe we should just force sch_ingress to be compiled in
1895  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1896  * a compare and 2 stores extra right now if we dont have it on
1897  * but have CONFIG_NET_CLS_ACT
1898  * NOTE: This doesnt stop any functionality; if you dont have
1899  * the ingress scheduler, you just cant add policies on ingress.
1900  *
1901  */
1902 static int ing_filter(struct sk_buff *skb)
1903 {
1904         struct Qdisc *q;
1905         struct net_device *dev = skb->dev;
1906         int result = TC_ACT_OK;
1907
1908         if (dev->qdisc_ingress) {
1909                 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1910                 if (MAX_RED_LOOP < ttl++) {
1911                         printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
1912                                 skb->iif, skb->dev->ifindex);
1913                         return TC_ACT_SHOT;
1914                 }
1915
1916                 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1917
1918                 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1919
1920                 spin_lock(&dev->ingress_lock);
1921                 if ((q = dev->qdisc_ingress) != NULL)
1922                         result = q->enqueue(skb, q);
1923                 spin_unlock(&dev->ingress_lock);
1924
1925         }
1926
1927         return result;
1928 }
1929 #endif
1930
1931 int netif_receive_skb(struct sk_buff *skb)
1932 {
1933         struct packet_type *ptype, *pt_prev;
1934         struct net_device *orig_dev;
1935         int ret = NET_RX_DROP;
1936         __be16 type;
1937
1938         /* if we've gotten here through NAPI, check netpoll */
1939         if (netpoll_receive_skb(skb))
1940                 return NET_RX_DROP;
1941
1942         if (!skb->tstamp.tv64)
1943                 net_timestamp(skb);
1944
1945         if (!skb->iif)
1946                 skb->iif = skb->dev->ifindex;
1947
1948         orig_dev = skb_bond(skb);
1949
1950         if (!orig_dev)
1951                 return NET_RX_DROP;
1952
1953         __get_cpu_var(netdev_rx_stat).total++;
1954
1955         skb_reset_network_header(skb);
1956         skb_reset_transport_header(skb);
1957         skb->mac_len = skb->network_header - skb->mac_header;
1958
1959         pt_prev = NULL;
1960
1961         rcu_read_lock();
1962
1963 #ifdef CONFIG_NET_CLS_ACT
1964         if (skb->tc_verd & TC_NCLS) {
1965                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1966                 goto ncls;
1967         }
1968 #endif
1969
1970         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1971                 if (!ptype->dev || ptype->dev == skb->dev) {
1972                         if (pt_prev)
1973                                 ret = deliver_skb(skb, pt_prev, orig_dev);
1974                         pt_prev = ptype;
1975                 }
1976         }
1977
1978 #ifdef CONFIG_NET_CLS_ACT
1979         if (pt_prev) {
1980                 ret = deliver_skb(skb, pt_prev, orig_dev);
1981                 pt_prev = NULL; /* noone else should process this after*/
1982         } else {
1983                 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1984         }
1985
1986         ret = ing_filter(skb);
1987
1988         if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1989                 kfree_skb(skb);
1990                 goto out;
1991         }
1992
1993         skb->tc_verd = 0;
1994 ncls:
1995 #endif
1996
1997         skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
1998         if (!skb)
1999                 goto out;
2000         skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2001         if (!skb)
2002                 goto out;
2003
2004         type = skb->protocol;
2005         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
2006                 if (ptype->type == type &&
2007                     (!ptype->dev || ptype->dev == skb->dev)) {
2008                         if (pt_prev)
2009                                 ret = deliver_skb(skb, pt_prev, orig_dev);
2010                         pt_prev = ptype;
2011                 }
2012         }
2013
2014         if (pt_prev) {
2015                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2016         } else {
2017                 kfree_skb(skb);
2018                 /* Jamal, now you will not able to escape explaining
2019                  * me how you were going to use this. :-)
2020                  */
2021                 ret = NET_RX_DROP;
2022         }
2023
2024 out:
2025         rcu_read_unlock();
2026         return ret;
2027 }
2028
2029 static int process_backlog(struct napi_struct *napi, int quota)
2030 {
2031         int work = 0;
2032         struct softnet_data *queue = &__get_cpu_var(softnet_data);
2033         unsigned long start_time = jiffies;
2034
2035         napi->weight = weight_p;
2036         do {
2037                 struct sk_buff *skb;
2038                 struct net_device *dev;
2039
2040                 local_irq_disable();
2041                 skb = __skb_dequeue(&queue->input_pkt_queue);
2042                 if (!skb) {
2043                         __napi_complete(napi);
2044                         local_irq_enable();
2045                         break;
2046                 }
2047
2048                 local_irq_enable();
2049
2050                 dev = skb->dev;
2051
2052                 netif_receive_skb(skb);
2053
2054                 dev_put(dev);
2055         } while (++work < quota && jiffies == start_time);
2056
2057         return work;
2058 }
2059
2060 /**
2061  * __napi_schedule - schedule for receive
2062  * @napi: entry to schedule
2063  *
2064  * The entry's receive function will be scheduled to run
2065  */
2066 void fastcall __napi_schedule(struct napi_struct *n)
2067 {
2068         unsigned long flags;
2069
2070         local_irq_save(flags);
2071         list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2072         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2073         local_irq_restore(flags);
2074 }
2075 EXPORT_SYMBOL(__napi_schedule);
2076
2077
2078 static void net_rx_action(struct softirq_action *h)
2079 {
2080         struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2081         unsigned long start_time = jiffies;
2082         int budget = netdev_budget;
2083         void *have;
2084
2085         local_irq_disable();
2086
2087         while (!list_empty(list)) {
2088                 struct napi_struct *n;
2089                 int work, weight;
2090
2091                 /* If softirq window is exhuasted then punt.
2092                  *
2093                  * Note that this is a slight policy change from the
2094                  * previous NAPI code, which would allow up to 2
2095                  * jiffies to pass before breaking out.  The test
2096                  * used to be "jiffies - start_time > 1".
2097                  */
2098                 if (unlikely(budget <= 0 || jiffies != start_time))
2099                         goto softnet_break;
2100
2101                 local_irq_enable();
2102
2103                 /* Even though interrupts have been re-enabled, this
2104                  * access is safe because interrupts can only add new
2105                  * entries to the tail of this list, and only ->poll()
2106                  * calls can remove this head entry from the list.
2107                  */
2108                 n = list_entry(list->next, struct napi_struct, poll_list);
2109
2110                 have = netpoll_poll_lock(n);
2111
2112                 weight = n->weight;
2113
2114                 work = n->poll(n, weight);
2115
2116                 WARN_ON_ONCE(work > weight);
2117
2118                 budget -= work;
2119
2120                 local_irq_disable();
2121
2122                 /* Drivers must not modify the NAPI state if they
2123                  * consume the entire weight.  In such cases this code
2124                  * still "owns" the NAPI instance and therefore can
2125                  * move the instance around on the list at-will.
2126                  */
2127                 if (unlikely(work == weight))
2128                         list_move_tail(&n->poll_list, list);
2129
2130                 netpoll_poll_unlock(have);
2131         }
2132 out:
2133         local_irq_enable();
2134
2135 #ifdef CONFIG_NET_DMA
2136         /*
2137          * There may not be any more sk_buffs coming right now, so push
2138          * any pending DMA copies to hardware
2139          */
2140         if (!cpus_empty(net_dma.channel_mask)) {
2141                 int chan_idx;
2142                 for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
2143                         struct dma_chan *chan = net_dma.channels[chan_idx];
2144                         if (chan)
2145                                 dma_async_memcpy_issue_pending(chan);
2146                 }
2147         }
2148 #endif
2149
2150         return;
2151
2152 softnet_break:
2153         __get_cpu_var(netdev_rx_stat).time_squeeze++;
2154         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2155         goto out;
2156 }
2157
2158 static gifconf_func_t * gifconf_list [NPROTO];
2159
2160 /**
2161  *      register_gifconf        -       register a SIOCGIF handler
2162  *      @family: Address family
2163  *      @gifconf: Function handler
2164  *
2165  *      Register protocol dependent address dumping routines. The handler
2166  *      that is passed must not be freed or reused until it has been replaced
2167  *      by another handler.
2168  */
2169 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2170 {
2171         if (family >= NPROTO)
2172                 return -EINVAL;
2173         gifconf_list[family] = gifconf;
2174         return 0;
2175 }
2176
2177
2178 /*
2179  *      Map an interface index to its name (SIOCGIFNAME)
2180  */
2181
2182 /*
2183  *      We need this ioctl for efficient implementation of the
2184  *      if_indextoname() function required by the IPv6 API.  Without
2185  *      it, we would have to search all the interfaces to find a
2186  *      match.  --pb
2187  */
2188
2189 static int dev_ifname(struct ifreq __user *arg)
2190 {
2191         struct net_device *dev;
2192         struct ifreq ifr;
2193
2194         /*
2195          *      Fetch the caller's info block.
2196          */
2197
2198         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2199                 return -EFAULT;
2200
2201         read_lock(&dev_base_lock);
2202         dev = __dev_get_by_index(ifr.ifr_ifindex);
2203         if (!dev) {
2204                 read_unlock(&dev_base_lock);
2205                 return -ENODEV;
2206         }
2207
2208         strcpy(ifr.ifr_name, dev->name);
2209         read_unlock(&dev_base_lock);
2210
2211         if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2212                 return -EFAULT;
2213         return 0;
2214 }
2215
2216 /*
2217  *      Perform a SIOCGIFCONF call. This structure will change
2218  *      size eventually, and there is nothing I can do about it.
2219  *      Thus we will need a 'compatibility mode'.
2220  */
2221
2222 static int dev_ifconf(char __user *arg)
2223 {
2224         struct ifconf ifc;
2225         struct net_device *dev;
2226         char __user *pos;
2227         int len;
2228         int total;
2229         int i;
2230
2231         /*
2232          *      Fetch the caller's info block.
2233          */
2234
2235         if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2236                 return -EFAULT;
2237
2238         pos = ifc.ifc_buf;
2239         len = ifc.ifc_len;
2240
2241         /*
2242          *      Loop over the interfaces, and write an info block for each.
2243          */
2244
2245         total = 0;
2246         for_each_netdev(dev) {
2247                 for (i = 0; i < NPROTO; i++) {
2248                         if (gifconf_list[i]) {
2249                                 int done;
2250                                 if (!pos)
2251                                         done = gifconf_list[i](dev, NULL, 0);
2252                                 else
2253                                         done = gifconf_list[i](dev, pos + total,
2254                                                                len - total);
2255                                 if (done < 0)
2256                                         return -EFAULT;
2257                                 total += done;
2258                         }
2259                 }
2260         }
2261
2262         /*
2263          *      All done.  Write the updated control block back to the caller.
2264          */
2265         ifc.ifc_len = total;
2266
2267         /*
2268          *      Both BSD and Solaris return 0 here, so we do too.
2269          */
2270         return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2271 }
2272
2273 #ifdef CONFIG_PROC_FS
2274 /*
2275  *      This is invoked by the /proc filesystem handler to display a device
2276  *      in detail.
2277  */
2278 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2279 {
2280         loff_t off;
2281         struct net_device *dev;
2282
2283         read_lock(&dev_base_lock);
2284         if (!*pos)
2285                 return SEQ_START_TOKEN;
2286
2287         off = 1;
2288         for_each_netdev(dev)
2289                 if (off++ == *pos)
2290                         return dev;
2291
2292         return NULL;
2293 }
2294
2295 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2296 {
2297         ++*pos;
2298         return v == SEQ_START_TOKEN ?
2299                 first_net_device() : next_net_device((struct net_device *)v);
2300 }
2301
2302 void dev_seq_stop(struct seq_file *seq, void *v)
2303 {
2304         read_unlock(&dev_base_lock);
2305 }
2306
2307 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2308 {
2309         struct net_device_stats *stats = dev->get_stats(dev);
2310
2311         seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2312                    "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2313                    dev->name, stats->rx_bytes, stats->rx_packets,
2314                    stats->rx_errors,
2315                    stats->rx_dropped + stats->rx_missed_errors,
2316                    stats->rx_fifo_errors,
2317                    stats->rx_length_errors + stats->rx_over_errors +
2318                     stats->rx_crc_errors + stats->rx_frame_errors,
2319                    stats->rx_compressed, stats->multicast,
2320                    stats->tx_bytes, stats->tx_packets,
2321                    stats->tx_errors, stats->tx_dropped,
2322                    stats->tx_fifo_errors, stats->collisions,
2323                    stats->tx_carrier_errors +
2324                     stats->tx_aborted_errors +
2325                     stats->tx_window_errors +
2326                     stats->tx_heartbeat_errors,
2327                    stats->tx_compressed);
2328 }
2329
2330 /*
2331  *      Called from the PROCfs module. This now uses the new arbitrary sized
2332  *      /proc/net interface to create /proc/net/dev
2333  */
2334 static int dev_seq_show(struct seq_file *seq, void *v)
2335 {
2336         if (v == SEQ_START_TOKEN)
2337                 seq_puts(seq, "Inter-|   Receive                            "
2338                               "                    |  Transmit\n"
2339                               " face |bytes    packets errs drop fifo frame "
2340                               "compressed multicast|bytes    packets errs "
2341                               "drop fifo colls carrier compressed\n");
2342         else
2343                 dev_seq_printf_stats(seq, v);
2344         return 0;
2345 }
2346
2347 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2348 {
2349         struct netif_rx_stats *rc = NULL;
2350
2351         while (*pos < NR_CPUS)
2352                 if (cpu_online(*pos)) {
2353                         rc = &per_cpu(netdev_rx_stat, *pos);
2354                         break;
2355                 } else
2356                         ++*pos;
2357         return rc;
2358 }
2359
2360 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2361 {
2362         return softnet_get_online(pos);
2363 }
2364
2365 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2366 {
2367         ++*pos;
2368         return softnet_get_online(pos);
2369 }
2370
2371 static void softnet_seq_stop(struct seq_file *seq, void *v)
2372 {
2373 }
2374
2375 static int softnet_seq_show(struct seq_file *seq, void *v)
2376 {
2377         struct netif_rx_stats *s = v;
2378
2379         seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2380                    s->total, s->dropped, s->time_squeeze, 0,
2381                    0, 0, 0, 0, /* was fastroute */
2382                    s->cpu_collision );
2383         return 0;
2384 }
2385
2386 static const struct seq_operations dev_seq_ops = {
2387         .start = dev_seq_start,
2388         .next  = dev_seq_next,
2389         .stop  = dev_seq_stop,
2390         .show  = dev_seq_show,
2391 };
2392
2393 static int dev_seq_open(struct inode *inode, struct file *file)
2394 {
2395         return seq_open(file, &dev_seq_ops);
2396 }
2397
2398 static const struct file_operations dev_seq_fops = {
2399         .owner   = THIS_MODULE,
2400         .open    = dev_seq_open,
2401         .read    = seq_read,
2402         .llseek  = seq_lseek,
2403         .release = seq_release,
2404 };
2405
2406 static const struct seq_operations softnet_seq_ops = {
2407         .start = softnet_seq_start,
2408         .next  = softnet_seq_next,
2409         .stop  = softnet_seq_stop,
2410         .show  = softnet_seq_show,
2411 };
2412
2413 static int softnet_seq_open(struct inode *inode, struct file *file)
2414 {
2415         return seq_open(file, &softnet_seq_ops);
2416 }
2417
2418 static const struct file_operations softnet_seq_fops = {
2419         .owner   = THIS_MODULE,
2420         .open    = softnet_seq_open,
2421         .read    = seq_read,
2422         .llseek  = seq_lseek,
2423         .release = seq_release,
2424 };
2425
2426 static void *ptype_get_idx(loff_t pos)
2427 {
2428         struct packet_type *pt = NULL;
2429         loff_t i = 0;
2430         int t;
2431
2432         list_for_each_entry_rcu(pt, &ptype_all, list) {
2433                 if (i == pos)
2434                         return pt;
2435                 ++i;
2436         }
2437
2438         for (t = 0; t < 16; t++) {
2439                 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2440                         if (i == pos)
2441                                 return pt;
2442                         ++i;
2443                 }
2444         }
2445         return NULL;
2446 }
2447
2448 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2449 {
2450         rcu_read_lock();
2451         return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2452 }
2453
2454 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2455 {
2456         struct packet_type *pt;
2457         struct list_head *nxt;
2458         int hash;
2459
2460         ++*pos;
2461         if (v == SEQ_START_TOKEN)
2462                 return ptype_get_idx(0);
2463
2464         pt = v;
2465         nxt = pt->list.next;
2466         if (pt->type == htons(ETH_P_ALL)) {
2467                 if (nxt != &ptype_all)
2468                         goto found;
2469                 hash = 0;
2470                 nxt = ptype_base[0].next;
2471         } else
2472                 hash = ntohs(pt->type) & 15;
2473
2474         while (nxt == &ptype_base[hash]) {
2475                 if (++hash >= 16)
2476                         return NULL;
2477                 nxt = ptype_base[hash].next;
2478         }
2479 found:
2480         return list_entry(nxt, struct packet_type, list);
2481 }
2482
2483 static void ptype_seq_stop(struct seq_file *seq, void *v)
2484 {
2485         rcu_read_unlock();
2486 }
2487
2488 static void ptype_seq_decode(struct seq_file *seq, void *sym)
2489 {
2490 #ifdef CONFIG_KALLSYMS
2491         unsigned long offset = 0, symsize;
2492         const char *symname;
2493         char *modname;
2494         char namebuf[128];
2495
2496         symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2497                                   &modname, namebuf);
2498
2499         if (symname) {
2500                 char *delim = ":";
2501
2502                 if (!modname)
2503                         modname = delim = "";
2504                 seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2505                            symname, offset);
2506                 return;
2507         }
2508 #endif
2509
2510         seq_printf(seq, "[%p]", sym);
2511 }
2512
2513 static int ptype_seq_show(struct seq_file *seq, void *v)
2514 {
2515         struct packet_type *pt = v;
2516
2517         if (v == SEQ_START_TOKEN)
2518                 seq_puts(seq, "Type Device      Function\n");
2519         else {
2520                 if (pt->type == htons(ETH_P_ALL))
2521                         seq_puts(seq, "ALL ");
2522                 else
2523                         seq_printf(seq, "%04x", ntohs(pt->type));
2524
2525                 seq_printf(seq, " %-8s ",
2526                            pt->dev ? pt->dev->name : "");
2527                 ptype_seq_decode(seq,  pt->func);
2528                 seq_putc(seq, '\n');
2529         }
2530
2531         return 0;
2532 }
2533
2534 static const struct seq_operations ptype_seq_ops = {
2535         .start = ptype_seq_start,
2536         .next  = ptype_seq_next,
2537         .stop  = ptype_seq_stop,
2538         .show  = ptype_seq_show,
2539 };
2540
2541 static int ptype_seq_open(struct inode *inode, struct file *file)
2542 {
2543         return seq_open(file, &ptype_seq_ops);
2544 }
2545
2546 static const struct file_operations ptype_seq_fops = {
2547         .owner   = THIS_MODULE,
2548         .open    = ptype_seq_open,
2549         .read    = seq_read,
2550         .llseek  = seq_lseek,
2551         .release = seq_release,
2552 };
2553
2554
2555 static int __init dev_proc_init(void)
2556 {
2557         int rc = -ENOMEM;
2558
2559         if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2560                 goto out;
2561         if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2562                 goto out_dev;
2563         if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops))
2564                 goto out_dev2;
2565
2566         if (wext_proc_init())
2567                 goto out_softnet;
2568         rc = 0;
2569 out:
2570         return rc;
2571 out_softnet:
2572         proc_net_remove("ptype");
2573 out_dev2:
2574         proc_net_remove("softnet_stat");
2575 out_dev:
2576         proc_net_remove("dev");
2577         goto out;
2578 }
2579 #else
2580 #define dev_proc_init() 0
2581 #endif  /* CONFIG_PROC_FS */
2582
2583
2584 /**
2585  *      netdev_set_master       -       set up master/slave pair
2586  *      @slave: slave device
2587  *      @master: new master device
2588  *
2589  *      Changes the master device of the slave. Pass %NULL to break the
2590  *      bonding. The caller must hold the RTNL semaphore. On a failure
2591  *      a negative errno code is returned. On success the reference counts
2592  *      are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2593  *      function returns zero.
2594  */
2595 int netdev_set_master(struct net_device *slave, struct net_device *master)
2596 {
2597         struct net_device *old = slave->master;
2598
2599         ASSERT_RTNL();
2600
2601         if (master) {
2602                 if (old)
2603                         return -EBUSY;
2604                 dev_hold(master);
2605         }
2606
2607         slave->master = master;
2608
2609         synchronize_net();
2610
2611         if (old)
2612                 dev_put(old);
2613
2614         if (master)
2615                 slave->flags |= IFF_SLAVE;
2616         else
2617                 slave->flags &= ~IFF_SLAVE;
2618
2619         rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2620         return 0;
2621 }
2622
2623 static void __dev_set_promiscuity(struct net_device *dev, int inc)
2624 {
2625         unsigned short old_flags = dev->flags;
2626
2627         ASSERT_RTNL();
2628
2629         if ((dev->promiscuity += inc) == 0)
2630                 dev->flags &= ~IFF_PROMISC;
2631         else
2632                 dev->flags |= IFF_PROMISC;
2633         if (dev->flags != old_flags) {
2634                 printk(KERN_INFO "device %s %s promiscuous mode\n",
2635                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2636                                                                "left");
2637                 audit_log(current->audit_context, GFP_ATOMIC,
2638                         AUDIT_ANOM_PROMISCUOUS,
2639                         "dev=%s prom=%d old_prom=%d auid=%u",
2640                         dev->name, (dev->flags & IFF_PROMISC),
2641                         (old_flags & IFF_PROMISC),
2642                         audit_get_loginuid(current->audit_context));
2643
2644                 if (dev->change_rx_flags)
2645                         dev->change_rx_flags(dev, IFF_PROMISC);
2646         }
2647 }
2648
2649 /**
2650  *      dev_set_promiscuity     - update promiscuity count on a device
2651  *      @dev: device
2652  *      @inc: modifier
2653  *
2654  *      Add or remove promiscuity from a device. While the count in the device
2655  *      remains above zero the interface remains promiscuous. Once it hits zero
2656  *      the device reverts back to normal filtering operation. A negative inc
2657  *      value is used to drop promiscuity on the device.
2658  */
2659 void dev_set_promiscuity(struct net_device *dev, int inc)
2660 {
2661         unsigned short old_flags = dev->flags;
2662
2663         __dev_set_promiscuity(dev, inc);
2664         if (dev->flags != old_flags)
2665                 dev_set_rx_mode(dev);
2666 }
2667
2668 /**
2669  *      dev_set_allmulti        - update allmulti count on a device
2670  *      @dev: device
2671  *      @inc: modifier
2672  *
2673  *      Add or remove reception of all multicast frames to a device. While the
2674  *      count in the device remains above zero the interface remains listening
2675  *      to all interfaces. Once it hits zero the device reverts back to normal
2676  *      filtering operation. A negative @inc value is used to drop the counter
2677  *      when releasing a resource needing all multicasts.
2678  */
2679
2680 void dev_set_allmulti(struct net_device *dev, int inc)
2681 {
2682         unsigned short old_flags = dev->flags;
2683
2684         ASSERT_RTNL();
2685
2686         dev->flags |= IFF_ALLMULTI;
2687         if ((dev->allmulti += inc) == 0)
2688                 dev->flags &= ~IFF_ALLMULTI;
2689         if (dev->flags ^ old_flags) {
2690                 if (dev->change_rx_flags)
2691                         dev->change_rx_flags(dev, IFF_ALLMULTI);
2692                 dev_set_rx_mode(dev);
2693         }
2694 }
2695
2696 /*
2697  *      Upload unicast and multicast address lists to device and
2698  *      configure RX filtering. When the device doesn't support unicast
2699  *      filtering it is put in promiscous mode while unicast addresses
2700  *      are present.
2701  */
2702 void __dev_set_rx_mode(struct net_device *dev)
2703 {
2704         /* dev_open will call this function so the list will stay sane. */
2705         if (!(dev->flags&IFF_UP))
2706                 return;
2707
2708         if (!netif_device_present(dev))
2709                 return;
2710
2711         if (dev->set_rx_mode)
2712                 dev->set_rx_mode(dev);
2713         else {
2714                 /* Unicast addresses changes may only happen under the rtnl,
2715                  * therefore calling __dev_set_promiscuity here is safe.
2716                  */
2717                 if (dev->uc_count > 0 && !dev->uc_promisc) {
2718                         __dev_set_promiscuity(dev, 1);
2719                         dev->uc_promisc = 1;
2720                 } else if (dev->uc_count == 0 && dev->uc_promisc) {
2721                         __dev_set_promiscuity(dev, -1);
2722                         dev->uc_promisc = 0;
2723                 }
2724
2725                 if (dev->set_multicast_list)
2726                         dev->set_multicast_list(dev);
2727         }
2728 }
2729
2730 void dev_set_rx_mode(struct net_device *dev)
2731 {
2732         netif_tx_lock_bh(dev);
2733         __dev_set_rx_mode(dev);
2734         netif_tx_unlock_bh(dev);
2735 }
2736
2737 int __dev_addr_delete(struct dev_addr_list **list, int *count,
2738                       void *addr, int alen, int glbl)
2739 {
2740         struct dev_addr_list *da;
2741
2742         for (; (da = *list) != NULL; list = &da->next) {
2743                 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2744                     alen == da->da_addrlen) {
2745                         if (glbl) {
2746                                 int old_glbl = da->da_gusers;
2747                                 da->da_gusers = 0;
2748                                 if (old_glbl == 0)
2749                                         break;
2750                         }
2751                         if (--da->da_users)
2752                                 return 0;
2753
2754                         *list = da->next;
2755                         kfree(da);
2756                         (*count)--;
2757                         return 0;
2758                 }
2759         }
2760         return -ENOENT;
2761 }
2762
2763 int __dev_addr_add(struct dev_addr_list **list, int *count,
2764                    void *addr, int alen, int glbl)
2765 {
2766         struct dev_addr_list *da;
2767
2768         for (da = *list; da != NULL; da = da->next) {
2769                 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2770                     da->da_addrlen == alen) {
2771                         if (glbl) {
2772                                 int old_glbl = da->da_gusers;
2773                                 da->da_gusers = 1;
2774                                 if (old_glbl)
2775                                         return 0;
2776                         }
2777                         da->da_users++;
2778                         return 0;
2779                 }
2780         }
2781
2782         da = kmalloc(sizeof(*da), GFP_ATOMIC);
2783         if (da == NULL)
2784                 return -ENOMEM;
2785         memcpy(da->da_addr, addr, alen);
2786         da->da_addrlen = alen;
2787         da->da_users = 1;
2788         da->da_gusers = glbl ? 1 : 0;
2789         da->next = *list;
2790         *list = da;
2791         (*count)++;
2792         return 0;
2793 }
2794
2795 /**
2796  *      dev_unicast_delete      - Release secondary unicast address.
2797  *      @dev: device
2798  *      @addr: address to delete
2799  *      @alen: length of @addr
2800  *
2801  *      Release reference to a secondary unicast address and remove it
2802  *      from the device if the reference count drops to zero.
2803  *
2804  *      The caller must hold the rtnl_mutex.
2805  */
2806 int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
2807 {
2808         int err;
2809
2810         ASSERT_RTNL();
2811
2812         netif_tx_lock_bh(dev);
2813         err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2814         if (!err)
2815                 __dev_set_rx_mode(dev);
2816         netif_tx_unlock_bh(dev);
2817         return err;
2818 }
2819 EXPORT_SYMBOL(dev_unicast_delete);
2820
2821 /**
2822  *      dev_unicast_add         - add a secondary unicast address
2823  *      @dev: device
2824  *      @addr: address to delete
2825  *      @alen: length of @addr
2826  *
2827  *      Add a secondary unicast address to the device or increase
2828  *      the reference count if it already exists.
2829  *
2830  *      The caller must hold the rtnl_mutex.
2831  */
2832 int dev_unicast_add(struct net_device *dev, void *addr, int alen)
2833 {
2834         int err;
2835
2836         ASSERT_RTNL();
2837
2838         netif_tx_lock_bh(dev);
2839         err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2840         if (!err)
2841                 __dev_set_rx_mode(dev);
2842         netif_tx_unlock_bh(dev);
2843         return err;
2844 }
2845 EXPORT_SYMBOL(dev_unicast_add);
2846
2847 static void __dev_addr_discard(struct dev_addr_list **list)
2848 {
2849         struct dev_addr_list *tmp;
2850
2851         while (*list != NULL) {
2852                 tmp = *list;
2853                 *list = tmp->next;
2854                 if (tmp->da_users > tmp->da_gusers)
2855                         printk("__dev_addr_discard: address leakage! "
2856                                "da_users=%d\n", tmp->da_users);
2857                 kfree(tmp);
2858         }
2859 }
2860
2861 static void dev_addr_discard(struct net_device *dev)
2862 {
2863         netif_tx_lock_bh(dev);
2864
2865         __dev_addr_discard(&dev->uc_list);
2866         dev->uc_count = 0;
2867
2868         __dev_addr_discard(&dev->mc_list);
2869         dev->mc_count = 0;
2870
2871         netif_tx_unlock_bh(dev);
2872 }
2873
2874 unsigned dev_get_flags(const struct net_device *dev)
2875 {
2876         unsigned flags;
2877
2878         flags = (dev->flags & ~(IFF_PROMISC |
2879                                 IFF_ALLMULTI |
2880                                 IFF_RUNNING |
2881                                 IFF_LOWER_UP |
2882                                 IFF_DORMANT)) |
2883                 (dev->gflags & (IFF_PROMISC |
2884                                 IFF_ALLMULTI));
2885
2886         if (netif_running(dev)) {
2887                 if (netif_oper_up(dev))
2888                         flags |= IFF_RUNNING;
2889                 if (netif_carrier_ok(dev))
2890                         flags |= IFF_LOWER_UP;
2891                 if (netif_dormant(dev))
2892                         flags |= IFF_DORMANT;
2893         }
2894
2895         return flags;
2896 }
2897
2898 int dev_change_flags(struct net_device *dev, unsigned flags)
2899 {
2900         int ret, changes;
2901         int old_flags = dev->flags;
2902
2903         ASSERT_RTNL();
2904
2905         /*
2906          *      Set the flags on our device.
2907          */
2908
2909         dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2910                                IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2911                                IFF_AUTOMEDIA)) |
2912                      (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2913                                     IFF_ALLMULTI));
2914
2915         /*
2916          *      Load in the correct multicast list now the flags have changed.
2917          */
2918
2919         if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST)
2920                 dev->change_rx_flags(dev, IFF_MULTICAST);
2921
2922         dev_set_rx_mode(dev);
2923
2924         /*
2925          *      Have we downed the interface. We handle IFF_UP ourselves
2926          *      according to user attempts to set it, rather than blindly
2927          *      setting it.
2928          */
2929
2930         ret = 0;
2931         if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
2932                 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2933
2934                 if (!ret)
2935                         dev_set_rx_mode(dev);
2936         }
2937
2938         if (dev->flags & IFF_UP &&
2939             ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2940                                           IFF_VOLATILE)))
2941                 raw_notifier_call_chain(&netdev_chain,
2942                                 NETDEV_CHANGE, dev);
2943
2944         if ((flags ^ dev->gflags) & IFF_PROMISC) {
2945                 int inc = (flags & IFF_PROMISC) ? +1 : -1;
2946                 dev->gflags ^= IFF_PROMISC;
2947                 dev_set_promiscuity(dev, inc);
2948         }
2949
2950         /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2951            is important. Some (broken) drivers set IFF_PROMISC, when
2952            IFF_ALLMULTI is requested not asking us and not reporting.
2953          */
2954         if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2955                 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2956                 dev->gflags ^= IFF_ALLMULTI;
2957                 dev_set_allmulti(dev, inc);
2958         }
2959
2960         /* Exclude state transition flags, already notified */
2961         changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
2962         if (changes)
2963                 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
2964
2965         return ret;
2966 }
2967
2968 int dev_set_mtu(struct net_device *dev, int new_mtu)
2969 {
2970         int err;
2971
2972         if (new_mtu == dev->mtu)
2973                 return 0;
2974
2975         /*      MTU must be positive.    */
2976         if (new_mtu < 0)
2977                 return -EINVAL;
2978
2979         if (!netif_device_present(dev))
2980                 return -ENODEV;
2981
2982         err = 0;
2983         if (dev->change_mtu)
2984                 err = dev->change_mtu(dev, new_mtu);
2985         else
2986                 dev->mtu = new_mtu;
2987         if (!err && dev->flags & IFF_UP)
2988                 raw_notifier_call_chain(&netdev_chain,
2989                                 NETDEV_CHANGEMTU, dev);
2990         return err;
2991 }
2992
2993 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2994 {
2995         int err;
2996
2997         if (!dev->set_mac_address)
2998                 return -EOPNOTSUPP;
2999         if (sa->sa_family != dev->type)
3000                 return -EINVAL;
3001         if (!netif_device_present(dev))
3002                 return -ENODEV;
3003         err = dev->set_mac_address(dev, sa);
3004         if (!err)
3005                 raw_notifier_call_chain(&netdev_chain,
3006                                 NETDEV_CHANGEADDR, dev);
3007         return err;
3008 }
3009
3010 /*
3011  *      Perform the SIOCxIFxxx calls.
3012  */
3013 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
3014 {
3015         int err;
3016         struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
3017
3018         if (!dev)
3019                 return -ENODEV;
3020
3021         switch (cmd) {
3022                 case SIOCGIFFLAGS:      /* Get interface flags */
3023                         ifr->ifr_flags = dev_get_flags(dev);
3024                         return 0;
3025
3026                 case SIOCSIFFLAGS:      /* Set interface flags */
3027                         return dev_change_flags(dev, ifr->ifr_flags);
3028
3029                 case SIOCGIFMETRIC:     /* Get the metric on the interface
3030                                            (currently unused) */
3031                         ifr->ifr_metric = 0;
3032                         return 0;
3033
3034                 case SIOCSIFMETRIC:     /* Set the metric on the interface
3035                                            (currently unused) */
3036                         return -EOPNOTSUPP;
3037
3038                 case SIOCGIFMTU:        /* Get the MTU of a device */
3039                         ifr->ifr_mtu = dev->mtu;
3040                         return 0;
3041
3042                 case SIOCSIFMTU:        /* Set the MTU of a device */
3043                         return dev_set_mtu(dev, ifr->ifr_mtu);
3044
3045                 case SIOCGIFHWADDR:
3046                         if (!dev->addr_len)
3047                                 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3048                         else
3049                                 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3050                                        min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3051                         ifr->ifr_hwaddr.sa_family = dev->type;
3052                         return 0;
3053
3054                 case SIOCSIFHWADDR:
3055                         return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3056
3057                 case SIOCSIFHWBROADCAST:
3058                         if (ifr->ifr_hwaddr.sa_family != dev->type)
3059                                 return -EINVAL;
3060                         memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3061                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3062                         raw_notifier_call_chain(&netdev_chain,
3063                                             NETDEV_CHANGEADDR, dev);
3064                         return 0;
3065
3066                 case SIOCGIFMAP:
3067                         ifr->ifr_map.mem_start = dev->mem_start;
3068                         ifr->ifr_map.mem_end   = dev->mem_end;
3069                         ifr->ifr_map.base_addr = dev->base_addr;
3070                         ifr->ifr_map.irq       = dev->irq;
3071                         ifr->ifr_map.dma       = dev->dma;
3072                         ifr->ifr_map.port      = dev->if_port;
3073                         return 0;
3074
3075                 case SIOCSIFMAP:
3076                         if (dev->set_config) {
3077                                 if (!netif_device_present(dev))
3078                                         return -ENODEV;
3079                                 return dev->set_config(dev, &ifr->ifr_map);
3080                         }
3081                         return -EOPNOTSUPP;
3082
3083                 case SIOCADDMULTI:
3084                         if (!dev->set_multicast_list ||
3085                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3086                                 return -EINVAL;
3087                         if (!netif_device_present(dev))
3088                                 return -ENODEV;
3089                         return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3090                                           dev->addr_len, 1);
3091
3092                 case SIOCDELMULTI:
3093                         if (!dev->set_multicast_list ||
3094                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3095                                 return -EINVAL;
3096                         if (!netif_device_present(dev))
3097                                 return -ENODEV;
3098                         return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3099                                              dev->addr_len, 1);
3100
3101                 case SIOCGIFINDEX:
3102                         ifr->ifr_ifindex = dev->ifindex;
3103                         return 0;
3104
3105                 case SIOCGIFTXQLEN:
3106                         ifr->ifr_qlen = dev->tx_queue_len;
3107                         return 0;
3108
3109                 case SIOCSIFTXQLEN:
3110                         if (ifr->ifr_qlen < 0)
3111                                 return -EINVAL;
3112                         dev->tx_queue_len = ifr->ifr_qlen;
3113                         return 0;
3114
3115                 case SIOCSIFNAME:
3116                         ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3117                         return dev_change_name(dev, ifr->ifr_newname);
3118
3119                 /*
3120                  *      Unknown or private ioctl
3121                  */
3122
3123                 default:
3124                         if ((cmd >= SIOCDEVPRIVATE &&
3125                             cmd <= SIOCDEVPRIVATE + 15) ||
3126                             cmd == SIOCBONDENSLAVE ||
3127                             cmd == SIOCBONDRELEASE ||
3128                             cmd == SIOCBONDSETHWADDR ||
3129                             cmd == SIOCBONDSLAVEINFOQUERY ||
3130                             cmd == SIOCBONDINFOQUERY ||
3131                             cmd == SIOCBONDCHANGEACTIVE ||
3132                             cmd == SIOCGMIIPHY ||
3133                             cmd == SIOCGMIIREG ||
3134                             cmd == SIOCSMIIREG ||
3135                             cmd == SIOCBRADDIF ||
3136                             cmd == SIOCBRDELIF ||
3137                             cmd == SIOCWANDEV) {
3138                                 err = -EOPNOTSUPP;
3139                                 if (dev->do_ioctl) {
3140                                         if (netif_device_present(dev))
3141                                                 err = dev->do_ioctl(dev, ifr,
3142                                                                     cmd);
3143                                         else
3144                                                 err = -ENODEV;
3145                                 }
3146                         } else
3147                                 err = -EINVAL;
3148
3149         }
3150         return err;
3151 }
3152
3153 /*
3154  *      This function handles all "interface"-type I/O control requests. The actual
3155  *      'doing' part of this is dev_ifsioc above.
3156  */
3157
3158 /**
3159  *      dev_ioctl       -       network device ioctl
3160  *      @cmd: command to issue
3161  *      @arg: pointer to a struct ifreq in user space
3162  *
3163  *      Issue ioctl functions to devices. This is normally called by the
3164  *      user space syscall interfaces but can sometimes be useful for
3165  *      other purposes. The return value is the return from the syscall if
3166  *      positive or a negative errno code on error.
3167  */
3168
3169 int dev_ioctl(unsigned int cmd, void __user *arg)
3170 {
3171         struct ifreq ifr;
3172         int ret;
3173         char *colon;
3174
3175         /* One special case: SIOCGIFCONF takes ifconf argument
3176            and requires shared lock, because it sleeps writing
3177            to user space.
3178          */
3179
3180         if (cmd == SIOCGIFCONF) {
3181                 rtnl_lock();
3182                 ret = dev_ifconf((char __user *) arg);
3183                 rtnl_unlock();
3184                 return ret;
3185         }
3186         if (cmd == SIOCGIFNAME)
3187                 return dev_ifname((struct ifreq __user *)arg);
3188
3189         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3190                 return -EFAULT;
3191
3192         ifr.ifr_name[IFNAMSIZ-1] = 0;
3193
3194         colon = strchr(ifr.ifr_name, ':');
3195         if (colon)
3196                 *colon = 0;
3197
3198         /*
3199          *      See which interface the caller is talking about.
3200          */
3201
3202         switch (cmd) {
3203                 /*
3204                  *      These ioctl calls:
3205                  *      - can be done by all.
3206                  *      - atomic and do not require locking.
3207                  *      - return a value
3208                  */
3209                 case SIOCGIFFLAGS:
3210                 case SIOCGIFMETRIC:
3211                 case SIOCGIFMTU:
3212                 case SIOCGIFHWADDR:
3213                 case SIOCGIFSLAVE:
3214                 case SIOCGIFMAP:
3215                 case SIOCGIFINDEX:
3216                 case SIOCGIFTXQLEN:
3217                         dev_load(ifr.ifr_name);
3218                         read_lock(&dev_base_lock);
3219                         ret = dev_ifsioc(&ifr, cmd);
3220                         read_unlock(&dev_base_lock);
3221                         if (!ret) {
3222                                 if (colon)
3223                                         *colon = ':';
3224                                 if (copy_to_user(arg, &ifr,
3225                                                  sizeof(struct ifreq)))
3226                                         ret = -EFAULT;
3227                         }
3228                         return ret;
3229
3230                 case SIOCETHTOOL:
3231                         dev_load(ifr.ifr_name);
3232                         rtnl_lock();
3233                         ret = dev_ethtool(&ifr);
3234                         rtnl_unlock();
3235                         if (!ret) {
3236                                 if (colon)
3237                                         *colon = ':';
3238                                 if (copy_to_user(arg, &ifr,
3239                                                  sizeof(struct ifreq)))
3240                                         ret = -EFAULT;
3241                         }
3242                         return ret;
3243
3244                 /*
3245                  *      These ioctl calls:
3246                  *      - require superuser power.
3247                  *      - require strict serialization.
3248                  *      - return a value
3249                  */
3250                 case SIOCGMIIPHY:
3251                 case SIOCGMIIREG:
3252                 case SIOCSIFNAME:
3253                         if (!capable(CAP_NET_ADMIN))
3254                                 return -EPERM;
3255                         dev_load(ifr.ifr_name);
3256                         rtnl_lock();
3257                         ret = dev_ifsioc(&ifr, cmd);
3258                         rtnl_unlock();
3259                         if (!ret) {
3260                                 if (colon)
3261                                         *colon = ':';
3262                                 if (copy_to_user(arg, &ifr,
3263                                                  sizeof(struct ifreq)))
3264                                         ret = -EFAULT;
3265                         }
3266                         return ret;
3267
3268                 /*
3269                  *      These ioctl calls:
3270                  *      - require superuser power.
3271                  *      - require strict serialization.
3272                  *      - do not return a value
3273                  */
3274                 case SIOCSIFFLAGS:
3275                 case SIOCSIFMETRIC:
3276                 case SIOCSIFMTU:
3277                 case SIOCSIFMAP:
3278                 case SIOCSIFHWADDR:
3279                 case SIOCSIFSLAVE:
3280                 case SIOCADDMULTI:
3281                 case SIOCDELMULTI:
3282                 case SIOCSIFHWBROADCAST:
3283                 case SIOCSIFTXQLEN:
3284                 case SIOCSMIIREG:
3285                 case SIOCBONDENSLAVE:
3286                 case SIOCBONDRELEASE:
3287                 case SIOCBONDSETHWADDR:
3288                 case SIOCBONDCHANGEACTIVE:
3289                 case SIOCBRADDIF:
3290                 case SIOCBRDELIF:
3291                         if (!capable(CAP_NET_ADMIN))
3292                                 return -EPERM;
3293                         /* fall through */
3294                 case SIOCBONDSLAVEINFOQUERY:
3295                 case SIOCBONDINFOQUERY:
3296                         dev_load(ifr.ifr_name);
3297                         rtnl_lock();
3298                         ret = dev_ifsioc(&ifr, cmd);
3299                         rtnl_unlock();
3300                         return ret;
3301
3302                 case SIOCGIFMEM:
3303                         /* Get the per device memory space. We can add this but
3304                          * currently do not support it */
3305                 case SIOCSIFMEM:
3306                         /* Set the per device memory buffer space.
3307                          * Not applicable in our case */
3308                 case SIOCSIFLINK:
3309                         return -EINVAL;
3310
3311                 /*
3312                  *      Unknown or private ioctl.
3313                  */
3314                 default:
3315                         if (cmd == SIOCWANDEV ||
3316                             (cmd >= SIOCDEVPRIVATE &&
3317                              cmd <= SIOCDEVPRIVATE + 15)) {
3318                                 dev_load(ifr.ifr_name);
3319                                 rtnl_lock();
3320                                 ret = dev_ifsioc(&ifr, cmd);
3321                                 rtnl_unlock();
3322                                 if (!ret && copy_to_user(arg, &ifr,
3323                                                          sizeof(struct ifreq)))
3324                                         ret = -EFAULT;
3325                                 return ret;
3326                         }
3327                         /* Take care of Wireless Extensions */
3328                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3329                                 return wext_handle_ioctl(&ifr, cmd, arg);
3330                         return -EINVAL;
3331         }
3332 }
3333
3334
3335 /**
3336  *      dev_new_index   -       allocate an ifindex
3337  *
3338  *      Returns a suitable unique value for a new device interface
3339  *      number.  The caller must hold the rtnl semaphore or the
3340  *      dev_base_lock to be sure it remains unique.
3341  */
3342 static int dev_new_index(void)
3343 {
3344         static int ifindex;
3345         for (;;) {
3346                 if (++ifindex <= 0)
3347                         ifindex = 1;
3348                 if (!__dev_get_by_index(ifindex))
3349                         return ifindex;
3350         }
3351 }
3352
3353 static int dev_boot_phase = 1;
3354
3355 /* Delayed registration/unregisteration */
3356 static DEFINE_SPINLOCK(net_todo_list_lock);
3357 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
3358
3359 static void net_set_todo(struct net_device *dev)
3360 {
3361         spin_lock(&net_todo_list_lock);
3362         list_add_tail(&dev->todo_list, &net_todo_list);
3363         spin_unlock(&net_todo_list_lock);
3364 }
3365
3366 /**
3367  *      register_netdevice      - register a network device
3368  *      @dev: device to register
3369  *
3370  *      Take a completed network device structure and add it to the kernel
3371  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3372  *      chain. 0 is returned on success. A negative errno code is returned
3373  *      on a failure to set up the device, or if the name is a duplicate.
3374  *
3375  *      Callers must hold the rtnl semaphore. You may want
3376  *      register_netdev() instead of this.
3377  *
3378  *      BUGS:
3379  *      The locking appears insufficient to guarantee two parallel registers
3380  *      will not get the same name.
3381  */
3382
3383 int register_netdevice(struct net_device *dev)
3384 {
3385         struct hlist_head *head;
3386         struct hlist_node *p;
3387         int ret;
3388
3389         BUG_ON(dev_boot_phase);
3390         ASSERT_RTNL();
3391
3392         might_sleep();
3393
3394         /* When net_device's are persistent, this will be fatal. */
3395         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
3396
3397         spin_lock_init(&dev->queue_lock);
3398         spin_lock_init(&dev->_xmit_lock);
3399         netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
3400         dev->xmit_lock_owner = -1;
3401         spin_lock_init(&dev->ingress_lock);
3402
3403         dev->iflink = -1;
3404
3405         /* Init, if this function is available */
3406         if (dev->init) {
3407                 ret = dev->init(dev);
3408                 if (ret) {
3409                         if (ret > 0)
3410                                 ret = -EIO;
3411                         goto out;
3412                 }
3413         }
3414
3415         if (!dev_valid_name(dev->name)) {
3416                 ret = -EINVAL;
3417                 goto err_uninit;
3418         }
3419
3420         dev->ifindex = dev_new_index();
3421         if (dev->iflink == -1)
3422                 dev->iflink = dev->ifindex;
3423
3424         /* Check for existence of name */
3425         head = dev_name_hash(dev->name);
3426         hlist_for_each(p, head) {
3427                 struct net_device *d
3428                         = hlist_entry(p, struct net_device, name_hlist);
3429                 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3430                         ret = -EEXIST;
3431                         goto err_uninit;
3432                 }
3433         }
3434
3435         /* Fix illegal checksum combinations */
3436         if ((dev->features & NETIF_F_HW_CSUM) &&
3437             (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3438                 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
3439                        dev->name);
3440                 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
3441         }
3442
3443         if ((dev->features & NETIF_F_NO_CSUM) &&
3444             (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3445                 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
3446                        dev->name);
3447                 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
3448         }
3449
3450
3451         /* Fix illegal SG+CSUM combinations. */
3452         if ((dev->features & NETIF_F_SG) &&
3453             !(dev->features & NETIF_F_ALL_CSUM)) {
3454                 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
3455                        dev->name);
3456                 dev->features &= ~NETIF_F_SG;
3457         }
3458
3459         /* TSO requires that SG is present as well. */
3460         if ((dev->features & NETIF_F_TSO) &&
3461             !(dev->features & NETIF_F_SG)) {
3462                 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
3463                        dev->name);
3464                 dev->features &= ~NETIF_F_TSO;
3465         }
3466         if (dev->features & NETIF_F_UFO) {
3467                 if (!(dev->features & NETIF_F_HW_CSUM)) {
3468                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3469                                         "NETIF_F_HW_CSUM feature.\n",
3470                                                         dev->name);
3471                         dev->features &= ~NETIF_F_UFO;
3472                 }
3473                 if (!(dev->features & NETIF_F_SG)) {
3474                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3475                                         "NETIF_F_SG feature.\n",
3476                                         dev->name);
3477                         dev->features &= ~NETIF_F_UFO;
3478                 }
3479         }
3480
3481         /*
3482          *      nil rebuild_header routine,
3483          *      that should be never called and used as just bug trap.
3484          */
3485
3486         if (!dev->rebuild_header)
3487                 dev->rebuild_header = default_rebuild_header;
3488
3489         ret = netdev_register_sysfs(dev);
3490         if (ret)
3491                 goto err_uninit;
3492         dev->reg_state = NETREG_REGISTERED;
3493
3494         /*
3495          *      Default initial state at registry is that the
3496          *      device is present.
3497          */
3498
3499         set_bit(__LINK_STATE_PRESENT, &dev->state);
3500
3501         dev_init_scheduler(dev);
3502         write_lock_bh(&dev_base_lock);
3503         list_add_tail(&dev->dev_list, &dev_base_head);
3504         hlist_add_head(&dev->name_hlist, head);
3505         hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
3506         dev_hold(dev);
3507         write_unlock_bh(&dev_base_lock);
3508
3509         /* Notify protocols, that a new device appeared. */
3510         ret = raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
3511         ret = notifier_to_errno(ret);
3512         if (ret)
3513                 unregister_netdevice(dev);
3514
3515 out:
3516         return ret;
3517
3518 err_uninit:
3519         if (dev->uninit)
3520                 dev->uninit(dev);
3521         goto out;
3522 }
3523
3524 /**
3525  *      register_netdev - register a network device
3526  *      @dev: device to register
3527  *
3528  *      Take a completed network device structure and add it to the kernel
3529  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3530  *      chain. 0 is returned on success. A negative errno code is returned
3531  *      on a failure to set up the device, or if the name is a duplicate.
3532  *
3533  *      This is a wrapper around register_netdevice that takes the rtnl semaphore
3534  *      and expands the device name if you passed a format string to
3535  *      alloc_netdev.
3536  */
3537 int register_netdev(struct net_device *dev)
3538 {
3539         int err;
3540
3541         rtnl_lock();
3542
3543         /*
3544          * If the name is a format string the caller wants us to do a
3545          * name allocation.
3546          */
3547         if (strchr(dev->name, '%')) {
3548                 err = dev_alloc_name(dev, dev->name);
3549                 if (err < 0)
3550                         goto out;
3551         }
3552
3553         err = register_netdevice(dev);
3554 out:
3555         rtnl_unlock();
3556         return err;
3557 }
3558 EXPORT_SYMBOL(register_netdev);
3559
3560 /*
3561  * netdev_wait_allrefs - wait until all references are gone.
3562  *
3563  * This is called when unregistering network devices.
3564  *
3565  * Any protocol or device that holds a reference should register
3566  * for netdevice notification, and cleanup and put back the
3567  * reference if they receive an UNREGISTER event.
3568  * We can get stuck here if buggy protocols don't correctly
3569  * call dev_put.
3570  */
3571 static void netdev_wait_allrefs(struct net_device *dev)
3572 {
3573         unsigned long rebroadcast_time, warning_time;
3574
3575         rebroadcast_time = warning_time = jiffies;
3576         while (atomic_read(&dev->refcnt) != 0) {
3577                 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
3578                         rtnl_lock();
3579
3580                         /* Rebroadcast unregister notification */
3581                         raw_notifier_call_chain(&netdev_chain,
3582                                             NETDEV_UNREGISTER, dev);
3583
3584                         if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3585                                      &dev->state)) {
3586                                 /* We must not have linkwatch events
3587                                  * pending on unregister. If this
3588                                  * happens, we simply run the queue
3589                                  * unscheduled, resulting in a noop
3590                                  * for this device.
3591                                  */
3592                                 linkwatch_run_queue();
3593                         }
3594
3595                         __rtnl_unlock();
3596
3597                         rebroadcast_time = jiffies;
3598                 }
3599
3600                 msleep(250);
3601
3602                 if (time_after(jiffies, warning_time + 10 * HZ)) {
3603                         printk(KERN_EMERG "unregister_netdevice: "
3604                                "waiting for %s to become free. Usage "
3605                                "count = %d\n",
3606                                dev->name, atomic_read(&dev->refcnt));
3607                         warning_time = jiffies;
3608                 }
3609         }
3610 }
3611
3612 /* The sequence is:
3613  *
3614  *      rtnl_lock();
3615  *      ...
3616  *      register_netdevice(x1);
3617  *      register_netdevice(x2);
3618  *      ...
3619  *      unregister_netdevice(y1);
3620  *      unregister_netdevice(y2);
3621  *      ...
3622  *      rtnl_unlock();
3623  *      free_netdev(y1);
3624  *      free_netdev(y2);
3625  *
3626  * We are invoked by rtnl_unlock() after it drops the semaphore.
3627  * This allows us to deal with problems:
3628  * 1) We can delete sysfs objects which invoke hotplug
3629  *    without deadlocking with linkwatch via keventd.
3630  * 2) Since we run with the RTNL semaphore not held, we can sleep
3631  *    safely in order to wait for the netdev refcnt to drop to zero.
3632  */
3633 static DEFINE_MUTEX(net_todo_run_mutex);
3634 void netdev_run_todo(void)
3635 {
3636         struct list_head list;
3637
3638         /* Need to guard against multiple cpu's getting out of order. */
3639         mutex_lock(&net_todo_run_mutex);
3640
3641         /* Not safe to do outside the semaphore.  We must not return
3642          * until all unregister events invoked by the local processor
3643          * have been completed (either by this todo run, or one on
3644          * another cpu).
3645          */
3646         if (list_empty(&net_todo_list))
3647                 goto out;
3648
3649         /* Snapshot list, allow later requests */
3650         spin_lock(&net_todo_list_lock);
3651         list_replace_init(&net_todo_list, &list);
3652         spin_unlock(&net_todo_list_lock);
3653
3654         while (!list_empty(&list)) {
3655                 struct net_device *dev
3656                         = list_entry(list.next, struct net_device, todo_list);
3657                 list_del(&dev->todo_list);
3658
3659                 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3660                         printk(KERN_ERR "network todo '%s' but state %d\n",
3661                                dev->name, dev->reg_state);
3662                         dump_stack();
3663                         continue;
3664                 }
3665
3666                 dev->reg_state = NETREG_UNREGISTERED;
3667
3668                 netdev_wait_allrefs(dev);
3669
3670                 /* paranoia */
3671                 BUG_ON(atomic_read(&dev->refcnt));
3672                 BUG_TRAP(!dev->ip_ptr);
3673                 BUG_TRAP(!dev->ip6_ptr);
3674                 BUG_TRAP(!dev->dn_ptr);
3675
3676                 if (dev->destructor)
3677                         dev->destructor(dev);
3678
3679                 /* Free network device */
3680                 kobject_put(&dev->dev.kobj);
3681         }
3682
3683 out:
3684         mutex_unlock(&net_todo_run_mutex);
3685 }
3686
3687 static struct net_device_stats *internal_stats(struct net_device *dev)
3688 {
3689         return &dev->stats;
3690 }
3691
3692 /**
3693  *      alloc_netdev_mq - allocate network device
3694  *      @sizeof_priv:   size of private data to allocate space for
3695  *      @name:          device name format string
3696  *      @setup:         callback to initialize device
3697  *      @queue_count:   the number of subqueues to allocate
3698  *
3699  *      Allocates a struct net_device with private data area for driver use
3700  *      and performs basic initialization.  Also allocates subquue structs
3701  *      for each queue on the device at the end of the netdevice.
3702  */
3703 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
3704                 void (*setup)(struct net_device *), unsigned int queue_count)
3705 {
3706         void *p;
3707         struct net_device *dev;
3708         int alloc_size;
3709
3710         BUG_ON(strlen(name) >= sizeof(dev->name));
3711
3712         /* ensure 32-byte alignment of both the device and private area */
3713         alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
3714                      (sizeof(struct net_device_subqueue) * (queue_count - 1))) &
3715                      ~NETDEV_ALIGN_CONST;
3716         alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3717
3718         p = kzalloc(alloc_size, GFP_KERNEL);
3719         if (!p) {
3720                 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
3721                 return NULL;
3722         }
3723
3724         dev = (struct net_device *)
3725                 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3726         dev->padded = (char *)dev - (char *)p;
3727
3728         if (sizeof_priv) {
3729                 dev->priv = ((char *)dev +
3730                              ((sizeof(struct net_device) +
3731                                (sizeof(struct net_device_subqueue) *
3732                                 (queue_count - 1)) + NETDEV_ALIGN_CONST)
3733                               & ~NETDEV_ALIGN_CONST));
3734         }
3735
3736         dev->egress_subqueue_count = queue_count;
3737
3738         dev->get_stats = internal_stats;
3739         netpoll_netdev_init(dev);
3740         setup(dev);
3741         strcpy(dev->name, name);
3742         return dev;
3743 }
3744 EXPORT_SYMBOL(alloc_netdev_mq);
3745
3746 /**
3747  *      free_netdev - free network device
3748  *      @dev: device
3749  *
3750  *      This function does the last stage of destroying an allocated device
3751  *      interface. The reference to the device object is released.
3752  *      If this is the last reference then it will be freed.
3753  */
3754 void free_netdev(struct net_device *dev)
3755 {
3756 #ifdef CONFIG_SYSFS
3757         /*  Compatibility with error handling in drivers */
3758         if (dev->reg_state == NETREG_UNINITIALIZED) {
3759                 kfree((char *)dev - dev->padded);
3760                 return;
3761         }
3762
3763         BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3764         dev->reg_state = NETREG_RELEASED;
3765
3766         /* will free via device release */
3767         put_device(&dev->dev);
3768 #else
3769         kfree((char *)dev - dev->padded);
3770 #endif
3771 }
3772
3773 /* Synchronize with packet receive processing. */
3774 void synchronize_net(void)
3775 {
3776         might_sleep();
3777         synchronize_rcu();
3778 }
3779
3780 /**
3781  *      unregister_netdevice - remove device from the kernel
3782  *      @dev: device
3783  *
3784  *      This function shuts down a device interface and removes it
3785  *      from the kernel tables. On success 0 is returned, on a failure
3786  *      a negative errno code is returned.
3787  *
3788  *      Callers must hold the rtnl semaphore.  You may want
3789  *      unregister_netdev() instead of this.
3790  */
3791
3792 void unregister_netdevice(struct net_device *dev)
3793 {
3794         BUG_ON(dev_boot_phase);
3795         ASSERT_RTNL();
3796
3797         /* Some devices call without registering for initialization unwind. */
3798         if (dev->reg_state == NETREG_UNINITIALIZED) {
3799                 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3800                                   "was registered\n", dev->name, dev);
3801
3802                 WARN_ON(1);
3803                 return;
3804         }
3805
3806         BUG_ON(dev->reg_state != NETREG_REGISTERED);
3807
3808         /* If device is running, close it first. */
3809         if (dev->flags & IFF_UP)
3810                 dev_close(dev);
3811
3812         /* And unlink it from device chain. */
3813         write_lock_bh(&dev_base_lock);
3814         list_del(&dev->dev_list);
3815         hlist_del(&dev->name_hlist);
3816         hlist_del(&dev->index_hlist);
3817         write_unlock_bh(&dev_base_lock);
3818
3819         dev->reg_state = NETREG_UNREGISTERING;
3820
3821         synchronize_net();
3822
3823         /* Shutdown queueing discipline. */
3824         dev_shutdown(dev);
3825
3826
3827         /* Notify protocols, that we are about to destroy
3828            this device. They should clean all the things.
3829         */
3830         raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3831
3832         /*
3833          *      Flush the unicast and multicast chains
3834          */
3835         dev_addr_discard(dev);
3836
3837         if (dev->uninit)
3838                 dev->uninit(dev);
3839
3840         /* Notifier chain MUST detach us from master device. */
3841         BUG_TRAP(!dev->master);
3842
3843         /* Remove entries from sysfs */
3844         netdev_unregister_sysfs(dev);
3845
3846         /* Finish processing unregister after unlock */
3847         net_set_todo(dev);
3848
3849         synchronize_net();
3850
3851         dev_put(dev);
3852 }
3853
3854 /**
3855  *      unregister_netdev - remove device from the kernel
3856  *      @dev: device
3857  *
3858  *      This function shuts down a device interface and removes it
3859  *      from the kernel tables. On success 0 is returned, on a failure
3860  *      a negative errno code is returned.
3861  *
3862  *      This is just a wrapper for unregister_netdevice that takes
3863  *      the rtnl semaphore.  In general you want to use this and not
3864  *      unregister_netdevice.
3865  */
3866 void unregister_netdev(struct net_device *dev)
3867 {
3868         rtnl_lock();
3869         unregister_netdevice(dev);
3870         rtnl_unlock();
3871 }
3872
3873 EXPORT_SYMBOL(unregister_netdev);
3874
3875 static int dev_cpu_callback(struct notifier_block *nfb,
3876                             unsigned long action,
3877                             void *ocpu)
3878 {
3879         struct sk_buff **list_skb;
3880         struct net_device **list_net;
3881         struct sk_buff *skb;
3882         unsigned int cpu, oldcpu = (unsigned long)ocpu;
3883         struct softnet_data *sd, *oldsd;
3884
3885         if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
3886                 return NOTIFY_OK;
3887
3888         local_irq_disable();
3889         cpu = smp_processor_id();
3890         sd = &per_cpu(softnet_data, cpu);
3891         oldsd = &per_cpu(softnet_data, oldcpu);
3892
3893         /* Find end of our completion_queue. */
3894         list_skb = &sd->completion_queue;
3895         while (*list_skb)
3896                 list_skb = &(*list_skb)->next;
3897         /* Append completion queue from offline CPU. */
3898         *list_skb = oldsd->completion_queue;
3899         oldsd->completion_queue = NULL;
3900
3901         /* Find end of our output_queue. */
3902         list_net = &sd->output_queue;
3903         while (*list_net)
3904                 list_net = &(*list_net)->next_sched;
3905         /* Append output queue from offline CPU. */
3906         *list_net = oldsd->output_queue;
3907         oldsd->output_queue = NULL;
3908
3909         raise_softirq_irqoff(NET_TX_SOFTIRQ);
3910         local_irq_enable();
3911
3912         /* Process offline CPU's input_pkt_queue */
3913         while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3914                 netif_rx(skb);
3915
3916         return NOTIFY_OK;
3917 }
3918
3919 #ifdef CONFIG_NET_DMA
3920 /**
3921  * net_dma_rebalance - try to maintain one DMA channel per CPU
3922  * @net_dma: DMA client and associated data (lock, channels, channel_mask)
3923  *
3924  * This is called when the number of channels allocated to the net_dma client
3925  * changes.  The net_dma client tries to have one DMA channel per CPU.
3926  */
3927
3928 static void net_dma_rebalance(struct net_dma *net_dma)
3929 {
3930         unsigned int cpu, i, n, chan_idx;
3931         struct dma_chan *chan;
3932
3933         if (cpus_empty(net_dma->channel_mask)) {
3934                 for_each_online_cpu(cpu)
3935                         rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
3936                 return;
3937         }
3938
3939         i = 0;
3940         cpu = first_cpu(cpu_online_map);
3941
3942         for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
3943                 chan = net_dma->channels[chan_idx];
3944
3945                 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
3946                    + (i < (num_online_cpus() %
3947                         cpus_weight(net_dma->channel_mask)) ? 1 : 0));
3948
3949                 while(n) {
3950                         per_cpu(softnet_data, cpu).net_dma = chan;
3951                         cpu = next_cpu(cpu, cpu_online_map);
3952                         n--;
3953                 }
3954                 i++;
3955         }
3956 }
3957
3958 /**
3959  * netdev_dma_event - event callback for the net_dma_client
3960  * @client: should always be net_dma_client
3961  * @chan: DMA channel for the event
3962  * @state: DMA state to be handled
3963  */
3964 static enum dma_state_client
3965 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
3966         enum dma_state state)
3967 {
3968         int i, found = 0, pos = -1;
3969         struct net_dma *net_dma =
3970                 container_of(client, struct net_dma, client);
3971         enum dma_state_client ack = DMA_DUP; /* default: take no action */
3972
3973         spin_lock(&net_dma->lock);
3974         switch (state) {
3975         case DMA_RESOURCE_AVAILABLE:
3976                 for (i = 0; i < NR_CPUS; i++)
3977                         if (net_dma->channels[i] == chan) {
3978                                 found = 1;
3979                                 break;
3980                         } else if (net_dma->channels[i] == NULL && pos < 0)
3981                                 pos = i;
3982
3983                 if (!found && pos >= 0) {
3984                         ack = DMA_ACK;
3985                         net_dma->channels[pos] = chan;
3986                         cpu_set(pos, net_dma->channel_mask);
3987                         net_dma_rebalance(net_dma);
3988                 }
3989                 break;
3990         case DMA_RESOURCE_REMOVED:
3991                 for (i = 0; i < NR_CPUS; i++)
3992                         if (net_dma->channels[i] == chan) {
3993                                 found = 1;
3994                                 pos = i;
3995                                 break;
3996                         }
3997
3998                 if (found) {
3999                         ack = DMA_ACK;
4000                         cpu_clear(pos, net_dma->channel_mask);
4001                         net_dma->channels[i] = NULL;
4002                         net_dma_rebalance(net_dma);
4003                 }
4004                 break;
4005         default:
4006                 break;
4007         }
4008         spin_unlock(&net_dma->lock);
4009
4010         return ack;
4011 }
4012
4013 /**
4014  * netdev_dma_regiser - register the networking subsystem as a DMA client
4015  */
4016 static int __init netdev_dma_register(void)
4017 {
4018         spin_lock_init(&net_dma.lock);
4019         dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4020         dma_async_client_register(&net_dma.client);
4021         dma_async_client_chan_request(&net_dma.client);
4022         return 0;
4023 }
4024
4025 #else
4026 static int __init netdev_dma_register(void) { return -ENODEV; }
4027 #endif /* CONFIG_NET_DMA */
4028
4029 /**
4030  *      netdev_compute_feature - compute conjunction of two feature sets
4031  *      @all: first feature set
4032  *      @one: second feature set
4033  *
4034  *      Computes a new feature set after adding a device with feature set
4035  *      @one to the master device with current feature set @all.  Returns
4036  *      the new feature set.
4037  */
4038 int netdev_compute_features(unsigned long all, unsigned long one)
4039 {
4040         /* if device needs checksumming, downgrade to hw checksumming */
4041         if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4042                 all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
4043
4044         /* if device can't do all checksum, downgrade to ipv4/ipv6 */
4045         if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
4046                 all ^= NETIF_F_HW_CSUM
4047                         | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4048
4049         if (one & NETIF_F_GSO)
4050                 one |= NETIF_F_GSO_SOFTWARE;
4051         one |= NETIF_F_GSO;
4052
4053         /* If even one device supports robust GSO, enable it for all. */
4054         if (one & NETIF_F_GSO_ROBUST)
4055                 all |= NETIF_F_GSO_ROBUST;
4056
4057         all &= one | NETIF_F_LLTX;
4058
4059         if (!(all & NETIF_F_ALL_CSUM))
4060                 all &= ~NETIF_F_SG;
4061         if (!(all & NETIF_F_SG))
4062                 all &= ~NETIF_F_GSO_MASK;
4063
4064         return all;
4065 }
4066 EXPORT_SYMBOL(netdev_compute_features);
4067
4068 /*
4069  *      Initialize the DEV module. At boot time this walks the device list and
4070  *      unhooks any devices that fail to initialise (normally hardware not
4071  *      present) and leaves us with a valid list of present and active devices.
4072  *
4073  */
4074
4075 /*
4076  *       This is called single threaded during boot, so no need
4077  *       to take the rtnl semaphore.
4078  */
4079 static int __init net_dev_init(void)
4080 {
4081         int i, rc = -ENOMEM;
4082
4083         BUG_ON(!dev_boot_phase);
4084
4085         if (dev_proc_init())
4086                 goto out;
4087
4088         if (netdev_sysfs_init())
4089                 goto out;
4090
4091         INIT_LIST_HEAD(&ptype_all);
4092         for (i = 0; i < 16; i++)
4093                 INIT_LIST_HEAD(&ptype_base[i]);
4094
4095         for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
4096                 INIT_HLIST_HEAD(&dev_name_head[i]);
4097
4098         for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
4099                 INIT_HLIST_HEAD(&dev_index_head[i]);
4100
4101         /*
4102          *      Initialise the packet receive queues.
4103          */
4104
4105         for_each_possible_cpu(i) {
4106                 struct softnet_data *queue;
4107
4108                 queue = &per_cpu(softnet_data, i);
4109                 skb_queue_head_init(&queue->input_pkt_queue);
4110                 queue->completion_queue = NULL;
4111                 INIT_LIST_HEAD(&queue->poll_list);
4112
4113                 queue->backlog.poll = process_backlog;
4114                 queue->backlog.weight = weight_p;
4115         }
4116
4117         netdev_dma_register();
4118
4119         dev_boot_phase = 0;
4120
4121         open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
4122         open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
4123
4124         hotcpu_notifier(dev_cpu_callback, 0);
4125         dst_init();
4126         dev_mcast_init();
4127         rc = 0;
4128 out:
4129         return rc;
4130 }
4131
4132 subsys_initcall(net_dev_init);
4133
4134 EXPORT_SYMBOL(__dev_get_by_index);
4135 EXPORT_SYMBOL(__dev_get_by_name);
4136 EXPORT_SYMBOL(__dev_remove_pack);
4137 EXPORT_SYMBOL(dev_valid_name);
4138 EXPORT_SYMBOL(dev_add_pack);
4139 EXPORT_SYMBOL(dev_alloc_name);
4140 EXPORT_SYMBOL(dev_close);
4141 EXPORT_SYMBOL(dev_get_by_flags);
4142 EXPORT_SYMBOL(dev_get_by_index);
4143 EXPORT_SYMBOL(dev_get_by_name);
4144 EXPORT_SYMBOL(dev_open);
4145 EXPORT_SYMBOL(dev_queue_xmit);
4146 EXPORT_SYMBOL(dev_remove_pack);
4147 EXPORT_SYMBOL(dev_set_allmulti);
4148 EXPORT_SYMBOL(dev_set_promiscuity);
4149 EXPORT_SYMBOL(dev_change_flags);
4150 EXPORT_SYMBOL(dev_set_mtu);
4151 EXPORT_SYMBOL(dev_set_mac_address);
4152 EXPORT_SYMBOL(free_netdev);
4153 EXPORT_SYMBOL(netdev_boot_setup_check);
4154 EXPORT_SYMBOL(netdev_set_master);
4155 EXPORT_SYMBOL(netdev_state_change);
4156 EXPORT_SYMBOL(netif_receive_skb);
4157 EXPORT_SYMBOL(netif_rx);
4158 EXPORT_SYMBOL(register_gifconf);
4159 EXPORT_SYMBOL(register_netdevice);
4160 EXPORT_SYMBOL(register_netdevice_notifier);
4161 EXPORT_SYMBOL(skb_checksum_help);
4162 EXPORT_SYMBOL(synchronize_net);
4163 EXPORT_SYMBOL(unregister_netdevice);
4164 EXPORT_SYMBOL(unregister_netdevice_notifier);
4165 EXPORT_SYMBOL(net_enable_timestamp);
4166 EXPORT_SYMBOL(net_disable_timestamp);
4167 EXPORT_SYMBOL(dev_get_flags);
4168
4169 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
4170 EXPORT_SYMBOL(br_handle_frame_hook);
4171 EXPORT_SYMBOL(br_fdb_get_hook);
4172 EXPORT_SYMBOL(br_fdb_put_hook);
4173 #endif
4174
4175 #ifdef CONFIG_KMOD
4176 EXPORT_SYMBOL(dev_load);
4177 #endif
4178
4179 EXPORT_PER_CPU_SYMBOL(softnet_data);