Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6

[net-next-2.6.git] / net / core / dev.c
diff --git a/net/core/dev.c b/net/core/dev.c

index 04972a4783e2e9b75c54fd5c8e30bfedcf674ecc..78b5a89b0f40a455e4229fb04b291e87877b491b 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -128,6 +128,8 @@
  #include <linux/jhash.h>
  #include <linux/random.h>
  #include <trace/events/napi.h>
+#include <trace/events/net.h>
+#include <trace/events/skb.h>
  #include <linux/pci.h>
  #include <linux/inetdevice.h>
  
@@ -1484,7 +1486,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
         nf_reset(skb);
  
         if (unlikely(!(dev->flags & IFF_UP) ||
-                    (skb->len > (dev->mtu + dev->hard_header_len)))) {
+                    (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) {
                 atomic_long_inc(&dev->rx_dropped);
                 kfree_skb(skb);
                 return NET_RX_DROP;
@@ -1553,18 +1555,20 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
   * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
   * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
   */
-void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
  {
-       unsigned int real_num = dev->real_num_tx_queues;
+       if (txq < 1 || txq > dev->num_tx_queues)
+               return -EINVAL;
+
+       if (dev->reg_state == NETREG_REGISTERED) {
+               ASSERT_RTNL();
  
-       if (unlikely(txq > dev->num_tx_queues))
-               ;
-       else if (txq > real_num)
-               dev->real_num_tx_queues = txq;
-       else if (txq < real_num) {
-               dev->real_num_tx_queues = txq;
-               qdisc_reset_all_tx_gt(dev, txq);
+               if (txq < dev->real_num_tx_queues)
+                       qdisc_reset_all_tx_gt(dev, txq);
         }
+
+       dev->real_num_tx_queues = txq;
+       return 0;
  }
  EXPORT_SYMBOL(netif_set_real_num_tx_queues);
  
@@ -1583,12 +1587,12 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
  {
         int rc;
  
+       if (rxq < 1 || rxq > dev->num_rx_queues)
+               return -EINVAL;
+
         if (dev->reg_state == NETREG_REGISTERED) {
                 ASSERT_RTNL();
  
-               if (rxq > dev->num_rx_queues)
-                       return -EINVAL;
-
                 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
                                                   rxq);
                 if (rc)
@@ -1692,7 +1696,12 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
  
  static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
  {
-       if (can_checksum_protocol(dev->features, skb->protocol))
+       int features = dev->features;
+
+       if (vlan_tx_tag_present(skb))
+               features &= dev->vlan_features;
+
+       if (can_checksum_protocol(features, skb->protocol))
                 return true;
  
         if (skb->protocol == htons(ETH_P_8021Q)) {
@@ -1791,6 +1800,16 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
         __be16 type = skb->protocol;
         int err;
  
+       if (type == htons(ETH_P_8021Q)) {
+               struct vlan_ethhdr *veh;
+
+               if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
+                       return ERR_PTR(-EINVAL);
+
+               veh = (struct vlan_ethhdr *)skb->data;
+               type = veh->h_vlan_encapsulated_proto;
+       }
+
         skb_reset_mac_header(skb);
         skb->mac_len = skb->network_header - skb->mac_header;
         __skb_pull(skb, skb->mac_len);
@@ -1962,9 +1981,14 @@ static inline void skb_orphan_try(struct sk_buff *skb)
  static inline int skb_needs_linearize(struct sk_buff *skb,
                                       struct net_device *dev)
  {
+       int features = dev->features;
+
+       if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb))
+               features &= dev->vlan_features;
+
         return skb_is_nonlinear(skb) &&
-              ((skb_has_frag_list(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
-               (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
+              ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
+               (skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) ||
                                               illegal_highdma(dev, skb))));
  }
  
@@ -1987,6 +2011,15 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
  
                 skb_orphan_try(skb);
  
+               if (vlan_tx_tag_present(skb) &&
+                   !(dev->features & NETIF_F_HW_VLAN_TX)) {
+                       skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+                       if (unlikely(!skb))
+                               goto out;
+
+                       skb->vlan_tci = 0;
+               }
+
                 if (netif_needs_gso(dev, skb)) {
                         if (unlikely(dev_gso_segment(skb)))
                                 goto out_kfree_skb;
@@ -2011,6 +2044,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                 }
  
                 rc = ops->ndo_start_xmit(skb, dev);
+               trace_net_dev_xmit(skb, rc);
                 if (rc == NETDEV_TX_OK)
                         txq_trans_update(txq);
                 return rc;
@@ -2031,6 +2065,7 @@ gso:
                         skb_dst_drop(nskb);
  
                 rc = ops->ndo_start_xmit(nskb, dev);
+               trace_net_dev_xmit(nskb, rc);
                 if (unlikely(rc != NETDEV_TX_OK)) {
                         if (rc & ~NETDEV_TX_MASK)
                                 goto out_kfree_gso_skb;
@@ -2048,6 +2083,7 @@ out_kfree_gso_skb:
                 skb->destructor = DEV_GSO_CB(skb)->destructor;
  out_kfree_skb:
         kfree_skb(skb);
+out:
         return rc;
  }
  
@@ -2222,6 +2258,7 @@ int dev_queue_xmit(struct sk_buff *skb)
  #ifdef CONFIG_NET_CLS_ACT
         skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
  #endif
+       trace_net_dev_queue(skb);
         if (q->enqueue) {
                 rc = __dev_xmit_skb(skb, q, dev, txq);
                 goto out;
@@ -2578,6 +2615,7 @@ int netif_rx(struct sk_buff *skb)
         if (netdev_tstamp_prequeue)
                 net_timestamp_check(skb);
  
+       trace_netif_rx(skb);
  #ifdef CONFIG_RPS
         {
                 struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -2637,6 +2675,7 @@ static void net_tx_action(struct softirq_action *h)
                         clist = clist->next;
  
                         WARN_ON(atomic_read(&skb->users));
+                       trace_kfree_skb(skb, net_tx_action);
                         __kfree_skb(skb);
                 }
         }
@@ -2757,33 +2796,6 @@ out:
  }
  #endif
  
-/*
- *     netif_nit_deliver - deliver received packets to network taps
- *     @skb: buffer
- *
- *     This function is used to deliver incoming packets to network
- *     taps. It should be used when the normal netif_receive_skb path
- *     is bypassed, for example because of VLAN acceleration.
- */
-void netif_nit_deliver(struct sk_buff *skb)
-{
-       struct packet_type *ptype;
-
-       if (list_empty(&ptype_all))
-               return;
-
-       skb_reset_network_header(skb);
-       skb_reset_transport_header(skb);
-       skb->mac_len = skb->network_header - skb->mac_header;
-
-       rcu_read_lock();
-       list_for_each_entry_rcu(ptype, &ptype_all, list) {
-               if (!ptype->dev || ptype->dev == skb->dev)
-                       deliver_skb(skb, ptype, skb->dev);
-       }
-       rcu_read_unlock();
-}
-
  /**
   *     netdev_rx_handler_register - register receive handler
   *     @dev: device to register a handler for
@@ -2893,8 +2905,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
         if (!netdev_tstamp_prequeue)
                 net_timestamp_check(skb);
  
-       if (vlan_tx_tag_present(skb))
-               vlan_hwaccel_do_receive(skb);
+       trace_netif_receive_skb(skb);
  
         /* if we've gotten here through NAPI, check netpoll */
         if (netpoll_receive_skb(skb))
@@ -2908,8 +2919,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
          * be delivered to pkt handlers that are exact matches.  Also
          * the deliver_no_wcard flag will be set.  If packet handlers
          * are sensitive to duplicate packets these skbs will need to
-        * be dropped at the handler.  The vlan accel path may have
-        * already set the deliver_no_wcard flag.
+        * be dropped at the handler.
          */
         null_or_orig = NULL;
         orig_dev = skb->dev;
@@ -2968,6 +2978,18 @@ ncls:
                         goto out;
         }
  
+       if (vlan_tx_tag_present(skb)) {
+               if (pt_prev) {
+                       ret = deliver_skb(skb, pt_prev, orig_dev);
+                       pt_prev = NULL;
+               }
+               if (vlan_hwaccel_do_receive(&skb)) {
+                       ret = __netif_receive_skb(skb);
+                       goto out;
+               } else if (unlikely(!skb))
+                       goto out;
+       }
+
         /*
          * Make sure frames received on VLAN interfaces stacked on
          * bonding interfaces still make their way to any base bonding
@@ -3232,6 +3254,7 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
                 unsigned long diffs;
  
                 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
+               diffs |= p->vlan_tci ^ skb->vlan_tci;
                 diffs |= compare_ether_header(skb_mac_header(p),
                                               skb_gro_mac_header(skb));
                 NAPI_GRO_CB(p)->same_flow = !diffs;
@@ -3287,14 +3310,14 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
  }
  EXPORT_SYMBOL(napi_gro_receive);
  
-void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
+static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
  {
         __skb_pull(skb, skb_headlen(skb));
         skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
+       skb->vlan_tci = 0;
  
         napi->skb = skb;
  }
-EXPORT_SYMBOL(napi_reuse_skb);
  
  struct sk_buff *napi_get_frags(struct napi_struct *napi)
  {
@@ -4928,20 +4951,6 @@ static void rollback_registered(struct net_device *dev)
         rollback_registered_many(&single);
  }
  
-static void __netdev_init_queue_locks_one(struct net_device *dev,
-                                         struct netdev_queue *dev_queue,
-                                         void *_unused)
-{
-       spin_lock_init(&dev_queue->_xmit_lock);
-       netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
-       dev_queue->xmit_lock_owner = -1;
-}
-
-static void netdev_init_queue_locks(struct net_device *dev)
-{
-       netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
-}
-
  unsigned long netdev_fix_features(unsigned long features, const char *name)
  {
         /* Fix illegal SG+CSUM combinations. */
@@ -5013,29 +5022,62 @@ static int netif_alloc_rx_queues(struct net_device *dev)
  {
  #ifdef CONFIG_RPS
         unsigned int i, count = dev->num_rx_queues;
+       struct netdev_rx_queue *rx;
  
-       if (count) {
-               struct netdev_rx_queue *rx;
+       BUG_ON(count < 1);
  
-               rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
-               if (!rx) {
-                       pr_err("netdev: Unable to allocate %u rx queues.\n",
-                              count);
-                       return -ENOMEM;
-               }
-               dev->_rx = rx;
-
-               /*
-                * Set a pointer to first element in the array which holds the
-                * reference count.
-                */
-               for (i = 0; i < count; i++)
-                       rx[i].first = rx;
+       rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
+       if (!rx) {
+               pr_err("netdev: Unable to allocate %u rx queues.\n", count);
+               return -ENOMEM;
         }
+       dev->_rx = rx;
+
+       /*
+        * Set a pointer to first element in the array which holds the
+        * reference count.
+        */
+       for (i = 0; i < count; i++)
+               rx[i].first = rx;
  #endif
         return 0;
  }
  
+static int netif_alloc_netdev_queues(struct net_device *dev)
+{
+       unsigned int count = dev->num_tx_queues;
+       struct netdev_queue *tx;
+
+       BUG_ON(count < 1);
+
+       tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
+       if (!tx) {
+               pr_err("netdev: Unable to allocate %u tx queues.\n",
+                      count);
+               return -ENOMEM;
+       }
+       dev->_tx = tx;
+       return 0;
+}
+
+static void netdev_init_one_queue(struct net_device *dev,
+                                 struct netdev_queue *queue,
+                                 void *_unused)
+{
+       queue->dev = dev;
+
+       /* Initialize queue lock */
+       spin_lock_init(&queue->_xmit_lock);
+       netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
+       queue->xmit_lock_owner = -1;
+}
+
+static void netdev_init_queues(struct net_device *dev)
+{
+       netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
+       spin_lock_init(&dev->tx_global_lock);
+}
+
  /**
   *     register_netdevice      - register a network device
   *     @dev: device to register
@@ -5069,7 +5111,6 @@ int register_netdevice(struct net_device *dev)
  
         spin_lock_init(&dev->addr_list_lock);
         netdev_set_addr_lockdep_class(dev);
-       netdev_init_queue_locks(dev);
  
         dev->iflink = -1;
  
@@ -5077,6 +5118,12 @@ int register_netdevice(struct net_device *dev)
         if (ret)
                 goto out;
  
+       ret = netif_alloc_netdev_queues(dev);
+       if (ret)
+               goto out;
+
+       netdev_init_queues(dev);
+
         /* Init, if this function is available */
         if (dev->netdev_ops->ndo_init) {
                 ret = dev->netdev_ops->ndo_init(dev);
@@ -5458,19 +5505,6 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
  }
  EXPORT_SYMBOL(dev_get_stats);
  
-static void netdev_init_one_queue(struct net_device *dev,
-                                 struct netdev_queue *queue,
-                                 void *_unused)
-{
-       queue->dev = dev;
-}
-
-static void netdev_init_queues(struct net_device *dev)
-{
-       netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
-       spin_lock_init(&dev->tx_global_lock);
-}
-
  struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
  {
         struct netdev_queue *queue = dev_ingress_queue(dev);
@@ -5482,7 +5516,6 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
         if (!queue)
                 return NULL;
         netdev_init_one_queue(dev, queue, NULL);
-       __netdev_init_queue_locks_one(dev, queue, NULL);
         queue->qdisc = &noop_qdisc;
         queue->qdisc_sleeping = &noop_qdisc;
         rcu_assign_pointer(dev->ingress_queue, queue);
@@ -5504,13 +5537,18 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
  struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
                 void (*setup)(struct net_device *), unsigned int queue_count)
  {
-       struct netdev_queue *tx;
         struct net_device *dev;
         size_t alloc_size;
         struct net_device *p;
  
         BUG_ON(strlen(name) >= sizeof(dev->name));
  
+       if (queue_count < 1) {
+               pr_err("alloc_netdev: Unable to allocate device "
+                      "with zero queues.\n");
+               return NULL;
+       }
+
         alloc_size = sizeof(struct net_device);
         if (sizeof_priv) {
                 /* ensure 32-byte alignment of private area */
@@ -5526,20 +5564,12 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
                 return NULL;
         }
  
-       tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
-       if (!tx) {
-               printk(KERN_ERR "alloc_netdev: Unable to allocate "
-                      "tx qdiscs.\n");
-               goto free_p;
-       }
-
-
         dev = PTR_ALIGN(p, NETDEV_ALIGN);
         dev->padded = (char *)dev - (char *)p;
  
         dev->pcpu_refcnt = alloc_percpu(int);
         if (!dev->pcpu_refcnt)
-               goto free_tx;
+               goto free_p;
  
         if (dev_addr_init(dev))
                 goto free_pcpu;
@@ -5549,7 +5579,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
  
         dev_net_set(dev, &init_net);
  
-       dev->_tx = tx;
         dev->num_tx_queues = queue_count;
         dev->real_num_tx_queues = queue_count;
  
@@ -5560,8 +5589,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
  
         dev->gso_max_size = GSO_MAX_SIZE;
  
-       netdev_init_queues(dev);
-
         INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
         dev->ethtool_ntuple_list.count = 0;
         INIT_LIST_HEAD(&dev->napi_list);
@@ -5572,8 +5599,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
         strcpy(dev->name, name);
         return dev;
  
-free_tx:
-       kfree(tx);
  free_pcpu:
         free_percpu(dev->pcpu_refcnt);
  free_p: