Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6

[net-next-2.6.git] / net / core / dev.c
diff --git a/net/core/dev.c b/net/core/dev.c

index e1c1cdcc2bb0429d65fea408625e9dec0c317a59..7cd5237d98221349b56f2f5ced9305f65c528610 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1902,14 +1902,14 @@ static int dev_gso_segment(struct sk_buff *skb)
  
  /*
   * Try to orphan skb early, right before transmission by the device.
- * We cannot orphan skb if tx timestamp is requested, since
- * drivers need to call skb_tstamp_tx() to send the timestamp.
+ * We cannot orphan skb if tx timestamp is requested or the sk-reference
+ * is needed on driver level for other reasons, e.g. see net/can/raw.c
   */
  static inline void skb_orphan_try(struct sk_buff *skb)
  {
         struct sock *sk = skb->sk;
  
-       if (sk && !skb_tx(skb)->flags) {
+       if (sk && !skb_shinfo(skb)->tx_flags) {
                 /* skb_tx_hash() wont be able to get sk.
                  * We copy sk_hash into skb->rxhash
                  */
@@ -2259,69 +2259,44 @@ static inline void ____napi_schedule(struct softnet_data *sd,
         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
  }
  
-#ifdef CONFIG_RPS
-
-/* One global table that all flow-based protocols share. */
-struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
-EXPORT_SYMBOL(rps_sock_flow_table);
-
  /*
- * get_rps_cpu is called from netif_receive_skb and returns the target
- * CPU from the RPS map of the receiving queue for a given skb.
- * rcu_read_lock must be held on entry.
+ * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
+ * and src/dst port numbers. Returns a non-zero hash number on success
+ * and 0 on failure.
   */
-static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
-                      struct rps_dev_flow **rflowp)
+__u32 __skb_get_rxhash(struct sk_buff *skb)
  {
+       int nhoff, hash = 0, poff;
         struct ipv6hdr *ip6;
         struct iphdr *ip;
-       struct netdev_rx_queue *rxqueue;
-       struct rps_map *map;
-       struct rps_dev_flow_table *flow_table;
-       struct rps_sock_flow_table *sock_flow_table;
-       int cpu = -1;
         u8 ip_proto;
-       u16 tcpu;
         u32 addr1, addr2, ihl;
         union {
                 u32 v32;
                 u16 v16[2];
         } ports;
  
-       if (skb_rx_queue_recorded(skb)) {
-               u16 index = skb_get_rx_queue(skb);
-               if (unlikely(index >= dev->num_rx_queues)) {
-                       WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
-                               "on queue %u, but number of RX queues is %u\n",
-                               dev->name, index, dev->num_rx_queues);
-                       goto done;
-               }
-               rxqueue = dev->_rx + index;
-       } else
-               rxqueue = dev->_rx;
-
-       if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
-               goto done;
-
-       if (skb->rxhash)
-               goto got_hash; /* Skip hash computation on packet header */
+       nhoff = skb_network_offset(skb);
  
         switch (skb->protocol) {
         case __constant_htons(ETH_P_IP):
-               if (!pskb_may_pull(skb, sizeof(*ip)))
+               if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
                         goto done;
  
-               ip = (struct iphdr *) skb->data;
-               ip_proto = ip->protocol;
+               ip = (struct iphdr *) (skb->data + nhoff);
+               if (ip->frag_off & htons(IP_MF | IP_OFFSET))
+                       ip_proto = 0;
+               else
+                       ip_proto = ip->protocol;
                 addr1 = (__force u32) ip->saddr;
                 addr2 = (__force u32) ip->daddr;
                 ihl = ip->ihl;
                 break;
         case __constant_htons(ETH_P_IPV6):
-               if (!pskb_may_pull(skb, sizeof(*ip6)))
+               if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
                         goto done;
  
-               ip6 = (struct ipv6hdr *) skb->data;
+               ip6 = (struct ipv6hdr *) (skb->data + nhoff);
                 ip_proto = ip6->nexthdr;
                 addr1 = (__force u32) ip6->saddr.s6_addr32[3];
                 addr2 = (__force u32) ip6->daddr.s6_addr32[3];
@@ -2330,33 +2305,71 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
         default:
                 goto done;
         }
-       switch (ip_proto) {
-       case IPPROTO_TCP:
-       case IPPROTO_UDP:
-       case IPPROTO_DCCP:
-       case IPPROTO_ESP:
-       case IPPROTO_AH:
-       case IPPROTO_SCTP:
-       case IPPROTO_UDPLITE:
-               if (pskb_may_pull(skb, (ihl * 4) + 4)) {
-                       ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
+
+       ports.v32 = 0;
+       poff = proto_ports_offset(ip_proto);
+       if (poff >= 0) {
+               nhoff += ihl * 4 + poff;
+               if (pskb_may_pull(skb, nhoff + 4)) {
+                       ports.v32 = * (__force u32 *) (skb->data + nhoff);
                         if (ports.v16[1] < ports.v16[0])
                                 swap(ports.v16[0], ports.v16[1]);
-                       break;
                 }
-       default:
-               ports.v32 = 0;
-               break;
         }
  
         /* get a consistent hash (same value on both flow directions) */
         if (addr2 < addr1)
                 swap(addr1, addr2);
-       skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
-       if (!skb->rxhash)
-               skb->rxhash = 1;
  
-got_hash:
+       hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
+       if (!hash)
+               hash = 1;
+
+done:
+       return hash;
+}
+EXPORT_SYMBOL(__skb_get_rxhash);
+
+#ifdef CONFIG_RPS
+
+/* One global table that all flow-based protocols share. */
+struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
+EXPORT_SYMBOL(rps_sock_flow_table);
+
+/*
+ * get_rps_cpu is called from netif_receive_skb and returns the target
+ * CPU from the RPS map of the receiving queue for a given skb.
+ * rcu_read_lock must be held on entry.
+ */
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+                      struct rps_dev_flow **rflowp)
+{
+       struct netdev_rx_queue *rxqueue;
+       struct rps_map *map;
+       struct rps_dev_flow_table *flow_table;
+       struct rps_sock_flow_table *sock_flow_table;
+       int cpu = -1;
+       u16 tcpu;
+
+       if (skb_rx_queue_recorded(skb)) {
+               u16 index = skb_get_rx_queue(skb);
+               if (unlikely(index >= dev->num_rx_queues)) {
+                       WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
+                               "on queue %u, but number of RX queues is %u\n",
+                               dev->name, index, dev->num_rx_queues);
+                       goto done;
+               }
+               rxqueue = dev->_rx + index;
+       } else
+               rxqueue = dev->_rx;
+
+       if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
+               goto done;
+
+       skb_reset_network_header(skb);
+       if (!skb_get_rxhash(skb))
+               goto done;
+
         flow_table = rcu_dereference(rxqueue->rps_flow_table);
         sock_flow_table = rcu_dereference(rps_sock_flow_table);
         if (flow_table && sock_flow_table) {
@@ -2517,6 +2530,7 @@ int netif_rx(struct sk_buff *skb)
                 struct rps_dev_flow voidflow, *rflow = &voidflow;
                 int cpu;
  
+               preempt_disable();
                 rcu_read_lock();
  
                 cpu = get_rps_cpu(skb->dev, skb, &rflow);
@@ -2526,6 +2540,7 @@ int netif_rx(struct sk_buff *skb)
                 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
  
                 rcu_read_unlock();
+               preempt_enable();
         }
  #else
         {
@@ -3072,7 +3087,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
         int mac_len;
         enum gro_result ret;
  
-       if (!(skb->dev->features & NETIF_F_GRO))
+       if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
                 goto normal;
  
         if (skb_is_gso(skb) || skb_has_frags(skb))
@@ -3141,7 +3156,7 @@ pull:
                         put_page(skb_shinfo(skb)->frags[0].page);
                         memmove(skb_shinfo(skb)->frags,
                                 skb_shinfo(skb)->frags + 1,
-                               --skb_shinfo(skb)->nr_frags);
+                               --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
                 }
         }
  
@@ -3159,9 +3174,6 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
  {
         struct sk_buff *p;
  
-       if (netpoll_rx_on(skb))
-               return GRO_NORMAL;
-
         for (p = napi->gro_list; p; p = p->next) {
                 NAPI_GRO_CB(p)->same_flow =
                         (p->dev == skb->dev) &&