]> bbs.cooldavid.org Git - net-next-2.6.git/blobdiff - net/core/skbuff.c
gro: Avoid copying headers of unmerged packets
[net-next-2.6.git] / net / core / skbuff.c
index 5110b359c758c2e8abe7d13e5f63f90269fbbd5e..f9f4065a7e9b2620c40e1fc6b7e60a3b9ee301c4 100644 (file)
@@ -73,17 +73,13 @@ static struct kmem_cache *skbuff_fclone_cache __read_mostly;
 static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
                                  struct pipe_buffer *buf)
 {
-       struct sk_buff *skb = (struct sk_buff *) buf->private;
-
-       kfree_skb(skb);
+       put_page(buf->page);
 }
 
 static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
                                struct pipe_buffer *buf)
 {
-       struct sk_buff *skb = (struct sk_buff *) buf->private;
-
-       skb_get(skb);
+       get_page(buf->page);
 }
 
 static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
@@ -1334,9 +1330,19 @@ fault:
  */
 static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
 {
-       struct sk_buff *skb = (struct sk_buff *) spd->partial[i].private;
+       put_page(spd->pages[i]);
+}
 
-       kfree_skb(skb);
+static inline struct page *linear_to_page(struct page *page, unsigned int len,
+                                         unsigned int offset)
+{
+       struct page *p = alloc_pages(GFP_KERNEL, 0);
+
+       if (!p)
+               return NULL;
+       memcpy(page_address(p) + offset, page_address(page) + offset, len);
+
+       return p;
 }
 
 /*
@@ -1344,16 +1350,23 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
  */
 static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
                                unsigned int len, unsigned int offset,
-                               struct sk_buff *skb)
+                               struct sk_buff *skb, int linear)
 {
        if (unlikely(spd->nr_pages == PIPE_BUFFERS))
                return 1;
 
+       if (linear) {
+               page = linear_to_page(page, len, offset);
+               if (!page)
+                       return 1;
+       } else
+               get_page(page);
+
        spd->pages[spd->nr_pages] = page;
        spd->partial[spd->nr_pages].len = len;
        spd->partial[spd->nr_pages].offset = offset;
-       spd->partial[spd->nr_pages].private = (unsigned long) skb_get(skb);
        spd->nr_pages++;
+
        return 0;
 }
 
@@ -1369,7 +1382,7 @@ static inline void __segment_seek(struct page **page, unsigned int *poff,
 static inline int __splice_segment(struct page *page, unsigned int poff,
                                   unsigned int plen, unsigned int *off,
                                   unsigned int *len, struct sk_buff *skb,
-                                  struct splice_pipe_desc *spd)
+                                  struct splice_pipe_desc *spd, int linear)
 {
        if (!*len)
                return 1;
@@ -1392,7 +1405,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
                /* the linear region may spread across several pages  */
                flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
 
-               if (spd_fill_page(spd, page, flen, poff, skb))
+               if (spd_fill_page(spd, page, flen, poff, skb, linear))
                        return 1;
 
                __segment_seek(&page, &poff, &plen, flen);
@@ -1419,7 +1432,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
        if (__splice_segment(virt_to_page(skb->data),
                             (unsigned long) skb->data & (PAGE_SIZE - 1),
                             skb_headlen(skb),
-                            offset, len, skb, spd))
+                            offset, len, skb, spd, 1))
                return 1;
 
        /*
@@ -1429,7 +1442,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
                const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
 
                if (__splice_segment(f->page, f->page_offset, f->size,
-                                    offset, len, skb, spd))
+                                    offset, len, skb, spd, 0))
                        return 1;
        }
 
@@ -1442,7 +1455,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
  * the frag list, if such a thing exists. We'd probably need to recurse to
  * handle that cleanly.
  */
-int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
+int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
                    struct pipe_inode_info *pipe, unsigned int tlen,
                    unsigned int flags)
 {
@@ -1455,16 +1468,6 @@ int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
                .ops = &sock_pipe_buf_ops,
                .spd_release = sock_spd_release,
        };
-       struct sk_buff *skb;
-
-       /*
-        * I'd love to avoid the clone here, but tcp_read_sock()
-        * ignores reference counts and unconditonally kills the sk_buff
-        * on return from the actor.
-        */
-       skb = skb_clone(__skb, GFP_KERNEL);
-       if (unlikely(!skb))
-               return -ENOMEM;
 
        /*
         * __skb_splice_bits() only fails if the output has no room left,
@@ -1488,15 +1491,9 @@ int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
        }
 
 done:
-       /*
-        * drop our reference to the clone, the pipe consumption will
-        * drop the rest.
-        */
-       kfree_skb(skb);
-
        if (spd.nr_pages) {
+               struct sock *sk = skb->sk;
                int ret;
-               struct sock *sk = __skb->sk;
 
                /*
                 * Drop the socket lock, otherwise we have reverse
@@ -2587,27 +2584,38 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
        struct sk_buff *p = *head;
        struct sk_buff *nskb;
        unsigned int headroom;
-       unsigned int hlen = p->data - skb_mac_header(p);
+       unsigned int len = skb_gro_len(skb);
 
-       if (hlen + p->len + skb->len >= 65536)
+       if (p->len + len >= 65536)
                return -E2BIG;
 
        if (skb_shinfo(p)->frag_list)
                goto merge;
-       else if (!skb_headlen(p) && !skb_headlen(skb) &&
-                skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags <
+       else if (skb_headlen(skb) <= skb_gro_offset(skb) &&
+                skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags <=
                 MAX_SKB_FRAGS) {
+               skb_shinfo(skb)->frags[0].page_offset +=
+                       skb_gro_offset(skb) - skb_headlen(skb);
+               skb_shinfo(skb)->frags[0].size -=
+                       skb_gro_offset(skb) - skb_headlen(skb);
+
                memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags,
                       skb_shinfo(skb)->frags,
                       skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
 
                skb_shinfo(p)->nr_frags += skb_shinfo(skb)->nr_frags;
+               skb_shinfo(skb)->nr_frags = 0;
+
+               skb->truesize -= skb->data_len;
+               skb->len -= skb->data_len;
+               skb->data_len = 0;
+
                NAPI_GRO_CB(skb)->free = 1;
                goto done;
        }
 
        headroom = skb_headroom(p);
-       nskb = netdev_alloc_skb(p->dev, headroom);
+       nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p));
        if (unlikely(!nskb))
                return -ENOMEM;
 
@@ -2615,12 +2623,15 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
        nskb->mac_len = p->mac_len;
 
        skb_reserve(nskb, headroom);
+       __skb_put(nskb, skb_gro_offset(p));
 
-       skb_set_mac_header(nskb, -hlen);
+       skb_set_mac_header(nskb, skb_mac_header(p) - p->data);
        skb_set_network_header(nskb, skb_network_offset(p));
        skb_set_transport_header(nskb, skb_transport_offset(p));
 
-       memcpy(skb_mac_header(nskb), skb_mac_header(p), hlen);
+       __skb_pull(p, skb_gro_offset(p));
+       memcpy(skb_mac_header(nskb), skb_mac_header(p),
+              p->data - skb_mac_header(p));
 
        *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
        skb_shinfo(nskb)->frag_list = p;
@@ -2645,9 +2656,9 @@ merge:
 
 done:
        NAPI_GRO_CB(p)->count++;
-       p->data_len += skb->len;
-       p->truesize += skb->len;
-       p->len += skb->len;
+       p->data_len += len;
+       p->truesize += len;
+       p->len += len;
 
        NAPI_GRO_CB(skb)->same_flow = 1;
        return 0;