]> bbs.cooldavid.org Git - net-next-2.6.git/blobdiff - net/ipv4/ip_sockglue.c
net - IP_NODEFRAG option for IPv4 socket
[net-next-2.6.git] / net / ipv4 / ip_sockglue.c
index ce231780a2b14ef4e15a33baad4b0c887e96df16..6c40a8c46e7984843275af12bfbac9a241e8e4c6 100644 (file)
@@ -239,7 +239,16 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc)
    sent to multicast group to reach destination designated router.
  */
 struct ip_ra_chain *ip_ra_chain;
-DEFINE_RWLOCK(ip_ra_lock);
+static DEFINE_SPINLOCK(ip_ra_lock);
+
+
+static void ip_ra_destroy_rcu(struct rcu_head *head)
+{
+       struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
+
+       sock_put(ra->saved_sk);
+       kfree(ra);
+}
 
 int ip_ra_control(struct sock *sk, unsigned char on,
                  void (*destructor)(struct sock *))
@@ -251,35 +260,42 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 
        new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 
-       write_lock_bh(&ip_ra_lock);
+       spin_lock_bh(&ip_ra_lock);
        for (rap = &ip_ra_chain; (ra = *rap) != NULL; rap = &ra->next) {
                if (ra->sk == sk) {
                        if (on) {
-                               write_unlock_bh(&ip_ra_lock);
+                               spin_unlock_bh(&ip_ra_lock);
                                kfree(new_ra);
                                return -EADDRINUSE;
                        }
-                       *rap = ra->next;
-                       write_unlock_bh(&ip_ra_lock);
+                       /* dont let ip_call_ra_chain() use sk again */
+                       ra->sk = NULL;
+                       rcu_assign_pointer(*rap, ra->next);
+                       spin_unlock_bh(&ip_ra_lock);
 
                        if (ra->destructor)
                                ra->destructor(sk);
-                       sock_put(sk);
-                       kfree(ra);
+                       /*
+                        * Delay sock_put(sk) and kfree(ra) after one rcu grace
+                        * period. This guarantee ip_call_ra_chain() dont need
+                        * to mess with socket refcounts.
+                        */
+                       ra->saved_sk = sk;
+                       call_rcu(&ra->rcu, ip_ra_destroy_rcu);
                        return 0;
                }
        }
        if (new_ra == NULL) {
-               write_unlock_bh(&ip_ra_lock);
+               spin_unlock_bh(&ip_ra_lock);
                return -ENOBUFS;
        }
        new_ra->sk = sk;
        new_ra->destructor = destructor;
 
        new_ra->next = ra;
-       *rap = new_ra;
+       rcu_assign_pointer(*rap, new_ra);
        sock_hold(sk);
-       write_unlock_bh(&ip_ra_lock);
+       spin_unlock_bh(&ip_ra_lock);
 
        return 0;
 }
@@ -449,7 +465,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
                             (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
                             (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
                             (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
-                            (1<<IP_MINTTL))) ||
+                            (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
            optname == IP_MULTICAST_TTL ||
            optname == IP_MULTICAST_ALL ||
            optname == IP_MULTICAST_LOOP ||
@@ -572,6 +588,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
                }
                inet->hdrincl = val ? 1 : 0;
                break;
+       case IP_NODEFRAG:
+               if (sk->sk_type != SOCK_RAW) {
+                       err = -ENOPROTOOPT;
+                       break;
+               }
+               inet->nodefrag = val ? 1 : 0;
+               break;
        case IP_MTU_DISCOVER:
                if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE)
                        goto e_inval;