[IPV4]: Add raw drops counter.

[net-next-2.6.git] / include / net / sock.h
diff --git a/include/net/sock.h b/include/net/sock.h

index 20de3fa7ae40001bbe7771bb32915b8604f686f7..f5b6437141315cd745cbe8ddabdc1b64390936cd 100644 (file)
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -145,7 +145,8 @@ struct sock_common {
    *    @sk_forward_alloc: space allocated forward
    *    @sk_allocation: allocation mode
    *    @sk_sndbuf: size of send buffer in bytes
-  *    @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, %SO_OOBINLINE settings
+  *    @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
+  *               %SO_OOBINLINE settings
    *    @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets
    *    @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
    *    @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
@@ -153,9 +154,12 @@ struct sock_common {
    *    @sk_backlog: always used with the per-socket spinlock held
    *    @sk_callback_lock: used with the callbacks in the end of this struct
    *    @sk_error_queue: rarely used
-  *    @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance)
+  *    @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt,
+  *                      IPV6_ADDRFORM for instance)
    *    @sk_err: last error
-  *    @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out'
+  *    @sk_err_soft: errors that don't cause failure but are the cause of a
+  *                  persistent failure not just 'timed out'
+  *    @sk_drops: raw drops counter
    *    @sk_ack_backlog: current listen backlog
    *    @sk_max_ack_backlog: listen backlog set in listen()
    *    @sk_priority: %SO_PRIORITY setting
@@ -239,6 +243,7 @@ struct sock {
         rwlock_t                sk_callback_lock;
         int                     sk_err,
                                 sk_err_soft;
+       atomic_t                sk_drops;
         unsigned short          sk_ack_backlog;
         unsigned short          sk_max_ack_backlog;
         __u32                   sk_priority;
@@ -560,6 +565,14 @@ struct proto {
         void                    (*unhash)(struct sock *sk);
         int                     (*get_port)(struct sock *sk, unsigned short snum);
  
+#ifdef CONFIG_SMP
+       /* Keeping track of sockets in use */
+       void                    (*inuse_add)(struct proto *prot, int inc);
+       int                     (*inuse_getval)(const struct proto *prot);
+       int                     *inuse_ptr;
+#else
+       int                     inuse;
+#endif
         /* Memory pressure */
         void                    (*enter_memory_pressure)(void);
         atomic_t                *memory_allocated;      /* Current allocated memory. */
@@ -592,12 +605,38 @@ struct proto {
  #ifdef SOCK_REFCNT_DEBUG
         atomic_t                socks;
  #endif
-       struct {
-               int inuse;
-               u8  __pad[SMP_CACHE_BYTES - sizeof(int)];
-       } stats[NR_CPUS];
  };
  
+/*
+ * Special macros to let protos use a fast version of inuse{get|add}
+ * using a static percpu variable per proto instead of an allocated one,
+ * saving one dereference.
+ * This might be changed if/when dynamic percpu vars become fast.
+ */
+#ifdef CONFIG_SMP
+# define DEFINE_PROTO_INUSE(NAME)                      \
+static DEFINE_PER_CPU(int, NAME##_inuse);              \
+static void NAME##_inuse_add(struct proto *prot, int inc)      \
+{                                                      \
+       __get_cpu_var(NAME##_inuse) += inc;             \
+}                                                      \
+                                                       \
+static int NAME##_inuse_getval(const struct proto *prot)\
+{                                                      \
+       int res = 0, cpu;                               \
+                                                       \
+       for_each_possible_cpu(cpu)                      \
+               res += per_cpu(NAME##_inuse, cpu);      \
+       return res;                                     \
+}
+# define REF_PROTO_INUSE(NAME)                         \
+       .inuse_add = NAME##_inuse_add,                  \
+       .inuse_getval = NAME##_inuse_getval,
+#else
+# define DEFINE_PROTO_INUSE(NAME)
+# define REF_PROTO_INUSE(NAME)
+#endif
+
  extern int proto_register(struct proto *prot, int alloc_slab);
  extern void proto_unregister(struct proto *prot);
  
@@ -629,12 +668,29 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
  /* Called with local bh disabled */
  static __inline__ void sock_prot_inc_use(struct proto *prot)
  {
-       prot->stats[smp_processor_id()].inuse++;
+#ifdef CONFIG_SMP
+       prot->inuse_add(prot, 1);
+#else
+       prot->inuse++;
+#endif
  }
  
  static __inline__ void sock_prot_dec_use(struct proto *prot)
  {
-       prot->stats[smp_processor_id()].inuse--;
+#ifdef CONFIG_SMP
+       prot->inuse_add(prot, -1);
+#else
+       prot->inuse--;
+#endif
+}
+
+static __inline__ int sock_prot_inuse(struct proto *proto)
+{
+#ifdef CONFIG_SMP
+       return proto->inuse_getval(proto);
+#else
+       return proto->inuse;
+#endif
  }
  
  /* With per-bucket locks this operation is not-atomic, so that
@@ -893,7 +949,7 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
                 return err;
         
         rcu_read_lock_bh();
-       filter = sk->sk_filter;
+       filter = rcu_dereference(sk->sk_filter);
         if (filter) {
                 unsigned int pkt_len = sk_run_filter(skb, filter->insns,
                                 filter->len);
@@ -1184,14 +1240,19 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk,
                                                    gfp_t gfp)
  {
         struct sk_buff *skb;
-       int hdr_len;
  
-       hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header);
-       skb = alloc_skb_fclone(size + hdr_len, gfp);
+       /* The TCP header must be at least 32-bit aligned.  */
+       size = ALIGN(size, 4);
+
+       skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
         if (skb) {
                 skb->truesize += mem;
                 if (sk_stream_wmem_schedule(sk, skb->truesize)) {
-                       skb_reserve(skb, hdr_len);
+                       /*
+                        * Make sure that we have exactly size bytes
+                        * available to the caller, no more, no less.
+                        */
+                       skb_reserve(skb, skb_tailroom(skb) - size);
                         return skb;
                 }
                 __kfree_skb(skb);