From 95766fff6b9a78d11fc2d3812dd035381690b55d Mon Sep 17 00:00:00 2001 From: Hideo Aoki Date: Mon, 31 Dec 2007 00:29:24 -0800 Subject: [PATCH] [UDP]: Add memory accounting. Signed-off-by: Takahiro Yasui Signed-off-by: Hideo Aoki Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 27 ++++++++++++ include/net/udp.h | 9 ++++ net/ipv4/af_inet.c | 5 +++ net/ipv4/proc.c | 3 +- net/ipv4/sysctl_net_ipv4.c | 31 ++++++++++++++ net/ipv4/udp.c | 57 +++++++++++++++++++++++++- net/ipv6/udp.c | 32 +++++++++++++-- 7 files changed, 157 insertions(+), 7 deletions(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 6f7872ba1de..17a6e46fbd4 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -446,6 +446,33 @@ tcp_dma_copybreak - INTEGER and CONFIG_NET_DMA is enabled. Default: 4096 +UDP variables: + +udp_mem - vector of 3 INTEGERs: min, pressure, max + Number of pages allowed for queueing by all UDP sockets. + + min: Below this number of pages UDP is not bothered about its + memory appetite. When amount of memory allocated by UDP exceeds + this number, UDP starts to moderate memory usage. + + pressure: This value was introduced to follow format of tcp_mem. + + max: Number of pages allowed for queueing by all UDP sockets. + + Default is calculated at boot time from amount of available memory. + +udp_rmem_min - INTEGER + Minimal size of receive buffer used by UDP sockets in moderation. + Each UDP socket is able to use the size for receiving data, even if + total pages of UDP sockets exceed udp_mem pressure. The unit is byte. + Default: 4096 + +udp_wmem_min - INTEGER + Minimal size of send buffer used by UDP sockets in moderation. + Each UDP socket is able to use the size for sending data, even if + total pages of UDP sockets exceed udp_mem pressure. The unit is byte. + Default: 4096 + CIPSOv4 Variables: cipso_cache_enable - BOOLEAN diff --git a/include/net/udp.h b/include/net/udp.h index 98cb09ca3a2..93796beac8f 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -65,6 +65,13 @@ extern rwlock_t udp_hash_lock; extern struct proto udp_prot; +extern atomic_t udp_memory_allocated; + +/* sysctl variables for udp */ +extern int sysctl_udp_mem[3]; +extern int sysctl_udp_rmem_min; +extern int sysctl_udp_wmem_min; + struct sk_buff; /* @@ -198,4 +205,6 @@ extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo); extern int udp4_proc_init(void); extern void udp4_proc_exit(void); #endif + +extern void udp_init(void); #endif /* _UDP_H */ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 03633b7b9b4..0e12cf64607 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -139,6 +139,8 @@ void inet_sock_destruct(struct sock *sk) __skb_queue_purge(&sk->sk_receive_queue); __skb_queue_purge(&sk->sk_error_queue); + sk_mem_reclaim(sk); + if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) { printk("Attempt to release TCP socket in state %d %p\n", sk->sk_state, sk); @@ -1417,6 +1419,9 @@ static int __init inet_init(void) /* Setup TCP slab cache for open requests. */ tcp_init(); + /* Setup UDP memory threshold */ + udp_init(); + /* Add UDP-Lite (RFC 3828) */ udplite4_register(); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 41734db677b..53bc010beef 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -56,7 +56,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) sock_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), atomic_read(&tcp_memory_allocated)); - seq_printf(seq, "UDP: inuse %d\n", sock_prot_inuse(&udp_prot)); + seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse(&udp_prot), + atomic_read(&udp_memory_allocated)); seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse(&udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse(&raw_prot)); seq_printf(seq, "FRAG: inuse %d memory %d\n", diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 844f26fab06..a5a9f8e3bb2 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -812,6 +813,36 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "udp_mem", + .data = &sysctl_udp_mem, + .maxlen = sizeof(sysctl_udp_mem), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "udp_rmem_min", + .data = &sysctl_udp_rmem_min, + .maxlen = sizeof(sysctl_udp_rmem_min), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "udp_wmem_min", + .data = &sysctl_udp_wmem_min, + .maxlen = sizeof(sysctl_udp_wmem_min), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero + }, { .ctl_name = 0 } }; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1ce6b60b7f9..35328436075 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -82,6 +82,7 @@ #include #include #include +#include #include #include #include @@ -118,6 +119,17 @@ EXPORT_SYMBOL(udp_stats_in6); struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); +int sysctl_udp_mem[3] __read_mostly; +int sysctl_udp_rmem_min __read_mostly; +int sysctl_udp_wmem_min __read_mostly; + +EXPORT_SYMBOL(sysctl_udp_mem); +EXPORT_SYMBOL(sysctl_udp_rmem_min); +EXPORT_SYMBOL(sysctl_udp_wmem_min); + +atomic_t udp_memory_allocated; +EXPORT_SYMBOL(udp_memory_allocated); + static inline int __udp_lib_lport_inuse(__u16 num, const struct hlist_head udptable[]) { @@ -901,13 +913,17 @@ try_again: err = ulen; out_free: + lock_sock(sk); skb_free_datagram(sk, skb); + release_sock(sk); out: return err; csum_copy_err: + lock_sock(sk); if (!skb_kill_datagram(sk, skb, flags)) UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + release_sock(sk); if (noblock) return -EAGAIN; @@ -1072,7 +1088,15 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1) { - int ret = udp_queue_rcv_skb(sk, skb1); + int ret = 0; + + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + ret = udp_queue_rcv_skb(sk, skb1); + else + sk_add_backlog(sk, skb1); + bh_unlock_sock(sk); + if (ret > 0) /* we should probably re-process instead * of dropping packets here. */ @@ -1165,7 +1189,13 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], inet_iif(skb), udptable); if (sk != NULL) { - int ret = udp_queue_rcv_skb(sk, skb); + int ret = 0; + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + ret = udp_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); sock_put(sk); /* a return value > 0 means to resubmit the input, but @@ -1460,6 +1490,10 @@ struct proto udp_prot = { .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem = &sysctl_udp_wmem_min, + .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp_sock), #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, @@ -1655,6 +1689,25 @@ void udp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ +void __init udp_init(void) +{ + unsigned long limit; + + /* Set the pressure threshold up by the same strategy of TCP. It is a + * fraction of global memory that is up to 1/2 at 256 MB, decreasing + * toward zero with the amount of memory, with a floor of 128 pages. + */ + limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); + limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + limit = max(limit, 128UL); + sysctl_udp_mem[0] = limit / 4 * 3; + sysctl_udp_mem[1] = limit; + sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2; + + sysctl_udp_rmem_min = SK_MEM_QUANTUM; + sysctl_udp_wmem_min = SK_MEM_QUANTUM; +} + EXPORT_SYMBOL(udp_disconnect); EXPORT_SYMBOL(udp_hash); EXPORT_SYMBOL(udp_hash_lock); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c9a97b40551..bf58acab206 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -204,13 +204,17 @@ try_again: err = ulen; out_free: + lock_sock(sk); skb_free_datagram(sk, skb); + release_sock(sk); out: return err; csum_copy_err: + lock_sock(sk); if (!skb_kill_datagram(sk, skb, flags)) UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + release_sock(sk); if (flags & MSG_DONTWAIT) return -EAGAIN; @@ -366,10 +370,21 @@ static int __udp6_lib_mcast_deliver(struct sk_buff *skb, struct in6_addr *saddr, while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr, uh->source, saddr, dif))) { struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); - if (buff) - udpv6_queue_rcv_skb(sk2, buff); + if (buff) { + bh_lock_sock_nested(sk2); + if (!sock_owned_by_user(sk2)) + udpv6_queue_rcv_skb(sk2, buff); + else + sk_add_backlog(sk2, buff); + bh_unlock_sock(sk2); + } } - udpv6_queue_rcv_skb(sk, skb); + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + udpv6_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); out: read_unlock(&udp_hash_lock); return 0; @@ -482,7 +497,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], /* deliver */ - udpv6_queue_rcv_skb(sk, skb); + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + udpv6_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); sock_put(sk); return 0; @@ -994,6 +1014,10 @@ struct proto udpv6_prot = { .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem = &sysctl_udp_wmem_min, + .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp6_sock), #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, -- 2.39.3